diff --git a/crypto/fipsmodule/ml_kem/META.yml b/crypto/fipsmodule/ml_kem/META.yml
new file mode 100644
index 0000000000..6aabc19d86
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/META.yml
@@ -0,0 +1,5 @@
+name: mlkem-native
+source: pq-code-package/mlkem-native.git
+branch: main
+commit: bb8b6838919c27aa68064f538fd6d9eb6cdaf9e6
+imported-at: 2025-02-19T10:08:38+0000
diff --git a/crypto/fipsmodule/ml_kem/README.md b/crypto/fipsmodule/ml_kem/README.md
index 5ad7b64b5d..6b5137386a 100644
--- a/crypto/fipsmodule/ml_kem/README.md
+++ b/crypto/fipsmodule/ml_kem/README.md
@@ -1,17 +1,9 @@
-# AWS-LC ML-KEM readme file
+# ML-KEM
 
 The source code in this folder implements ML-KEM as defined in FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard ([link](https://csrc.nist.gov/pubs/fips/203/final).
 
-**Source code origin and modifications.** The source code was imported from a branch of the official repository of the Crystals-Kyber team that follows the standard draft: https://github.com/pq-crystals/kyber/tree/standard. The code was taken at [commit](https://github.com/pq-crystals/kyber/commit/11d00ff1f20cfca1f72d819e5a45165c1e0a2816) as of 03/26/2024. At the moment, only the reference C implementation is imported.
+## Source code origin and modifications.**
 
-The code was refactored in [this PR](https://github.com/aws/aws-lc/pull/1763) by parametrizing all functions that depend on values that are specific to a parameter set, i.e., that directly or indirectly depend on the value of KYBER_K. To do this, in `params.h` we defined a structure that holds those ML-KEM parameters and functions
-that initialize a given structure with values corresponding to a parameter set. This structure is then passed to every function that requires it as a function argument. In addition, the following changes were made to the source code in `ml_kem_ref` directory:
-- `randombytes.{h|c}` are deleted because we are using the randomness generation functions provided by AWS-LC.
-- `kem.c`: call to randombytes function is replaced with a call to RAND_bytes and the appropriate header file is included (openssl/rand.h).
-- `fips202.{h|c}` are deleted as all SHA3/SHAKE functionality is provided instead by AWS-LC fipsmodule/sha rather than the reference implementation.
-- `symmetric-shake.c`: unnecessary include of fips202.h is removed.
-- `api.h`: `pqcrystals` prefix substituted with `ml_kem` (to be able to build alongside `crypto/kyber`).
-- `poly.c`: the `poly_frommsg` function was modified to address the constant-time issue described [here](https://github.com/pq-crystals/kyber/commit/9b8d30698a3e7449aeb34e62339d4176f11e3c6c).
-- All internal header files were updated with unique `ML_KEM_*` include guards.
-
-**Testing.** The KATs were obtained from an independent implementation of ML-KEM written in SPARK Ada subset: https://github.com/awslabs/LibMLKEM.
+The source code in [mlkem](mlkem) is imported without change from [mlkem-native](https://github.com/pq-code-package/mlkem-native) using [importer.sh](importer.sh); see [META.yml](META.yml) for
+the exact hash. mlkem-native's FIPS-202 code is not imported, but glue code [fips202_glue.h](fips202_glue.h) and [fips202x4_glue.h](fips202x4_glue.h) provided to use AWS-LC's own FIPS-202
+implementation from [crypto/fipsmodule/sha](../sha). [mlkem_native_bcm.c](mlkem_native_bcm.c) is imported using [importer.sh](importer.sh) from the mlkem-native file `examples/monolithic_build/mlkem_native_monobuild.c` which is a [`crypto/fipsmodule/bcm.c`](../bcm.c)-like file including all mlkem-native compilation units. This file is imported once per security level in [mlkem_c.c](mlkem_c.c) in such a way that level-independent code is shared.
diff --git a/crypto/fipsmodule/ml_kem/fips202_glue.h b/crypto/fipsmodule/ml_kem/fips202_glue.h
new file mode 100644
index 0000000000..a053fe9f86
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/fips202_glue.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef MLK_AWSLC_FIPS202_GLUE_H
+#define MLK_AWSLC_FIPS202_GLUE_H
+#include <stddef.h>
+#include <stdint.h>
+
+#include "../sha/internal.h"
+
+#define SHAKE128_RATE 168
+#define SHAKE256_RATE 136
+#define SHA3_256_RATE 136
+#define SHA3_384_RATE 104
+#define SHA3_512_RATE 72
+
+#define shake128ctx KECCAK1600_CTX
+
+static MLK_INLINE void shake128_init(shake128ctx *state)
+{
+  /* Return code checks can be omitted
+   * SHAKE_Init always returns 1 when called with correct block size value */
+  (void) SHAKE_Init(state, SHAKE128_BLOCKSIZE);
+}
+
+static MLK_INLINE void shake128_release(shake128ctx *state)
+{
+  (void) state;
+}
+
+static MLK_INLINE void shake128_absorb_once(shake128ctx *state,
+                                            const uint8_t *input, size_t inlen)
+{
+  /* TODO: Document why this function does not fail in the context
+   *       of the calls made by mlkem-native. */
+  (void) SHAKE_Absorb(state, input, inlen);
+}
+
+static MLK_INLINE void shake128_squeezeblocks(uint8_t *output, size_t nblocks,
+                                              shake128ctx *state)
+{
+  /* TODO: Document why this function does not fail in the context
+   *       of the calls made by mlkem-native. */
+  (void) SHAKE_Squeeze(output, state, nblocks * SHAKE128_RATE);
+}
+
+static MLK_INLINE void shake256(uint8_t *output, size_t outlen,
+                                const uint8_t *input, size_t inlen)
+{
+  /* TODO: Document why this function does not fail in the context
+   *       of the calls made by mlkem-native. */
+  (void) SHAKE256(input, inlen, output, outlen);
+}
+
+static MLK_INLINE void sha3_256(uint8_t *output, const uint8_t *input,
+                                size_t inlen)
+{
+  /* TODO: Document why this function does not fail in the context
+   *       of the calls made by mlkem-native. */
+  (void) SHA3_256(input, inlen, output);
+}
+
+static MLK_INLINE void sha3_512(uint8_t *output, const uint8_t *input,
+                                size_t inlen)
+{
+  /* TODO: Document why this function does not fail in the context
+   *       of the calls made by mlkem-native. */
+  (void) SHA3_512(input, inlen, output);
+}
+
+#endif /* MLK_AWSLC_FIPS202_GLUE_H */
diff --git a/crypto/fipsmodule/ml_kem/fips202x4_glue.h b/crypto/fipsmodule/ml_kem/fips202x4_glue.h
new file mode 100644
index 0000000000..4992c72d45
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/fips202x4_glue.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * This is a shim establishing the FIPS-202 API required by
+ * mlkem-native from the API exposed by AWS-LC.
+ */
+
+#ifndef MLK_AWSLC_FIPS202X4_GLUE_H
+#define MLK_AWSLC_FIPS202X4_GLUE_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "fips202_glue.h"
+
+typedef shake128ctx shake128x4ctx[4];
+
+static MLK_INLINE void shake128x4_absorb_once(shake128x4ctx *state,
+                                              const uint8_t *in0,
+                                              const uint8_t *in1,
+                                              const uint8_t *in2,
+                                              const uint8_t *in3, size_t inlen)
+{
+  shake128_absorb_once(&(*state)[0], in0, inlen);
+  shake128_absorb_once(&(*state)[1], in1, inlen);
+  shake128_absorb_once(&(*state)[2], in2, inlen);
+  shake128_absorb_once(&(*state)[3], in3, inlen);
+}
+
+static MLK_INLINE void shake128x4_squeezeblocks(uint8_t *out0, uint8_t *out1,
+                                                uint8_t *out2, uint8_t *out3,
+                                                size_t nblocks,
+                                                shake128x4ctx *state)
+{
+  shake128_squeezeblocks(out0, nblocks, &(*state)[0]);
+  shake128_squeezeblocks(out1, nblocks, &(*state)[1]);
+  shake128_squeezeblocks(out2, nblocks, &(*state)[2]);
+  shake128_squeezeblocks(out3, nblocks, &(*state)[3]);
+}
+
+static MLK_INLINE void shake128x4_init(shake128x4ctx *state)
+{
+  shake128_init(&(*state)[0]);
+  shake128_init(&(*state)[1]);
+  shake128_init(&(*state)[2]);
+  shake128_init(&(*state)[3]);
+}
+
+static MLK_INLINE void shake128x4_release(shake128x4ctx *state)
+{
+  shake128_release(&(*state)[0]);
+  shake128_release(&(*state)[1]);
+  shake128_release(&(*state)[2]);
+  shake128_release(&(*state)[3]);
+}
+
+static MLK_INLINE void shake256x4(uint8_t *out0, uint8_t *out1, uint8_t *out2,
+                                  uint8_t *out3, size_t outlen, uint8_t *in0,
+                                  uint8_t *in1, uint8_t *in2, uint8_t *in3,
+                                  size_t inlen)
+{
+  shake256(out0, outlen, in0, inlen);
+  shake256(out1, outlen, in1, inlen);
+  shake256(out2, outlen, in2, inlen);
+  shake256(out3, outlen, in3, inlen);
+}
+
+#endif /* MLK_AWSLC_FIPS202X4_GLUE_H */
diff --git a/crypto/fipsmodule/ml_kem/importer.sh b/crypto/fipsmodule/ml_kem/importer.sh
new file mode 100755
index 0000000000..a0ff020b27
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/importer.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+GITHUB_SERVER_URL=https://github.com/
+GITHUB_REPOSITORY=${GITHUB_REPOSITORY:=pq-code-package/mlkem-native.git}
+GITHUB_SHA=${GITHUB_SHA:=main}
+
+SRC=mlkem
+TMP=$(mktemp -d) || exit 1
+echo "Temporary working directory: $TMP"
+
+# Check if source directory already exists
+if [ -d "$SRC" ]; then
+    echo "Source directory or symlink $SRC does already exist -- please remove it before re-running the importer"
+    exit 1
+fi
+
+# Work in temporary directory
+pushd $TMP
+
+# Fetch repository
+echo "Fetching repository ..."
+git init >/dev/null
+git remote add origin $GITHUB_SERVER_URL/$GITHUB_REPOSITORY >/dev/null
+git fetch origin --depth 1 $GITHUB_SHA >/dev/null
+git checkout FETCH_HEAD >/dev/null
+GITHUB_COMMIT=$(git rev-parse FETCH_HEAD)
+
+popd
+
+echo "Pull source code from remote repository..."
+
+# Copy mlkem-native source tree -- C-only, no FIPS-202
+mkdir $SRC
+cp $TMP/mlkem/* $SRC
+# Copy and statically simplify BCM file
+unifdef -DMLK_MONOBUILD_CUSTOM_FIPS202                          \
+        -UMLK_MONOBUILD_WITH_NATIVE_ARITH                       \
+        -UMLK_MONOBUILD_WITH_NATIVE_FIPS202                     \
+        $TMP/examples/monolithic_build/mlkem_native_monobuild.c \
+        > mlkem_native_bcm.c
+
+echo "Remove temporary artifacts ..."
+rm -rf $TMP
+
+echo "Generating META.yml file ..."
+cat <<EOF > META.yml
+name: mlkem-native
+source: $GITHUB_REPOSITORY
+branch: $GITHUB_SHA
+commit: $GITHUB_COMMIT
+imported-at: $(date "+%Y-%m-%dT%H:%M:%S%z")
+EOF
diff --git a/crypto/fipsmodule/ml_kem/ml_kem.c b/crypto/fipsmodule/ml_kem/ml_kem.c
new file mode 100644
index 0000000000..1b9aae4dc7
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/ml_kem.c
@@ -0,0 +1,220 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0 OR ISC
+
+/* mlkem-native source code */
+
+/* This indirection is needed to not clash with the
+ * existing API in crypto/fipsmodule/digest. */
+#define shake128_absorb_once MLK_NAMESPACE(shake128_absorb_once)
+#define shake128_squeezeblocks MLK_NAMESPACE(shake128_squeezeblocks)
+#define shake128_init MLK_NAMESPACE(shake128_init)
+#define shake128_release MLK_NAMESPACE(shake128_release)
+#define shake256 MLK_NAMESPACE(shake256)
+#define sha3_256 MLK_NAMESPACE(sha3_256)
+#define sha3_512 MLK_NAMESPACE(sha3_512)
+
+/* Include level-independent code */
+#define MLK_CONFIG_FILE "../mlkem_native_config.h"
+#define MLK_FIPS202_CUSTOM_HEADER "../fips202_glue.h"
+#define MLK_FIPS202X4_CUSTOM_HEADER "../fips202x4_glue.h"
+
+/* MLKEM-512 */
+#define MLKEM_K 2
+#define MLK_MULTILEVEL_BUILD_WITH_SHARED /* Include level-independent code */
+#define MLK_MONOBUILD_KEEP_SHARED_HEADERS
+#include "mlkem_native_bcm.c"
+/* MLKEM-768 */
+#define MLKEM_K 3
+#define MLK_MULTILEVEL_BUILD_NO_SHARED /* Exclude levelinpendent code */
+#define MLK_MONOBUILD_KEEP_SHARED_HEADERS
+#include "mlkem_native_bcm.c"
+/* MLKEM-1024 */
+#define MLKEM_K 4
+#undef MLK_MONOBUILD_KEEP_SHARED_HEADERS
+#include "mlkem_native_bcm.c"
+
+/* Undefine FIPS-202 indirection */
+#undef shake128_absorb_once
+#undef shake128_squeezeblocks
+#undef shake128_init
+#undef shake128_release
+#undef shake256
+#undef sha3_256
+#undef sha3_512
+
+/* End of mlkem-native source code */
+
+#include "./ml_kem.h"
+#include "openssl/rand.h"
+
+void randombytes(uint8_t *out, size_t outlen)
+{
+    RAND_bytes(out, outlen);
+}
+
+int ml_kem_512_keypair_deterministic(uint8_t *public_key  /* OUT */,
+                                         uint8_t *secret_key  /* OUT */,
+                                         const uint8_t *seed  /* IN */) {
+  boringssl_ensure_ml_kem_self_test();
+  return ml_kem_512_keypair_deterministic_no_self_test(public_key, secret_key, seed);
+}
+
+int ml_kem_512_keypair_deterministic_no_self_test(uint8_t *public_key  /* OUT */,
+                                                  uint8_t *secret_key  /* OUT */,
+                                                  const uint8_t *seed  /* IN */) {
+  int res = mlkem512_keypair_derand(public_key, secret_key, seed);
+#if defined(AWSLC_FIPS)
+  /* PCT failure is the only failure condition for key generation. */
+  if (res != 0) {
+      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
+  }
+#endif
+  return res;
+}
+
+int ml_kem_512_keypair(uint8_t *public_key /* OUT */,
+                           uint8_t *secret_key /* OUT */) {
+  boringssl_ensure_ml_kem_self_test();
+  int res = mlkem512_keypair(public_key, secret_key);
+#if defined(AWSLC_FIPS)
+  /* PCT failure is the only failure condition for key generation. */
+  if (res != 0) {
+      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
+  }
+#endif
+  return res;
+}
+
+int ml_kem_512_encapsulate_deterministic(uint8_t *ciphertext       /* OUT */,
+                                             uint8_t *shared_secret    /* OUT */,
+                                             const uint8_t *public_key /* IN  */,
+                                             const uint8_t *seed       /* IN */) {
+  boringssl_ensure_ml_kem_self_test();
+  return ml_kem_512_encapsulate_deterministic_no_self_test(ciphertext, shared_secret, public_key, seed);
+}
+
+int ml_kem_512_encapsulate_deterministic_no_self_test(uint8_t *ciphertext       /* OUT */,
+                                                      uint8_t *shared_secret    /* OUT */,
+                                                      const uint8_t *public_key /* IN  */,
+                                                      const uint8_t *seed       /* IN */) {
+  return mlkem512_enc_derand(ciphertext, shared_secret, public_key, seed);
+}
+
+int ml_kem_512_encapsulate(uint8_t *ciphertext       /* OUT */,
+                               uint8_t *shared_secret    /* OUT */,
+                               const uint8_t *public_key /* IN  */) {
+  boringssl_ensure_ml_kem_self_test();
+  return mlkem512_enc(ciphertext, shared_secret, public_key);
+}
+
+int ml_kem_512_decapsulate(uint8_t *shared_secret    /* OUT */,
+                               const uint8_t *ciphertext /* IN  */,
+                               const uint8_t *secret_key /* IN  */) {
+  boringssl_ensure_ml_kem_self_test();
+  return ml_kem_512_decapsulate_no_self_test(shared_secret, ciphertext, secret_key);
+}
+
+int ml_kem_512_decapsulate_no_self_test(uint8_t *shared_secret    /* OUT */,
+                                        const uint8_t *ciphertext /* IN  */,
+                                        const uint8_t *secret_key /* IN  */) {
+  return mlkem512_dec(shared_secret, ciphertext, secret_key);
+}
+
+
+int ml_kem_768_keypair_deterministic(uint8_t *public_key  /* OUT */,
+                                         uint8_t *secret_key  /* OUT */,
+                                         const uint8_t *seed  /* IN */) {
+  boringssl_ensure_ml_kem_self_test();
+  int res = mlkem768_keypair_derand(public_key, secret_key, seed);
+#if defined(AWSLC_FIPS)
+  /* PCT failure is the only failure condition for key generation. */
+  if (res != 0) {
+      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
+  }
+#endif
+  return res;
+}
+
+int ml_kem_768_keypair(uint8_t *public_key /* OUT */,
+                           uint8_t *secret_key /* OUT */) {
+  boringssl_ensure_ml_kem_self_test();
+  int res = mlkem768_keypair(public_key, secret_key);
+#if defined(AWSLC_FIPS)
+  /* PCT failure is the only failure condition for key generation. */
+  if (res != 0) {
+      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
+  }
+#endif
+  return res;
+}
+
+int ml_kem_768_encapsulate_deterministic(uint8_t *ciphertext       /* OUT */,
+                                             uint8_t *shared_secret    /* OUT */,
+                                             const uint8_t *public_key /* IN  */,
+                                             const uint8_t *seed       /* IN */) {
+  boringssl_ensure_ml_kem_self_test();
+  return mlkem768_enc_derand(ciphertext, shared_secret, public_key, seed);
+}
+
+int ml_kem_768_encapsulate(uint8_t *ciphertext       /* OUT */,
+                               uint8_t *shared_secret    /* OUT */,
+                               const uint8_t *public_key /* IN  */) {
+  boringssl_ensure_ml_kem_self_test();
+  return mlkem768_enc(ciphertext, shared_secret, public_key);
+}
+
+int ml_kem_768_decapsulate(uint8_t *shared_secret    /* OUT */,
+                               const uint8_t *ciphertext /* IN  */,
+                               const uint8_t *secret_key /* IN  */) {
+  boringssl_ensure_ml_kem_self_test();
+  return mlkem768_dec(shared_secret, ciphertext, secret_key);
+}
+
+int ml_kem_1024_keypair_deterministic(uint8_t *public_key  /* OUT */,
+                                          uint8_t *secret_key  /* OUT */,
+                                          const uint8_t *seed  /* IN */) {
+  boringssl_ensure_ml_kem_self_test();
+  int res = mlkem1024_keypair_derand(public_key, secret_key, seed);
+#if defined(AWSLC_FIPS)
+  /* PCT failure is the only failure condition for key generation. */
+  if (res != 0) {
+      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
+  }
+#endif
+  return res;
+}
+
+int ml_kem_1024_keypair(uint8_t *public_key /* OUT */,
+                            uint8_t *secret_key /* OUT */) {
+  boringssl_ensure_ml_kem_self_test();
+  int res = mlkem1024_keypair(public_key, secret_key);
+#if defined(AWSLC_FIPS)
+  /* PCT failure is the only failure condition for key generation. */
+  if (res != 0) {
+      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
+  }
+#endif
+  return res;
+}
+
+int ml_kem_1024_encapsulate_deterministic(uint8_t *ciphertext       /* OUT */,
+                                              uint8_t *shared_secret    /* OUT */,
+                                              const uint8_t *public_key /* IN  */,
+                                              const uint8_t *seed       /* IN */) {
+  boringssl_ensure_ml_kem_self_test();
+  return mlkem1024_enc_derand(ciphertext, shared_secret, public_key, seed);
+}
+
+int ml_kem_1024_encapsulate(uint8_t *ciphertext       /* OUT */,
+                                uint8_t *shared_secret    /* OUT */,
+                                const uint8_t *public_key /* IN  */) {
+  boringssl_ensure_ml_kem_self_test();
+  return mlkem1024_enc(ciphertext, shared_secret, public_key);
+}
+
+int ml_kem_1024_decapsulate(uint8_t *shared_secret    /* OUT */,
+                                const uint8_t *ciphertext /* IN  */,
+                                const uint8_t *secret_key /* IN  */) {
+  boringssl_ensure_ml_kem_self_test();
+  return mlkem1024_dec(shared_secret, ciphertext, secret_key);
+}
diff --git a/crypto/fipsmodule/ml_kem/ml_kem.h b/crypto/fipsmodule/ml_kem/ml_kem.h
index 752855764f..0341cd3daf 100644
--- a/crypto/fipsmodule/ml_kem/ml_kem.h
+++ b/crypto/fipsmodule/ml_kem/ml_kem.h
@@ -65,6 +65,11 @@ int ml_kem_768_keypair_deterministic(uint8_t *public_key /* OUT */,
                                          uint8_t *secret_key /* OUT */,
                                          const uint8_t *seed /* IN */);
 
+int ml_kem_768_keypair_deterministic_no_self_test(uint8_t *public_key  /* OUT */,
+                                                  uint8_t *secret_key  /* OUT */,
+                                                  const uint8_t *seed  /* IN */);
+
+
 int ml_kem_768_keypair(uint8_t *public_key /* OUT */,
                            uint8_t *secret_key /* OUT */);
 
@@ -73,6 +78,12 @@ int ml_kem_768_encapsulate_deterministic(uint8_t *ciphertext       /* OUT */,
                                              const uint8_t *public_key /* IN  */,
                                              const uint8_t *seed /* IN */);
 
+int ml_kem_768_encapsulate_deterministic_no_self_test(uint8_t *ciphertext       /* OUT */,
+                                                      uint8_t *shared_secret    /* OUT */,
+                                                      const uint8_t *public_key /* IN  */,
+                                                      const uint8_t *seed       /* IN */);
+
+
 int ml_kem_768_encapsulate(uint8_t *ciphertext       /* OUT */,
                                uint8_t *shared_secret    /* OUT */,
                                const uint8_t *public_key /* IN  */);
@@ -81,10 +92,18 @@ int ml_kem_768_decapsulate(uint8_t *shared_secret    /* OUT */,
                                const uint8_t *ciphertext /* IN  */,
                                const uint8_t *secret_key /* IN  */);
 
+int ml_kem_768_decapsulate_no_self_test(uint8_t *shared_secret    /* OUT */,
+                                        const uint8_t *ciphertext /* IN  */,
+                                        const uint8_t *secret_key /* IN  */);
+
 int ml_kem_1024_keypair_deterministic(uint8_t *public_key /* OUT */,
                                           uint8_t *secret_key /* OUT */,
                                           const uint8_t *seed /* IN */);
 
+int ml_kem_1024_keypair_deterministic_no_self_test(uint8_t *public_key  /* OUT */,
+                                                  uint8_t *secret_key  /* OUT */,
+                                                  const uint8_t *seed  /* IN */);
+
 int ml_kem_1024_keypair(uint8_t *public_key /* OUT */,
                             uint8_t *secret_key /* OUT */);
 
@@ -93,6 +112,12 @@ int ml_kem_1024_encapsulate_deterministic(uint8_t *ciphertext       /* OUT */,
                                               const uint8_t *public_key /* IN  */,
                                               const uint8_t *seed /* IN */);
 
+int ml_kem_1024_encapsulate_deterministic_no_self_test(uint8_t *ciphertext       /* OUT */,
+                                                      uint8_t *shared_secret    /* OUT */,
+                                                      const uint8_t *public_key /* IN  */,
+                                                      const uint8_t *seed       /* IN */);
+
+
 int ml_kem_1024_encapsulate(uint8_t *ciphertext       /* OUT */,
                                 uint8_t *shared_secret    /* OUT */,
                                 const uint8_t *public_key /* IN  */);
@@ -100,4 +125,10 @@ int ml_kem_1024_encapsulate(uint8_t *ciphertext       /* OUT */,
 int ml_kem_1024_decapsulate(uint8_t *shared_secret    /* OUT */,
                                 const uint8_t *ciphertext /* IN  */,
                                 const uint8_t *secret_key /* IN  */);
+
+int ml_kem_1024_decapsulate_no_self_test(uint8_t *shared_secret    /* OUT */,
+                                        const uint8_t *ciphertext /* IN  */,
+                                        const uint8_t *secret_key /* IN  */);
+
+
 #endif // ML_KEM_H
diff --git a/crypto/fipsmodule/ml_kem/mlkem/LICENSE b/crypto/fipsmodule/ml_kem/mlkem/LICENSE
new file mode 100644
index 0000000000..7922ab8007
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/LICENSE
@@ -0,0 +1,6 @@
+Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/);
+or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html).
+
+For Keccak and AES we are using public-domain
+code from sources and by authors listed in
+comments on top of the respective files.
diff --git a/crypto/fipsmodule/ml_kem/mlkem/arith_backend.h b/crypto/fipsmodule/ml_kem/mlkem/arith_backend.h
new file mode 100644
index 0000000000..dc7685e734
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/arith_backend.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef MLK_ARITH_BACKEND_H
+#define MLK_ARITH_BACKEND_H
+
+#include "common.h"
+
+#if defined(MLK_ARITH_BACKEND_IMPL)
+#include MLK_ARITH_BACKEND_IMPL
+
+/* Include to enforce consistency of API and implementation,
+ * and conduct sanity checks on the backend.
+ *
+ * Keep this _after_ the inclusion of the backend; otherwise,
+ * the sanity checks won't have an effect. */
+#if defined(MLK_CHECK_APIS)
+#include "native/api.h"
+#endif
+#endif
+
+#endif /* MLK_ARITH_BACKEND_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/cbmc.h b/crypto/fipsmodule/ml_kem/mlkem/cbmc.h
new file mode 100644
index 0000000000..f3337d1888
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/cbmc.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef MLK_CBMC_H
+#define MLK_CBMC_H
+/***************************************************
+ * Basic replacements for __CPROVER_XXX contracts
+ ***************************************************/
+
+#include "common.h"
+
+#ifndef CBMC
+
+#define __contract__(x)
+#define __loop__(x)
+
+#else /* CBMC _is_ defined, therefore we're doing proof */
+
+#define __contract__(x) x
+#define __loop__(x) x
+
+/* https://diffblue.github.io/cbmc/contracts-assigns.html */
+#define assigns(...) __CPROVER_assigns(__VA_ARGS__)
+
+/* https://diffblue.github.io/cbmc/contracts-requires-ensures.html */
+#define requires(...) __CPROVER_requires(__VA_ARGS__)
+#define ensures(...) __CPROVER_ensures(__VA_ARGS__)
+/* https://diffblue.github.io/cbmc/contracts-loops.html */
+#define invariant(...) __CPROVER_loop_invariant(__VA_ARGS__)
+#define decreases(...) __CPROVER_decreases(__VA_ARGS__)
+/* cassert to avoid confusion with in-built assert */
+#define cassert(x) __CPROVER_assert(x, "cbmc assertion failed")
+#define assume(...) __CPROVER_assume(__VA_ARGS__)
+
+/***************************************************
+ * Macros for "expression" forms that may appear
+ * _inside_ top-level contracts.
+ ***************************************************/
+
+/*
+ * function return value - useful inside ensures
+ * https://diffblue.github.io/cbmc/contracts-functions.html
+ */
+#define return_value (__CPROVER_return_value)
+
+/*
+ * assigns l-value targets
+ * https://diffblue.github.io/cbmc/contracts-assigns.html
+ */
+#define object_whole(...) __CPROVER_object_whole(__VA_ARGS__)
+#define memory_slice(...) __CPROVER_object_upto(__VA_ARGS__)
+#define same_object(...) __CPROVER_same_object(__VA_ARGS__)
+
+/*
+ * Pointer-related predicates
+ * https://diffblue.github.io/cbmc/contracts-memory-predicates.html
+ */
+#define memory_no_alias(...) __CPROVER_is_fresh(__VA_ARGS__)
+#define readable(...) __CPROVER_r_ok(__VA_ARGS__)
+#define writeable(...) __CPROVER_w_ok(__VA_ARGS__)
+
+/*
+ * History variables
+ * https://diffblue.github.io/cbmc/contracts-history-variables.html
+ */
+#define old(...) __CPROVER_old(__VA_ARGS__)
+#define loop_entry(...) __CPROVER_loop_entry(__VA_ARGS__)
+
+/*
+ * Quantifiers
+ * Note that the range on qvar is _exclusive_ between qvar_lb .. qvar_ub
+ * https://diffblue.github.io/cbmc/contracts-quantifiers.html
+ */
+
+/*
+ * Prevent clang-format from corrupting CBMC's special ==> operator
+ */
+/* clang-format off */
+#define forall(qvar, qvar_lb, qvar_ub, predicate)                 \
+  __CPROVER_forall                                                \
+  {                                                               \
+    unsigned qvar;                                                \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> (predicate)   \
+  }
+
+#define EXISTS(qvar, qvar_lb, qvar_ub, predicate)         \
+  __CPROVER_exists                                              \
+  {                                                             \
+    unsigned qvar;                                              \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) && (predicate)  \
+  }
+/* clang-format on */
+
+/***************************************************
+ * Convenience macros for common contract patterns
+ ***************************************************/
+
+/*
+ * Boolean-value predidate that asserts that "all values of array_var are in
+ * range value_lb (inclusive) .. value_ub (exclusive)"
+ * Example:
+ *  array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)
+ * expands to
+ *  __CPROVER_forall { int k; (0 <= k && k <= MLKEM_N-1) ==> (
+ *  0 <= a->coeffs[k]) && a->coeffs[k] < MLKEM_Q)) }
+ */
+
+/*
+ * Prevent clang-format from corrupting CBMC's special ==> operator
+ */
+/* clang-format off */
+#define CBMC_CONCAT_(left, right) left##right
+#define CBMC_CONCAT(left, right) CBMC_CONCAT_(left, right)
+
+#define array_bound_core(qvar, qvar_lb, qvar_ub, array_var,            \
+                         value_lb, value_ub)                           \
+  __CPROVER_forall                                                     \
+  {                                                                    \
+    unsigned qvar;                                                     \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==>                    \
+        (((int)(value_lb) <= ((array_var)[(qvar)])) &&		       \
+         (((array_var)[(qvar)]) < (int)(value_ub)))		       \
+  }
+
+#define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \
+  array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb),      \
+      (qvar_ub), (array_var), (value_lb), (value_ub))
+/* clang-format on */
+
+/* Wrapper around array_bound operating on absolute values.
+ *
+ * Note that since the absolute bound is inclusive, but the lower
+ * bound in array_bound is inclusive, we have to raise it by 1.
+ */
+#define array_abs_bound(arr, lb, ub, k) \
+  array_bound((arr), (lb), (ub), -((int)(k)) + 1, (k))
+
+#endif
+
+#endif /* MLK_CBMC_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/common.h b/crypto/fipsmodule/ml_kem/mlkem/common.h
new file mode 100644
index 0000000000..3adc929c26
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/common.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_COMMON_H
+#define MLK_COMMON_H
+
+#if defined(MLK_CONFIG_FILE)
+#include MLK_CONFIG_FILE
+#else
+#include "config.h"
+#endif /* MLK_CONFIG_FILE */
+
+#include "params.h"
+#include "sys.h"
+
+/* Include backend metadata */
+#if defined(MLK_USE_NATIVE_BACKEND_ARITH)
+#if defined(MLK_ARITH_BACKEND_FILE)
+#include MLK_ARITH_BACKEND_FILE
+#else
+#error Bad configuration: MLK_USE_NATIVE_BACKEND_ARITH is set, but MLK_ARITH_BACKEND_FILE is not.
+#endif
+#endif
+
+#if defined(MLK_USE_NATIVE_BACKEND_FIPS202)
+#if defined(MLK_FIPS202_BACKEND_FILE)
+#include MLK_FIPS202_BACKEND_FILE
+#else
+#error Bad configuration: MLK_USE_NATIVE_BACKEND_FIPS202 is set, but MLK_FIPS202_BACKEND_FILE is not.
+#endif
+#endif
+
+#if !defined(MLK_ARITH_BACKEND_NAME)
+#define MLK_ARITH_BACKEND_NAME C
+#endif
+
+#if !defined(MLK_FIPS202_BACKEND_NAME)
+#define MLK_FIPS202_BACKEND_NAME C
+#endif
+
+/* For a monobuild (where all compilation units are merged into one), mark
+ * all non-public API as static since they don't need external linkage. */
+#if !defined(MLK_MONOBUILD)
+#define MLK_INTERNAL_API
+#else
+#define MLK_INTERNAL_API static
+#endif
+
+/* Public API may have internal or external linkage, depending on how
+ * mlkem-native is used in the monobuild. Keep it external by default,
+ * but allow the user to overwrite this in the config. */
+#if !defined(MLK_EXTERNAL_API)
+#define MLK_EXTERNAL_API
+#endif /* MLK_EXTERNAL_API */
+
+#define MLK_MAKE_NAMESPACE_(x1, x2) x1##_##x2
+#define MLK_MAKE_NAMESPACE(x1, x2) MLK_MAKE_NAMESPACE_(x1, x2)
+
+#define MLK_NAMESPACE(s) MLK_MAKE_NAMESPACE(MLK_NAMESPACE_PREFIX, s)
+
+#if defined(MLK_NAMESPACE_PREFIX_ADD_LEVEL)
+#define MLK_MAKE_NAMESPACE_K_(x1, x2, x3) x1##x2##_##x3
+#define MLK_MAKE_NAMESPACE_K(x1, x2, x3) MLK_MAKE_NAMESPACE_K_(x1, x2, x3)
+#define MLK_NAMESPACE_K(s) \
+  MLK_MAKE_NAMESPACE_K(MLK_NAMESPACE_PREFIX, MLKEM_LVL, s)
+#else
+#define MLK_NAMESPACE_K(s) MLK_NAMESPACE(s)
+#endif
+
+/* On Apple platforms, we need to emit leading underscore
+ * in front of assembly symbols. We thus introducee a separate
+ * namespace wrapper for ASM symbols. */
+#if !defined(__APPLE__)
+#define MLK_ASM_NAMESPACE(sym) MLK_NAMESPACE(sym)
+#else
+#define MLK_PREFIX_UNDERSCORE_(sym) _##sym
+#define MLK_PREFIX_UNDERSCORE(sym) MLK_PREFIX_UNDERSCORE_(sym)
+#define MLK_ASM_NAMESPACE(sym) MLK_PREFIX_UNDERSCORE(MLK_NAMESPACE(sym))
+#endif
+
+/*
+ * On X86_64 if control-flow protections (CET) are enabled (through
+ * -fcf-protection=), we add an endbr64 instruction at every global function
+ * label.  See sys.h for more details
+ */
+#if defined(MLK_SYS_X86_64)
+#define MLK_ASM_FN_SYMBOL(sym) MLK_ASM_NAMESPACE(sym) : MLK_CET_ENDBR
+#else
+#define MLK_ASM_FN_SYMBOL(sym) MLK_ASM_NAMESPACE(sym) :
+#endif
+
+/* We aim to simplify the user's life by supporting builds where
+ * all source files are included, even those that are not needed.
+ * Those files are appropriately guarded and will be empty when unneeded.
+ * The following is to avoid compilers complaining about this. */
+#define MLK_EMPTY_CU(s) extern int MLK_NAMESPACE_K(empty_cu_##s);
+
+#if !defined(MLK_FIPS202_CUSTOM_HEADER)
+#define MLK_FIPS202_HEADER_FILE "fips202/fips202.h"
+#else
+#define MLK_FIPS202_HEADER_FILE MLK_FIPS202_CUSTOM_HEADER
+#endif
+
+#if !defined(MLK_FIPS202X4_CUSTOM_HEADER)
+#define MLK_FIPS202X4_HEADER_FILE "fips202/fips202x4.h"
+#else
+#define MLK_FIPS202X4_HEADER_FILE MLK_FIPS202X4_CUSTOM_HEADER
+#endif
+
+#endif /* MLK_COMMON_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/compress.c b/crypto/fipsmodule/ml_kem/mlkem/compress.c
new file mode 100644
index 0000000000..f290e8a11d
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/compress.c
@@ -0,0 +1,472 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include "common.h"
+#if !defined(MLK_MULTILEVEL_BUILD_NO_SHARED)
+
+#include <stdint.h>
+#include <string.h>
+#include "arith_backend.h"
+#include "cbmc.h"
+#include "compress.h"
+#include "debug.h"
+#include "verify.h"
+
+#if defined(MLK_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3)
+#if !defined(MLK_USE_NATIVE_POLY_COMPRESS_D4)
+MLK_INTERNAL_API
+void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a)
+{
+  unsigned i;
+  debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+
+  for (i = 0; i < MLKEM_N / 8; i++)
+  __loop__(invariant(i <= MLKEM_N / 8))
+  {
+    unsigned j;
+    uint8_t t[8] = {0};
+    for (j = 0; j < 8; j++)
+    __loop__(
+      invariant(i <= MLKEM_N / 8 && j <= 8)
+      invariant(array_bound(t, 0, j, 0, 16)))
+    {
+      t[j] = scalar_compress_d4(a->coeffs[8 * i + j]);
+    }
+
+    r[i * 4] = t[0] | (t[1] << 4);
+    r[i * 4 + 1] = t[2] | (t[3] << 4);
+    r[i * 4 + 2] = t[4] | (t[5] << 4);
+    r[i * 4 + 3] = t[6] | (t[7] << 4);
+  }
+}
+#else  /* MLK_USE_NATIVE_POLY_COMPRESS_D4 */
+MLK_INTERNAL_API
+void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a)
+{
+  debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+  poly_compress_d4_native(r, a->coeffs);
+}
+#endif /* MLK_USE_NATIVE_POLY_COMPRESS_D4 */
+
+#if !defined(MLK_USE_NATIVE_POLY_COMPRESS_D10)
+MLK_INTERNAL_API
+void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a)
+{
+  unsigned j;
+  debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+  for (j = 0; j < MLKEM_N / 4; j++)
+  __loop__(invariant(j <= MLKEM_N / 4))
+  {
+    unsigned k;
+    uint16_t t[4];
+    for (k = 0; k < 4; k++)
+    __loop__(
+      invariant(k <= 4)
+      invariant(forall(r, 0, k, t[r] < (1u << 10))))
+    {
+      t[k] = scalar_compress_d10(a->coeffs[4 * j + k]);
+    }
+
+    /*
+     * Make all implicit truncation explicit. No data is being
+     * truncated for the LHS's since each t[i] is 10-bit in size.
+     */
+    r[5 * j + 0] = (t[0] >> 0) & 0xFF;
+    r[5 * j + 1] = (t[0] >> 8) | ((t[1] << 2) & 0xFF);
+    r[5 * j + 2] = (t[1] >> 6) | ((t[2] << 4) & 0xFF);
+    r[5 * j + 3] = (t[2] >> 4) | ((t[3] << 6) & 0xFF);
+    r[5 * j + 4] = (t[3] >> 2);
+  }
+}
+#else  /* MLK_USE_NATIVE_POLY_COMPRESS_D10 */
+MLK_INTERNAL_API
+void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a)
+{
+  debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+  poly_compress_d10_native(r, a->coeffs);
+}
+#endif /* MLK_USE_NATIVE_POLY_COMPRESS_D10 */
+
+#if !defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D4)
+MLK_INTERNAL_API
+void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4])
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N / 2; i++)
+  __loop__(
+    invariant(i <= MLKEM_N / 2)
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q)))
+  {
+    r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF);
+    r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF);
+  }
+
+  debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#else  /* MLK_USE_NATIVE_POLY_DECOMPRESS_D4 */
+MLK_INTERNAL_API
+void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4])
+{
+  poly_decompress_d4_native(r->coeffs, a);
+  debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#endif /* MLK_USE_NATIVE_POLY_DECOMPRESS_D4 */
+
+#if !defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D10)
+MLK_INTERNAL_API
+void poly_decompress_d10(poly *r,
+                         const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10])
+{
+  unsigned j;
+  for (j = 0; j < MLKEM_N / 4; j++)
+  __loop__(
+    invariant(j <= MLKEM_N / 4)
+    invariant(array_bound(r->coeffs, 0, 4 * j, 0, MLKEM_Q)))
+  {
+    unsigned k;
+    uint16_t t[4];
+    uint8_t const *base = &a[5 * j];
+
+    t[0] = 0x3FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8));
+    t[1] = 0x3FF & ((base[1] >> 2) | ((uint16_t)base[2] << 6));
+    t[2] = 0x3FF & ((base[2] >> 4) | ((uint16_t)base[3] << 4));
+    t[3] = 0x3FF & ((base[3] >> 6) | ((uint16_t)base[4] << 2));
+
+    for (k = 0; k < 4; k++)
+    __loop__(
+      invariant(k <= 4)
+      invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, MLKEM_Q)))
+    {
+      r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]);
+    }
+  }
+
+  debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#else  /* MLK_USE_NATIVE_POLY_DECOMPRESS_D10 */
+MLK_INTERNAL_API
+void poly_decompress_d10(poly *r,
+                         const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10])
+{
+  poly_decompress_d10_native(r->coeffs, a);
+  debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#endif /* MLK_USE_NATIVE_POLY_DECOMPRESS_D10 */
+#endif /* defined(MLK_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \
+          || MLKEM_K == 3) */
+
+#if defined(MLK_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4
+#if !defined(MLK_USE_NATIVE_POLY_COMPRESS_D5)
+MLK_INTERNAL_API
+void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a)
+{
+  unsigned i;
+  debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+
+  for (i = 0; i < MLKEM_N / 8; i++)
+  __loop__(invariant(i <= MLKEM_N / 8))
+  {
+    unsigned j;
+    uint8_t t[8] = {0};
+    for (j = 0; j < 8; j++)
+    __loop__(
+      invariant(i <= MLKEM_N / 8 && j <= 8)
+      invariant(array_bound(t, 0, j, 0, 32)))
+    {
+      t[j] = scalar_compress_d5(a->coeffs[8 * i + j]);
+    }
+
+    /*
+     * Explicitly truncate to avoid warning about
+     * implicit truncation in CBMC, and use array indexing into
+     * r rather than pointer-arithmetic to simplify verification
+     */
+    r[i * 5] = 0xFF & ((t[0] >> 0) | (t[1] << 5));
+    r[i * 5 + 1] = 0xFF & ((t[1] >> 3) | (t[2] << 2) | (t[3] << 7));
+    r[i * 5 + 2] = 0xFF & ((t[3] >> 1) | (t[4] << 4));
+    r[i * 5 + 3] = 0xFF & ((t[4] >> 4) | (t[5] << 1) | (t[6] << 6));
+    r[i * 5 + 4] = 0xFF & ((t[6] >> 2) | (t[7] << 3));
+  }
+}
+#else  /* MLK_USE_NATIVE_POLY_COMPRESS_D5 */
+MLK_INTERNAL_API
+void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a)
+{
+  debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+  poly_compress_d5_native(r, a->coeffs);
+}
+#endif /* MLK_USE_NATIVE_POLY_COMPRESS_D5 */
+
+#if !defined(MLK_USE_NATIVE_POLY_COMPRESS_D11)
+MLK_INTERNAL_API
+void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a)
+{
+  unsigned j;
+  debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+
+  for (j = 0; j < MLKEM_N / 8; j++)
+  __loop__(invariant(j <= MLKEM_N / 8))
+  {
+    unsigned k;
+    uint16_t t[8];
+    for (k = 0; k < 8; k++)
+    __loop__(
+      invariant(k <= 8)
+      invariant(forall(r, 0, k, t[r] < (1u << 11))))
+    {
+      t[k] = scalar_compress_d11(a->coeffs[8 * j + k]);
+    }
+
+    /*
+     * Make all implicit truncation explicit. No data is being
+     * truncated for the LHS's since each t[i] is 11-bit in size.
+     */
+    r[11 * j + 0] = (t[0] >> 0) & 0xFF;
+    r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF);
+    r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF);
+    r[11 * j + 3] = (t[2] >> 2) & 0xFF;
+    r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF);
+    r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF);
+    r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF);
+    r[11 * j + 7] = (t[5] >> 1) & 0xFF;
+    r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF);
+    r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF);
+    r[11 * j + 10] = (t[7] >> 3);
+  }
+}
+#else  /* MLK_USE_NATIVE_POLY_COMPRESS_D11 */
+MLK_INTERNAL_API
+void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a)
+{
+  debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+  poly_compress_d11_native(r, a->coeffs);
+}
+#endif /* MLK_USE_NATIVE_POLY_COMPRESS_D11 */
+
+#if !defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D5)
+MLK_INTERNAL_API
+void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5])
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N / 8; i++)
+  __loop__(
+    invariant(i <= MLKEM_N / 8)
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q)))
+  {
+    unsigned j;
+    uint8_t t[8];
+    const unsigned offset = i * 5;
+    /*
+     * Explicitly truncate to avoid warning about
+     * implicit truncation in CBMC and unwind loop for ease
+     * of proof.
+     */
+
+    /*
+     * Decompress 5 8-bit bytes (so 40 bits) into
+     * 8 5-bit values stored in t[]
+     */
+    t[0] = 0x1F & (a[offset + 0] >> 0);
+    t[1] = 0x1F & ((a[offset + 0] >> 5) | (a[offset + 1] << 3));
+    t[2] = 0x1F & (a[offset + 1] >> 2);
+    t[3] = 0x1F & ((a[offset + 1] >> 7) | (a[offset + 2] << 1));
+    t[4] = 0x1F & ((a[offset + 2] >> 4) | (a[offset + 3] << 4));
+    t[5] = 0x1F & (a[offset + 3] >> 1);
+    t[6] = 0x1F & ((a[offset + 3] >> 6) | (a[offset + 4] << 2));
+    t[7] = 0x1F & (a[offset + 4] >> 3);
+
+    /* and copy to the correct slice in r[] */
+    for (j = 0; j < 8; j++)
+    __loop__(
+      invariant(j <= 8 && i <= MLKEM_N / 8)
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q)))
+    {
+      r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]);
+    }
+  }
+
+  debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#else  /* MLK_USE_NATIVE_POLY_DECOMPRESS_D5 */
+MLK_INTERNAL_API
+void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5])
+{
+  poly_decompress_d5_native(r->coeffs, a);
+  debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#endif /* MLK_USE_NATIVE_POLY_DECOMPRESS_D5 */
+
+#if !defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D11)
+MLK_INTERNAL_API
+void poly_decompress_d11(poly *r,
+                         const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11])
+{
+  unsigned j;
+  for (j = 0; j < MLKEM_N / 8; j++)
+  __loop__(
+    invariant(j <= MLKEM_N / 8)
+    invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q)))
+  {
+    unsigned k;
+    uint16_t t[8];
+    uint8_t const *base = &a[11 * j];
+    t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8));
+    t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5));
+    t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) |
+                    ((uint16_t)base[4] << 10));
+    t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7));
+    t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4));
+    t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) |
+                    ((uint16_t)base[8] << 9));
+    t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6));
+    t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3));
+
+    for (k = 0; k < 8; k++)
+    __loop__(
+      invariant(k <= 8)
+      invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q)))
+    {
+      r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]);
+    }
+  }
+
+  debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#else  /* MLK_USE_NATIVE_POLY_DECOMPRESS_D11 */
+MLK_INTERNAL_API
+void poly_decompress_d11(poly *r,
+                         const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11])
+{
+  poly_decompress_d11_native(r->coeffs, a);
+  debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#endif /* MLK_USE_NATIVE_POLY_DECOMPRESS_D11 */
+
+#endif /* MLK_MULTILEVEL_BUILD) || MLKEM_K == 4 */
+
+#if !defined(MLK_USE_NATIVE_POLY_TOBYTES)
+MLK_INTERNAL_API
+void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
+{
+  unsigned i;
+  debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+
+  for (i = 0; i < MLKEM_N / 2; i++)
+  __loop__(invariant(i <= MLKEM_N / 2))
+  {
+    const uint16_t t0 = a->coeffs[2 * i];
+    const uint16_t t1 = a->coeffs[2 * i + 1];
+    /*
+     * t0 and t1 are both < MLKEM_Q, so contain at most 12 bits each of
+     * significant data, so these can be packed into 24 bits or exactly
+     * 3 bytes, as follows.
+     */
+
+    /* Least significant bits 0 - 7 of t0. */
+    r[3 * i + 0] = t0 & 0xFF;
+
+    /*
+     * Most significant bits 8 - 11 of t0 become the least significant
+     * nibble of the second byte. The least significant 4 bits
+     * of t1 become the upper nibble of the second byte.
+     */
+    r[3 * i + 1] = (t0 >> 8) | ((t1 << 4) & 0xF0);
+
+    /* Bits 4 - 11 of t1 become the third byte. */
+    r[3 * i + 2] = t1 >> 4;
+  }
+}
+#else  /* MLK_USE_NATIVE_POLY_TOBYTES */
+MLK_INTERNAL_API
+void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
+{
+  debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+  poly_tobytes_native(r, a->coeffs);
+}
+#endif /* MLK_USE_NATIVE_POLY_TOBYTES */
+
+#if !defined(MLK_USE_NATIVE_POLY_FROMBYTES)
+MLK_INTERNAL_API
+void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N / 2; i++)
+  __loop__(
+    invariant(i <= MLKEM_N / 2)
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_UINT12_LIMIT)))
+  {
+    const uint8_t t0 = a[3 * i + 0];
+    const uint8_t t1 = a[3 * i + 1];
+    const uint8_t t2 = a[3 * i + 2];
+    r->coeffs[2 * i + 0] = t0 | ((t1 << 8) & 0xFFF);
+    r->coeffs[2 * i + 1] = (t1 >> 4) | (t2 << 4);
+  }
+
+  /* Note that the coefficients are not canonical */
+  debug_assert_bound(r, MLKEM_N, 0, MLKEM_UINT12_LIMIT);
+}
+#else  /* MLK_USE_NATIVE_POLY_FROMBYTES */
+MLK_INTERNAL_API
+void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
+{
+  poly_frombytes_native(r->coeffs, a);
+}
+#endif /* MLK_USE_NATIVE_POLY_FROMBYTES */
+
+MLK_INTERNAL_API
+void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
+{
+  unsigned i;
+#if (MLKEM_INDCPA_MSGBYTES != MLKEM_N / 8)
+#error "MLKEM_INDCPA_MSGBYTES must be equal to MLKEM_N/8 bytes!"
+#endif
+
+  for (i = 0; i < MLKEM_N / 8; i++)
+  __loop__(
+    invariant(i <= MLKEM_N / 8)
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q)))
+  {
+    unsigned j;
+    for (j = 0; j < 8; j++)
+    __loop__(
+      invariant(i <  MLKEM_N / 8 && j <= 8)
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q)))
+    {
+      /* ct_sel_int16(MLKEM_Q_HALF, 0, b) is `Decompress_1(b != 0)`
+       * as per [FIPS 203, Eq (4.8)]. */
+
+      /* Prevent the compiler from recognizing this as a bit selection */
+      uint8_t mask = value_barrier_u8(1u << j);
+      r->coeffs[8 * i + j] = ct_sel_int16(MLKEM_Q_HALF, 0, msg[i] & mask);
+    }
+  }
+  debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q);
+}
+
+MLK_INTERNAL_API
+void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
+{
+  unsigned i;
+  debug_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+
+  for (i = 0; i < MLKEM_N / 8; i++)
+  __loop__(invariant(i <= MLKEM_N / 8))
+  {
+    unsigned j;
+    msg[i] = 0;
+    for (j = 0; j < 8; j++)
+    __loop__(
+      invariant(i <= MLKEM_N / 8 && j <= 8))
+    {
+      uint32_t t = scalar_compress_d1(a->coeffs[8 * i + j]);
+      msg[i] |= t << j;
+    }
+  }
+}
+
+#else /* MLK_MULTILEVEL_BUILD_NO_SHARED */
+
+MLK_EMPTY_CU(compress)
+
+#endif /* MLK_MULTILEVEL_BUILD_NO_SHARED */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/compress.h b/crypto/fipsmodule/ml_kem/mlkem/compress.h
new file mode 100644
index 0000000000..d3ec13b2af
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/compress.h
@@ -0,0 +1,635 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_COMPRESS_H
+#define MLK_COMPRESS_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include "cbmc.h"
+#include "common.h"
+#include "debug.h"
+#include "poly.h"
+#include "verify.h"
+
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define scalar_compress_d1 MLK_NAMESPACE(scalar_compress_d1)
+#define scalar_compress_d4 MLK_NAMESPACE(scalar_compress_d4)
+#define scalar_compress_d5 MLK_NAMESPACE(scalar_compress_d5)
+#define scalar_compress_d10 MLK_NAMESPACE(scalar_compress_d10)
+#define scalar_compress_d11 MLK_NAMESPACE(scalar_compress_d11)
+#define scalar_decompress_d4 MLK_NAMESPACE(scalar_decompress_d4)
+#define scalar_decompress_d5 MLK_NAMESPACE(scalar_decompress_d5)
+#define scalar_decompress_d10 MLK_NAMESPACE(scalar_decompress_d10)
+#define scalar_decompress_d11 MLK_NAMESPACE(scalar_decompress_d11)
+/* End of static namespacing */
+
+/************************************************************
+ * Name: scalar_compress_d1
+ *
+ * Description: Computes round(u * 2 / q)
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo q
+ *                 to be compressed.
+ *
+ * Specification: Compress_1 from [FIPS 203, Eq (4.7)].
+ *
+ ************************************************************/
+/*
+ * The multiplication in this routine will exceed UINT32_MAX
+ * and wrap around for large values of u. This is expected and required.
+ */
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "unsigned-overflow"
+#endif
+static MLK_INLINE uint32_t scalar_compress_d1(uint16_t u)
+__contract__(
+  requires(u <= MLKEM_Q - 1)
+  ensures(return_value < 2)
+  ensures(return_value == (((uint32_t)u * 2 + MLKEM_Q / 2) / MLKEM_Q) % 2)  )
+{
+  /* Compute as follows:
+   * ```
+   * round(u * 2 / MLKEM_Q)
+   *   = round(u * 2 * (2^31 / MLKEM_Q) / 2^31)
+   *  ~= round(u * 2 * round(2^31 / MLKEM_Q) / 2^31)
+   * ```
+   */
+  /* check-magic: 1290168 == 2*round(2^31 / MLKEM_Q) */
+  uint32_t d0 = (uint32_t)u * 1290168;
+  return (d0 + (1u << 30)) >> 31;
+}
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+/************************************************************
+ * Name: scalar_compress_d4
+ *
+ * Description: Computes round(u * 16 / q) % 16
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo q
+ *                 to be compressed.
+ *
+ * Specification: Compress_4 from [FIPS 203, Eq (4.7)].
+ *
+ ************************************************************/
+/*
+ * The multiplication in this routine will exceed UINT32_MAX
+ * and wrap around for large values of u. This is expected and required.
+ */
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "unsigned-overflow"
+#endif
+static MLK_INLINE uint32_t scalar_compress_d4(uint16_t u)
+__contract__(
+  requires(u <= MLKEM_Q - 1)
+  ensures(return_value < 16)
+  ensures(return_value == (((uint32_t)u * 16 + MLKEM_Q / 2) / MLKEM_Q) % 16))
+{
+  /* Compute as follows:
+   * ```
+   * round(u * 16 / MLKEM_Q)
+   *   = round(u * 16 * (2^28 / MLKEM_Q) / 2^28)
+   *  ~= round(u * 16 * round(2^28 / MLKEM_Q) / 2^28)
+   * ```
+   */
+  /* check-magic: 1290160 == 16 * round(2^28 / MLKEM_Q) */
+  uint32_t d0 = (uint32_t)u * 1290160;
+  return (d0 + (1u << 27)) >> 28; /* round(d0/2^28) */
+}
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+/************************************************************
+ * Name: scalar_decompress_d4
+ *
+ * Description: Computes round(u * q / 16)
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo 16
+ *                 to be decompressed.
+ *
+ * Specification: Decompress_4 from [FIPS 203, Eq (4.8)].
+ *
+ ************************************************************/
+static MLK_INLINE uint16_t scalar_decompress_d4(uint32_t u)
+__contract__(
+  requires(0 <= u && u < 16)
+  ensures(return_value <= (MLKEM_Q - 1))
+) { return ((u * MLKEM_Q) + 8) >> 4; }
+
+/************************************************************
+ * Name: scalar_compress_d5
+ *
+ * Description: Computes round(u * 32 / q) % 32
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo q
+ *                 to be compressed.
+ *
+ * Specification: Compress_5 from [FIPS 203, Eq (4.7)].
+ *
+ ************************************************************/
+/*
+ * The multiplication in this routine will exceed UINT32_MAX
+ * and wrap around for large values of u. This is expected and required.
+ */
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "unsigned-overflow"
+#endif
+static MLK_INLINE uint32_t scalar_compress_d5(uint16_t u)
+__contract__(
+  requires(u <= MLKEM_Q - 1)
+  ensures(return_value < 32)
+  ensures(return_value == (((uint32_t)u * 32 + MLKEM_Q / 2) / MLKEM_Q) % 32)  )
+{
+  /* Compute as follows:
+   * ```
+   * round(u * 32 / MLKEM_Q)
+   *   = round(u * 32 * (2^27 / MLKEM_Q) / 2^27)
+   *  ~= round(u * 32 * round(2^27 / MLKEM_Q) / 2^27)
+   * ```
+   */
+  /* check-magic: 1290176 == 2^5 * round(2^27 / MLKEM_Q) */
+  uint32_t d0 = (uint32_t)u * 1290176;
+  return (d0 + (1u << 26)) >> 27; /* round(d0/2^27) */
+}
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+/************************************************************
+ * Name: scalar_decompress_d5
+ *
+ * Description: Computes round(u * q / 32)
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo 32
+ *                 to be decompressed.
+ *
+ * Specification: Decompress_5 from [FIPS 203, Eq (4.8)].
+ *
+ ************************************************************/
+static MLK_INLINE uint16_t scalar_decompress_d5(uint32_t u)
+__contract__(
+  requires(0 <= u && u < 32)
+  ensures(return_value <= MLKEM_Q - 1)
+) { return ((u * MLKEM_Q) + 16) >> 5; }
+
+/************************************************************
+ * Name: scalar_compress_d10
+ *
+ * Description: Computes round(u * 2**10 / q) % 2**10
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo q
+ *                 to be compressed.
+ *
+ * Specification: Compress_10 from [FIPS 203, Eq (4.7)].
+ *
+ ************************************************************/
+/*
+ * The multiplication in this routine will exceed UINT32_MAX
+ * and wrap around for large values of u. This is expected and required.
+ */
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "unsigned-overflow"
+#endif
+static MLK_INLINE uint32_t scalar_compress_d10(uint16_t u)
+__contract__(
+  requires(u <= MLKEM_Q - 1)
+  ensures(return_value < (1u << 10))
+  ensures(return_value == (((uint32_t)u * (1u << 10) + MLKEM_Q / 2) / MLKEM_Q) % (1 << 10)))
+{
+  /* Compute as follows:
+   * ```
+   * round(u * 1024 / MLKEM_Q)
+   *   = round(u * 1024 * (2^33 / MLKEM_Q) / 2^33)
+   *  ~= round(u * 1024 * round(2^33 / MLKEM_Q) / 2^33)
+   * ```
+   */
+  /* check-magic: 2642263040 == 2^10 * round(2^33 / MLKEM_Q) */
+  uint64_t d0 = (uint64_t)u * 2642263040;
+  d0 = (d0 + ((uint64_t)1u << 32)) >> 33; /* round(d0/2^33) */
+  return (d0 & 0x3FF);
+}
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+/************************************************************
+ * Name: scalar_decompress_d10
+ *
+ * Description: Computes round(u * q / 1024)
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo 1024
+ *                 to be decompressed.
+ *
+ * Specification: Decompress_10 from [FIPS 203, Eq (4.8)].
+ *
+ ************************************************************/
+static MLK_INLINE uint16_t scalar_decompress_d10(uint32_t u)
+__contract__(
+  requires(0 <= u && u < 1024)
+  ensures(return_value <= (MLKEM_Q - 1))
+) { return ((u * MLKEM_Q) + 512) >> 10; }
+
+/************************************************************
+ * Name: scalar_compress_d11
+ *
+ * Description: Computes round(u * 2**11 / q) % 2**11
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo q
+ *                 to be compressed.
+ *
+ * Specification: Compress_11 from [FIPS 203, Eq (4.7)].
+ *
+ ************************************************************/
+/*
+ * The multiplication in this routine will exceed UINT32_MAX
+ * and wrap around for large values of u. This is expected and required.
+ */
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "unsigned-overflow"
+#endif
+static MLK_INLINE uint32_t scalar_compress_d11(uint16_t u)
+__contract__(
+  requires(u <= MLKEM_Q - 1)
+  ensures(return_value < (1u << 11))
+  ensures(return_value == (((uint32_t)u * (1u << 11) + MLKEM_Q / 2) / MLKEM_Q) % (1 << 11)))
+{
+  /* Compute as follows:
+   * ```
+   * round(u * 2048 / MLKEM_Q)
+   *   = round(u * 2048 * (2^33 / MLKEM_Q) / 2^33)
+   *  ~= round(u * 2048 * round(2^33 / MLKEM_Q) / 2^33)
+   * ```
+   */
+  /* check-magic: 5284526080 == 2^11 * round(2^33 / MLKEM_Q) */
+  uint64_t d0 = (uint64_t)u * 5284526080;
+  d0 = (d0 + ((uint64_t)1u << 32)) >> 33; /* round(d0/2^33) */
+  return (d0 & 0x7FF);
+}
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+/************************************************************
+ * Name: scalar_decompress_d11
+ *
+ * Description: Computes round(u * q / 2048)
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo 2048
+ *                 to be decompressed.
+ *
+ * Specification: Decompress_11 from [FIPS 203, Eq (4.8)].
+ *
+ ************************************************************/
+static MLK_INLINE uint16_t scalar_decompress_d11(uint32_t u)
+__contract__(
+  requires(0 <= u && u < 2048)
+  ensures(return_value <= (MLKEM_Q - 1))
+) { return ((u * MLKEM_Q) + 1024) >> 11; }
+
+#if defined(MLK_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3)
+#define poly_compress_d4 MLK_NAMESPACE(poly_compress_d4)
+/*************************************************
+ * Name:        poly_compress_d4
+ *
+ * Description: Compression (4 bits) and subsequent serialization of a
+ *              polynomial
+ *
+ * Arguments:   - uint8_t *r: pointer to output byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes)
+ *              - const poly *a: pointer to input polynomial
+ *                  Coefficients must be unsigned canonical,
+ *                  i.e. in [0,1,..,MLKEM_Q-1].
+ *
+ * Specification: Implements `ByteEncode_4 (Compress_4 (a))`:
+ *                - ByteEncode_d: [FIPS 203, Algorithm 5],
+ *                - Compress_d: [FIPS 203, Eq (4.7)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `ByteEncode_{d_v} (Compress_{d_v} (v))` appears in
+ *                  [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L23],
+ *                  where `d_v=4` for ML-KEM-{512,768} [FIPS 203, Table 2].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const poly *a);
+
+#define poly_compress_d10 MLK_NAMESPACE(poly_compress_d10)
+/*************************************************
+ * Name:        poly_compress_d10
+ *
+ * Description: Compression (10 bits) and subsequent serialization of a
+ *              polynomial
+ *
+ * Arguments:   - uint8_t *r: pointer to output byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes)
+ *              - const poly *a: pointer to input polynomial
+ *                  Coefficients must be unsigned canonical,
+ *                  i.e. in [0,1,..,MLKEM_Q-1].
+ *
+ * Specification: Implements `ByteEncode_10 (Compress_10 (a))`:
+ *                - ByteEncode_d: [FIPS 203, Algorithm 5],
+ *                - Compress_d: [FIPS 203, Eq (4.7)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `ByteEncode_{d_u} (Compress_{d_u} (u))` appears in
+ *                  [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L22],
+ *                  where `d_u=10` for ML-KEM-{512,768} [FIPS 203, Table 2].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const poly *a);
+
+#define poly_decompress_d4 MLK_NAMESPACE(poly_decompress_d4)
+/*************************************************
+ * Name:        poly_decompress_d4
+ *
+ * Description: De-serialization and subsequent decompression (dv bits) of a
+ *              polynomial; approximate inverse of poly_compress
+ *
+ * Arguments:   - poly *r: pointer to output polynomial
+ *              - const uint8_t *a: pointer to input byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes)
+ *
+ * Upon return, the coefficients of the output polynomial are unsigned-canonical
+ * (non-negative and smaller than MLKEM_Q).
+ *
+ * Specification: Implements `Decompress_4 (ByteDecode_4 (a))`:
+ *                - ByteDecode_d: [FIPS 203, Algorithm 6],
+ *                - Decompress_d: [FIPS 203, Eq (4.8)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `Decompress_{d_v} (ByteDecode_{d_v} (v))` appears in
+ *                  [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L4],
+ *                  where `d_v=4` for ML-KEM-{512,768} [FIPS 203, Table 2].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_decompress_d4(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]);
+
+#define poly_decompress_d10 MLK_NAMESPACE(poly_decompress_d10)
+/*************************************************
+ * Name:        poly_decompress_d10
+ *
+ * Description: De-serialization and subsequent decompression (10 bits) of a
+ *              polynomial; approximate inverse of poly_compress_d10
+ *
+ * Arguments:   - poly *r: pointer to output polynomial
+ *              - const uint8_t *a: pointer to input byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes)
+ *
+ * Upon return, the coefficients of the output polynomial are unsigned-canonical
+ * (non-negative and smaller than MLKEM_Q).
+ *
+ * Specification: Implements `Decompress_10 (ByteDecode_10 (a))`:
+ *                - ByteDecode_d: [FIPS 203, Algorithm 6],
+ *                - Decompress_d: [FIPS 203, Eq (4.8)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `Decompress_{d_u} (ByteDecode_{d_u} (u))` appears in
+ *                  [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L3],
+ *                  where `d_u=10` for ML-KEM-{512,768} [FIPS 203, Table 2].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_decompress_d10(poly *r,
+                         const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]);
+#endif /* defined(MLK_MULTILEVEL_BUILD_WITH_SHARED) || (MLKEM_K == 2 \
+          || MLKEM_K == 3) */
+
+#if defined(MLK_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4
+#define poly_compress_d5 MLK_NAMESPACE(poly_compress_d5)
+/*************************************************
+ * Name:        poly_compress_d5
+ *
+ * Description: Compression (5 bits) and subsequent serialization of a
+ *              polynomial
+ *
+ * Arguments:   - uint8_t *r: pointer to output byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes)
+ *              - const poly *a: pointer to input polynomial
+ *                  Coefficients must be unsigned canonical,
+ *                  i.e. in [0,1,..,MLKEM_Q-1].
+ *
+ * Specification: Implements `ByteEncode_5 (Compress_5 (a))`:
+ *                - ByteEncode_d: [FIPS 203, Algorithm 5],
+ *                - Compress_d: [FIPS 203, Eq (4.7)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `ByteEncode_{d_v} (Compress_{d_v} (v))` appears in
+ *                  [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L23],
+ *                  where `d_v=5` for ML-KEM-1024 [FIPS 203, Table 2].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const poly *a);
+
+#define poly_compress_d11 MLK_NAMESPACE(poly_compress_d11)
+/*************************************************
+ * Name:        poly_compress_d11
+ *
+ * Description: Compression (11 bits) and subsequent serialization of a
+ *              polynomial
+ *
+ * Arguments:   - uint8_t *r: pointer to output byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes)
+ *              - const poly *a: pointer to input polynomial
+ *                  Coefficients must be unsigned canonical,
+ *                  i.e. in [0,1,..,MLKEM_Q-1].
+ *
+ * Specification: `ByteEncode_11 (Compress_11 (a))`:
+ *                - ByteEncode_d: [FIPS 203, Algorithm 5],
+ *                - Compress_d: [FIPS 203, Eq (4.7)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `ByteEncode_{d_u} (Compress_{d_u} (u))` appears in
+ *                  [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L22],
+ *                  where `d_u=11` for ML-KEM-1024 [FIPS 203, Table 2].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const poly *a);
+
+#define poly_decompress_d5 MLK_NAMESPACE(poly_decompress_d5)
+/*************************************************
+ * Name:        poly_decompress_d5
+ *
+ * Description: De-serialization and subsequent decompression (dv bits) of a
+ *              polynomial; approximate inverse of poly_compress
+ *
+ * Arguments:   - poly *r: pointer to output polynomial
+ *              - const uint8_t *a: pointer to input byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes)
+ *
+ * Upon return, the coefficients of the output polynomial are unsigned-canonical
+ * (non-negative and smaller than MLKEM_Q).
+ *
+ * Specification: Implements `Decompress_5 (ByteDecode_5 (a))`:
+ *                - ByteDecode_d: [FIPS 203, Algorithm 6],
+ *                - Decompress_d: [FIPS 203, Eq (4.8)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `Decompress_{d_v} (ByteDecode_{d_v} (v))` appears in
+ *                  [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L4],
+ *                  where `d_v=5` for ML-KEM-1024 [FIPS 203, Table 2].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_decompress_d5(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]);
+
+#define poly_decompress_d11 MLK_NAMESPACE(poly_decompress_d11)
+/*************************************************
+ * Name:        poly_decompress_d11
+ *
+ * Description: De-serialization and subsequent decompression (11 bits) of a
+ *              polynomial; approximate inverse of poly_compress_d11
+ *
+ * Arguments:   - poly *r: pointer to output polynomial
+ *              - const uint8_t *a: pointer to input byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes)
+ *
+ * Upon return, the coefficients of the output polynomial are unsigned-canonical
+ * (non-negative and smaller than MLKEM_Q).
+ *
+ * Specification: Implements `Decompress_11 (ByteDecode_11 (a))`:
+ *                - ByteDecode_d: [FIPS 203, Algorithm 6],
+ *                - Decompress_d: [FIPS 203, Eq (4.8)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `Decompress_{d_u} (ByteDecode_{d_u} (u))` appears in
+ *                  [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L3],
+ *                  where `d_u=11` for ML-KEM-1024 [FIPS 203, Table 2].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_decompress_d11(poly *r,
+                         const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]);
+#endif /* defined(MLK_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_K == 4 \
+        */
+
+#define poly_tobytes MLK_NAMESPACE(poly_tobytes)
+/*************************************************
+ * Name:        poly_tobytes
+ *
+ * Description: Serialization of a polynomial.
+ *              Signed coefficients are converted to
+ *              unsigned form before serialization.
+ *
+ * Arguments:   INPUT:
+ *              - a: const pointer to input polynomial,
+ *                with each coefficient in the range [0,1,..,Q-1]
+ *              OUTPUT
+ *              - r: pointer to output byte array
+ *                   (of MLKEM_POLYBYTES bytes)
+ *
+ * Specification: Implements ByteEncode_12 [FIPS 203, Algorithm 5].
+ *                Extended to vectors as per
+ *                [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
+__contract__(
+  requires(memory_no_alias(r, MLKEM_POLYBYTES))
+  requires(memory_no_alias(a, sizeof(poly)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+  assigns(object_whole(r))
+);
+
+
+#define poly_frombytes MLK_NAMESPACE(poly_frombytes)
+/*************************************************
+ * Name:        poly_frombytes
+ *
+ * Description: De-serialization of a polynomial.
+ *
+ * Arguments:   INPUT
+ *              - a: pointer to input byte array
+ *                   (of MLKEM_POLYBYTES bytes)
+ *              OUTPUT
+ *              - r: pointer to output polynomial, with
+ *                   each coefficient unsigned and in the range
+ *                   0 .. 4095
+ *
+ * Specification: Implements ByteDecode_12 [FIPS 203, Algorithm 6].
+ *                Extended to vectors as per
+ *                [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
+__contract__(
+  requires(memory_no_alias(a, MLKEM_POLYBYTES))
+  requires(memory_no_alias(r, sizeof(poly)))
+  assigns(memory_slice(r, sizeof(poly)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_UINT12_LIMIT))
+);
+
+
+#define poly_frommsg MLK_NAMESPACE(poly_frommsg)
+/*************************************************
+ * Name:        poly_frommsg
+ *
+ * Description: Convert 32-byte message to polynomial
+ *
+ * Arguments:   - poly *r: pointer to output polynomial
+ *              - const uint8_t *msg: pointer to input message
+ *
+ * Specification: Implements `Decompress_1 (ByteDecode_1 (a))`:
+ *                - ByteDecode_d: [FIPS 203, Algorithm 6],
+ *                - Decompress_d: [FIPS 203, Eq (4.8)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `Decompress_1 (ByteDecode_1 (w))` appears in
+ *                  [FIPS 203, Algorithm 15 (K-PKE.Encrypt), L20].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
+__contract__(
+  requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
+  requires(memory_no_alias(r, sizeof(poly)))
+  assigns(object_whole(r))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+);
+
+#define poly_tomsg MLK_NAMESPACE(poly_tomsg)
+/*************************************************
+ * Name:        poly_tomsg
+ *
+ * Description: Convert polynomial to 32-byte message
+ *
+ * Arguments:   - uint8_t *msg: pointer to output message
+ *              - const poly *r: pointer to input polynomial
+ *                Coefficients must be unsigned canonical
+ *
+ * Specification: Implements `ByteEncode_1 (Compress_1 (a))`:
+ *                - ByteEncode_d: [FIPS 203, Algorithm 5],
+ *                - Compress_d: [FIPS 203, Eq (4.7)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `ByteEncode_1 (Compress_1 (w))` appears in
+ *                  [FIPS 203, Algorithm 14 (K-PKE.Decrypt), L7].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *r)
+__contract__(
+  requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
+  requires(memory_no_alias(r, sizeof(poly)))
+  requires(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+  assigns(object_whole(msg))
+);
+
+#endif /* MLK_COMPRESS_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/config.h b/crypto/fipsmodule/ml_kem/mlkem/config.h
new file mode 100644
index 0000000000..526b9efb51
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/config.h
@@ -0,0 +1,338 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef MLK_CONFIG_H
+#define MLK_CONFIG_H
+
+/******************************************************************************
+ * Name:        MLKEM_K
+ *
+ * Description: Determines the security level for ML-KEM
+ *              - MLKEM_K=2 corresponds to ML-KEM-512
+ *              - MLKEM_K=3 corresponds to ML-KEM-768
+ *              - MLKEM_K=4 corresponds to ML-KEM-1024
+ *
+ *              This can also be set using CFLAGS.
+ *
+ *****************************************************************************/
+#ifndef MLKEM_K
+#define MLKEM_K 3 /* Change this for different security strengths */
+#endif
+
+/******************************************************************************
+ * Name:        MLK_CONFIG_FILE
+ *
+ * Description: If defined, this is a header that will be included instead
+ *              of this default configuration file mlkem/config.h.
+ *
+ *              When you need to build mlkem-native in multiple configurations,
+ *              using varying MLK_CONFIG_FILE can be more convenient
+ *              then configuring everything through CFLAGS.
+ *
+ *              To use, MLK_CONFIG_FILE _must_ be defined prior
+ *              to the inclusion of any mlkem-native headers. For example,
+ *              it can be set by passing `-DMLK_CONFIG_FILE="..."`
+ *              on the command line.
+ *
+ *****************************************************************************/
+/* #define MLK_CONFIG_FILE "config.h" */
+
+/******************************************************************************
+ * Name:        MLK_NAMESPACE_PREFIX
+ *
+ * Description: The prefix to use to namespace global symbols from mlkem/.
+ *
+ *              Level-dependent symbols will additionally be prefixed with the
+ *              security level if MLK_NAMESPACE_PREFIX_ADD_LEVEL is set.
+ *
+ *              This can also be set using CFLAGS.
+ *
+ *****************************************************************************/
+#if !defined(MLK_NAMESPACE_PREFIX)
+#define MLK_NAMESPACE_PREFIX MLK_DEFAULT_NAMESPACE_PREFIX
+#endif
+
+/******************************************************************************
+ * Name:        MLK_NAMESPACE_PREFIX_ADD_LEVEL
+ *
+ * Description: If set, the level (512, 768, 1024) is added to the namespace
+ *              prefix MLK_NAMESPACE_PREFIX for all functions which are
+ *              level-dependent. Level-independent functions will have there
+ *              symbol prefixed by MLK_NAMESPACE_PREFIX only.
+ *
+ *              This is intended to be used for multi-level builds where
+ *              level-independent code should be shared across levels.
+ *
+ *              This can also be set using CFLAGS.
+ *
+ *****************************************************************************/
+/* #define MLK_NAMESPACE_PREFIX_ADD_LEVEL */
+
+/******************************************************************************
+ * Name:        MLK_MULTILEVEL_BUILD_WITH_SHARED
+ *
+ * Description: This is for multi-level builds of mlkem-native only. If you
+ *              need only a single security level build of mlkem-native,
+ *              keep this unset.
+ *
+ *              If this is set, all MLKEM_K-independent code will be included
+ *              in the build, including code needed only for other security
+ *              levels.
+ *
+ *              Example: poly_cbd3 is only needed for MLKEM_K == 2. Yet, if
+ *              this option is set for a build with MLKEM_K==3/4, it would
+ *              be included.
+ *
+ *              To build mlkem-native with support for all security levels,
+ *              build it three times -- once per level -- and set the option
+ *              MLK_MULTILEVEL_BUILD_WITH_SHARED for exactly one of
+ *              them, and MLK_MULTILEVEL_BUILD_NO_SHARED for the
+ *              others.
+ *
+ *              See examples/multilevel_build for an example.
+ *
+ *              This can also be set using CFLAGS.
+ *
+ *****************************************************************************/
+/* #define MLK_MULTILEVEL_BUILD_WITH_SHARED */
+
+/******************************************************************************
+ * Name:        MLK_MULTILEVEL_BUILD_NO_SHARED
+ *
+ * Description: This is for multi-level builds of mlkem-native only. If you
+ *              need only a single security level build of mlkem-native,
+ *              keep this unset.
+ *
+ *              If this is set, no MLKEM_K-independent code will be included
+ *              in the build.
+ *
+ *              To build mlkem-native with support for all security levels,
+ *              build it three times -- once per level -- and set the option
+ *              MLK_MULTILEVEL_BUILD_WITH_SHARED for exactly one of
+ *              them, and MLK_MULTILEVEL_BUILD_NO_SHARED for the
+ *              others.
+ *
+ *              See examples/multilevel_build for an example.
+ *
+ *              This can also be set using CFLAGS.
+ *
+ *****************************************************************************/
+/* #define MLK_MULTILEVEL_BUILD_NO_SHARED */
+
+/******************************************************************************
+ * Name:        MLK_USE_NATIVE_BACKEND_ARITH
+ *
+ * Description: Determines whether an native arithmetic backend should be used.
+ *
+ *              The arithmetic backend covers performance critical functions
+ *              such as the number-theoretic transform (NTT).
+ *
+ *              If this option is unset, the C backend will be used.
+ *
+ *              If this option is set, the arithmetic backend to be use is
+ *              determined by MLK_ARITH_BACKEND: If the latter is
+ *              unset, the default backend for your the target architecture
+ *              will be used. If set, it must be the name of a backend metadata
+ *              file.
+ *
+ *              This can also be set using CFLAGS.
+ *
+ *****************************************************************************/
+#if !defined(MLK_USE_NATIVE_BACKEND_ARITH)
+/* #define MLK_USE_NATIVE_BACKEND_ARITH */
+#endif
+
+/******************************************************************************
+ * Name:        MLK_ARITH_BACKEND_FILE
+ *
+ * Description: The arithmetic backend to use.
+ *
+ *              If MLK_USE_NATIVE_BACKEND_ARITH is unset, this option
+ *              is ignored.
+ *
+ *              If MLK_USE_NATIVE_BACKEND_ARITH is set, this option must
+ *              either be undefined or the filename of an arithmetic backend.
+ *              If unset, the default backend will be used.
+ *
+ *              This can be set using CFLAGS.
+ *
+ *****************************************************************************/
+#if defined(MLK_USE_NATIVE_BACKEND_ARITH) && !defined(MLK_ARITH_BACKEND_FILE)
+#define MLK_ARITH_BACKEND_FILE "native/meta.h"
+#endif
+
+/******************************************************************************
+ * Name:        MLK_USE_NATIVE_BACKEND_FIPS202
+ *
+ * Description: Determines whether an native FIPS202 backend should be used.
+ *
+ *              The FIPS202 backend covers 1x/2x/4x-fold Keccak-f1600, which is
+ *              the performance bottleneck of SHA3 and SHAKE.
+ *
+ *              If this option is unset, the C backend will be used.
+ *
+ *              If this option is set, the FIPS202 backend to be use is
+ *              determined by MLK_FIPS202_BACKEND: If the latter is
+ *              unset, the default backend for your the target architecture
+ *              will be used. If set, it must be the name of a backend metadata
+ *              file.
+ *
+ *              This can also be set using CFLAGS.
+ *
+ *****************************************************************************/
+#if !defined(MLK_USE_NATIVE_BACKEND_FIPS202)
+/* #define MLK_USE_NATIVE_BACKEND_FIPS202 */
+#endif
+
+/******************************************************************************
+ * Name:        MLK_FIPS202_BACKEND_FILE
+ *
+ * Description: The FIPS-202 backend to use.
+ *
+ *              If MLK_USE_NATIVE_BACKEND_FIPS202 is set, this option must
+ *              either be undefined or the filename of a FIPS202 backend.
+ *              If unset, the default backend will be used.
+ *
+ *              This can be set using CFLAGS.
+ *
+ *****************************************************************************/
+#if defined(MLK_USE_NATIVE_BACKEND_FIPS202) && \
+    !defined(MLK_FIPS202_BACKEND_FILE)
+#define MLK_FIPS202_BACKEND_FILE "fips202/native/meta.h"
+#endif
+
+/******************************************************************************
+ * Name:        MLK_FIPS202_CUSTOM_HEADER
+ *
+ * Description: Custom header to use for FIPS-202
+ *
+ *              This should only be set if you intend to use a custom
+ *              FIPS-202 implementation, different from the one shipped
+ *              with mlkem-native.
+ *
+ *              If set, it must be the name of a file serving as the
+ *              replacement for mlkem/fips202/fips202.h, and exposing
+ *              the same API (see FIPS202.md).
+ *
+ *****************************************************************************/
+/* #define MLK_FIPS202_CUSTOM_HEADER "SOME_FILE.h" */
+
+/******************************************************************************
+ * Name:        MLK_FIPS202X4_CUSTOM_HEADER
+ *
+ * Description: Custom header to use for FIPS-202-X4
+ *
+ *              This should only be set if you intend to use a custom
+ *              FIPS-202 implementation, different from the one shipped
+ *              with mlkem-native.
+ *
+ *              If set, it must be the name of a file serving as the
+ *              replacement for mlkem/fips202/fips202x4.h, and exposing
+ *              the same API (see FIPS202.md).
+ *
+ *****************************************************************************/
+/* #define MLK_FIPS202X4_CUSTOM_HEADER "SOME_FILE.h" */
+
+/******************************************************************************
+ * Name:        MLK_USE_CT_ZEROIZE_NATIVE
+ *
+ * Description: In compliance with FIPS 203 Section 3.3, mlkem-native zeroizes
+ *              intermediate stack buffers before returning from function calls.
+ *
+ *              Set this option and define `ct_zeroize_native` if you want to
+ *              use a custom method to zeroize intermediate stack buffers.
+ *              The default implementation uses SecureZeroMemory on Windows
+ *              and a memset + compiler barrier otherwise. If neither of those
+ *              is available on the target platform, compilation will fail,
+ *              and you will need to use MLK_USE_CT_ZEROIZE_NATIVE to provide
+ *              a custom implementation of `ct_zeroize_native()`.
+ *
+ *              WARNING:
+ *              The explicit stack zeroization conducted by mlkem-native
+ *              reduces the likelihood of data leaking on the stack, but
+ *              does not eliminate it! The C standard makes no guarantee about
+ *              where a compiler allocates structures and whether/where it makes
+ *              copies of them. Also, in addition to entire structures, there
+ *              may also be potentially exploitable leakage of individual values
+ *              on the stack.
+ *
+ *              If you need bullet-proof zeroization of the stack, you need to
+ *              consider additional measures instead of of what this feature
+ *              provides. In this case, you can set ct_zeroize_native to a
+ *              no-op.
+ *
+ *****************************************************************************/
+/* #define MLK_USE_CT_ZEROIZE_NATIVE
+   #if !defined(__ASSEMBLER__)
+   #include <stdint.h>
+   #include "sys.h"
+   static MLK_INLINE void ct_zeroize_native(void *ptr, size_t len)
+   {
+       ... your implementation ...
+   }
+   #endif
+*/
+
+/******************************************************************************
+ * Name:        MLK_KEYGEN_PCT
+ *
+ * Description: Compliance with [FIPS 140-3
+ *IG](https://csrc.nist.gov/csrc/media/Projects/cryptographic-module-validation-program/documents/fips%20140-3/FIPS%20140-3%20IG.pdf)
+ *              requires a Pairwise Consistency Test (PCT) to be carried out
+ *              on a freshly generated keypair before it can be exported.
+ *
+ *              Set this option if such a check should be implemented.
+ *              In this case, crypto_kem_keypair_derand and crypto_kem_keypair
+ *              will return a non-zero error code if the PCT failed.
+ *
+ *              NOTE: This feature will drastically lower the performance of
+ *              key generation.
+ *
+ *****************************************************************************/
+/* #define MLK_KEYGEN_PCT */
+
+/******************************************************************************
+ * Name:        MLK_KEYGEN_PCT_BREAKAGE_TEST
+ *
+ * Description: If this option is set, the user must provide a runtime
+ *              function `static inline int mlk_break_pct() { ... }` to
+ *              indicate whether the PCT should be made fail.
+ *
+ *              This option only has an effect if MLK_KEYGEN_PCT is set.
+ *
+ *****************************************************************************/
+/* #define MLK_KEYGEN_PCT_BREAKAGE_TEST
+   #if !defined(__ASSEMBLER__)
+   #include "sys.h"
+   static MLK_INLINE int mlk_break_pct(void)
+   {
+       ... return 0/1 depending on whether PCT should be broken ...
+   }
+   #endif
+*/
+
+/*************************  Config internals  ********************************/
+
+/* Default namespace
+ *
+ * Don't change this. If you need a different namespace, re-define
+ * MLK_NAMESPACE_PREFIX above instead, and remove the following.
+ *
+ * The default MLKEM namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_MLKEM<LEVEL>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_MLKEM512_
+ */
+
+#if MLKEM_K == 2
+#define MLK_DEFAULT_NAMESPACE_PREFIX PQCP_MLKEM_NATIVE_MLKEM512
+#elif MLKEM_K == 3
+#define MLK_DEFAULT_NAMESPACE_PREFIX PQCP_MLKEM_NATIVE_MLKEM768
+#elif MLKEM_K == 4
+#define MLK_DEFAULT_NAMESPACE_PREFIX PQCP_MLKEM_NATIVE_MLKEM1024
+#endif
+
+#endif /* MLK_CONFIG_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/debug.c b/crypto/fipsmodule/ml_kem/mlkem/debug.c
new file mode 100644
index 0000000000..66fed302e7
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/debug.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/* NOTE: You can remove this file unless you compile with MLKEM_DEBUG. */
+
+#include "common.h"
+
+#if !defined(MLK_MULTILEVEL_BUILD_NO_SHARED) && defined(MLKEM_DEBUG)
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "debug.h"
+
+#define MLK_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] "
+
+void mlkem_debug_assert(const char *file, int line, const int val)
+{
+  if (val == 0)
+  {
+    fprintf(stderr, MLK_DEBUG_ERROR_HEADER "Assertion failed (value %d)\n",
+            file, line, val);
+    exit(1);
+  }
+}
+
+void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr,
+                              unsigned len, int lower_bound_exclusive,
+                              int upper_bound_exclusive)
+{
+  int err = 0;
+  unsigned i;
+  for (i = 0; i < len; i++)
+  {
+    int16_t val = ptr[i];
+    if (!(val > lower_bound_exclusive && val < upper_bound_exclusive))
+    {
+      fprintf(
+          stderr,
+          MLK_DEBUG_ERROR_HEADER
+          "Bounds assertion failed: Index %u, value %d out of bounds (%d,%d)\n",
+          file, line, i, (int)val, lower_bound_exclusive,
+          upper_bound_exclusive);
+      err = 1;
+    }
+  }
+
+  if (err == 1)
+    exit(1);
+}
+
+#else /* !MLK_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */
+
+MLK_EMPTY_CU(debug)
+
+#endif /* !MLK_MULTILEVEL_BUILD_NO_SHARED && MLKEM_DEBUG */
+
+/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
+ * Don't modify by hand -- this is auto-generated by scripts/autogen. */
+#undef MLK_DEBUG_ERROR_HEADER
diff --git a/crypto/fipsmodule/ml_kem/mlkem/debug.h b/crypto/fipsmodule/ml_kem/mlkem/debug.h
new file mode 100644
index 0000000000..6623aa2a50
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/debug.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_DEBUG_H
+#define MLK_DEBUG_H
+#include "common.h"
+
+#if defined(MLKEM_DEBUG)
+#include <stdint.h>
+
+/*************************************************
+ * Name:        mlkem_debug_assert
+ *
+ * Description: Check debug assertion
+ *
+ *              Prints an error message to stderr and calls
+ *              exit(1) if not.
+ *
+ * Arguments:   - file: filename
+ *              - line: line number
+ *              - val: Value asserted to be non-zero
+ **************************************************/
+#define mlkem_debug_assert MLK_NAMESPACE(mlkem_debug_assert)
+void mlkem_debug_assert(const char *file, int line, const int val);
+
+/*************************************************
+ * Name:        mlkem_debug_check_bounds
+ *
+ * Description: Check whether values in an array of int16_t
+ *              are within specified bounds.
+ *
+ *              Prints an error message to stderr and calls
+ *              exit(1) if not.
+ *
+ * Arguments:   - file: filename
+ *              - line: line number
+ *              - ptr: Base of array to be checked
+ *              - len: Number of int16_t in ptr
+ *              - lower_bound_exclusive: Exclusive lower bound
+ *              - upper_bound_exclusive: Exclusive upper bound
+ **************************************************/
+#define mlkem_debug_check_bounds MLK_NAMESPACE(mlkem_debug_check_bounds)
+void mlkem_debug_check_bounds(const char *file, int line, const int16_t *ptr,
+                              unsigned len, int lower_bound_exclusive,
+                              int upper_bound_exclusive);
+
+/* Check assertion, calling exit() upon failure
+ *
+ * val: Value that's asserted to be non-zero
+ */
+#define debug_assert(val) mlkem_debug_assert(__FILE__, __LINE__, (val))
+
+/* Check bounds in array of int16_t's
+ * ptr: Base of int16_t array; will be explicitly cast to int16_t*,
+ *      so you may pass a byte-compatible type such as poly or polyvec.
+ * len: Number of int16_t in array
+ * value_lb: Inclusive lower value bound
+ * value_ub: Exclusive upper value bound */
+#define debug_assert_bound(ptr, len, value_lb, value_ub)                      \
+  mlkem_debug_check_bounds(__FILE__, __LINE__, (const int16_t *)(ptr), (len), \
+                           (value_lb)-1, (value_ub))
+
+/* Check absolute bounds in array of int16_t's
+ * ptr: Base of array, expression of type int16_t*
+ * len: Number of int16_t in array
+ * value_abs_bd: Exclusive absolute upper bound */
+#define debug_assert_abs_bound(ptr, len, value_abs_bd) \
+  debug_assert_bound((ptr), (len), (-(value_abs_bd) + 1), (value_abs_bd))
+
+/* Version of bounds assertions for 2-dimensional arrays */
+#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \
+  debug_assert_bound((ptr), ((len0) * (len1)), (value_lb), (value_ub))
+
+#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \
+  debug_assert_abs_bound((ptr), ((len0) * (len1)), (value_abs_bd))
+
+/* When running CBMC, convert debug assertions into proof obligations */
+#elif defined(CBMC)
+
+#include "../cbmc.h"
+
+#define debug_assert(val) cassert(val)
+
+#define debug_assert_bound(ptr, len, value_lb, value_ub) \
+  cassert(array_bound(((int16_t *)(ptr)), 0, (len), (value_lb), (value_ub)))
+
+#define debug_assert_abs_bound(ptr, len, value_abs_bd) \
+  cassert(array_abs_bound(((int16_t *)(ptr)), 0, (len), (value_abs_bd)))
+
+/* Because of https://github.com/diffblue/cbmc/issues/8570, we can't
+ * just use a single flattened array_bound(...) here. */
+#define debug_assert_bound_2d(ptr, M, N, value_lb, value_ub)           \
+  cassert(forall(kN, 0, (M),                                           \
+                 array_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \
+                             (value_lb), (value_ub))))
+
+#define debug_assert_abs_bound_2d(ptr, M, N, value_abs_bd)                 \
+  cassert(forall(kN, 0, (M),                                               \
+                 array_abs_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \
+                                 (value_abs_bd))))
+
+#else /* MLKEM_DEBUG */
+
+#define debug_assert(val) \
+  do                      \
+  {                       \
+  } while (0)
+#define debug_assert_bound(ptr, len, value_lb, value_ub) \
+  do                                                     \
+  {                                                      \
+  } while (0)
+#define debug_assert_abs_bound(ptr, len, value_abs_bd) \
+  do                                                   \
+  {                                                    \
+  } while (0)
+
+#define debug_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \
+  do                                                               \
+  {                                                                \
+  } while (0)
+
+#define debug_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \
+  do                                                             \
+  {                                                              \
+  } while (0)
+
+
+#endif /* MLKEM_DEBUG */
+#endif /* MLK_DEBUG_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/indcpa.c b/crypto/fipsmodule/ml_kem/mlkem/indcpa.c
new file mode 100644
index 0000000000..5b1e8d3f69
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/indcpa.c
@@ -0,0 +1,509 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "arith_backend.h"
+#include "cbmc.h"
+#include "debug.h"
+#include "indcpa.h"
+#include "poly.h"
+#include "poly_k.h"
+#include "randombytes.h"
+#include "sampling.h"
+#include "symmetric.h"
+
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define pack_pk MLK_NAMESPACE_K(pack_pk)
+#define unpack_pk MLK_NAMESPACE_K(unpack_pk)
+#define pack_sk MLK_NAMESPACE_K(pack_sk)
+#define unpack_sk MLK_NAMESPACE_K(unpack_sk)
+#define pack_ciphertext MLK_NAMESPACE_K(pack_ciphertext)
+#define unpack_ciphertext MLK_NAMESPACE_K(unpack_ciphertext)
+#define matvec_mul MLK_NAMESPACE_K(matvec_mul)
+/* End of static namespacing */
+
+/*************************************************
+ * Name:        pack_pk
+ *
+ * Description: Serialize the public key as concatenation of the
+ *              serialized vector of polynomials pk
+ *              and the public seed used to generate the matrix A.
+ *
+ * Arguments:   uint8_t *r: pointer to the output serialized public key
+ *              polyvec *pk: pointer to the input public-key polyvec.
+ *                Must have coefficients within [0,..,q-1].
+ *              const uint8_t *seed: pointer to the input public seed
+ *
+ * Specification:
+ * Implements [FIPS 203, Algorithm 13 (K-PKE.KeyGen), L19]
+ *
+ **************************************************/
+static void pack_pk(uint8_t r[MLKEM_INDCPA_PUBLICKEYBYTES], polyvec *pk,
+                    const uint8_t seed[MLKEM_SYMBYTES])
+{
+  debug_assert_bound_2d(pk, MLKEM_K, MLKEM_N, 0, MLKEM_Q);
+  polyvec_tobytes(r, pk);
+  memcpy(r + MLKEM_POLYVECBYTES, seed, MLKEM_SYMBYTES);
+}
+
+/*************************************************
+ * Name:        unpack_pk
+ *
+ * Description: De-serialize public key from a byte array;
+ *              approximate inverse of pack_pk
+ *
+ * Arguments:   - polyvec *pk: pointer to output public-key polynomial vector
+ *                  Coefficients will be normalized to [0,..,q-1].
+ *              - uint8_t *seed: pointer to output seed to generate matrix A
+ *              - const uint8_t *packedpk: pointer to input serialized public
+ *                  key.
+ *
+ * Specification:
+ * Implements [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L2-3]
+ *
+ **************************************************/
+static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES],
+                      const uint8_t packedpk[MLKEM_INDCPA_PUBLICKEYBYTES])
+{
+  polyvec_frombytes(pk, packedpk);
+  memcpy(seed, packedpk + MLKEM_POLYVECBYTES, MLKEM_SYMBYTES);
+
+  /* NOTE: If a modulus check was conducted on the PK, we know at this
+   * point that the coefficients of `pk` are unsigned canonical. The
+   * specifications and proofs, however, do _not_ assume this, and instead
+   * work with the easily provable bound by MLKEM_UINT12_LIMIT. */
+}
+
+/*************************************************
+ * Name:        pack_sk
+ *
+ * Description: Serialize the secret key
+ *
+ * Arguments:   - uint8_t *r: pointer to output serialized secret key
+ *              - polyvec *sk: pointer to input vector of polynomials (secret
+ *                key)
+ *
+ * Specification:
+ * Implements [FIPS 203, Algorithm 13 (K-PKE.KeyGen), L20]
+ *
+ **************************************************/
+static void pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], polyvec *sk)
+{
+  debug_assert_bound_2d(sk, MLKEM_K, MLKEM_N, 0, MLKEM_Q);
+  polyvec_tobytes(r, sk);
+}
+
+/*************************************************
+ * Name:        unpack_sk
+ *
+ * Description: De-serialize the secret key; inverse of pack_sk
+ *
+ * Arguments:   - polyvec *sk: pointer to output vector of polynomials (secret
+ *                key)
+ *              - const uint8_t *packedsk: pointer to input serialized secret
+ *                key
+ *
+ * Specification:
+ * Implements [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L5]
+ *
+ **************************************************/
+static void unpack_sk(polyvec *sk,
+                      const uint8_t packedsk[MLKEM_INDCPA_SECRETKEYBYTES])
+{
+  polyvec_frombytes(sk, packedsk);
+}
+
+/*************************************************
+ * Name:        pack_ciphertext
+ *
+ * Description: Serialize the ciphertext as concatenation of the
+ *              compressed and serialized vector of polynomials b
+ *              and the compressed and serialized polynomial v
+ *
+ * Arguments:   uint8_t *r: pointer to the output serialized ciphertext
+ *              poly *pk: pointer to the input vector of polynomials b
+ *              poly *v: pointer to the input polynomial v
+ *
+ * Specification:
+ * Implements [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L22-23]
+ *
+ **************************************************/
+static void pack_ciphertext(uint8_t r[MLKEM_INDCPA_BYTES], polyvec *b, poly *v)
+{
+  polyvec_compress_du(r, b);
+  poly_compress_dv(r + MLKEM_POLYVECCOMPRESSEDBYTES_DU, v);
+}
+
+/*************************************************
+ * Name:        unpack_ciphertext
+ *
+ * Description: De-serialize and decompress ciphertext from a byte array;
+ *              approximate inverse of pack_ciphertext
+ *
+ * Arguments:   - polyvec *b: pointer to the output vector of polynomials b
+ *              - poly *v: pointer to the output polynomial v
+ *              - const uint8_t *c: pointer to the input serialized ciphertext
+ *
+ * Specification:
+ * Implements [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L1-4]
+ *
+ **************************************************/
+static void unpack_ciphertext(polyvec *b, poly *v,
+                              const uint8_t c[MLKEM_INDCPA_BYTES])
+{
+  polyvec_decompress_du(b, c);
+  poly_decompress_dv(v, c + MLKEM_POLYVECCOMPRESSEDBYTES_DU);
+}
+
+#if !defined(MLK_USE_NATIVE_NTT_CUSTOM_ORDER)
+/* This namespacing is not done at the top to avoid a naming conflict
+ * with native backends, which are currently not yet namespaced. */
+#define poly_permute_bitrev_to_custom \
+  MLK_NAMESPACE_K(poly_permute_bitrev_to_custom)
+
+static MLK_INLINE void poly_permute_bitrev_to_custom(int16_t data[MLKEM_N])
+__contract__(
+  /* We don't specify that this should be a permutation, but only
+   * that it does not change the bound established at the end of gen_matrix. */
+  requires(memory_no_alias(data, sizeof(int16_t) * MLKEM_N))
+  requires(array_bound(data, 0, MLKEM_N, 0, MLKEM_Q))
+  assigns(memory_slice(data, sizeof(poly)))
+  ensures(array_bound(data, 0, MLKEM_N, 0, MLKEM_Q))) { ((void)data); }
+#endif /* MLK_USE_NATIVE_NTT_CUSTOM_ORDER */
+
+/* Not static for benchmarking */
+MLK_INTERNAL_API
+void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
+{
+  unsigned i, j;
+  /*
+   * We generate four separate seed arrays rather than a single one to work
+   * around limitations in CBMC function contracts dealing with disjoint slices
+   * of the same parent object.
+   */
+
+  MLK_ALIGN uint8_t seed0[MLKEM_SYMBYTES + 2];
+  MLK_ALIGN uint8_t seed1[MLKEM_SYMBYTES + 2];
+  MLK_ALIGN uint8_t seed2[MLKEM_SYMBYTES + 2];
+  MLK_ALIGN uint8_t seed3[MLKEM_SYMBYTES + 2];
+  uint8_t *seedxy[4];
+  seedxy[0] = seed0;
+  seedxy[1] = seed1;
+  seedxy[2] = seed2;
+  seedxy[3] = seed3;
+
+  for (j = 0; j < 4; j++)
+  {
+    memcpy(seedxy[j], seed, MLKEM_SYMBYTES);
+  }
+
+  /* Sample 4 matrix entries a time. */
+  for (i = 0; i < (MLKEM_K * MLKEM_K / 4) * 4; i += 4)
+  {
+    uint8_t x, y;
+
+    for (j = 0; j < 4; j++)
+    {
+      x = (i + j) / MLKEM_K;
+      y = (i + j) % MLKEM_K;
+      if (transposed)
+      {
+        seedxy[j][MLKEM_SYMBYTES + 0] = x;
+        seedxy[j][MLKEM_SYMBYTES + 1] = y;
+      }
+      else
+      {
+        seedxy[j][MLKEM_SYMBYTES + 0] = y;
+        seedxy[j][MLKEM_SYMBYTES + 1] = x;
+      }
+    }
+
+    /*
+     * This call writes across polyvec boundaries for K=2 and K=3.
+     * This is intentional and safe.
+     */
+    poly_rej_uniform_x4(&a[0].vec[0] + i, seedxy);
+  }
+
+  /* For MLKEM_K == 3, sample the last entry individually. */
+  if (i < MLKEM_K * MLKEM_K)
+  {
+    uint8_t x, y;
+    x = i / MLKEM_K;
+    y = i % MLKEM_K;
+
+    if (transposed)
+    {
+      seed0[MLKEM_SYMBYTES + 0] = x;
+      seed0[MLKEM_SYMBYTES + 1] = y;
+    }
+    else
+    {
+      seed0[MLKEM_SYMBYTES + 0] = y;
+      seed0[MLKEM_SYMBYTES + 1] = x;
+    }
+
+    poly_rej_uniform(&a[0].vec[0] + i, seed0);
+    i++;
+  }
+
+  debug_assert(i == MLKEM_K * MLKEM_K);
+
+  /*
+   * The public matrix is generated in NTT domain. If the native backend
+   * uses a custom order in NTT domain, permute A accordingly.
+   */
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    for (j = 0; j < MLKEM_K; j++)
+    {
+      poly_permute_bitrev_to_custom(a[i].vec[j].coeffs);
+    }
+  }
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  ct_zeroize(seed0, sizeof(seed0));
+  ct_zeroize(seed1, sizeof(seed1));
+  ct_zeroize(seed2, sizeof(seed2));
+  ct_zeroize(seed3, sizeof(seed3));
+}
+
+/*************************************************
+ * Name:        matvec_mul
+ *
+ * Description: Computes matrix-vector product in NTT domain,
+ *              via Montgomery multiplication.
+ *
+ * Arguments:   - polyvec *out: Pointer to output polynomial vector
+ *              - polyvec a[MLKEM_K]: Input matrix. Must be in NTT domain
+ *                  and have coefficients of absolute value < 4096.
+ *              - polyvec *v: Input polynomial vector. Must be in NTT domain.
+ *              - polyvec *vc: Mulcache for v, computed via
+ *                  polyvec_mulcache_compute().
+ *
+ * Specification: Implements [FIPS 203, Section 2.4.7, Eq (2.12), (2.13)]
+ *
+ **************************************************/
+static void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
+                       const polyvec_mulcache *vc)
+__contract__(
+  requires(memory_no_alias(out, sizeof(polyvec)))
+  requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
+  requires(memory_no_alias(v, sizeof(polyvec)))
+  requires(memory_no_alias(vc, sizeof(polyvec_mulcache)))
+  requires(forall(k0, 0, MLKEM_K,
+    forall(k1, 0, MLKEM_K,
+      array_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N, 0, MLKEM_UINT12_LIMIT))))
+  assigns(object_whole(out)))
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  __loop__(
+    assigns(i, object_whole(out))
+    invariant(i <= MLKEM_K))
+  {
+    polyvec_basemul_acc_montgomery_cached(&out->vec[i], &a[i], v, vc);
+  }
+}
+
+MLK_INTERNAL_API
+void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
+                           uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
+                           const uint8_t coins[MLKEM_SYMBYTES])
+{
+  MLK_ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
+  const uint8_t *publicseed = buf;
+  const uint8_t *noiseseed = buf + MLKEM_SYMBYTES;
+  polyvec a[MLKEM_K], e, pkpv, skpv;
+  polyvec_mulcache skpv_cache;
+
+  MLK_ALIGN uint8_t coins_with_domain_separator[MLKEM_SYMBYTES + 1];
+  /* Concatenate coins with MLKEM_K for domain separation of security levels */
+  memcpy(coins_with_domain_separator, coins, MLKEM_SYMBYTES);
+  coins_with_domain_separator[MLKEM_SYMBYTES] = MLKEM_K;
+
+  hash_g(buf, coins_with_domain_separator, MLKEM_SYMBYTES + 1);
+
+  /*
+   * Declassify the public seed.
+   * Required to use it in conditional-branches in rejection sampling.
+   * This is needed because all output of randombytes is marked as secret
+   * (=undefined)
+   */
+  MLK_CT_TESTING_DECLASSIFY(publicseed, MLKEM_SYMBYTES);
+
+  gen_matrix(a, publicseed, 0 /* no transpose */);
+
+#if MLKEM_K == 2
+  poly_getnoise_eta1_4x(skpv.vec + 0, skpv.vec + 1, e.vec + 0, e.vec + 1,
+                        noiseseed, 0, 1, 2, 3);
+#elif MLKEM_K == 3
+  /*
+   * Only the first three output buffers are needed.
+   * The laster parameter is a dummy that's overwritten later.
+   */
+  poly_getnoise_eta1_4x(skpv.vec + 0, skpv.vec + 1, skpv.vec + 2,
+                        pkpv.vec + 0 /* irrelevant */, noiseseed, 0, 1, 2,
+                        0xFF /* irrelevant */);
+  /* Same here */
+  poly_getnoise_eta1_4x(e.vec + 0, e.vec + 1, e.vec + 2,
+                        pkpv.vec + 0 /* irrelevant */, noiseseed, 3, 4, 5,
+                        0xFF /* irrelevant */);
+#elif MLKEM_K == 4
+  poly_getnoise_eta1_4x(skpv.vec + 0, skpv.vec + 1, skpv.vec + 2, skpv.vec + 3,
+                        noiseseed, 0, 1, 2, 3);
+  poly_getnoise_eta1_4x(e.vec + 0, e.vec + 1, e.vec + 2, e.vec + 3, noiseseed,
+                        4, 5, 6, 7);
+#endif
+
+  polyvec_ntt(&skpv);
+  polyvec_ntt(&e);
+
+  polyvec_mulcache_compute(&skpv_cache, &skpv);
+  matvec_mul(&pkpv, a, &skpv, &skpv_cache);
+  polyvec_tomont(&pkpv);
+
+  polyvec_add(&pkpv, &e);
+  polyvec_reduce(&pkpv);
+  polyvec_reduce(&skpv);
+
+  pack_sk(sk, &skpv);
+  pack_pk(pk, &pkpv, publicseed);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  ct_zeroize(buf, sizeof(buf));
+  ct_zeroize(coins_with_domain_separator, sizeof(coins_with_domain_separator));
+  ct_zeroize(a, sizeof(a));
+  ct_zeroize(&e, sizeof(e));
+  ct_zeroize(&skpv, sizeof(skpv));
+  ct_zeroize(&skpv_cache, sizeof(skpv_cache));
+}
+
+
+MLK_INTERNAL_API
+void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
+                const uint8_t m[MLKEM_INDCPA_MSGBYTES],
+                const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
+                const uint8_t coins[MLKEM_SYMBYTES])
+{
+  MLK_ALIGN uint8_t seed[MLKEM_SYMBYTES];
+  polyvec sp, pkpv, ep, at[MLKEM_K], b;
+  poly v, k, epp;
+  polyvec_mulcache sp_cache;
+
+  unpack_pk(&pkpv, seed, pk);
+  poly_frommsg(&k, m);
+
+  /*
+   * Declassify the public seed.
+   * Required to use it in conditional-branches in rejection sampling.
+   * This is needed because in re-encryption the publicseed originated from sk
+   * which is marked undefined.
+   */
+  MLK_CT_TESTING_DECLASSIFY(seed, MLKEM_SYMBYTES);
+
+  gen_matrix(at, seed, 1 /* transpose */);
+
+#if MLKEM_K == 2
+  poly_getnoise_eta1122_4x(sp.vec + 0, sp.vec + 1, ep.vec + 0, ep.vec + 1,
+                           coins, 0, 1, 2, 3);
+  poly_getnoise_eta2(&epp, coins, 4);
+#elif MLKEM_K == 3
+  /*
+   * In this call, only the first three output buffers are needed.
+   * The last parameter is a dummy that's overwritten later.
+   */
+  poly_getnoise_eta1_4x(sp.vec + 0, sp.vec + 1, sp.vec + 2, &b.vec[0], coins, 0,
+                        1, 2, 0xFF);
+  /* The fourth output buffer in this call _is_ used. */
+  poly_getnoise_eta2_4x(ep.vec + 0, ep.vec + 1, ep.vec + 2, &epp, coins, 3, 4,
+                        5, 6);
+#elif MLKEM_K == 4
+  poly_getnoise_eta1_4x(sp.vec + 0, sp.vec + 1, sp.vec + 2, sp.vec + 3, coins,
+                        0, 1, 2, 3);
+  poly_getnoise_eta2_4x(ep.vec + 0, ep.vec + 1, ep.vec + 2, ep.vec + 3, coins,
+                        4, 5, 6, 7);
+  poly_getnoise_eta2(&epp, coins, 8);
+#endif
+
+  polyvec_ntt(&sp);
+
+  polyvec_mulcache_compute(&sp_cache, &sp);
+  matvec_mul(&b, at, &sp, &sp_cache);
+  polyvec_basemul_acc_montgomery_cached(&v, &pkpv, &sp, &sp_cache);
+
+  polyvec_invntt_tomont(&b);
+  poly_invntt_tomont(&v);
+
+  polyvec_add(&b, &ep);
+  poly_add(&v, &epp);
+  poly_add(&v, &k);
+
+  polyvec_reduce(&b);
+  poly_reduce(&v);
+
+  pack_ciphertext(c, &b, &v);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  ct_zeroize(seed, sizeof(seed));
+  ct_zeroize(&sp, sizeof(sp));
+  ct_zeroize(&sp_cache, sizeof(sp_cache));
+  ct_zeroize(&b, sizeof(b));
+  ct_zeroize(&v, sizeof(v));
+  ct_zeroize(at, sizeof(at));
+  ct_zeroize(&k, sizeof(k));
+  ct_zeroize(&ep, sizeof(ep));
+  ct_zeroize(&epp, sizeof(epp));
+}
+
+MLK_INTERNAL_API
+void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
+                const uint8_t c[MLKEM_INDCPA_BYTES],
+                const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
+{
+  polyvec b, skpv;
+  poly v, sb;
+  polyvec_mulcache b_cache;
+
+  unpack_ciphertext(&b, &v, c);
+  unpack_sk(&skpv, sk);
+
+  polyvec_ntt(&b);
+  polyvec_mulcache_compute(&b_cache, &b);
+  polyvec_basemul_acc_montgomery_cached(&sb, &skpv, &b, &b_cache);
+  poly_invntt_tomont(&sb);
+
+  poly_sub(&v, &sb);
+  poly_reduce(&v);
+
+  poly_tomsg(m, &v);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  ct_zeroize(&skpv, sizeof(skpv));
+  ct_zeroize(&b, sizeof(b));
+  ct_zeroize(&b_cache, sizeof(b_cache));
+  ct_zeroize(&v, sizeof(v));
+  ct_zeroize(&sb, sizeof(sb));
+}
+
+/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
+ * Don't modify by hand -- this is auto-generated by scripts/autogen. */
+#undef pack_pk
+#undef unpack_pk
+#undef pack_sk
+#undef unpack_sk
+#undef pack_ciphertext
+#undef unpack_ciphertext
+#undef matvec_mul
+#undef poly_permute_bitrev_to_custom
diff --git a/crypto/fipsmodule/ml_kem/mlkem/indcpa.h b/crypto/fipsmodule/ml_kem/mlkem/indcpa.h
new file mode 100644
index 0000000000..b974370dea
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/indcpa.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_INDCPA_H
+#define MLK_INDCPA_H
+
+#include <stdint.h>
+#include "cbmc.h"
+#include "common.h"
+#include "poly_k.h"
+
+#define gen_matrix MLK_NAMESPACE_K(gen_matrix)
+/*************************************************
+ * Name:        gen_matrix
+ *
+ * Description: Deterministically generate matrix A (or the transpose of A)
+ *              from a seed. Entries of the matrix are polynomials that look
+ *              uniformly random. Performs rejection sampling on output of
+ *              a XOF
+ *
+ * Arguments:   - polyvec *a: pointer to output matrix A
+ *              - const uint8_t *seed: pointer to input seed
+ *              - int transposed: boolean deciding whether A or A^T is generated
+ *
+ * Specification: Implements [FIPS 203, Algorithm 13 (K-PKE.KeyGen), L3-7]
+ *                and [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L4-8].
+ *                The `transposed` parameter only affects internal presentation.
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
+__contract__(
+  requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
+  requires(memory_no_alias(seed, MLKEM_SYMBYTES))
+  requires(transposed == 0 || transposed == 1)
+  assigns(object_whole(a))
+  ensures(forall(x, 0, MLKEM_K, forall(y, 0, MLKEM_K,
+  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, MLKEM_Q))));
+);
+
+#define indcpa_keypair_derand MLK_NAMESPACE_K(indcpa_keypair_derand)
+/*************************************************
+ * Name:        indcpa_keypair_derand
+ *
+ * Description: Generates public and private key for the CPA-secure
+ *              public-key encryption scheme underlying ML-KEM
+ *
+ * Arguments:   - uint8_t *pk: pointer to output public key
+ *                             (of length MLKEM_INDCPA_PUBLICKEYBYTES bytes)
+ *              - uint8_t *sk: pointer to output private key
+ *                             (of length MLKEM_INDCPA_SECRETKEYBYTES bytes)
+ *              - const uint8_t *coins: pointer to input randomness
+ *                             (of length MLKEM_SYMBYTES bytes)
+ *
+ * Specification: Implements [FIPS 203, Algorithm 13 (K-PKE.KeyGen)].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
+                           uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
+                           const uint8_t coins[MLKEM_SYMBYTES])
+__contract__(
+  requires(memory_no_alias(pk, MLKEM_INDCPA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCPA_SECRETKEYBYTES))
+  requires(memory_no_alias(coins, MLKEM_SYMBYTES))
+  assigns(object_whole(pk))
+  assigns(object_whole(sk))
+);
+
+#define indcpa_enc MLK_NAMESPACE_K(indcpa_enc)
+/*************************************************
+ * Name:        indcpa_enc
+ *
+ * Description: Encryption function of the CPA-secure
+ *              public-key encryption scheme underlying Kyber.
+ *
+ * Arguments:   - uint8_t *c: pointer to output ciphertext
+ *                            (of length MLKEM_INDCPA_BYTES bytes)
+ *              - const uint8_t *m: pointer to input message
+ *                                  (of length MLKEM_INDCPA_MSGBYTES bytes)
+ *              - const uint8_t *pk: pointer to input public key
+ *                                   (of length MLKEM_INDCPA_PUBLICKEYBYTES)
+ *              - const uint8_t *coins: pointer to input random coins used as
+ *                 seed (of length MLKEM_SYMBYTES) to deterministically generate
+ *                 all randomness
+ *
+ * Specification: Implements [FIPS 203, Algorithm 14 (K-PKE.Encrypt)].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
+                const uint8_t m[MLKEM_INDCPA_MSGBYTES],
+                const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
+                const uint8_t coins[MLKEM_SYMBYTES])
+__contract__(
+  requires(memory_no_alias(c, MLKEM_INDCPA_BYTES))
+  requires(memory_no_alias(m, MLKEM_INDCPA_MSGBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCPA_PUBLICKEYBYTES))
+  requires(memory_no_alias(coins, MLKEM_SYMBYTES))
+  assigns(object_whole(c))
+);
+
+#define indcpa_dec MLK_NAMESPACE_K(indcpa_dec)
+/*************************************************
+ * Name:        indcpa_dec
+ *
+ * Description: Decryption function of the CPA-secure
+ *              public-key encryption scheme underlying Kyber.
+ *
+ * Arguments:   - uint8_t *m: pointer to output decrypted message
+ *                            (of length MLKEM_INDCPA_MSGBYTES)
+ *              - const uint8_t *c: pointer to input ciphertext
+ *                                  (of length MLKEM_INDCPA_BYTES)
+ *              - const uint8_t *sk: pointer to input secret key
+ *                                   (of length MLKEM_INDCPA_SECRETKEYBYTES)
+ *
+ * Specification: Implements [FIPS 203, Algorithm 15 (K-PKE.Decrypt)].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
+                const uint8_t c[MLKEM_INDCPA_BYTES],
+                const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
+__contract__(
+  requires(memory_no_alias(c, MLKEM_INDCPA_BYTES))
+  requires(memory_no_alias(m, MLKEM_INDCPA_MSGBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCPA_SECRETKEYBYTES))
+  assigns(object_whole(m))
+);
+
+#endif /* MLK_INDCPA_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/kem.c b/crypto/fipsmodule/ml_kem/mlkem/kem.c
new file mode 100644
index 0000000000..b98d6ce5ef
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/kem.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "indcpa.h"
+#include "kem.h"
+#include "randombytes.h"
+#include "symmetric.h"
+#include "verify.h"
+
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define check_pk MLK_NAMESPACE_K(check_pk)
+#define check_sk MLK_NAMESPACE_K(check_sk)
+#define check_pct MLK_NAMESPACE_K(check_pct)
+/* End of static namespacing */
+
+#if defined(CBMC)
+/* Redeclaration with contract needed for CBMC only */
+int memcmp(const void *str1, const void *str2, size_t n)
+__contract__(
+  requires(memory_no_alias(str1, n))
+  requires(memory_no_alias(str2, n))
+);
+#endif
+
+/*************************************************
+ * Name:        check_pk
+ *
+ * Description: Implements modulus check mandated by FIPS 203,
+ *              i.e., ensures that coefficients are in [0,q-1].
+ *
+ * Arguments:   - const uint8_t *pk: pointer to input public key
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *
+ * Returns: - 0 on success
+ *          - -1 on failure
+ *
+ * Specification: Implements [FIPS 203, Section 7.2, 'modulus check']
+ *
+ **************************************************/
+MLK_MUST_CHECK_RETURN_VALUE
+static int check_pk(const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
+{
+  int res;
+  polyvec p;
+  uint8_t p_reencoded[MLKEM_POLYVECBYTES];
+
+  polyvec_frombytes(&p, pk);
+  polyvec_reduce(&p);
+  polyvec_tobytes(p_reencoded, &p);
+
+  /* We use a constant-time memcmp here to avoid having to
+   * declassify the PK before the PCT has succeeded. */
+  res = ct_memcmp(pk, p_reencoded, MLKEM_POLYVECBYTES) ? -1 : 0;
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  ct_zeroize(p_reencoded, sizeof(p_reencoded));
+  ct_zeroize(&p, sizeof(p));
+  return res;
+}
+
+/*************************************************
+ * Name:        check_sk
+ *
+ * Description: Implements public key hash check mandated by FIPS 203,
+ *              i.e., ensures that
+ *              sk[768𝑘+32 ∶ 768𝑘+64] = H(pk)= H(sk[384𝑘 : 768𝑘+32])
+ *
+ * Arguments:   - const uint8_t *sk: pointer to input private key
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
+ *
+ * Returns: - 0 on success
+ *          - -1 on failure
+ *
+ * Specification: Implements [FIPS 203, Section 7.3, 'hash check']
+ *
+ **************************************************/
+MLK_MUST_CHECK_RETURN_VALUE
+static int check_sk(const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
+{
+  int res;
+  MLK_ALIGN uint8_t test[MLKEM_SYMBYTES];
+  /*
+   * The parts of `sk` being hashed and compared here are public, so
+   * no public information is leaked through the runtime or the return value
+   * of this function.
+   */
+
+  /* Declassify the public part of the secret key */
+  MLK_CT_TESTING_DECLASSIFY(sk + MLKEM_INDCPA_SECRETKEYBYTES,
+                            MLKEM_INDCCA_PUBLICKEYBYTES);
+  MLK_CT_TESTING_DECLASSIFY(
+      sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, MLKEM_SYMBYTES);
+
+  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_INDCCA_PUBLICKEYBYTES);
+  res = memcmp(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
+               MLKEM_SYMBYTES)
+            ? -1
+            : 0;
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  ct_zeroize(test, sizeof(test));
+  return res;
+}
+
+MLK_MUST_CHECK_RETURN_VALUE
+static int check_pct(uint8_t const pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                     uint8_t const sk[MLKEM_INDCCA_SECRETKEYBYTES])
+__contract__(
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES)));
+
+#if defined(MLK_KEYGEN_PCT)
+/* Specification:
+ * Partially implements 'Pairwise Consistency Test' [FIPS 140-3 IG] and
+ * [FIPS 203, Section 7.1, Pairwise Consistency].
+ */
+static int check_pct(uint8_t const pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                     uint8_t const sk[MLKEM_INDCCA_SECRETKEYBYTES])
+{
+  int res;
+  uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES];
+  uint8_t ss_enc[MLKEM_SSBYTES], ss_dec[MLKEM_SSBYTES];
+
+  res = crypto_kem_enc(ct, ss_enc, pk);
+  if (res != 0)
+  {
+    goto cleanup;
+  }
+
+  res = crypto_kem_dec(ss_dec, ct, sk);
+  if (res != 0)
+  {
+    goto cleanup;
+  }
+
+#if defined(MLK_KEYGEN_PCT_BREAKAGE_TEST)
+  /* Deliberately break PCT for testing purposes */
+  if (mlk_break_pct())
+  {
+    ss_enc[0] = ~ss_enc[0];
+  }
+#endif /* MLK_KEYGEN_PCT_BREAKAGE_TEST */
+
+  res = ct_memcmp(ss_enc, ss_dec, sizeof(ss_dec));
+
+cleanup:
+  /* The result of the PCT is public. */
+  MLK_CT_TESTING_DECLASSIFY(&res, sizeof(res));
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  ct_zeroize(ct, sizeof(ct));
+  ct_zeroize(ss_enc, sizeof(ss_enc));
+  ct_zeroize(ss_dec, sizeof(ss_dec));
+  return res;
+}
+#else  /* !MLKEM_KEYGEN_PCT */
+static int check_pct(uint8_t const pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                     uint8_t const sk[MLKEM_INDCCA_SECRETKEYBYTES])
+{
+  /* Skip PCT */
+  ((void)pk);
+  ((void)sk);
+  return 0;
+}
+#endif /* MLKEM_KEYGEN_PCT */
+
+MLK_EXTERNAL_API
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
+{
+  indcpa_keypair_derand(pk, sk, coins);
+  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
+  hash_h(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
+         MLKEM_INDCCA_PUBLICKEYBYTES);
+  /* Value z for pseudo-random output on reject */
+  memcpy(sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+         coins + MLKEM_SYMBYTES, MLKEM_SYMBYTES);
+
+  /* Declassify public key */
+  MLK_CT_TESTING_DECLASSIFY(pk, MLKEM_INDCCA_PUBLICKEYBYTES);
+
+  /* Pairwise Consistency Test (PCT) (FIPS 140-3 IPG) */
+  if (check_pct(pk, sk))
+  {
+    return -1;
+  }
+
+  return 0;
+}
+
+MLK_EXTERNAL_API
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
+{
+  int res;
+  MLK_ALIGN uint8_t coins[2 * MLKEM_SYMBYTES];
+
+  /* Acquire necessary randomness, and mark it as secret. */
+  randombytes(coins, 2 * MLKEM_SYMBYTES);
+  MLK_CT_TESTING_SECRET(coins, sizeof(coins));
+
+  res = crypto_kem_keypair_derand(pk, sk, coins);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  ct_zeroize(coins, sizeof(coins));
+  return res;
+}
+
+MLK_EXTERNAL_API
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
+{
+  MLK_ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
+  /* Will contain key, coins */
+  MLK_ALIGN uint8_t kr[2 * MLKEM_SYMBYTES];
+
+  /* Specification: Implements [FIPS 203, Section 7.2, Modulus check] */
+  if (check_pk(pk))
+  {
+    return -1;
+  }
+
+  memcpy(buf, coins, MLKEM_SYMBYTES);
+
+  /* Multitarget countermeasure for coins + contributory KEM */
+  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
+  hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
+
+  /* coins are in kr+MLKEM_SYMBYTES */
+  indcpa_enc(ct, buf, pk, kr + MLKEM_SYMBYTES);
+
+  memcpy(ss, kr, MLKEM_SYMBYTES);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  ct_zeroize(buf, sizeof(buf));
+  ct_zeroize(kr, sizeof(kr));
+
+  return 0;
+}
+
+MLK_EXTERNAL_API
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
+{
+  int res;
+  MLK_ALIGN uint8_t coins[MLKEM_SYMBYTES];
+
+  randombytes(coins, MLKEM_SYMBYTES);
+  MLK_CT_TESTING_SECRET(coins, sizeof(coins));
+
+  res = crypto_kem_enc_derand(ct, ss, pk, coins);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  ct_zeroize(coins, sizeof(coins));
+  return res;
+}
+
+MLK_EXTERNAL_API
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
+{
+  uint8_t fail;
+  MLK_ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
+  /* Will contain key, coins */
+  MLK_ALIGN uint8_t kr[2 * MLKEM_SYMBYTES];
+  MLK_ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_INDCCA_CIPHERTEXTBYTES];
+
+  const uint8_t *pk = sk + MLKEM_INDCPA_SECRETKEYBYTES;
+
+  /* Specification: Implements [FIPS 203, Section 7.3, Hash check] */
+  if (check_sk(sk))
+  {
+    return -1;
+  }
+
+  indcpa_dec(buf, ct, sk);
+
+  /* Multitarget countermeasure for coins + contributory KEM */
+  memcpy(buf + MLKEM_SYMBYTES,
+         sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, MLKEM_SYMBYTES);
+  hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
+
+  /* Recompute and compare ciphertext */
+  /* coins are in kr+MLKEM_SYMBYTES */
+  indcpa_enc(tmp, buf, pk, kr + MLKEM_SYMBYTES);
+  fail = ct_memcmp(ct, tmp, MLKEM_INDCCA_CIPHERTEXTBYTES);
+
+  /* Compute rejection key */
+  memcpy(tmp, sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+         MLKEM_SYMBYTES);
+  memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_INDCCA_CIPHERTEXTBYTES);
+  hash_j(ss, tmp, sizeof(tmp));
+
+  /* Copy true key to return buffer if fail is 0 */
+  ct_cmov_zero(ss, kr, MLKEM_SYMBYTES, fail);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  ct_zeroize(buf, sizeof(buf));
+  ct_zeroize(kr, sizeof(kr));
+  ct_zeroize(tmp, sizeof(tmp));
+
+  return 0;
+}
+
+/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
+ * Don't modify by hand -- this is auto-generated by scripts/autogen. */
+#undef check_pk
+#undef check_sk
+#undef check_pct
diff --git a/crypto/fipsmodule/ml_kem/mlkem/kem.h b/crypto/fipsmodule/ml_kem/mlkem/kem.h
new file mode 100644
index 0000000000..eab52704ce
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/kem.h
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_KEM_H
+#define MLK_KEM_H
+
+#include <stdint.h>
+#include "cbmc.h"
+#include "common.h"
+#include "sys.h"
+
+#if defined(MLK_CHECK_APIS)
+/* Include to ensure consistency between internal kem.h
+ * and external mlkem_native.h. */
+#include "mlkem_native.h"
+
+#if MLKEM_INDCCA_SECRETKEYBYTES != MLKEM_SECRETKEYBYTES(MLKEM_LVL)
+#error Mismatch for SECRETKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_PUBLICKEYBYTES != MLKEM_PUBLICKEYBYTES(MLKEM_LVL)
+#error Mismatch for PUBLICKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_CIPHERTEXTBYTES != MLKEM_CIPHERTEXTBYTES(MLKEM_LVL)
+#error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h
+#endif
+
+#else
+#define crypto_kem_keypair_derand MLK_NAMESPACE_K(keypair_derand)
+#define crypto_kem_keypair MLK_NAMESPACE_K(keypair)
+#define crypto_kem_enc_derand MLK_NAMESPACE_K(enc_derand)
+#define crypto_kem_enc MLK_NAMESPACE_K(enc)
+#define crypto_kem_dec MLK_NAMESPACE_K(dec)
+#endif
+
+/*************************************************
+ * Name:        crypto_kem_keypair_derand
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t *pk: pointer to output public key
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *              - uint8_t *sk: pointer to output private key
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
+ *              - uint8_t *coins: pointer to input randomness
+ *                (an already allocated array filled with 2*MLKEM_SYMBYTES
+ *                 random bytes)
+ *
+ * Returns:     - 0: On success
+ *              - -1: On PCT failure (if MLK_KEYGEN_PCT) is enabled.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 16, ML-KEM.KeyGen_Internal]
+ *
+ **************************************************/
+MLK_EXTERNAL_API
+MLK_MUST_CHECK_RETURN_VALUE
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
+__contract__(
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
+  requires(memory_no_alias(coins, 2 * MLKEM_SYMBYTES))
+  assigns(object_whole(pk))
+  assigns(object_whole(sk))
+);
+
+/*************************************************
+ * Name:        crypto_kem_keypair
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t *pk: pointer to output public key
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *              - uint8_t *sk: pointer to output private key
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
+ *
+ * Returns:     - 0: On success
+ *              - -1: On PCT failure (if MLK_KEYGEN_PCT) is enabled.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 19, ML-KEM.KeyGen]
+ *
+ **************************************************/
+MLK_EXTERNAL_API
+MLK_MUST_CHECK_RETURN_VALUE
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
+__contract__(
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
+  assigns(object_whole(pk))
+  assigns(object_whole(sk))
+);
+
+/*************************************************
+ * Name:        crypto_kem_enc_derand
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *                 bytes)
+ *              - uint8_t *ss: pointer to output shared secret
+ *                (an already allocated array of MLKEM_SSBYTES bytes)
+ *              - const uint8_t *pk: pointer to input public key
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *              - const uint8_t *coins: pointer to input randomness
+ *                (an already allocated array filled with MLKEM_SYMBYTES random
+ *                 bytes)
+ *
+ * Returns: - 0 on success
+ *          - -1 if the 'modulus check' [FIPS 203, Section 7.2]
+ *            for the public key fails.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 17, ML-KEM.Encaps_Internal]
+ *
+ **************************************************/
+MLK_EXTERNAL_API
+MLK_MUST_CHECK_RETURN_VALUE
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
+__contract__(
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ss, MLKEM_SSBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(coins, MLKEM_SYMBYTES))
+  assigns(object_whole(ct))
+  assigns(object_whole(ss))
+);
+
+/*************************************************
+ * Name:        crypto_kem_enc
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *                 bytes)
+ *              - uint8_t *ss: pointer to output shared secret
+ *                (an already allocated array of MLKEM_SSBYTES bytes)
+ *              - const uint8_t *pk: pointer to input public key
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *
+ * Returns: - 0 on success
+ *          - -1 if the 'modulus check' [FIPS 203, Section 7.2]
+ *            for the public key fails.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 20, ML-KEM.Encaps]
+ *
+ **************************************************/
+MLK_EXTERNAL_API
+MLK_MUST_CHECK_RETURN_VALUE
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
+__contract__(
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ss, MLKEM_SSBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  assigns(object_whole(ct))
+  assigns(object_whole(ss))
+);
+
+/*************************************************
+ * Name:        crypto_kem_dec
+ *
+ * Description: Generates shared secret for given
+ *              cipher text and private key
+ *
+ * Arguments:   - uint8_t *ss: pointer to output shared secret
+ *                (an already allocated array of MLKEM_SSBYTES bytes)
+ *              - const uint8_t *ct: pointer to input cipher text
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *                 bytes)
+ *              - const uint8_t *sk: pointer to input private key
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
+ *
+ * Returns: - 0 on success
+ *          - -1 if the 'hash check' [FIPS 203, Section 7.3]
+ *            for the secret key fails.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 21, ML-KEM.Decaps]
+ *
+ **************************************************/
+MLK_EXTERNAL_API
+MLK_MUST_CHECK_RETURN_VALUE
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
+__contract__(
+  requires(memory_no_alias(ss, MLKEM_SSBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
+  assigns(object_whole(ss))
+);
+
+#endif /* MLK_KEM_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/mlkem_native.h b/crypto/fipsmodule/ml_kem/mlkem/mlkem_native.h
new file mode 100644
index 0000000000..ed74c53427
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/mlkem_native.h
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef MLK_H
+#define MLK_H
+/*
+ * Public API for mlkem-native
+ *
+ * This header defines the public API of a single build of mlkem-native.
+ *
+ * To use this header, make sure one of the following holds:
+ *
+ * - The config.h used for the build is available in the include paths.
+ * - The values of MLK_BUILD_INFO_LVL and MLK_BUILD_INFO_NAMESPACE are set,
+ * reflecting the security level (512/768/1024) and namespace of the build.
+ *
+ * This header specifies a build of mlkem-native for a fixed security level.
+ * If you need multiple builds, e.g. to build a library offering multiple
+ * security levels, you need multiple instances of this header. In this case,
+ * make sure to rename or #undefine the header guard
+ */
+
+#include <stdint.h>
+
+/*************************** Build information ********************************/
+
+/*
+ * Provide security level (MLK_BUILD_INFO_LVL) and namespacing
+ * (MLK_BUILD_INFO_NAMESPACE)
+ *
+ * By default, this is extracted from the configuration used for the build,
+ * but you can also set it manually to avoid a dependency on the build config.
+ */
+
+/* Skip this if MLK_BUILD_INFO_LVL has already been set */
+#if !defined(MLK_BUILD_INFO_LVL)
+
+/* Option 1: Extract from config */
+#if defined(MLK_CONFIG_FILE)
+#include MLK_CONFIG_FILE
+#else
+#include "config.h"
+#endif
+
+#if MLKEM_K == 2
+#define MLK_BUILD_INFO_LVL 512
+#elif MLKEM_K == 3
+#define MLK_BUILD_INFO_LVL 768
+#elif MLKEM_K == 4
+#define MLK_BUILD_INFO_LVL 1024
+#else
+#error MLKEM_K not set by config file
+#endif
+
+#ifndef MLK_NAMESPACE_PREFIX
+#error MLK_NAMESPACE_PREFIX not set by config file
+#endif
+
+#if defined(MLK_NAMESPACE_PREFIX_ADD_LEVEL)
+#define MLK_BUILD_INFO_CONCAT3_(x, y, z) x##y##_##z
+#define MLK_BUILD_INFO_CONCAT3(x, y, z) MLK_BUILD_INFO_CONCAT_(x, y, z)
+#define MLK_BUILD_INFO_NAMESPACE(sym) \
+  MLK_BUILD_INFO_CONCAT3(MLK_NAMESPACE_PREFIX, MLK_BUILD_INFO_LVL, sym)
+#else
+#define MLK_BUILD_INFO_CONCAT2_(x, y) x##_##y
+#define MLK_BUILD_INFO_CONCAT2(x, y) MLK_BUILD_INFO_CONCAT2_(x, y)
+#define MLK_BUILD_INFO_NAMESPACE(sym) \
+  MLK_BUILD_INFO_CONCAT2(MLK_NAMESPACE_PREFIX, sym)
+#endif
+
+#endif /* MLK_BUILD_INFO_LVL */
+
+/* Option 2: Provide MLK_BUILD_INFO_LVL and MLK_BUILD_INFO_NAMESPACE manually */
+
+/* #define MLK_BUILD_INFO_LVL            ADJUSTME */
+/* #define MLK_BUILD_INFO_NAMESPACE(sym) ADJUSTME */
+
+/******************************* Key sizes ************************************/
+
+/* Sizes of cryptographic material, per level */
+/* See mlke/common.h for the arithmetic expressions giving rise to these */
+/* check-magic: off */
+#define MLKEM512_SECRETKEYBYTES 1632
+#define MLKEM512_PUBLICKEYBYTES 800
+#define MLKEM512_CIPHERTEXTBYTES 768
+
+#define MLKEM768_SECRETKEYBYTES 2400
+#define MLKEM768_PUBLICKEYBYTES 1184
+#define MLKEM768_CIPHERTEXTBYTES 1088
+
+#define MLKEM1024_SECRETKEYBYTES 3168
+#define MLKEM1024_PUBLICKEYBYTES 1568
+#define MLKEM1024_CIPHERTEXTBYTES 1568
+/* check-magic: on */
+
+/* Size of randomness coins in bytes (level-independent) */
+#define MLKEM_SYMBYTES 32
+#define MLKEM512_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM768_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM1024_SYMBYTES MLKEM_SYMBYTES
+/* Size of shared secret in bytes (level-independent) */
+#define MLKEM_BYTES 32
+#define MLKEM512_BYTES MLKEM_BYTES
+#define MLKEM768_BYTES MLKEM_BYTES
+#define MLKEM1024_BYTES MLKEM_BYTES
+
+/* Sizes of cryptographic material, as a function of LVL=512,768,1024 */
+#define MLKEM_SECRETKEYBYTES_(LVL) MLKEM##LVL##_SECRETKEYBYTES
+#define MLKEM_PUBLICKEYBYTES_(LVL) MLKEM##LVL##_PUBLICKEYBYTES
+#define MLKEM_CIPHERTEXTBYTES_(LVL) MLKEM##LVL##_CIPHERTEXTBYTES
+#define MLKEM_SECRETKEYBYTES(LVL) MLKEM_SECRETKEYBYTES_(LVL)
+#define MLKEM_PUBLICKEYBYTES(LVL) MLKEM_PUBLICKEYBYTES_(LVL)
+#define MLKEM_CIPHERTEXTBYTES(LVL) MLKEM_CIPHERTEXTBYTES_(LVL)
+
+/****************************** Function API **********************************/
+
+#if defined(__GNUC__) || defined(clang)
+#define MLK_MUST_CHECK_RETURN_VALUE __attribute__((warn_unused_result))
+#else
+#define MLK_MUST_CHECK_RETURN_VALUE
+#endif
+
+/*************************************************
+ * Name:        crypto_kem_keypair_derand
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t pk[]: pointer to output public key, an array of
+ *                 length MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t sk[]: pointer to output private key, an array of
+ *                  of MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *              - uint8_t *coins: pointer to input randomness, an array of
+ *                  2*MLKEM_SYMBYTES uniformly random bytes.
+ *
+ * Returns:     - 0: On success
+ *              - -1: On PCT failure (if MLK_KEYGEN_PCT) is enabled.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 16, ML-KEM.KeyGen_Internal]
+ *
+ **************************************************/
+MLK_MUST_CHECK_RETURN_VALUE
+int MLK_BUILD_INFO_NAMESPACE(keypair_derand)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(MLK_BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(MLK_BUILD_INFO_LVL)], const uint8_t *coins);
+
+/*************************************************
+ * Name:        crypto_kem_keypair
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t *pk: pointer to output public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t *sk: pointer to output private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns:     - 0: On success
+ *              - -1: On PCT failure (if MLK_KEYGEN_PCT) is enabled.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 19, ML-KEM.KeyGen]
+ *
+ **************************************************/
+MLK_MUST_CHECK_RETURN_VALUE
+int MLK_BUILD_INFO_NAMESPACE(keypair)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(MLK_BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(MLK_BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_enc_derand
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - const uint8_t *coins: pointer to input randomness, an array of
+ *                 MLKEM_SYMBYTES bytes.
+ *
+ * Returns: - 0 on success
+ *          - -1 if the 'modulus check' [FIPS 203, Section 7.2]
+ *            for the public key fails.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 17, ML-KEM.Encaps_Internal]
+ *
+ **************************************************/
+MLK_MUST_CHECK_RETURN_VALUE
+int MLK_BUILD_INFO_NAMESPACE(enc_derand)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(MLK_BUILD_INFO_LVL)],
+    uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(MLK_BUILD_INFO_LVL)],
+    const uint8_t coins[MLKEM_SYMBYTES]);
+
+/*************************************************
+ * Name:        crypto_kem_enc
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *
+ * Returns: - 0 on success
+ *          - -1 if the 'modulus check' [FIPS 203, Section 7.2]
+ *            for the public key fails.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 20, ML-KEM.Encaps]
+ *
+ **************************************************/
+MLK_MUST_CHECK_RETURN_VALUE
+int MLK_BUILD_INFO_NAMESPACE(enc)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(MLK_BUILD_INFO_LVL)],
+    uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(MLK_BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_dec
+ *
+ * Description: Generates shared secret for given
+ *              cipher text and private key
+ *
+ * Arguments:   - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *ct: pointer to input cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - const uint8_t *sk: pointer to input private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns: - 0 on success
+ *          - -1 if the 'hash check' [FIPS 203, Section 7.3]
+ *            for the secret key fails.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 21, ML-KEM.Decaps]
+ *
+ **************************************************/
+MLK_MUST_CHECK_RETURN_VALUE
+int MLK_BUILD_INFO_NAMESPACE(dec)(
+    uint8_t ss[MLKEM_BYTES],
+    const uint8_t ct[MLKEM_CIPHERTEXTBYTES(MLK_BUILD_INFO_LVL)],
+    const uint8_t sk[MLKEM_SECRETKEYBYTES(MLK_BUILD_INFO_LVL)]);
+
+/****************************** Standard API *********************************/
+
+/* If desired, export API in CRYPTO_xxx and crypto_kem_xxx format as used
+ * e.g. by SUPERCOP and NIST.
+ *
+ * Remove this if you don't need it, or if you need multiple instances
+ * of this header. */
+
+#if !defined(MLK_BUILD_INFO_NO_STANDARD_API)
+#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES(MLK_BUILD_INFO_LVL)
+#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES(MLK_BUILD_INFO_LVL)
+#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES(MLK_BUILD_INFO_LVL)
+
+#define CRYPTO_SYMBYTES MLKEM_SYMBYTES
+#define CRYPTO_BYTES MLKEM_BYTES
+
+#define crypto_kem_keypair_derand MLK_BUILD_INFO_NAMESPACE(keypair_derand)
+#define crypto_kem_keypair MLK_BUILD_INFO_NAMESPACE(keypair)
+#define crypto_kem_enc_derand MLK_BUILD_INFO_NAMESPACE(enc_derand)
+#define crypto_kem_enc MLK_BUILD_INFO_NAMESPACE(enc)
+#define crypto_kem_dec MLK_BUILD_INFO_NAMESPACE(dec)
+#endif /* MLK_BUILD_INFO_NO_STANDARD_API */
+
+/********************************* Cleanup ************************************/
+
+/* Unset build information to allow multiple instances of this header.
+ * Keep this commented out when using the standard API. */
+/* #undef MLK_BUILD_INFO_LVL */
+/* #undef MLK_BUILD_INFO_NAMESPACE */
+
+#endif /* MLK_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/params.h b/crypto/fipsmodule/ml_kem/mlkem/params.h
new file mode 100644
index 0000000000..2b859ac27d
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/params.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_PARAMS_H
+#define MLK_PARAMS_H
+
+#if defined(MLK_CONFIG_FILE)
+#include MLK_CONFIG_FILE
+#else
+#include "config.h"
+#endif /* MLK_CONFIG_FILE */
+
+#if !defined(MLKEM_K)
+#error MLKEM_K is not defined
+#endif
+
+#define MLKEM_N 256
+#define MLKEM_Q 3329
+#define MLKEM_Q_HALF ((MLKEM_Q + 1) / 2) /* 1665 */
+#define MLKEM_UINT12_LIMIT 4096
+
+#define MLKEM_SYMBYTES 32 /* size in bytes of hashes, and seeds */
+#define MLKEM_SSBYTES 32  /* size in bytes of shared key */
+
+#define MLKEM_POLYBYTES 384
+#define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES)
+
+#define MLKEM_POLYCOMPRESSEDBYTES_D4 128
+#define MLKEM_POLYCOMPRESSEDBYTES_D5 160
+#define MLKEM_POLYCOMPRESSEDBYTES_D10 320
+#define MLKEM_POLYCOMPRESSEDBYTES_D11 352
+
+#if MLKEM_K == 2
+#define MLKEM_LVL 512
+#define MLKEM_ETA1 3
+#define MLKEM_DU 10
+#define MLKEM_DV 4
+#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4
+#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10
+#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
+#elif MLKEM_K == 3
+#define MLKEM_LVL 768
+#define MLKEM_ETA1 2
+#define MLKEM_DU 10
+#define MLKEM_DV 4
+#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4
+#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10
+#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
+#elif MLKEM_K == 4
+#define MLKEM_LVL 1024
+#define MLKEM_ETA1 2
+#define MLKEM_DU 11
+#define MLKEM_DV 5
+#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D5
+#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D11
+#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
+#endif
+
+#define MLKEM_ETA2 2
+
+#define MLKEM_INDCPA_MSGBYTES (MLKEM_SYMBYTES)
+#define MLKEM_INDCPA_PUBLICKEYBYTES (MLKEM_POLYVECBYTES + MLKEM_SYMBYTES)
+#define MLKEM_INDCPA_SECRETKEYBYTES (MLKEM_POLYVECBYTES)
+#define MLKEM_INDCPA_BYTES \
+  (MLKEM_POLYVECCOMPRESSEDBYTES_DU + MLKEM_POLYCOMPRESSEDBYTES_DV)
+
+#define MLKEM_INDCCA_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
+/* 32 bytes of additional space to save H(pk) */
+#define MLKEM_INDCCA_SECRETKEYBYTES                            \
+  (MLKEM_INDCPA_SECRETKEYBYTES + MLKEM_INDCPA_PUBLICKEYBYTES + \
+   2 * MLKEM_SYMBYTES)
+#define MLKEM_INDCCA_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
+
+#endif /* MLK_PARAMS_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/poly.c b/crypto/fipsmodule/ml_kem/mlkem/poly.c
new file mode 100644
index 0000000000..e077d5d1d2
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/poly.c
@@ -0,0 +1,495 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include "common.h"
+#if !defined(MLK_MULTILEVEL_BUILD_NO_SHARED)
+
+#include <stdint.h>
+#include <string.h>
+#include "arith_backend.h"
+#include "cbmc.h"
+#include "debug.h"
+#include "poly.h"
+#include "sampling.h"
+#include "symmetric.h"
+#include "verify.h"
+
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define fqmul MLK_NAMESPACE(fqmul)
+#define barrett_reduce MLK_NAMESPACE(barrett_reduce)
+#define scalar_signed_to_unsigned_q MLK_NAMESPACE(scalar_signed_to_unsigned_q)
+#define ntt_butterfly_block MLK_NAMESPACE(ntt_butterfly_block)
+#define ntt_layer MLK_NAMESPACE(ntt_layer)
+#define invntt_layer MLK_NAMESPACE(invntt_layer)
+/* End of static namespacing */
+
+#if !defined(MLK_USE_NATIVE_POLY_TOMONT) ||           \
+    !defined(MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE) || \
+    !defined(MLK_USE_NATIVE_NTT) || !defined(MLK_USE_NATIVE_INTT)
+/*************************************************
+ * Name:        fqmul
+ *
+ * Description: Montgomery multiplication modulo MLKEM_Q
+ *
+ * Arguments:   - int16_t a: first factor
+ *                  Can be any int16_t.
+ *              - int16_t b: second factor.
+ *                  Must be signed canonical (abs value <(MLKEM_Q+1)/2)
+ *
+ * Returns 16-bit integer congruent to a*b*R^{-1} mod MLKEM_Q, and
+ * smaller than MLKEM_Q in absolute value.
+ *
+ **************************************************/
+static MLK_INLINE int16_t fqmul(int16_t a, int16_t b)
+__contract__(
+  requires(b > -MLKEM_Q_HALF && b < MLKEM_Q_HALF)
+  ensures(return_value > -MLKEM_Q && return_value < MLKEM_Q)
+)
+{
+  int16_t res;
+  debug_assert_abs_bound(&b, 1, MLKEM_Q_HALF);
+
+  res = montgomery_reduce((int32_t)a * (int32_t)b);
+  /* Bounds:
+   * |res| <= ceil(|a| * |b| / 2^16) + (MLKEM_Q + 1) / 2
+   *       <= ceil(2^15 * ((MLKEM_Q - 1)/2) / 2^16) + (MLKEM_Q + 1) / 2
+   *       <= ceil((MLKEM_Q - 1) / 4) + (MLKEM_Q + 1) / 2
+   *        < MLKEM_Q
+   */
+
+  debug_assert_abs_bound(&res, 1, MLKEM_Q);
+  return res;
+}
+#endif /* !defined(MLK_USE_NATIVE_POLY_TOMONT) ||           \
+          !defined(MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE) || \
+          !defined(MLK_USE_NATIVE_NTT) ||                   \
+          !defined(MLK_USE_NATIVE_INTT) */
+
+#if !defined(MLK_USE_NATIVE_POLY_REDUCE) || !defined(MLK_USE_NATIVE_INTT)
+/*************************************************
+ * Name:        barrett_reduce
+ *
+ * Description: Barrett reduction; given a 16-bit integer a, computes
+ *              centered representative congruent to a mod q in
+ *              {-(q-1)/2,...,(q-1)/2}
+ *
+ * Arguments:   - int16_t a: input integer to be reduced
+ *
+ * Returns:     integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
+ **************************************************/
+static MLK_INLINE int16_t barrett_reduce(int16_t a)
+__contract__(
+  ensures(return_value > -MLKEM_Q_HALF && return_value < MLKEM_Q_HALF)
+)
+{
+  /*
+   * To divide by MLKEM_Q using Barrett multiplication, the "magic number"
+   * multiplier is round_to_nearest(2**26/MLKEM_Q)
+   */
+  const int BPOWER = 26;
+  const int32_t barrett_multiplier = ((1 << BPOWER) + MLKEM_Q / 2) / MLKEM_Q;
+
+  /*
+   * Compute round_to_nearest(a/MLKEM_Q) using the multiplier
+   * above and shift by BPOWER places.
+   * PORTABILITY: Right-shift on a signed integer is, strictly-speaking,
+   * implementation-defined for negative left argument. Here,
+   * we assume it's sign-preserving "arithmetic" shift right. (C99 6.5.7 (5))
+   */
+  const int32_t t = (barrett_multiplier * a + (1 << (BPOWER - 1))) >> BPOWER;
+
+  /*
+   * t is in -10 .. +10, so we need 32-bit math to
+   * evaluate t * MLKEM_Q and the subsequent subtraction
+   */
+  int16_t res = (int16_t)(a - t * MLKEM_Q);
+
+  debug_assert_abs_bound(&res, 1, MLKEM_Q_HALF);
+  return res;
+}
+#endif /* !defined(MLK_USE_NATIVE_POLY_REDUCE) || \
+          !defined(MLK_USE_NATIVE_INTT) */
+
+#if !defined(MLK_USE_NATIVE_POLY_TOMONT)
+MLK_INTERNAL_API
+void poly_tomont(poly *r)
+{
+  unsigned i;
+  const int16_t f = 1353; /* check-magic: 1353 == signed_mod(2^32, MLKEM_Q) */
+  for (i = 0; i < MLKEM_N; i++)
+  __loop__(
+    invariant(i <= MLKEM_N)
+    invariant(array_abs_bound(r->coeffs, 0, i, MLKEM_Q)))
+  {
+    r->coeffs[i] = fqmul(r->coeffs[i], f);
+  }
+
+  debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q);
+}
+#else  /* MLK_USE_NATIVE_POLY_TOMONT */
+MLK_INTERNAL_API
+void poly_tomont(poly *r)
+{
+  poly_tomont_native(r->coeffs);
+  debug_assert_abs_bound(r, MLKEM_N, MLKEM_Q);
+}
+#endif /* MLK_USE_NATIVE_POLY_TOMONT */
+
+#if !defined(MLK_USE_NATIVE_POLY_REDUCE)
+/************************************************************
+ * Name: scalar_signed_to_unsigned_q
+ *
+ * Description: Constant-time conversion of signed representatives
+ *              modulo MLKEM_Q within range (-(MLKEM_Q-1) .. (MLKEM_Q-1))
+ *              into unsigned representatives within range (0..(MLKEM_Q-1)).
+ *
+ * Arguments: c: signed coefficient to be converted
+ ************************************************************/
+static MLK_INLINE uint16_t scalar_signed_to_unsigned_q(int16_t c)
+__contract__(
+  requires(c > -MLKEM_Q && c < MLKEM_Q)
+  ensures(return_value >= 0 && return_value < MLKEM_Q)
+  ensures(return_value == (int32_t)c + (((int32_t)c < 0) * MLKEM_Q)))
+{
+  debug_assert_abs_bound(&c, 1, MLKEM_Q);
+
+  /* Add Q if c is negative, but in constant time */
+  c = ct_sel_int16(c + MLKEM_Q, c, ct_cmask_neg_i16(c));
+
+  /* and therefore cast to uint16_t is safe. */
+  debug_assert_bound(&c, 1, 0, MLKEM_Q);
+  return (uint16_t)c;
+}
+
+MLK_INTERNAL_API
+void poly_reduce(poly *r)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N; i++)
+  __loop__(
+    invariant(i <= MLKEM_N)
+    invariant(array_bound(r->coeffs, 0, i, 0, MLKEM_Q)))
+  {
+    /* Barrett reduction, giving signed canonical representative */
+    int16_t t = barrett_reduce(r->coeffs[i]);
+    /* Conditional addition to get unsigned canonical representative */
+    r->coeffs[i] = scalar_signed_to_unsigned_q(t);
+  }
+
+  debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#else  /* MLK_USE_NATIVE_POLY_REDUCE */
+MLK_INTERNAL_API
+void poly_reduce(poly *r)
+{
+  poly_reduce_native(r->coeffs);
+  debug_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#endif /* MLK_USE_NATIVE_POLY_REDUCE */
+
+MLK_INTERNAL_API
+void poly_add(poly *r, const poly *b)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N; i++)
+  __loop__(
+    invariant(i <= MLKEM_N)
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
+  {
+    r->coeffs[i] = r->coeffs[i] + b->coeffs[i];
+  }
+}
+
+MLK_INTERNAL_API
+void poly_sub(poly *r, const poly *b)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N; i++)
+  __loop__(
+    invariant(i <= MLKEM_N)
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
+  {
+    r->coeffs[i] = r->coeffs[i] - b->coeffs[i];
+  }
+}
+
+/* Include zeta table unless NTT, invNTT and mulcache computation
+ * have been replaced by native implementations. */
+#if !defined(MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE) || \
+    !defined(MLK_USE_NATIVE_NTT) || !defined(MLK_USE_NATIVE_INTT)
+#include "zetas.inc"
+#endif /* !MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE && \
+    !MLK_USE_NATIVE_NTT && !MLK_USE_NATIVE_INTT */
+
+#if !defined(MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE)
+MLK_INTERNAL_API
+void poly_mulcache_compute(poly_mulcache *x, const poly *a)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N / 4; i++)
+  __loop__(
+    invariant(i <= MLKEM_N / 4)
+    invariant(array_abs_bound(x->coeffs, 0, 2 * i, MLKEM_Q)))
+  {
+    x->coeffs[2 * i + 0] = fqmul(a->coeffs[4 * i + 1], zetas[64 + i]);
+    x->coeffs[2 * i + 1] = fqmul(a->coeffs[4 * i + 3], -zetas[64 + i]);
+  }
+
+  /*
+   * This bound is true for the C implementation, but not needed
+   * in the higher level bounds reasoning. It is thus omitted
+   * them from the spec to not unnecessarily constrain native
+   * implementations, but checked here nonetheless.
+   */
+  debug_assert_abs_bound(x, MLKEM_N / 2, MLKEM_Q);
+}
+#else  /* MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE */
+MLK_INTERNAL_API
+void poly_mulcache_compute(poly_mulcache *x, const poly *a)
+{
+  poly_mulcache_compute_native(x->coeffs, a->coeffs);
+  /* Omitting bounds assertion since native implementations may
+   * decide not to use a mulcache. Note that the C backend implementation
+   * of poly_basemul_montgomery_cached() does still include the check. */
+}
+#endif /* MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE */
+
+#if !defined(MLK_USE_NATIVE_NTT)
+/*
+ * Computes a block CT butterflies with a fixed twiddle factor,
+ * using Montgomery multiplication.
+ * Parameters:
+ * - r: Pointer to base of polynomial (_not_ the base of butterfly block)
+ * - root: Twiddle factor to use for the butterfly. This must be in
+ *         Montgomery form and signed canonical.
+ * - start: Offset to the beginning of the butterfly block
+ * - len: Index difference between coefficients subject to a butterfly
+ * - bound: Ghost variable describing coefficient bound: Prior to `start`,
+ *          coefficients must be bound by `bound + MLKEM_Q`. Post `start`,
+ *          they must be bound by `bound`.
+ * When this function returns, output coefficients in the index range
+ * [start, start+2*len) have bound bumped to `bound + MLKEM_Q`.
+ * Example:
+ * - start=8, len=4
+ *   This would compute the following four butterflies
+ *          8     --    12
+ *             9    --     13
+ *                10   --     14
+ *                   11   --     15
+ * - start=4, len=2
+ *   This would compute the following two butterflies
+ *          4 -- 6
+ *             5 -- 7
+ */
+static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta,
+                                unsigned start, unsigned len, int bound)
+__contract__(
+  requires(start < MLKEM_N)
+  requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N)
+  requires(0 <= bound && bound < INT16_MAX - MLKEM_Q)
+  requires(-MLKEM_Q_HALF < zeta && zeta < MLKEM_Q_HALF)
+  requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
+  requires(array_abs_bound(r, 0, start, bound + MLKEM_Q))
+  requires(array_abs_bound(r, start, MLKEM_N, bound))
+  assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
+  ensures(array_abs_bound(r, 0, start + 2*len, bound + MLKEM_Q))
+  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound)))
+{
+  /* `bound` is a ghost variable only needed in the CBMC specification */
+  unsigned j;
+  ((void)bound);
+  for (j = start; j < start + len; j++)
+  __loop__(
+    invariant(start <= j && j <= start + len)
+    /*
+     * Coefficients are updated in strided pairs, so the bounds for the
+     * intermediate states alternate twice between the old and new bound
+     */
+    invariant(array_abs_bound(r, 0,           j,           bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j,           start + len, bound))
+    invariant(array_abs_bound(r, start + len, j + len,     bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j + len,     MLKEM_N,     bound)))
+  {
+    int16_t t;
+    t = fqmul(r[j + len], zeta);
+    r[j + len] = r[j] - t;
+    r[j] = r[j] + t;
+  }
+}
+
+/*
+ *Compute one layer of forward NTT
+ * Parameters:
+ * - r: Pointer to base of polynomial
+ * - len: Stride of butterflies in this layer.
+ * - layer: Ghost variable indicating which layer is being applied.
+ *          Must match `len` via `len == MLKEM_N >> layer`.
+ * Note: `len` could be dropped and computed in the function, but
+ *   we are following the structure of the reference NTT from the
+ *   official Kyber implementation here, merely adding `layer` as
+ *   a ghost variable for the specifications.
+ */
+static void ntt_layer(int16_t r[MLKEM_N], unsigned len, unsigned layer)
+__contract__(
+  requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
+  requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer))
+  requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q))
+  assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
+  ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q)))
+{
+  unsigned start, k;
+  /* `layer` is a ghost variable only needed in the CBMC specification */
+  ((void)layer);
+  /* Twiddle factors for layer n start at index 2^(layer-1) */
+  k = MLKEM_N / (2 * len);
+  for (start = 0; start < MLKEM_N; start += 2 * len)
+  __loop__(
+    invariant(start < MLKEM_N + 2 * len)
+    invariant(k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N)
+    invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q))
+    invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q)))
+  {
+    int16_t zeta = zetas[k++];
+    ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q);
+  }
+}
+
+/*
+ * Compute full forward NTT
+ * NOTE: This particular implementation satisfies a much tighter
+ * bound on the output coefficients (5*q) than the contractual one (8*q),
+ * but this is not needed in the calling code. Should we change the
+ * base multiplication strategy to require smaller NTT output bounds,
+ * the proof may need strengthening.
+ */
+
+MLK_INTERNAL_API
+void poly_ntt(poly *p)
+{
+  unsigned len, layer;
+  int16_t *r;
+  debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q);
+  r = p->coeffs;
+
+  for (len = 128, layer = 1; len >= 2; len >>= 1, layer++)
+  __loop__(
+    invariant(1 <= layer && layer <= 8 && len == (MLKEM_N >> layer))
+    invariant(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q)))
+  {
+    ntt_layer(r, len, layer);
+  }
+
+  /* Check the stronger bound */
+  debug_assert_abs_bound(p, MLKEM_N, MLK_NTT_BOUND);
+}
+#else  /* MLK_USE_NATIVE_NTT */
+
+MLK_INTERNAL_API
+void poly_ntt(poly *p)
+{
+  debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q);
+  ntt_native(p->coeffs);
+  debug_assert_abs_bound(p, MLKEM_N, MLK_NTT_BOUND);
+}
+#endif /* MLK_USE_NATIVE_NTT */
+
+#if !defined(MLK_USE_NATIVE_INTT)
+
+/* Compute one layer of inverse NTT */
+static void invntt_layer(int16_t *r, unsigned len, unsigned layer)
+__contract__(
+  requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
+  requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7)
+  requires(len == (1 << (8 - layer)))
+  requires(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
+  assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
+  ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
+{
+  unsigned start, k;
+  /* `layer` is a ghost variable used only in the specification */
+  ((void)layer);
+  k = MLKEM_N / len - 1;
+  for (start = 0; start < MLKEM_N; start += 2 * len)
+  __loop__(
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
+    invariant(start <= MLKEM_N && k <= 127)
+    /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */
+    invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len))
+  {
+    unsigned j;
+    int16_t zeta = zetas[k--];
+    for (j = start; j < start + len; j++)
+    __loop__(
+      invariant(start <= j && j <= start + len)
+      invariant(start <= MLKEM_N && k <= 127)
+      invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
+    {
+      int16_t t = r[j];
+      r[j] = barrett_reduce(t + r[j + len]);
+      r[j + len] = r[j + len] - t;
+      r[j + len] = fqmul(r[j + len], zeta);
+    }
+  }
+}
+
+MLK_INTERNAL_API
+void poly_invntt_tomont(poly *p)
+{
+  /*
+   * Scale input polynomial to account for Montgomery factor
+   * and NTT twist. This also brings coefficients down to
+   * absolute value < MLKEM_Q.
+   */
+  unsigned j, len, layer;
+  const int16_t f = 1441; /* check-magic: 1441 == pow(2,32 - 7,MLKEM_Q) */
+  int16_t *r = p->coeffs;
+
+  for (j = 0; j < MLKEM_N; j++)
+  __loop__(
+    invariant(j <= MLKEM_N)
+    invariant(array_abs_bound(r, 0, j, MLKEM_Q)))
+  {
+    r[j] = fqmul(r[j], f);
+  }
+
+  /* Run the invNTT layers */
+  for (len = 2, layer = 7; len <= 128; len <<= 1, layer--)
+  __loop__(
+    invariant(2 <= len && len <= 256 && layer <= 7 && len == (1 << (8 - layer)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
+  {
+    invntt_layer(p->coeffs, len, layer);
+  }
+
+  debug_assert_abs_bound(p, MLKEM_N, MLK_INVNTT_BOUND);
+}
+#else  /* MLK_USE_NATIVE_INTT */
+
+MLK_INTERNAL_API
+void poly_invntt_tomont(poly *p)
+{
+  intt_native(p->coeffs);
+  debug_assert_abs_bound(p, MLKEM_N, MLK_INVNTT_BOUND);
+}
+#endif /* MLK_USE_NATIVE_INTT */
+
+#else /* MLK_MULTILEVEL_BUILD_NO_SHARED */
+
+MLK_EMPTY_CU(poly)
+
+#endif /* MLK_MULTILEVEL_BUILD_NO_SHARED */
+
+/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
+ * Don't modify by hand -- this is auto-generated by scripts/autogen. */
+#undef fqmul
+#undef barrett_reduce
+#undef scalar_signed_to_unsigned_q
+#undef ntt_butterfly_block
+#undef ntt_layer
+#undef invntt_layer
diff --git a/crypto/fipsmodule/ml_kem/mlkem/poly.h b/crypto/fipsmodule/ml_kem/mlkem/poly.h
new file mode 100644
index 0000000000..8b509043d0
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/poly.h
@@ -0,0 +1,340 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_POLY_H
+#define MLK_POLY_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include "cbmc.h"
+#include "common.h"
+#include "debug.h"
+#include "verify.h"
+
+/* Absolute exclusive upper bound for the output of the inverse NTT */
+#define MLK_INVNTT_BOUND (8 * MLKEM_Q)
+
+/* Absolute exclusive upper bound for the output of the forward NTT */
+#define MLK_NTT_BOUND (8 * MLKEM_Q)
+
+/*
+ * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial
+ * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1]
+ */
+#define poly MLK_NAMESPACE(poly)
+typedef struct
+{
+  int16_t coeffs[MLKEM_N];
+} MLK_ALIGN poly;
+
+/*
+ * INTERNAL presentation of precomputed data speeding up
+ * the base multiplication of two polynomials in NTT domain.
+ */
+#define poly_mulcache MLK_NAMESPACE(poly_mulcache)
+typedef struct
+{
+  int16_t coeffs[MLKEM_N >> 1];
+} poly_mulcache;
+
+#define cast_uint16_to_int16 MLK_NAMESPACE(cast_uint16_to_int16)
+/*************************************************
+ * Name:        cast_uint16_to_int16
+ *
+ * Description: Cast uint16 value to int16
+ *
+ * Returns:
+ *   input x in     0 .. 32767: returns value unchanged
+ *   input x in 32768 .. 65535: returns (x - 65536)
+ **************************************************/
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "conversion"
+#endif
+static MLK_ALWAYS_INLINE int16_t cast_uint16_to_int16(uint16_t x)
+{
+  /*
+   * PORTABILITY: This relies on uint16_t -> int16_t
+   * being implemented as the inverse of int16_t -> uint16_t,
+   * which is implementation-defined (C99 6.3.1.3 (3))
+   * CBMC (correctly) fails to prove this conversion is OK,
+   * so we have to suppress that check here
+   */
+  return (int16_t)x;
+}
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+#define montgomery_reduce MLK_NAMESPACE(montgomery_reduce)
+/*************************************************
+ * Name:        montgomery_reduce
+ *
+ * Description: Generic Montgomery reduction; given a 32-bit integer a, computes
+ *              16-bit integer congruent to a * R^-1 mod q, where R=2^16
+ *
+ * Arguments:   - int32_t a: input integer to be reduced, of absolute value
+ *                smaller or equal to INT32_MAX - 2^15 * MLKEM_Q.
+ *
+ * Returns:     integer congruent to a * R^-1 modulo q, with absolute value
+ *                <= ceil(|a| / 2^16) + (MLKEM_Q + 1)/2
+ *
+ **************************************************/
+static MLK_ALWAYS_INLINE int16_t montgomery_reduce(int32_t a)
+__contract__(
+    requires(a < +(INT32_MAX - (((int32_t)1 << 15) * MLKEM_Q)) &&
+	     a > -(INT32_MAX - (((int32_t)1 << 15) * MLKEM_Q)))
+    /* We don't attempt to express an input-dependent output bound
+     * as the post-condition here. There are two call-sites for this
+     * function:
+     * - The base multiplication: Here, we need no output bound.
+     * - fqmul: Here, we inline this function and prove another spec
+     *          for fqmul which does have a post-condition bound. */
+)
+{
+  /* check-magic: 62209 == unsigned_mod(pow(MLKEM_Q, -1, 2^16), 2^16) */
+  const uint32_t QINV = 62209;
+
+  /*  Compute a*q^{-1} mod 2^16 in unsigned representatives */
+  const uint16_t a_reduced = a & UINT16_MAX;
+  const uint16_t a_inverted = (a_reduced * QINV) & UINT16_MAX;
+
+  /* Lift to signed canonical representative mod 2^16. */
+  const int16_t t = cast_uint16_to_int16(a_inverted);
+
+  int32_t r;
+
+  debug_assert(a < +(INT32_MAX - (((int32_t)1 << 15) * MLKEM_Q)) &&
+               a > -(INT32_MAX - (((int32_t)1 << 15) * MLKEM_Q)));
+
+  r = a - ((int32_t)t * MLKEM_Q);
+
+  /*
+   * PORTABILITY: Right-shift on a signed integer is, strictly-speaking,
+   * implementation-defined for negative left argument. Here,
+   * we assume it's sign-preserving "arithmetic" shift right. (C99 6.5.7 (5))
+   */
+  r = r >> 16;
+  /* Bounds: |r >> 16| <= ceil(|r| / 2^16)
+   *                   <= ceil(|a| / 2^16 + MLKEM_Q / 2)
+   *                   <= ceil(|a| / 2^16) + (MLKEM_Q + 1) / 2
+   *
+   * (Note that |a >> n| = ceil(|a| / 2^16) for negative a)
+   */
+  return (int16_t)r;
+}
+
+#define poly_tomont MLK_NAMESPACE(poly_tomont)
+/*************************************************
+ * Name:        poly_tomont
+ *
+ * Description: Inplace conversion of all coefficients of a polynomial
+ *              from normal domain to Montgomery domain
+ *
+ *              Bounds: Output < q in absolute value.
+ *
+ * Arguments:   - poly *r: pointer to input/output polynomial
+ *
+ * Specification: Internal normalization required in `indcpa_keypair_derand`
+ *                as part of matrix-vector multiplication
+ *                [FIPS 203, Algorithm 13, K-PKE.KeyGen, L18].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_tomont(poly *r)
+__contract__(
+  requires(memory_no_alias(r, sizeof(poly)))
+  assigns(memory_slice(r, sizeof(poly)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_Q))
+);
+
+#define poly_mulcache_compute MLK_NAMESPACE(poly_mulcache_compute)
+/************************************************************
+ * Name: poly_mulcache_compute
+ *
+ * Description: Computes the mulcache for a polynomial in NTT domain
+ *
+ *              The mulcache of a degree-2 polynomial b := b0 + b1*X
+ *              in Fq[X]/(X^2-zeta) is the value b1*zeta, needed when
+ *              computing products of b in Fq[X]/(X^2-zeta).
+ *
+ *              The mulcache of a polynomial in NTT domain -- which is
+ *              a 128-tuple of degree-2 polynomials in Fq[X]/(X^2-zeta),
+ *              for varying zeta, is the 128-tuple of mulcaches of those
+ *              polynomials.
+ *
+ * Arguments: - x: Pointer to mulcache to be populated
+ *            - a: Pointer to input polynomial
+ *
+ * Specification:
+ * - Caches `b_1 * \gamma` in [FIPS 203, Algorithm 12, BaseCaseMultiply, L1]
+ *
+ ************************************************************/
+/*
+ * NOTE: The default C implementation of this function populates
+ * the mulcache with values in (-q,q), but this is not needed for the
+ * higher level safety proofs, and thus not part of the spec.
+ */
+MLK_INTERNAL_API
+void poly_mulcache_compute(poly_mulcache *x, const poly *a)
+__contract__(
+  requires(memory_no_alias(x, sizeof(poly_mulcache)))
+  requires(memory_no_alias(a, sizeof(poly)))
+  assigns(object_whole(x))
+);
+
+#define poly_reduce MLK_NAMESPACE(poly_reduce)
+/*************************************************
+ * Name:        poly_reduce
+ *
+ * Description: Converts polynomial to _unsigned canonical_ representatives.
+ *
+ *              The input coefficients can be arbitrary integers in int16_t.
+ *              The output coefficients are in [0,1,...,MLKEM_Q-1].
+ *
+ * Arguments:   - poly *r: pointer to input/output polynomial
+ *
+ * Specification: Normalizes on unsigned canoncial representatives
+ *                ahead of calling [FIPS 203, Compress_d, Eq (4.7)].
+ *                This is not made explicit in FIPS 203.
+ *
+ **************************************************/
+/*
+ * NOTE: The semantics of poly_reduce() is different in
+ * the reference implementation, which requires
+ * signed canonical output data. Unsigned canonical
+ * outputs are better suited to the only remaining
+ * use of poly_reduce() in the context of (de)serialization.
+ */
+MLK_INTERNAL_API
+void poly_reduce(poly *r)
+__contract__(
+  requires(memory_no_alias(r, sizeof(poly)))
+  assigns(memory_slice(r, sizeof(poly)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+);
+
+#define poly_add MLK_NAMESPACE(poly_add)
+/************************************************************
+ * Name: poly_add
+ *
+ * Description: Adds two polynomials in place
+ *
+ * Arguments: - r: Pointer to input-output polynomial to be added to.
+ *            - b: Pointer to input polynomial that should be added
+ *                 to r. Must be disjoint from r.
+ *
+ * The coefficients of r and b must be so that the addition does
+ * not overflow. Otherwise, the behaviour of this function is undefined.
+ *
+ * Specification:
+ * - [FIPS 203, 2.4.5, Arithmetic With Polynomials and NTT Representations]
+ * - Used in [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L21]
+ *
+ ************************************************************/
+/*
+ * NOTE: The reference implementation uses a 3-argument poly_add.
+ * We specialize to the accumulator form to avoid reasoning about aliasing.
+ */
+MLK_INTERNAL_API
+void poly_add(poly *r, const poly *b)
+__contract__(
+  requires(memory_no_alias(r, sizeof(poly)))
+  requires(memory_no_alias(b, sizeof(poly)))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
+  assigns(memory_slice(r, sizeof(poly)))
+);
+
+#define poly_sub MLK_NAMESPACE(poly_sub)
+/*************************************************
+ * Name:        poly_sub
+ *
+ * Description: Subtract two polynomials; no modular reduction is performed
+ *
+ * Arguments: - poly *r: Pointer to input-output polynomial to be added to.
+ *            - const poly *b: Pointer to second input polynomial
+ *
+ * Specification:
+ * - [FIPS 203, 2.4.5, Arithmetic With Polynomials and NTT Representations]
+ * - Used in [FIPS 203, Algorithm 15, K-PKE.Decrypt, L6]
+ *
+ **************************************************/
+/*
+ * NOTE: The reference implementation uses a 3-argument poly_sub.
+ * We specialize to the accumulator form to avoid reasoning about aliasing.
+ */
+MLK_INTERNAL_API
+void poly_sub(poly *r, const poly *b)
+__contract__(
+  requires(memory_no_alias(r, sizeof(poly)))
+  requires(memory_no_alias(b, sizeof(poly)))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
+  assigns(object_whole(r))
+);
+
+#define poly_ntt MLK_NAMESPACE(poly_ntt)
+/*************************************************
+ * Name:        poly_ntt
+ *
+ * Description: Computes negacyclic number-theoretic transform (NTT) of
+ *              a polynomial in place.
+ *
+ *              The input is assumed to be in normal order and
+ *              coefficient-wise bound by MLKEM_Q in absolute value.
+ *
+ *              The output polynomial is in bitreversed order, and
+ *              coefficient-wise bound by MLK_NTT_BOUND in absolute value.
+ *
+ *              (NOTE: Sometimes the input to the NTT is actually smaller,
+ *               which gives better bounds.)
+ *
+ * Arguments:   - poly *p: pointer to in/output polynomial
+ *
+ * Specification: Implements [FIPS 203, Algorithm 9, NTT]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_ntt(poly *r)
+__contract__(
+  requires(memory_no_alias(r, sizeof(poly)))
+  requires(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_Q))
+  assigns(memory_slice(r, sizeof(poly)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLK_NTT_BOUND))
+);
+
+#define poly_invntt_tomont MLK_NAMESPACE(poly_invntt_tomont)
+/*************************************************
+ * Name:        poly_invntt_tomont
+ *
+ * Description: Computes inverse of negacyclic number-theoretic transform (NTT)
+ *              of a polynomial in place;
+ *              inputs assumed to be in bitreversed order, output in normal
+ *              order
+ *
+ *              The input is assumed to be in bitreversed order, and can
+ *              have arbitrary coefficients in int16_t.
+ *
+ *              The output polynomial is in normal order, and
+ *              coefficient-wise bound by MLK_INVNTT_BOUND in absolute value.
+ *
+ * Arguments:   - uint16_t *a: pointer to in/output polynomial
+ *
+ * Specification: Implements composition of [FIPS 203, Algorithm 10, NTT^{-1}]
+ *                and elementwise modular multiplication with a suitable
+ *                Montgomery factor introduced during the base multiplication.
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_invntt_tomont(poly *r)
+__contract__(
+  requires(memory_no_alias(r, sizeof(poly)))
+  assigns(memory_slice(r, sizeof(poly)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLK_INVNTT_BOUND))
+);
+
+#endif /* MLK_POLY_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/poly_k.c b/crypto/fipsmodule/ml_kem/mlkem/poly_k.c
new file mode 100644
index 0000000000..87f042e5f0
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/poly_k.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include <stdint.h>
+#include <string.h>
+
+#include "arith_backend.h"
+#include "compress.h"
+#include "debug.h"
+#include "poly_k.h"
+#include "sampling.h"
+#include "symmetric.h"
+
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define poly_cbd_eta1 MLK_NAMESPACE_K(poly_cbd_eta1)
+#define poly_cbd_eta2 MLK_NAMESPACE_K(poly_cbd_eta2)
+/* End of static namespacing */
+
+MLK_INTERNAL_API
+void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
+                         const polyvec *a)
+{
+  unsigned i;
+  debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q);
+
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    poly_compress_du(r + i * MLKEM_POLYCOMPRESSEDBYTES_DU, &a->vec[i]);
+  }
+}
+
+MLK_INTERNAL_API
+void polyvec_decompress_du(polyvec *r,
+                           const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU);
+  }
+
+  debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q);
+}
+
+MLK_INTERNAL_API
+void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
+{
+  unsigned i;
+  debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q);
+
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]);
+  }
+}
+
+MLK_INTERNAL_API
+void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES);
+  }
+
+  debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_UINT12_LIMIT);
+}
+
+MLK_INTERNAL_API
+void polyvec_ntt(polyvec *r)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    poly_ntt(&r->vec[i]);
+  }
+
+  debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, MLK_NTT_BOUND);
+}
+
+MLK_INTERNAL_API
+void polyvec_invntt_tomont(polyvec *r)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    poly_invntt_tomont(&r->vec[i]);
+  }
+
+  debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, MLK_INVNTT_BOUND);
+}
+
+#if !defined(MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED)
+MLK_INTERNAL_API
+void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
+                                           const polyvec *b,
+                                           const polyvec_mulcache *b_cache)
+{
+  unsigned i;
+  debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_UINT12_LIMIT);
+  for (i = 0; i < MLKEM_N / 2; i++)
+  __loop__(invariant(i <= MLKEM_N / 2))
+  {
+    unsigned k;
+    int32_t t[2] = {0};
+    for (k = 0; k < MLKEM_K; k++)
+    __loop__(
+      invariant(k <= MLKEM_K &&
+         t[0] <=    (int32_t) k * 2 * MLKEM_UINT12_LIMIT * 32768  &&
+         t[0] >= - ((int32_t) k * 2 * MLKEM_UINT12_LIMIT * 32768) &&
+         t[1] <=   ((int32_t) k * 2 * MLKEM_UINT12_LIMIT * 32768) &&
+         t[1] >= - ((int32_t) k * 2 * MLKEM_UINT12_LIMIT * 32768)))
+    {
+      t[0] += (int32_t)a->vec[k].coeffs[2 * i + 1] * b_cache->vec[k].coeffs[i];
+      t[0] += (int32_t)a->vec[k].coeffs[2 * i] * b->vec[k].coeffs[2 * i];
+      t[1] += (int32_t)a->vec[k].coeffs[2 * i] * b->vec[k].coeffs[2 * i + 1];
+      t[1] += (int32_t)a->vec[k].coeffs[2 * i + 1] * b->vec[k].coeffs[2 * i];
+    }
+    r->coeffs[2 * i + 0] = montgomery_reduce(t[0]);
+    r->coeffs[2 * i + 1] = montgomery_reduce(t[1]);
+  }
+}
+
+#else /* !MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
+MLK_INTERNAL_API
+void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
+                                           const polyvec *b,
+                                           const polyvec_mulcache *b_cache)
+{
+  debug_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_UINT12_LIMIT);
+  /* Omitting bounds assertion for cache since native implementations may
+   * decide not to use a mulcache. Note that the C backend implementation
+   * of poly_basemul_montgomery_cached() does still include the check. */
+#if MLKEM_K == 2
+  polyvec_basemul_acc_montgomery_cached_k2_native(r->coeffs, (const int16_t *)a,
+                                                  (const int16_t *)b,
+                                                  (const int16_t *)b_cache);
+#elif MLKEM_K == 3
+  polyvec_basemul_acc_montgomery_cached_k3_native(r->coeffs, (const int16_t *)a,
+                                                  (const int16_t *)b,
+                                                  (const int16_t *)b_cache);
+#elif MLKEM_K == 4
+  polyvec_basemul_acc_montgomery_cached_k4_native(r->coeffs, (const int16_t *)a,
+                                                  (const int16_t *)b,
+                                                  (const int16_t *)b_cache);
+#endif
+}
+#endif /* MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
+
+MLK_INTERNAL_API
+void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    poly_mulcache_compute(&x->vec[i], &a->vec[i]);
+  }
+}
+
+MLK_INTERNAL_API
+void polyvec_reduce(polyvec *r)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    poly_reduce(&r->vec[i]);
+  }
+
+  debug_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q);
+}
+
+MLK_INTERNAL_API
+void polyvec_add(polyvec *r, const polyvec *b)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    poly_add(&r->vec[i], &b->vec[i]);
+  }
+}
+
+MLK_INTERNAL_API
+void polyvec_tomont(polyvec *r)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    poly_tomont(&r->vec[i]);
+  }
+
+  debug_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, MLKEM_Q);
+}
+
+
+/*************************************************
+ * Name:        poly_cbd_eta1
+ *
+ * Description: Given an array of uniformly random bytes, compute
+ *              polynomial with coefficients distributed according to
+ *              a centered binomial distribution with parameter MLKEM_ETA1.
+ *
+ * Arguments:   - poly *r: pointer to output polynomial
+ *              - const uint8_t *buf: pointer to input byte array
+ *
+ * Specification: Implements [FIPS 203, Algorithm 8, SamplePolyCBD_eta1], where
+ *                eta1 is specified per level in [FIPS 203, Table 2]
+ *                and represented as MLKEM_ETA1 here.
+ *
+ **************************************************/
+static MLK_INLINE void poly_cbd_eta1(
+    poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
+__contract__(
+  requires(memory_no_alias(r, sizeof(poly)))
+  requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4))
+  assigns(memory_slice(r, sizeof(poly)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1))
+)
+{
+#if MLKEM_ETA1 == 2
+  poly_cbd2(r, buf);
+#elif MLKEM_ETA1 == 3
+  poly_cbd3(r, buf);
+#else
+#error "Invalid value of MLKEM_ETA1"
+#endif
+}
+
+MLK_INTERNAL_API
+void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
+                           const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
+                           uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
+{
+  MLK_ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4];
+  MLK_ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4];
+  MLK_ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4];
+  MLK_ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4];
+  MLK_ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1];
+  MLK_ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1];
+  MLK_ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1];
+  MLK_ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1];
+  memcpy(extkey0, seed, MLKEM_SYMBYTES);
+  memcpy(extkey1, seed, MLKEM_SYMBYTES);
+  memcpy(extkey2, seed, MLKEM_SYMBYTES);
+  memcpy(extkey3, seed, MLKEM_SYMBYTES);
+  extkey0[MLKEM_SYMBYTES] = nonce0;
+  extkey1[MLKEM_SYMBYTES] = nonce1;
+  extkey2[MLKEM_SYMBYTES] = nonce2;
+  extkey3[MLKEM_SYMBYTES] = nonce3;
+  prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3);
+  poly_cbd_eta1(r0, buf0);
+  poly_cbd_eta1(r1, buf1);
+  poly_cbd_eta1(r2, buf2);
+  poly_cbd_eta1(r3, buf3);
+
+  debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1);
+  debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1);
+  debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA1 + 1);
+  debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA1 + 1);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  ct_zeroize(buf0, sizeof(buf0));
+  ct_zeroize(buf1, sizeof(buf1));
+  ct_zeroize(buf2, sizeof(buf2));
+  ct_zeroize(buf3, sizeof(buf3));
+  ct_zeroize(extkey0, sizeof(extkey0));
+  ct_zeroize(extkey1, sizeof(extkey1));
+  ct_zeroize(extkey2, sizeof(extkey2));
+  ct_zeroize(extkey3, sizeof(extkey3));
+}
+
+#if MLKEM_K == 2 || MLKEM_K == 4
+/*************************************************
+ * Name:        poly_cbd_eta2
+ *
+ * Description: Given an array of uniformly random bytes, compute
+ *              polynomial with coefficients distributed according to
+ *              a centered binomial distribution with parameter MLKEM_ETA2.
+ *
+ * Arguments:   - poly *r: pointer to output polynomial
+ *              - const uint8_t *buf: pointer to input byte array
+ *
+ * Specification: Implements [FIPS 203, Algorithm 8, SamplePolyCBD_eta2], where
+ *                eta2 is specified per level in [FIPS 203, Table 2]
+ *                and represented as MLKEM_ETA2 here.
+ *
+ **************************************************/
+static MLK_INLINE void poly_cbd_eta2(
+    poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
+__contract__(
+  requires(memory_no_alias(r, sizeof(poly)))
+  requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4))
+  assigns(memory_slice(r, sizeof(poly)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)))
+{
+#if MLKEM_ETA2 == 2
+  poly_cbd2(r, buf);
+#else
+#error "Invalid value of MLKEM_ETA2"
+#endif
+}
+
+MLK_INTERNAL_API
+void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
+                        uint8_t nonce)
+{
+  MLK_ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4];
+  MLK_ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1];
+
+  memcpy(extkey, seed, MLKEM_SYMBYTES);
+  extkey[MLKEM_SYMBYTES] = nonce;
+  prf_eta2(buf, extkey);
+
+  poly_cbd_eta2(r, buf);
+
+  debug_assert_abs_bound(r, MLKEM_N, MLKEM_ETA1 + 1);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  ct_zeroize(buf, sizeof(buf));
+  ct_zeroize(extkey, sizeof(extkey));
+}
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
+
+
+#if MLKEM_K == 2
+MLK_INTERNAL_API
+void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
+                              const uint8_t seed[MLKEM_SYMBYTES],
+                              uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
+                              uint8_t nonce3)
+{
+#if MLKEM_ETA2 >= MLKEM_ETA1
+#error poly_getnoise_eta1122_4x assumes MLKEM_ETA1 > MLKEM_ETA2
+#endif
+  MLK_ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4];
+  MLK_ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4];
+  /* Pad to larger buffer */
+  MLK_ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4];
+  MLK_ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4];
+
+  MLK_ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1];
+  MLK_ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1];
+  MLK_ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1];
+  MLK_ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1];
+
+  memcpy(extkey0, seed, MLKEM_SYMBYTES);
+  memcpy(extkey1, seed, MLKEM_SYMBYTES);
+  memcpy(extkey2, seed, MLKEM_SYMBYTES);
+  memcpy(extkey3, seed, MLKEM_SYMBYTES);
+  extkey0[MLKEM_SYMBYTES] = nonce0;
+  extkey1[MLKEM_SYMBYTES] = nonce1;
+  extkey2[MLKEM_SYMBYTES] = nonce2;
+  extkey3[MLKEM_SYMBYTES] = nonce3;
+
+  /* On systems with fast batched Keccak, we use 4-fold batched PRF,
+   * even though that means generating more random data in buf2 and buf3
+   * than necessary. */
+#if !defined(FIPS202_X4_DEFAULT_IMPLEMENTATION)
+  prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3);
+#else  /* FIPS202_X4_DEFAULT_IMPLEMENTATION */
+  prf_eta1(buf0, extkey0);
+  prf_eta1(buf1, extkey1);
+  prf_eta2(buf2, extkey2);
+  prf_eta2(buf3, extkey3);
+#endif /* FIPS202_X4_DEFAULT_IMPLEMENTATION */
+
+  poly_cbd_eta1(r0, buf0);
+  poly_cbd_eta1(r1, buf1);
+  poly_cbd_eta2(r2, buf2);
+  poly_cbd_eta2(r3, buf3);
+
+  debug_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1);
+  debug_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1);
+  debug_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA2 + 1);
+  debug_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA2 + 1);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  ct_zeroize(buf0, sizeof(buf0));
+  ct_zeroize(buf1, sizeof(buf1));
+  ct_zeroize(buf2, sizeof(buf2));
+  ct_zeroize(buf3, sizeof(buf3));
+  ct_zeroize(extkey0, sizeof(extkey0));
+  ct_zeroize(extkey1, sizeof(extkey1));
+  ct_zeroize(extkey2, sizeof(extkey2));
+  ct_zeroize(extkey3, sizeof(extkey3));
+}
+#endif /* MLKEM_K == 2 */
+
+/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
+ * Don't modify by hand -- this is auto-generated by scripts/autogen. */
+#undef poly_cbd_eta1
+#undef poly_cbd_eta2
diff --git a/crypto/fipsmodule/ml_kem/mlkem/poly_k.h b/crypto/fipsmodule/ml_kem/mlkem/poly_k.h
new file mode 100644
index 0000000000..8be2d1bc38
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/poly_k.h
@@ -0,0 +1,678 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_POLY_K_H
+#define MLK_POLY_K_H
+
+#include <stdint.h>
+#include "common.h"
+#include "compress.h"
+#include "poly.h"
+
+#define polyvec MLK_NAMESPACE_K(polyvec)
+typedef struct
+{
+  poly vec[MLKEM_K];
+} MLK_ALIGN polyvec;
+
+#define polyvec_mulcache MLK_NAMESPACE_K(polyvec_mulcache)
+typedef struct
+{
+  poly_mulcache vec[MLKEM_K];
+} polyvec_mulcache;
+
+#define poly_compress_du MLK_NAMESPACE_K(poly_compress_du)
+/*************************************************
+ * Name:        poly_compress_du
+ *
+ * Description: Compression (du bits) and subsequent serialization of a
+ *              polynomial
+ *
+ * Arguments:   - uint8_t *r: pointer to output byte array
+ *                  (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes)
+ *              - const poly *a: pointer to input polynomial
+ *                  Coefficients must be unsigned canonical,
+ *                  i.e. in [0,1,..,MLKEM_Q-1].
+ *
+ * Specification: Implements `ByteEncode_{d_u} (Compress_{d_u} (u))`
+ *                in [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L22],
+ *                with level-specific d_u defined in [FIPS 203, Table 2],
+ *                and given by MLKEM_DU here.
+ *
+ **************************************************/
+static MLK_INLINE void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU],
+                                        const poly *a)
+__contract__(
+  requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
+  requires(memory_no_alias(a, sizeof(poly)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+  assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU)))
+{
+#if MLKEM_DU == 10
+  poly_compress_d10(r, a);
+#elif MLKEM_DU == 11
+  poly_compress_d11(r, a);
+#else
+#error "Invalid value of MLKEM_DU"
+#endif
+}
+
+#define poly_decompress_du MLK_NAMESPACE_K(poly_decompress_du)
+/*************************************************
+ * Name:        poly_decompress_du
+ *
+ * Description: De-serialization and subsequent decompression (du bits) of a
+ *              polynomial; approximate inverse of poly_compress_du
+ *
+ * Arguments:   - poly *r: pointer to output polynomial
+ *              - const uint8_t *a: pointer to input byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes)
+ *
+ * Upon return, the coefficients of the output polynomial are unsigned-canonical
+ * (non-negative and smaller than MLKEM_Q).
+ *
+ * Specification: Implements `Decompress_{d_u} (ByteDecode_{d_u} (u))`
+ *                in [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L3].
+ *                with level-specific d_u defined in [FIPS 203, Table 2],
+ *                and given by MLKEM_DU here.
+ *
+ **************************************************/
+static MLK_INLINE void poly_decompress_du(
+    poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
+__contract__(
+  requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU))
+  requires(memory_no_alias(r, sizeof(poly)))
+  assigns(memory_slice(r, sizeof(poly)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)))
+{
+#if MLKEM_DU == 10
+  poly_decompress_d10(r, a);
+#elif MLKEM_DU == 11
+  poly_decompress_d11(r, a);
+#else
+#error "Invalid value of MLKEM_DU"
+#endif
+}
+
+#define poly_compress_dv MLK_NAMESPACE_K(poly_compress_dv)
+/*************************************************
+ * Name:        poly_compress_dv
+ *
+ * Description: Compression (dv bits) and subsequent serialization of a
+ *              polynomial
+ *
+ * Arguments:   - uint8_t *r: pointer to output byte array
+ *                  (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes)
+ *              - const poly *a: pointer to input polynomial
+ *                  Coefficients must be unsigned canonical,
+ *                  i.e. in [0,1,..,MLKEM_Q-1].
+ *
+ * Specification: Implements `ByteEncode_{d_v} (Compress_{d_v} (v))`
+ *                in [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L23].
+ *                with level-specific d_v defined in [FIPS 203, Table 2],
+ *                and given by MLKEM_DV here.
+ *
+ **************************************************/
+static MLK_INLINE void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV],
+                                        const poly *a)
+__contract__(
+  requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV))
+  requires(memory_no_alias(a, sizeof(poly)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+  assigns(object_whole(r)))
+{
+#if MLKEM_DV == 4
+  poly_compress_d4(r, a);
+#elif MLKEM_DV == 5
+  poly_compress_d5(r, a);
+#else
+#error "Invalid value of MLKEM_DV"
+#endif
+}
+
+
+#define poly_decompress_dv MLK_NAMESPACE_K(poly_decompress_dv)
+/*************************************************
+ * Name:        poly_decompress_dv
+ *
+ * Description: De-serialization and subsequent decompression (dv bits) of a
+ *              polynomial; approximate inverse of poly_compress
+ *
+ * Arguments:   - poly *r: pointer to output polynomial
+ *              - const uint8_t *a: pointer to input byte array
+ *                  (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes)
+ *
+ * Upon return, the coefficients of the output polynomial are unsigned-canonical
+ * (non-negative and smaller than MLKEM_Q).
+ *
+ * Specification: Implements `Decompress_{d_v} (ByteDecode_{d_v} (v))`
+ *                in [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L4].
+ *                with level-specific d_v defined in [FIPS 203, Table 2],
+ *                and given by MLKEM_DV here.
+ *
+ **************************************************/
+static MLK_INLINE void poly_decompress_dv(
+    poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
+__contract__(
+  requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV))
+  requires(memory_no_alias(r, sizeof(poly)))
+  assigns(object_whole(r))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)))
+{
+#if MLKEM_DV == 4
+  poly_decompress_d4(r, a);
+#elif MLKEM_DV == 5
+  poly_decompress_d5(r, a);
+#else
+#error "Invalid value of MLKEM_DV"
+#endif
+}
+
+#define polyvec_compress_du MLK_NAMESPACE_K(polyvec_compress_du)
+/*************************************************
+ * Name:        polyvec_compress_du
+ *
+ * Description: Compress and serialize vector of polynomials
+ *
+ * Arguments:   - uint8_t *r: pointer to output byte array
+ *                            (needs space for MLKEM_POLYVECCOMPRESSEDBYTES_DU)
+ *              - const polyvec *a: pointer to input vector of polynomials.
+ *                                  Coefficients must be unsigned canonical,
+ *                                  i.e. in [0,1,..,MLKEM_Q-1].
+ *
+ * Specification: Implements `ByteEncode_{d_u} (Compress_{d_u} (u))`
+ *                in [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L22].
+ *                with level-specific d_u defined in [FIPS 203, Table 2],
+ *                and given by MLKEM_DU here.
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
+                         const polyvec *a)
+__contract__(
+  requires(memory_no_alias(r, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
+  requires(memory_no_alias(a, sizeof(polyvec)))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))
+  assigns(object_whole(r))
+);
+
+#define polyvec_decompress_du MLK_NAMESPACE_K(polyvec_decompress_du)
+/*************************************************
+ * Name:        polyvec_decompress_du
+ *
+ * Description: De-serialize and decompress vector of polynomials;
+ *              approximate inverse of polyvec_compress_du
+ *
+ * Arguments:   - polyvec *r:       pointer to output vector of polynomials.
+ *                Output will have coefficients normalized to [0,..,q-1].
+ *              - const uint8_t *a: pointer to input byte array
+ *                                  (of length MLKEM_POLYVECCOMPRESSEDBYTES_DU)
+ *
+ * Specification: Implements `Decompress_{d_u} (ByteDecode_{d_u} (u))`
+ *                in [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L3].
+ *                with level-specific d_u defined in [FIPS 203, Table 2],
+ *                and given by MLKEM_DU here.
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void polyvec_decompress_du(polyvec *r,
+                           const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
+__contract__(
+  requires(memory_no_alias(a, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
+  requires(memory_no_alias(r, sizeof(polyvec)))
+  assigns(object_whole(r))
+  ensures(forall(k0, 0, MLKEM_K,
+         array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))
+);
+
+#define polyvec_tobytes MLK_NAMESPACE_K(polyvec_tobytes)
+/*************************************************
+ * Name:        polyvec_tobytes
+ *
+ * Description: Serialize vector of polynomials
+ *
+ * Arguments:   - uint8_t *r: pointer to output byte array
+ *                            (needs space for MLKEM_POLYVECBYTES)
+ *              - const polyvec *a: pointer to input vector of polynomials
+ *                  Each polynomial must have coefficients in [0,..,q-1].
+ *
+ * Specification: Implements ByteEncode_12 [FIPS 203, Algorithm 5].
+ *                Extended to vectors as per
+ *                [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                and [FIPS 203, 2.4.6, Matrices and Vectors]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
+__contract__(
+  requires(memory_no_alias(a, sizeof(polyvec)))
+  requires(memory_no_alias(r, MLKEM_POLYVECBYTES))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))
+  assigns(object_whole(r))
+);
+
+#define polyvec_frombytes MLK_NAMESPACE_K(polyvec_frombytes)
+/*************************************************
+ * Name:        polyvec_frombytes
+ *
+ * Description: De-serialize vector of polynomials;
+ *              inverse of polyvec_tobytes
+ *
+ * Arguments:   - const polyvec *a: pointer to output vector of polynomials
+ *                 (of length MLKEM_POLYVECBYTES). Output will have coefficients
+ *                 normalized in [0..4095].
+ *              - uint8_t *r: pointer to input byte array
+ *
+ * Specification: Implements ByteDecode_12 [FIPS 203, Algorithm 6].
+ *                Extended to vectors as per
+ *                [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                and [FIPS 203, 2.4.6, Matrices and Vectors]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
+__contract__(
+  requires(memory_no_alias(r, sizeof(polyvec)))
+  requires(memory_no_alias(a, MLKEM_POLYVECBYTES))
+  assigns(object_whole(r))
+  ensures(forall(k0, 0, MLKEM_K,
+        array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_UINT12_LIMIT)))
+);
+
+#define polyvec_ntt MLK_NAMESPACE_K(polyvec_ntt)
+/*************************************************
+ * Name:        polyvec_ntt
+ *
+ * Description: Apply forward NTT to all elements of a vector of polynomials.
+ *
+ *              The input is assumed to be in normal order and
+ *              coefficient-wise bound by MLKEM_Q in absolute value.
+ *
+ *              The output polynomial is in bitreversed order, and
+ *              coefficient-wise bound by MLK_NTT_BOUND in absolute value.
+ *
+ * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
+ *
+ * Specification:
+ * - Implements [FIPS 203, Algorithm 9, NTT]
+ * - Extended to vectors as per [FIPS 203, 2.4.6, Matrices and Vectors]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void polyvec_ntt(polyvec *r)
+__contract__(
+  requires(memory_no_alias(r, sizeof(polyvec)))
+  requires(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, MLKEM_Q)))
+  assigns(object_whole(r))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, MLK_NTT_BOUND)))
+);
+
+#define polyvec_invntt_tomont MLK_NAMESPACE_K(polyvec_invntt_tomont)
+/*************************************************
+ * Name:        polyvec_invntt_tomont
+ *
+ * Description: Apply inverse NTT to all elements of a vector of polynomials
+ *              and multiply by Montgomery factor 2^16
+ *
+ *              The input is assumed to be in bitreversed order, and can
+ *              have arbitrary coefficients in int16_t.
+ *
+ *              The output polynomial is in normal order, and
+ *              coefficient-wise bound by MLK_INVNTT_BOUND in absolute value.
+ *
+ * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
+ *
+ * Specification:
+ * - Implements [FIPS 203, Algorithm 10, NTT^{-1}]
+ * - Extended to vectors as per [FIPS 203, 2.4.6, Matrices and Vectors]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void polyvec_invntt_tomont(polyvec *r)
+__contract__(
+  requires(memory_no_alias(r, sizeof(polyvec)))
+  assigns(object_whole(r))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, MLK_INVNTT_BOUND)))
+);
+
+#define polyvec_basemul_acc_montgomery_cached \
+  MLK_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached)
+/*************************************************
+ * Name:        polyvec_basemul_acc_montgomery_cached
+ *
+ * Description: Scalar product of two vectors of polynomials in NTT domain,
+ *              using mulcache for second operand.
+ *
+ *              Bounds:
+ *              - Every coefficient of a is assumed to be in [0..4095]
+ *              - No bounds guarantees for the coefficients in the result.
+ *
+ * Arguments:   - poly *r: pointer to output polynomial
+ *              - const polyvec *a: pointer to first input polynomial vector
+ *              - const polyvec *b: pointer to second input polynomial vector
+ *              - const polyvec_mulcache *b_cache: pointer to mulcache
+ *                  for second input polynomial vector. Can be computed
+ *                  via polyvec_mulcache_compute().
+ *
+ * Specification: Implements
+ *                - [FIPS 203, Section 2.4.7, Eq (2.14)]
+ *                - [FIPS 203, Algorithm 11, MultiplyNTTs]
+ *                - [FIPS 203, Algorithm 12, BaseCaseMultiply]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
+                                           const polyvec *b,
+                                           const polyvec_mulcache *b_cache)
+__contract__(
+  requires(memory_no_alias(r, sizeof(poly)))
+  requires(memory_no_alias(a, sizeof(polyvec)))
+  requires(memory_no_alias(b, sizeof(polyvec)))
+  requires(memory_no_alias(b_cache, sizeof(polyvec_mulcache)))
+  requires(forall(k1, 0, MLKEM_K,
+     array_bound(a->vec[k1].coeffs, 0, MLKEM_N, 0, MLKEM_UINT12_LIMIT)))
+  assigns(object_whole(r))
+);
+
+#define polyvec_mulcache_compute MLK_NAMESPACE_K(polyvec_mulcache_compute)
+/************************************************************
+ * Name: polyvec_mulcache_compute
+ *
+ * Description: Computes the mulcache for a vector of polynomials in NTT domain
+ *
+ *              The mulcache of a degree-2 polynomial b := b0 + b1*X
+ *              in Fq[X]/(X^2-zeta) is the value b1*zeta, needed when
+ *              computing products of b in Fq[X]/(X^2-zeta).
+ *
+ *              The mulcache of a polynomial in NTT domain -- which is
+ *              a 128-tuple of degree-2 polynomials in Fq[X]/(X^2-zeta),
+ *              for varying zeta, is the 128-tuple of mulcaches of those
+ *              polynomials.
+ *
+ *              The mulcache of a vector of polynomials is the vector
+ *              of mulcaches of its entries.
+ *
+ * Arguments: - x: Pointer to mulcache to be populated
+ *            - a: Pointer to input polynomial vector
+ *
+ * Specification:
+ * - Caches `b_1 * \gamma` in [FIPS 203, Algorithm 12, BaseCaseMultiply, L1]
+ *
+ ************************************************************/
+/*
+ * NOTE: The default C implementation of this function populates
+ * the mulcache with values in (-q,q), but this is not needed for the
+ * higher level safety proofs, and thus not part of the spec.
+ */
+MLK_INTERNAL_API
+void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
+__contract__(
+  requires(memory_no_alias(x, sizeof(polyvec_mulcache)))
+  requires(memory_no_alias(a, sizeof(polyvec)))
+  assigns(object_whole(x))
+);
+
+#define polyvec_reduce MLK_NAMESPACE_K(polyvec_reduce)
+/*************************************************
+ * Name:        polyvec_reduce
+ *
+ * Description: Applies Barrett reduction to each coefficient
+ *              of each element of a vector of polynomials;
+ *              for details of the Barrett reduction see comments in reduce.c
+ *
+ * Arguments:   - polyvec *r: pointer to input/output polynomial
+ *
+ * Specification: Normalizes on unsigned canoncial representatives
+ *                ahead of calling [FIPS 203, Compress_d, Eq (4.7)].
+ *                This is not made explicit in FIPS 203.
+ *
+ **************************************************/
+/*
+ * NOTE: The semantics of polyvec_reduce() is different in
+ *       the reference implementation, which requires
+ *       signed canonical output data. Unsigned canonical
+ *       outputs are better suited to the only remaining
+ *       use of poly_reduce() in the context of (de)serialization.
+ */
+MLK_INTERNAL_API
+void polyvec_reduce(polyvec *r)
+__contract__(
+  requires(memory_no_alias(r, sizeof(polyvec)))
+  assigns(object_whole(r))
+  ensures(forall(k0, 0, MLKEM_K,
+    array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))
+);
+
+#define polyvec_add MLK_NAMESPACE_K(polyvec_add)
+/*************************************************
+ * Name:        polyvec_add
+ *
+ * Description: Add vectors of polynomials
+ *
+ * Arguments: - polyvec *r: pointer to input-output vector of polynomials to be
+ *              added to
+ *            - const polyvec *b: pointer to second input vector of polynomials
+ *
+ * The coefficients of r and b must be so that the addition does
+ * not overflow. Otherwise, the behaviour of this function is undefined.
+ *
+ * The coefficients returned in *r are in int16_t which is sufficient
+ * to prove type-safety of calling units. Therefore, no stronger
+ * ensures clause is required on this function.
+ *
+ * Specification:
+ * - [FIPS 203, 2.4.5, Arithmetic With Polynomials and NTT Representations]
+ * - Used in [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L19]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void polyvec_add(polyvec *r, const polyvec *b)
+__contract__(
+  requires(memory_no_alias(r, sizeof(polyvec)))
+  requires(memory_no_alias(b, sizeof(polyvec)))
+  requires(forall(j0, 0, MLKEM_K,
+          forall(k0, 0, MLKEM_N,
+            (int32_t)r->vec[j0].coeffs[k0] + b->vec[j0].coeffs[k0] <= INT16_MAX)))
+  requires(forall(j1, 0, MLKEM_K,
+          forall(k1, 0, MLKEM_N,
+            (int32_t)r->vec[j1].coeffs[k1] + b->vec[j1].coeffs[k1] >= INT16_MIN)))
+  assigns(object_whole(r))
+);
+
+#define polyvec_tomont MLK_NAMESPACE_K(polyvec_tomont)
+/*************************************************
+ * Name:        polyvec_tomont
+ *
+ * Description: Inplace conversion of all coefficients of a polynomial
+ *              vector from normal domain to Montgomery domain
+ *
+ *              Bounds: Output < q in absolute value.
+ *
+ *
+ * Specification: Internal normalization required in `indcpa_keypair_derand`
+ *                as part of matrix-vector multiplication
+ *                [FIPS 203, Algorithm 13, K-PKE.KeyGen, L18].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void polyvec_tomont(polyvec *r)
+__contract__(
+  requires(memory_no_alias(r, sizeof(polyvec)))
+  assigns(memory_slice(r, sizeof(polyvec)))
+  assigns(object_whole(r))
+  ensures(forall(j, 0, MLKEM_K,
+    array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, MLKEM_Q)))
+);
+
+#define poly_getnoise_eta1_4x MLK_NAMESPACE_K(poly_getnoise_eta1_4x)
+/*************************************************
+ * Name:        poly_getnoise_eta1_4x
+ *
+ * Description: Batch sample four polynomials deterministically from a seed
+ *              and nonces, with output polynomials close to centered binomial
+ *              distribution with parameter MLKEM_ETA1.
+ *
+ * Arguments:   - poly *r{0,1,2,3}: pointer to output polynomial
+ *              - const uint8_t *seed: pointer to input seed
+ *                                     (of length MLKEM_SYMBYTES bytes)
+ *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
+ *
+ * Specification:
+ * Implements 4x `SamplePolyCBD_{eta1} (PRF_{eta1} (sigma, N))`:
+ * - [FIPS 203, Algorithm 8, SamplePolyCBD_eta]
+ * - [FIPS 203, Eq (4.3), PRF_eta]
+ * - `SamplePolyCBD_{eta1} (PRF_{eta1} (sigma, N))` appears in
+ *   [FIPS 203, Algorithm 13, K-PKE.KeyGen, L{9, 13}]
+ *   [FIPS 203, Algorithm 14, K-PKE.Encrypt, L10]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
+                           const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
+                           uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
+/* Depending on MLKEM_K, the pointers passed to this function belong
+   to the same objects, so we cannot use memory_no_alias for r0-r3.
+
+   NOTE: Somehow it is important to use memory_no_alias() first in the
+         conjunctions defining each case.
+*/
+#if MLKEM_K == 2
+__contract__(
+  requires(memory_no_alias(seed, MLKEM_SYMBYTES))
+  requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */
+    (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) &&
+     r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2)))
+  assigns(memory_slice(r0, sizeof(poly)))
+  assigns(memory_slice(r1, sizeof(poly)))
+  assigns(memory_slice(r2, sizeof(poly)))
+  assigns(memory_slice(r3, sizeof(poly)))
+  ensures(
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1));
+);
+#elif MLKEM_K == 4
+__contract__(
+  requires(memory_no_alias(seed, MLKEM_SYMBYTES))
+  requires( /* Case B: r0, r1, r2, r3 consecutive */
+    (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3))
+  assigns(memory_slice(r0, sizeof(poly)))
+  assigns(memory_slice(r1, sizeof(poly)))
+  assigns(memory_slice(r2, sizeof(poly)))
+  assigns(memory_slice(r3, sizeof(poly)))
+  ensures(
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1));
+);
+#elif MLKEM_K == 3
+__contract__(
+  requires(memory_no_alias(seed, MLKEM_SYMBYTES))
+  requires( /* Case C: r0, r1, r2 consecutive */
+ (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) &&
+  r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0)))
+  assigns(memory_slice(r0, sizeof(poly)))
+  assigns(memory_slice(r1, sizeof(poly)))
+  assigns(memory_slice(r2, sizeof(poly)))
+  assigns(memory_slice(r3, sizeof(poly)))
+  ensures(
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1));
+);
+#endif /* MLKEM_K */
+
+#if MLKEM_ETA1 == MLKEM_ETA2
+/*
+ * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024
+ * where MLKEM_ETA2 = MLKEM_ETA1 = 2.
+ * For ml-kem-512, poly_getnoise_eta1122_4x is used instead.
+ */
+#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x
+#endif /* MLKEM_ETA1 == MLKEM_ETA2 */
+
+#if MLKEM_K == 2 || MLKEM_K == 4
+#define poly_getnoise_eta2 MLK_NAMESPACE_K(poly_getnoise_eta2)
+/*************************************************
+ * Name:        poly_getnoise_eta2
+ *
+ * Description: Sample a polynomial deterministically from a seed and a nonce,
+ *              with output polynomial close to centered binomial distribution
+ *              with parameter MLKEM_ETA2
+ *
+ * Arguments:   - poly *r: pointer to output polynomial
+ *              - const uint8_t *seed: pointer to input seed
+ *                                     (of length MLKEM_SYMBYTES bytes)
+ *              - uint8_t nonce: one-byte input nonce
+ *
+ * Specification:
+ * Implements `SamplePolyCBD_{eta2} (PRF_{eta2} (sigma, N))`:
+ * - [FIPS 203, Algorithm 8, SamplePolyCBD_eta]
+ * - [FIPS 203, Eq (4.3), PRF_eta]
+ * - `SamplePolyCBD_{eta2} (PRF_{eta2} (sigma, N))` appears in
+ *   [FIPS 203, Algorithm 14, K-PKE.Encrypt, L14]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
+                        uint8_t nonce)
+__contract__(
+  requires(memory_no_alias(r, sizeof(poly)))
+  requires(memory_no_alias(seed, MLKEM_SYMBYTES))
+  assigns(object_whole(r))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1))
+);
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
+
+#if MLKEM_K == 2
+#define poly_getnoise_eta1122_4x MLK_NAMESPACE_K(poly_getnoise_eta1122_4x)
+/*************************************************
+ * Name:        poly_getnoise_eta1122_4x
+ *
+ * Description: Batch sample four polynomials deterministically from a seed
+ * and a nonces, with output polynomials close to centered binomial
+ * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2
+ *
+ * Arguments:   - poly *r{0,1,2,3}: pointer to output polynomial
+ *              - const uint8_t *seed: pointer to input seed
+ *                                     (of length MLKEM_SYMBYTES bytes)
+ *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
+ *
+ * Specification:
+ * Implements two instances each of
+ * `SamplePolyCBD_{eta1} (PRF_{eta1} (sigma, N))` and
+ * `SamplePolyCBD_{eta2} (PRF_{eta2} (sigma, N))`:
+ * - [FIPS 203, Algorithm 8, SamplePolyCBD_eta]
+ * - [FIPS 203, Eq (4.3), PRF_eta]
+ * - `SamplePolyCBD_{eta2} (PRF_{eta2} (sigma, N))` appears in
+ *   [FIPS 203, Algorithm 14, K-PKE.Encrypt, L14]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
+                              const uint8_t seed[MLKEM_SYMBYTES],
+                              uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
+                              uint8_t nonce3)
+__contract__(
+  requires( /* r0, r1 consecutive, r2, r3 consecutive */
+ (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) &&
+   r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2)))
+  requires(memory_no_alias(seed, MLKEM_SYMBYTES))
+  assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3))
+  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)
+     && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)
+     && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)
+     && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1));
+);
+#endif /* MLKEM_K == 2 */
+
+#endif /* MLK_POLY_K_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/randombytes.h b/crypto/fipsmodule/ml_kem/mlkem/randombytes.h
new file mode 100644
index 0000000000..585a8a0656
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/randombytes.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_RANDOMBYTES_H
+#define MLK_RANDOMBYTES_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "cbmc.h"
+
+void randombytes(uint8_t *out, size_t outlen)
+__contract__(
+  requires(memory_no_alias(out, outlen))
+  assigns(memory_slice(out, outlen))
+);
+
+#endif /* MLK_RANDOMBYTES_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/sampling.c b/crypto/fipsmodule/ml_kem/mlkem/sampling.c
new file mode 100644
index 0000000000..29e8d23c36
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/sampling.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include "common.h"
+#if !defined(MLK_MULTILEVEL_BUILD_NO_SHARED)
+
+#include "arith_backend.h"
+#include "debug.h"
+#include "sampling.h"
+#include "symmetric.h"
+
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define rej_uniform MLK_NAMESPACE(rej_uniform)
+#define rej_uniform_scalar MLK_NAMESPACE(rej_uniform_scalar)
+#define load32_littleendian MLK_NAMESPACE(load32_littleendian)
+#define load24_littleendian MLK_NAMESPACE(load24_littleendian)
+/* End of static namespacing */
+
+static unsigned rej_uniform_scalar(int16_t *r, unsigned target, unsigned offset,
+                                   const uint8_t *buf, unsigned buflen)
+__contract__(
+  requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
+  requires(memory_no_alias(r, sizeof(int16_t) * target))
+  requires(memory_no_alias(buf, buflen))
+  requires(array_bound(r, 0, offset, 0, MLKEM_Q))
+  assigns(memory_slice(r, sizeof(int16_t) * target))
+  ensures(offset <= return_value && return_value <= target)
+  ensures(array_bound(r, 0, return_value, 0, MLKEM_Q))
+)
+{
+  unsigned ctr, pos;
+  uint16_t val0, val1;
+
+  debug_assert_bound(r, offset, 0, MLKEM_Q);
+
+  ctr = offset;
+  pos = 0;
+  /* pos + 3 cannot overflow due to the assumption buflen <= 4096 */
+  while (ctr < target && pos + 3 <= buflen)
+  __loop__(
+    invariant(offset <= ctr && ctr <= target && pos <= buflen)
+    invariant(ctr > 0 ==> array_bound(r, 0, ctr, 0, MLKEM_Q)))
+  {
+    val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF;
+    val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF;
+    pos += 3;
+
+    if (val0 < MLKEM_Q)
+    {
+      r[ctr++] = val0;
+    }
+    if (ctr < target && val1 < MLKEM_Q)
+    {
+      r[ctr++] = val1;
+    }
+  }
+
+  debug_assert_bound(r, ctr, 0, MLKEM_Q);
+  return ctr;
+}
+
+/*************************************************
+ * Name:        rej_uniform
+ *
+ * Description: Run rejection sampling on uniform random bytes to generate
+ *              uniform random integers mod q
+ *
+ * Arguments:   - int16_t *r:          pointer to output buffer
+ *              - unsigned target:     requested number of 16-bit integers
+ *                                     (uniform mod q).
+ *                                     Must be <= 4096.
+ *              - unsigned offset:     number of 16-bit integers that have
+ *                                     already been sampled.
+ *                                     Must be <= target.
+ *              - const uint8_t *buf:  pointer to input buffer
+ *                                     (assumed to be uniform random bytes)
+ *              - unsigned buflen:     length of input buffer in bytes
+ *                                     Must be <= 4096.
+ *                                     Must be a multiple of 3.
+ *
+ * Note: Strictly speaking, only a few values of buflen near UINT_MAX need
+ * excluding. The limit of 128 is somewhat arbitrary but sufficient for all
+ * uses of this function. Similarly, the actual limit for target is UINT_MAX/2.
+ *
+ * Returns the new offset of sampled 16-bit integers, at most target,
+ * and at least the initial offset.
+ * If the new offset is strictly less than len, all of the input buffers
+ * is guaranteed to have been consumed. If it is equal to len, no information
+ * is provided on how many bytes of the input buffer have been consumed.
+ **************************************************/
+
+/*
+ * NOTE: The signature differs from the Kyber reference implementation
+ * in that it adds the offset and always expects the base of the target
+ * buffer. This avoids shifting the buffer base in the caller, which appears
+ * tricky to reason about.
+ */
+static unsigned rej_uniform(int16_t *r, unsigned target, unsigned offset,
+                            const uint8_t *buf, unsigned buflen)
+__contract__(
+  requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
+  requires(memory_no_alias(r, sizeof(int16_t) * target))
+  requires(memory_no_alias(buf, buflen))
+  requires(array_bound(r, 0, offset, 0, MLKEM_Q))
+  assigns(memory_slice(r, sizeof(int16_t) * target))
+  ensures(offset <= return_value && return_value <= target)
+  ensures(array_bound(r, 0, return_value, 0, MLKEM_Q))
+)
+{
+#if defined(MLK_USE_NATIVE_REJ_UNIFORM)
+  if (offset == 0)
+  {
+    int ret = rej_uniform_native(r, target, buf, buflen);
+    if (ret != -1)
+    {
+      unsigned res = (unsigned)ret;
+      debug_assert_bound(r, res, 0, MLKEM_Q);
+      return res;
+    }
+  }
+#endif /* MLK_USE_NATIVE_REJ_UNIFORM */
+
+  return rej_uniform_scalar(r, target, offset, buf, buflen);
+}
+
+#ifndef MLKEM_GEN_MATRIX_NBLOCKS
+#define MLKEM_GEN_MATRIX_NBLOCKS \
+  ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + XOF_RATE) / XOF_RATE)
+#endif
+
+MLK_INTERNAL_API
+void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4])
+{
+  /* Temporary buffers for XOF output before rejection sampling */
+  MLK_ALIGN uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
+  MLK_ALIGN uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
+  MLK_ALIGN uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
+  MLK_ALIGN uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
+
+  /* Tracks the number of coefficients we have already sampled */
+  unsigned ctr[4];
+  xof_x4_ctx statex;
+  unsigned buflen;
+
+  /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */
+  xof_x4_init(&statex);
+  xof_x4_absorb(&statex, seed[0], seed[1], seed[2], seed[3],
+                MLKEM_SYMBYTES + 2);
+
+  /*
+   * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS.
+   * This should generate the matrix entries with high probability.
+   */
+  xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS,
+                       &statex);
+  buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE;
+  ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen);
+  ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen);
+  ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen);
+  ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen);
+
+  /*
+   * So long as not all matrix entries have been generated, squeeze
+   * one more block a time until we're done.
+   */
+  buflen = XOF_RATE;
+  while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N ||
+         ctr[3] < MLKEM_N)
+  __loop__(
+    assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0),
+       object_whole(buf1), object_whole(buf2), object_whole(buf3))
+    invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N)
+    invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N)
+    invariant(array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q))
+    invariant(array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q))
+    invariant(array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q))
+    invariant(array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q)))
+  {
+    xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex);
+    ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen);
+    ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen);
+    ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen);
+    ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen);
+  }
+
+  xof_x4_release(&statex);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  ct_zeroize(buf0, sizeof(buf0));
+  ct_zeroize(buf1, sizeof(buf1));
+  ct_zeroize(buf2, sizeof(buf2));
+  ct_zeroize(buf3, sizeof(buf3));
+}
+
+MLK_INTERNAL_API
+void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
+{
+  xof_ctx state;
+  MLK_ALIGN uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
+  unsigned ctr, buflen;
+
+  xof_init(&state);
+  xof_absorb(&state, seed, MLKEM_SYMBYTES + 2);
+
+  /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS.
+   */
+  /* This should generate the matrix entry with high probability. */
+  xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state);
+  buflen = MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE;
+  ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen);
+
+  /* Squeeze + sample one more block a time until we're done */
+  buflen = XOF_RATE;
+  while (ctr < MLKEM_N)
+  __loop__(
+    assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf))
+    invariant(ctr <= MLKEM_N)
+    invariant(array_bound(entry->coeffs, 0, ctr, 0, MLKEM_Q)))
+  {
+    xof_squeezeblocks(buf, 1, &state);
+    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen);
+  }
+
+  xof_release(&state);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  ct_zeroize(buf, sizeof(buf));
+}
+
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define load32_littleendian MLK_NAMESPACE(load32_littleendian)
+#define load24_littleendian MLK_NAMESPACE(load24_littleendian)
+/* End of static namespacing */
+
+/*************************************************
+ * Name:        load32_littleendian
+ *
+ * Description: load 4 bytes into a 32-bit integer
+ *              in little-endian order
+ *
+ * Arguments:   - const uint8_t *x: pointer to input byte array
+ *
+ * Returns 32-bit unsigned integer loaded from x
+ **************************************************/
+static uint32_t load32_littleendian(const uint8_t x[4])
+{
+  uint32_t r;
+  r = (uint32_t)x[0];
+  r |= (uint32_t)x[1] << 8;
+  r |= (uint32_t)x[2] << 16;
+  r |= (uint32_t)x[3] << 24;
+  return r;
+}
+
+MLK_INTERNAL_API
+void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N / 8; i++)
+  __loop__(
+    invariant(i <= MLKEM_N / 8)
+    invariant(array_abs_bound(r->coeffs, 0, 8 * i, 3)))
+  {
+    unsigned j;
+    uint32_t t = load32_littleendian(buf + 4 * i);
+    uint32_t d = t & 0x55555555;
+    d += (t >> 1) & 0x55555555;
+
+    for (j = 0; j < 8; j++)
+    __loop__(
+      invariant(i <= MLKEM_N / 8 && j <= 8)
+      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 3)))
+    {
+      const int16_t a = (d >> (4 * j + 0)) & 0x3;
+      const int16_t b = (d >> (4 * j + 2)) & 0x3;
+      r->coeffs[8 * i + j] = a - b;
+    }
+  }
+}
+
+#if defined(MLK_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3
+/*************************************************
+ * Name:        load24_littleendian
+ *
+ * Description: load 3 bytes into a 32-bit integer
+ *              in little-endian order.
+ *              This function is only needed for ML-KEM-512
+ *
+ * Arguments:   - const uint8_t *x: pointer to input byte array
+ *
+ * Returns 32-bit unsigned integer loaded from x (most significant byte is zero)
+ **************************************************/
+static uint32_t load24_littleendian(const uint8_t x[3])
+{
+  uint32_t r;
+  r = (uint32_t)x[0];
+  r |= (uint32_t)x[1] << 8;
+  r |= (uint32_t)x[2] << 16;
+  return r;
+}
+
+MLK_INTERNAL_API
+void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N / 4; i++)
+  __loop__(
+    invariant(i <= MLKEM_N / 4)
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 4)))
+  {
+    unsigned j;
+    const uint32_t t = load24_littleendian(buf + 3 * i);
+    uint32_t d = t & 0x00249249;
+    d += (t >> 1) & 0x00249249;
+    d += (t >> 2) & 0x00249249;
+
+    for (j = 0; j < 4; j++)
+    __loop__(
+      invariant(i <= MLKEM_N / 4 && j <= 4)
+      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 4)))
+    {
+      const int16_t a = (d >> (6 * j + 0)) & 0x7;
+      const int16_t b = (d >> (6 * j + 3)) & 0x7;
+      r->coeffs[4 * i + j] = a - b;
+    }
+  }
+}
+#endif /* defined(MLK_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == \
+          3 */
+
+#else /* MLK_MULTILEVEL_BUILD_NO_SHARED */
+
+MLK_EMPTY_CU(sampling)
+
+#endif /* MLK_MULTILEVEL_BUILD_NO_SHARED */
+
+/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
+ * Don't modify by hand -- this is auto-generated by scripts/autogen. */
+#undef rej_uniform
+#undef rej_uniform_scalar
+#undef load32_littleendian
+#undef load24_littleendian
+#undef MLKEM_GEN_MATRIX_NBLOCKS
+#undef load32_littleendian
+#undef load24_littleendian
diff --git a/crypto/fipsmodule/ml_kem/mlkem/sampling.h b/crypto/fipsmodule/ml_kem/mlkem/sampling.h
new file mode 100644
index 0000000000..ba50567b44
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/sampling.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_SAMPLING_H
+#define MLK_SAMPLING_H
+
+#include <stdint.h>
+#include <stdlib.h>
+#include "cbmc.h"
+#include "common.h"
+#include "poly.h"
+
+#define poly_cbd2 MLK_NAMESPACE(poly_cbd2)
+/*************************************************
+ * Name:        poly_cbd2
+ *
+ * Description: Given an array of uniformly random bytes, compute
+ *              polynomial with coefficients distributed according to
+ *              a centered binomial distribution with parameter eta=2
+ *
+ * Arguments:   - poly *r: pointer to output polynomial
+ *              - const uint8_t *buf: pointer to input byte array
+ *
+ * Specification: Implements [FIPS 203, Algorithm 8, SamplePolyCBD_2]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]);
+
+#if defined(MLK_MULTILEVEL_BUILD_WITH_SHARED) || MLKEM_ETA1 == 3
+#define poly_cbd3 MLK_NAMESPACE(poly_cbd3)
+/*************************************************
+ * Name:        poly_cbd3
+ *
+ * Description: Given an array of uniformly random bytes, compute
+ *              polynomial with coefficients distributed according to
+ *              a centered binomial distribution with parameter eta=3.
+ *              This function is only needed for ML-KEM-512
+ *
+ * Arguments:   - poly *r: pointer to output polynomial
+ *              - const uint8_t *buf: pointer to input byte array
+ *
+ * Specification: Implements [FIPS 203, Algorithm 8, SamplePolyCBD_3]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]);
+#endif /* MLK_MULTILEVEL_BUILD || MLKEM_ETA1 == 3 */
+
+#define poly_rej_uniform_x4 MLK_NAMESPACE(poly_rej_uniform_x4)
+/*************************************************
+ * Name:        poly_rej_uniform_x4
+ *
+ * Description: Generate four polynomials using rejection sampling
+ *              on (pseudo-)uniformly random bytes sampled from a seed.
+ *
+ * Arguments:   - poly *vec:           Pointer to an array of 4 polynomials
+ *                                     to be sampled.
+ *              - uint8_t *seed[4]:    Pointer to array of four pointers
+ *                                     pointing to the seed buffers of size
+ *                                     MLKEM_SYMBYTES + 2 each.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 7, SampleNTT]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_rej_uniform_x4(poly *vec, uint8_t *seed[4])
+__contract__(
+  requires(memory_no_alias(vec, sizeof(poly) * 4))
+  requires(memory_no_alias(seed, sizeof(uint8_t*) * 4))
+  requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2))
+  requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2))
+  requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2))
+  requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2))
+  assigns(memory_slice(vec, sizeof(poly) * 4))
+  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q)));
+
+#define poly_rej_uniform MLK_NAMESPACE(poly_rej_uniform)
+/*************************************************
+ * Name:        poly_rej_uniform
+ *
+ * Description: Generate polynomial using rejection sampling
+ *              on (pseudo-)uniformly random bytes sampled from a seed.
+ *
+ * Arguments:   - poly *vec:           Pointer to polynomial to be sampled.
+ *              - uint8_t *seed:       Pointer to seed buffer of size
+ *                                     MLKEM_SYMBYTES + 2 each.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 7, SampleNTT]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void poly_rej_uniform(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
+__contract__(
+  requires(memory_no_alias(entry, sizeof(poly)))
+  requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2))
+  assigns(memory_slice(entry, sizeof(poly)))
+  ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q)));
+
+#endif /* MLK_SAMPLING_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/symmetric.h b/crypto/fipsmodule/ml_kem/mlkem/symmetric.h
new file mode 100644
index 0000000000..060e1c021c
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/symmetric.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_SYMMETRIC_H
+#define MLK_SYMMETRIC_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include "cbmc.h"
+#include "common.h"
+#include MLK_FIPS202_HEADER_FILE
+#include MLK_FIPS202X4_HEADER_FILE
+
+/* Macros denoting FIPS 203 specific Hash functions */
+
+/* Hash function H, [FIPS 203, Section 4.1, Eq (4.4)] */
+#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES)
+
+/* Hash function G, [FIPS 203, Section 4.1, Eq (4.5)] */
+#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES)
+
+/* Hash function J, [FIPS 203, Section 4.1, Eq (4.4)] */
+#define hash_j(OUT, IN, INBYTES) shake256(OUT, MLKEM_SYMBYTES, IN, INBYTES)
+
+/* PRF function, [FIPS 203, Section 4.1, Eq (4.3)]
+ * Referring to (eq 4.3), `OUT` is assumed to contain `s || b`. */
+#define prf_eta(ETA, OUT, IN) \
+  shake256(OUT, (ETA) * MLKEM_N / 4, IN, MLKEM_SYMBYTES + 1)
+#define prf_eta1(OUT, IN) prf_eta(MLKEM_ETA1, OUT, IN)
+#define prf_eta2(OUT, IN) prf_eta(MLKEM_ETA2, OUT, IN)
+#define prf_eta1_x4(OUT0, OUT1, OUT2, OUT3, IN0, IN1, IN2, IN3)            \
+  shake256x4(OUT0, OUT1, OUT2, OUT3, (MLKEM_ETA1 * MLKEM_N / 4), IN0, IN1, \
+             IN2, IN3, MLKEM_SYMBYTES + 1)
+
+/* XOF function, FIPS 203 4.1 */
+#define xof_ctx shake128ctx
+#define xof_x4_ctx shake128x4ctx
+#define xof_init(CTX) shake128_init((CTX))
+#define xof_absorb(CTX, IN, INBYTES) \
+  shake128_absorb_once((CTX), (IN), (INBYTES))
+#define xof_squeezeblocks(BUF, NBLOCKS, CTX) \
+  shake128_squeezeblocks((BUF), (NBLOCKS), (CTX))
+#define xof_release(CTX) shake128_release((CTX))
+
+#define xof_x4_init(CTX) shake128x4_init((CTX))
+#define xof_x4_absorb(CTX, IN0, IN1, IN2, IN3, INBYTES) \
+  shake128x4_absorb_once((CTX), (IN0), (IN1), (IN2), (IN3), (INBYTES))
+#define xof_x4_squeezeblocks(BUF0, BUF1, BUF2, BUF3, NBLOCKS, CTX) \
+  shake128x4_squeezeblocks((BUF0), (BUF1), (BUF2), (BUF3), (NBLOCKS), (CTX))
+#define xof_x4_release(CTX) shake128x4_release((CTX))
+
+#define XOF_RATE SHAKE128_RATE
+
+#endif /* MLK_SYMMETRIC_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/sys.h b/crypto/fipsmodule/ml_kem/mlkem/sys.h
new file mode 100644
index 0000000000..f61d75eb74
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/sys.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_SYS_H
+#define MLK_SYS_H
+
+/* Check if we're running on an AArch64 little endian system. _M_ARM64 is set by
+ * MSVC. */
+#if defined(__AARCH64EL__) || defined(_M_ARM64)
+#define MLK_SYS_AARCH64
+#endif
+
+/* Check if we're running on an AArch64 big endian system. */
+#if defined(__AARCH64EB__)
+#define MLK_SYS_AARCH64_EB
+#endif
+
+#if defined(__x86_64__)
+#define MLK_SYS_X86_64
+#if defined(__AVX2__)
+#define MLK_SYS_X86_64_AVX2
+#endif
+#endif /* __x86_64__ */
+
+#if defined(_WIN32)
+#define MLK_SYS_WINDOWS
+#endif /* _WIN32 */
+
+#if !defined(MLK_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
+#define MLK_HAVE_INLINE_ASM
+#endif
+
+/* Try to find endianness, if not forced through CFLAGS already */
+#if !defined(MLK_SYS_LITTLE_ENDIAN) && !defined(MLK_SYS_BIG_ENDIAN)
+#if defined(__BYTE_ORDER__)
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define MLK_SYS_LITTLE_ENDIAN
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define MLK_SYS_BIG_ENDIAN
+#else /* __BYTE_ORDER__ */
+#error "__BYTE_ORDER__ defined, but don't recognize value."
+#endif /* __BYTE_ORDER__ */
+#endif /* !defined(__BYTE_ORDER__) */
+#endif /* defined(MLK_SYS_LITTLE_ENDIAN) || defined(MLK_SYS_BIG_ENDIAN) */
+
+/* If MLK_FORCE_AARCH64 is set, assert that we're indeed on an AArch64 system.
+ */
+#if defined(MLK_FORCE_AARCH64) && !defined(MLK_SYS_AARCH64)
+#error "MLK_FORCE_AARCH64 is set, but we don't seem to be on an AArch64 system."
+#endif
+
+/* If MLK_FORCE_AARCH64_EB is set, assert that we're indeed on a big endian
+ * AArch64 system. */
+#if defined(MLK_FORCE_AARCH64_EB) && !defined(MLK_SYS_AARCH64_EB)
+#error \
+    "MLK_FORCE_AARCH64_EB is set, but we don't seem to be on an AArch64 system."
+#endif
+
+/* If MLK_FORCE_X86_64 is set, assert that we're indeed on an X86_64 system. */
+#if defined(MLK_FORCE_X86_64) && !defined(MLK_SYS_X86_64)
+#error "MLK_FORCE_X86_64 is set, but we don't seem to be on an X86_64 system."
+#endif
+
+/*
+ * C90 does not have the inline compiler directive yet.
+ * We don't use it in C90 builds.
+ * However, in that case the compiler warns about some inline functions in
+ * header files not being used in every compilation unit that includes that
+ * header. To work around it we silence that warning in that case using
+ * __attribute__((unused)).
+ */
+
+/* Do not use inline for C90 builds*/
+#if !defined(MLK_INLINE)
+#if !defined(inline)
+#if defined(_MSC_VER)
+#define MLK_INLINE __inline
+/* Don't combine __inline and __forceinline */
+#define MLK_ALWAYS_INLINE __forceinline
+#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#define MLK_INLINE inline
+#define MLK_ALWAYS_INLINE MLK_INLINE __attribute__((always_inline))
+#else
+#define MLK_INLINE __attribute__((unused))
+#define MLK_ALWAYS_INLINE MLK_INLINE
+#endif
+
+#else
+#define MLK_INLINE inline
+#define MLK_ALWAYS_INLINE MLK_INLINE __attribute__((always_inline))
+#endif
+#endif
+
+/*
+ * C90 does not have the restrict compiler directive yet.
+ * We don't use it in C90 builds.
+ */
+#if !defined(restrict)
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#define MLK_RESTRICT restrict
+#else
+#define MLK_RESTRICT
+#endif
+
+#else
+
+#define MLK_RESTRICT restrict
+#endif
+
+#define MLK_DEFAULT_ALIGN 32
+#if defined(__GNUC__)
+#define MLK_ALIGN __attribute__((aligned(MLK_DEFAULT_ALIGN)))
+#elif defined(_MSC_VER)
+#define MLK_ALIGN __declspec(align(MLK_DEFAULT_ALIGN))
+#else
+#define MLK_ALIGN /* No known support for alignment constraints */
+#endif
+
+
+/* New X86_64 CPUs support Conflow-flow protection using the CET instructions.
+ * When enabled (through -fcf-protection=), all compilation units (including
+ * empty ones) need to support CET for this to work.
+ * For assembly, this means that source files need to signal support for
+ * CET by setting the appropriate note.gnu.property section.
+ * This can be achieved by including the <cet.h> header in all assembly file.
+ * This file also provides the _CET_ENDBR macro which needs to be placed at
+ * every potential target of an indirect branch.
+ * If CET is enabled _CET_ENDBR maps to the endbr64 instruction, otherwise
+ * it is empty.
+ * In case the compiler does not support CET (e.g., <gcc8, <clang11),
+ * the __CET__ macro is not set and we default to nothing.
+ * Note that we only issue _CET_ENDBR instructions through the MLK_ASM_FN_SYMBOL
+ * macro as the global symbols are the only possible targets of indirect
+ * branches in our code.
+ */
+#if defined(MLK_SYS_X86_64)
+#if defined(__CET__)
+#include <cet.h>
+#define MLK_CET_ENDBR _CET_ENDBR
+#else
+#define MLK_CET_ENDBR
+#endif
+#endif
+
+#if defined(MLK_CT_TESTING_ENABLED) && !defined(__ASSEMBLER__)
+#include <valgrind/memcheck.h>
+#define MLK_CT_TESTING_SECRET(ptr, len) \
+  VALGRIND_MAKE_MEM_UNDEFINED((ptr), (len))
+#define MLK_CT_TESTING_DECLASSIFY(ptr, len) \
+  VALGRIND_MAKE_MEM_DEFINED((ptr), (len))
+#else
+#define MLK_CT_TESTING_SECRET(ptr, len) \
+  do                                    \
+  {                                     \
+  } while (0)
+#define MLK_CT_TESTING_DECLASSIFY(ptr, len) \
+  do                                        \
+  {                                         \
+  } while (0)
+#endif /* MLK_CT_TESTING_ENABLED */
+
+#if defined(__GNUC__) || defined(clang)
+#define MLK_MUST_CHECK_RETURN_VALUE __attribute__((warn_unused_result))
+#else
+#define MLK_MUST_CHECK_RETURN_VALUE
+#endif
+
+#endif /* MLK_SYS_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/verify.c b/crypto/fipsmodule/ml_kem/mlkem/verify.c
new file mode 100644
index 0000000000..343061584d
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/verify.c
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include "verify.h"
+
+#if !defined(MLK_USE_ASM_VALUE_BARRIER) && \
+    !defined(MLK_MULTILEVEL_BUILD_NO_SHARED)
+/*
+ * Masking value used in constant-time functions from
+ * verify.h to block the compiler's range analysis and
+ * thereby reduce the risk of compiler-introduced branches.
+ */
+volatile uint64_t ct_opt_blocker_u64 = 0;
+
+#else /* MLK_USE_ASM_VALUE_BARRIER && \
+         !MLK_MULTILEVEL_BUILD_NO_SHARED */
+
+MLK_EMPTY_CU(verify)
+
+#endif /* MLK_USE_ASM_VALUE_BARRIER && \
+          !MLK_MULTILEVEL_BUILD_NO_SHARED */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/verify.h b/crypto/fipsmodule/ml_kem/mlkem/verify.h
new file mode 100644
index 0000000000..44a72f6697
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/verify.h
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_VERIFY_H
+#define MLK_VERIFY_H
+
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+#include "cbmc.h"
+#include "common.h"
+
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define value_barrier_u8 MLK_NAMESPACE(value_barrier_u8)
+#define value_barrier_u32 MLK_NAMESPACE(value_barrier_u32)
+#define value_barrier_i32 MLK_NAMESPACE(value_barrier_i32)
+#define ct_cmask_neg_i16 MLK_NAMESPACE(ct_cmask_neg_i16)
+#define ct_cmask_nonzero_u8 MLK_NAMESPACE(ct_cmask_nonzero_u8)
+#define ct_cmask_nonzero_u16 MLK_NAMESPACE(ct_cmask_nonzero_u16)
+#define ct_sel_uint8 MLK_NAMESPACE(ct_sel_uint8)
+#define ct_sel_int16 MLK_NAMESPACE(ct_sel_int16)
+#define ct_memcmp MLK_NAMESPACE(ct_memcmp)
+#define ct_cmov_zero MLK_NAMESPACE(ct_cmov_zero)
+#define ct_zeroize MLK_NAMESPACE(ct_zeroize)
+/* End of static namespacing */
+
+/* Constant-time comparisons and conditional operations
+
+   We reduce the risk for compilation into variable-time code
+   through the use of 'value barriers'.
+
+   Functionally, a value barrier is a no-op. To the compiler, however,
+   it constitutes an arbitrary modification of its input, and therefore
+   harden's value propagation and range analysis.
+
+   We consider two approaches to implement a value barrier:
+   - An empty inline asm block which marks the target value as clobbered.
+   - XOR'ing with the value of a volatile global that's set to 0;
+     for a discussion / implementation of this idea, see e.g.
+     * https://groups.google.com/a/list.nist.gov/g/pqc-forum/c/hqbtIGFKIpU/m/H14H0wOlBgAJ
+     * https://lib.mceliece.org/libmceliece-20240513/inttypes/crypto_intN.h.html
+
+   The first approach is cheap because it only prevents the compiler
+   from reasoning about the value of the variable past the barrier,
+   but does not directly generate additional instructions.
+
+   The second approach generates redundant loads and XOR operations
+   and therefore comes at a higher runtime cost. However, it appears
+   more robust towards optimization, as compilers should never drop
+   a volatile load.
+
+   We use the empty-ASM value barrier for GCC and clang, and fall
+   back to the global volatile barrier otherwise.
+
+   The global value barrier can be forced by setting MLKEM_NO_ASM_VALUE_BARRIER.
+
+*/
+
+#if defined(MLK_HAVE_INLINE_ASM) && !defined(MLKEM_NO_ASM_VALUE_BARRIER)
+#define MLK_USE_ASM_VALUE_BARRIER
+#endif
+
+#if !defined(MLK_USE_ASM_VALUE_BARRIER)
+
+/*
+ * Declaration of global volatile that the global value barrier
+ * is loading from and masking with.
+ */
+#define ct_opt_blocker_u64 MLK_NAMESPACE(ct_opt_blocker_u64)
+extern volatile uint64_t ct_opt_blocker_u64;
+
+/* Helper functions for obtaining masks of various sizes */
+static MLK_INLINE uint8_t get_optblocker_u8(void)
+__contract__(ensures(return_value == 0)) { return (uint8_t)ct_opt_blocker_u64; }
+
+static MLK_INLINE uint32_t get_optblocker_u32(void)
+__contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
+
+static MLK_INLINE uint32_t get_optblocker_i32(void)
+__contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
+
+static MLK_INLINE uint32_t value_barrier_u32(uint32_t b)
+__contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u32()); }
+
+static MLK_INLINE int32_t value_barrier_i32(int32_t b)
+__contract__(ensures(return_value == b)) { return (b ^ get_optblocker_i32()); }
+
+static MLK_INLINE uint8_t value_barrier_u8(uint8_t b)
+__contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u8()); }
+
+#else /* !MLK_USE_ASM_VALUE_BARRIER */
+
+static MLK_INLINE uint32_t value_barrier_u32(uint32_t b)
+__contract__(ensures(return_value == b))
+{
+  __asm__("" : "+r"(b));
+  return b;
+}
+
+static MLK_INLINE int32_t value_barrier_i32(int32_t b)
+__contract__(ensures(return_value == b))
+{
+  __asm__("" : "+r"(b));
+  return b;
+}
+
+static MLK_INLINE uint8_t value_barrier_u8(uint8_t b)
+__contract__(ensures(return_value == b))
+{
+  __asm__("" : "+r"(b));
+  return b;
+}
+
+#endif /* MLK_USE_ASM_VALUE_BARRIER */
+
+/*
+ * The ct_cmask_nonzero_xxx functions below make deliberate use of unsigned
+ * overflow, which is fully defined behaviour in C. It is thus safe to disable
+ * this warning.
+ */
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "unsigned-overflow"
+#endif
+
+/*************************************************
+ * Name:        ct_cmask_nonzero_u16
+ *
+ * Description: Return 0 if input is zero, and -1 otherwise.
+ *
+ * Arguments:   uint16_t x: Value to be converted into a mask
+ **************************************************/
+static MLK_INLINE uint16_t ct_cmask_nonzero_u16(uint16_t x)
+__contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
+{
+  uint32_t tmp = value_barrier_u32(-((uint32_t)x));
+  tmp >>= 16;
+  return tmp;
+}
+
+/*************************************************
+ * Name:        ct_cmask_nonzero_u8
+ *
+ * Description: Return 0 if input is zero, and -1 otherwise.
+ *
+ * Arguments:   uint8_t x: Value to be converted into a mask
+ **************************************************/
+static MLK_INLINE uint8_t ct_cmask_nonzero_u8(uint8_t x)
+__contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
+{
+  uint32_t tmp = value_barrier_u32(-((uint32_t)x));
+  tmp >>= 24;
+  return tmp;
+}
+
+/* Put unsigned overflow warnings in CBMC back into scope */
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+/*
+ * The ct_cmask_neg_i16 function below makes deliberate use of
+ * signed to unsigned integer conversion, which is fully defined
+ * behaviour in C. It is thus safe to disable this warning.
+ */
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "conversion"
+#endif
+
+/*************************************************
+ * Name:        ct_cmask_neg_i16
+ *
+ * Description: Return 0 if input is non-negative, and -1 otherwise.
+ *
+ * Arguments:   uint16_t x: Value to be converted into a mask
+ **************************************************/
+static MLK_INLINE uint16_t ct_cmask_neg_i16(int16_t x)
+__contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
+{
+  int32_t tmp = value_barrier_i32((int32_t)x);
+  tmp >>= 16;
+  return (int16_t)tmp;
+}
+
+/* Put unsigned-to-signed warnings in CBMC back into scope */
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+/*
+ * The ct_csel_xxx functions below make deliberate use of unsigned
+ * to signed integer conversion, which is implementation-defined
+ * behaviour. Here, we assume that uint16_t -> int16_t is inverse
+ * to int16_t -> uint16_t.
+ */
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "conversion"
+#endif
+
+/*************************************************
+ * Name:        ct_sel_int16
+ *
+ * Description: Functionally equivalent to cond ? a : b,
+ *              but implemented with guards against
+ *              compiler-introduced branches.
+ *
+ * Arguments:   int16_t a:       First alternative
+ *              int16_t b:       Second alternative
+ *              uint16_t cond:   Condition variable.
+ *
+ * Specification:
+ * - With `a = MLKEM_Q_HALF` and `b=0`, this essentially
+ *   implements `Decompress_1` [FIPS 203, Eq (4.8)] in `poly_frommsg()`.
+ *
+ **************************************************/
+static MLK_INLINE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
+__contract__(ensures(return_value == (cond ? a : b)))
+{
+  uint16_t au = a, bu = b;
+  uint16_t res = bu ^ (ct_cmask_nonzero_u16(cond) & (au ^ bu));
+  return (int16_t)res;
+}
+
+/* Put unsigned-to-signed warnings in CBMC back into scope */
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+/*************************************************
+ * Name:        ct_sel_uint8
+ *
+ * Description: Functionally equivalent to cond ? a : b,
+ *              but implemented with guards against
+ *              compiler-introduced branches.
+ *
+ * Arguments:   uint8_t a:       First alternative
+ *              uint8_t b:       Second alternative
+ *              uuint8_t cond:   Condition variable.
+ **************************************************/
+static MLK_INLINE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
+__contract__(ensures(return_value == (cond ? a : b)))
+{
+  return b ^ (ct_cmask_nonzero_u8(cond) & (a ^ b));
+}
+
+/*************************************************
+ * Name:        ct_memcmp
+ *
+ * Description: Compare two arrays for equality in constant time.
+ *
+ * Arguments:   const uint8_t *a: pointer to first byte array
+ *              const uint8_t *b: pointer to second byte array
+ *              size_t len:       length of the byte arrays
+ *
+ * Returns 0 if the byte arrays are equal, a non-zero value otherwise
+ *
+ * Specification:
+ * - Used to securely compute conditional move in
+ *   [FIPS 203, Algorithm 18 (ML-KEM.Decaps_Internal, L9-11]
+ **************************************************/
+static MLK_INLINE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
+                                    const size_t len)
+__contract__(
+  requires(memory_no_alias(a, len))
+  requires(memory_no_alias(b, len))
+  requires(len <= INT_MAX)
+  ensures((return_value == 0) == forall(i, 0, len, (a[i] == b[i]))))
+{
+  uint8_t r = 0, s = 0;
+  unsigned i;
+
+  for (i = 0; i < len; i++)
+  __loop__(
+    invariant(i <= len)
+    invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k])))))
+  {
+    r |= a[i] ^ b[i];
+    /* s is useless, but prevents the loop from being aborted once r=0xff. */
+    s ^= a[i] ^ b[i];
+  }
+
+  /*
+   * - Convert r into a mask; this may not be necessary, but is an additional
+   *   safeguard
+   *   towards leaking information about a and b.
+   * - XOR twice with s, separated by a value barrier, to prevent the compile
+   *   from dropping the s computation in the loop.
+   */
+  return (value_barrier_u8(ct_cmask_nonzero_u8(r) ^ s) ^ s);
+}
+
+/*************************************************
+ * Name:        ct_cmov_zero
+ *
+ * Description: Copy len bytes from x to r if b is zero;
+ *              don't modify x if b is non-zero.
+ *              assumes two's complement representation of negative integers.
+ *              Runs in constant time.
+ *
+ * Arguments:   uint8_t *r:       pointer to output byte array
+ *              const uint8_t *x: pointer to input byte array
+ *              size_t len:       Amount of bytes to be copied
+ *              uint8_t b:        Condition value.
+ *
+ * Specification:
+ * - Used to securely compute conditional move in
+ *   [FIPS 203, Algorithm 18 (ML-KEM.Decaps_Internal, L9-11]
+ **************************************************/
+static MLK_INLINE void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len,
+                                    uint8_t b)
+__contract__(
+  requires(memory_no_alias(r, len))
+  requires(memory_no_alias(x, len))
+  assigns(memory_slice(r, len)))
+{
+  size_t i;
+  for (i = 0; i < len; i++)
+  __loop__(invariant(i <= len))
+  {
+    r[i] = ct_sel_uint8(r[i], x[i], b);
+  }
+}
+
+/*************************************************
+ * Name:        ct_zeroize
+ *
+ * Description: Force-zeroize a buffer.
+ *
+ * Arguments:   uint8_t *r:       pointer to byte array to be zeroed
+ *              size_t len:       Amount of bytes to be zeroed
+ *
+ * Specification: Used to implement
+ * [FIPS 203, Section 3.3, Destruction of intermediate values]
+ *
+ **************************************************/
+static MLK_INLINE void ct_zeroize(void *r, size_t len)
+__contract__(
+  requires(memory_no_alias(r, len))
+  assigns(memory_slice(r, len))
+);
+
+#if defined(MLK_USE_CT_ZEROIZE_NATIVE)
+static MLK_INLINE void ct_zeroize(void *ptr, size_t len)
+{
+  ct_zeroize_native(ptr, len);
+}
+#elif defined(MLK_SYS_WINDOWS)
+#include <windows.h>
+static MLK_INLINE void ct_zeroize(void *ptr, size_t len)
+{
+  SecureZeroMemory(ptr, len);
+}
+#elif defined(MLK_HAVE_INLINE_ASM)
+#include <string.h>
+static MLK_INLINE void ct_zeroize(void *ptr, size_t len)
+{
+  memset(ptr, 0, len);
+  /* This follows OpenSSL and seems sufficient to prevent the compiler
+   * from optimizing away the memset.
+   *
+   * If there was a reliable way to detect availability of memset_s(),
+   * that would be preferred. */
+  __asm__ __volatile__("" : : "r"(ptr) : "memory");
+}
+#else
+#error No plausibly-secure implementation of ct_zeroize available. Please provide your own using MLK_USE_CT_ZEROIZE_NATIVE.
+#endif
+
+#endif /* MLK_VERIFY_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/zetas.inc b/crypto/fipsmodule/ml_kem/mlkem/zetas.inc
new file mode 100644
index 0000000000..66cb7d1518
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/zetas.inc
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * WARNING: This file is auto-generated from scripts/autogen
+ *          Do not modify it directly.
+ */
+
+#include <stdint.h>
+
+/*
+ * Table of zeta values used in the reference NTT and inverse NTT.
+ * See autogen for details.
+ */
+static MLK_ALIGN const int16_t zetas[128] = {
+    -1044, -758,  -359,  -1517, 1493,  1422,  287,   202,  -171,  622,   1577,
+    182,   962,   -1202, -1474, 1468,  573,   -1325, 264,  383,   -829,  1458,
+    -1602, -130,  -681,  1017,  732,   608,   -1542, 411,  -205,  -1571, 1223,
+    652,   -552,  1015,  -1293, 1491,  -282,  -1544, 516,  -8,    -320,  -666,
+    -1618, -1162, 126,   1469,  -853,  -90,   -271,  830,  107,   -1421, -247,
+    -951,  -398,  961,   -1508, -725,  448,   -1065, 677,  -1275, -1103, 430,
+    555,   843,   -1251, 871,   1550,  105,   422,   587,  177,   -235,  -291,
+    -460,  1574,  1653,  -246,  778,   1159,  -147,  -777, 1483,  -602,  1119,
+    -1590, 644,   -872,  349,   418,   329,   -156,  -75,  817,   1097,  603,
+    610,   1322,  -1285, -1465, 384,   -1215, -136,  1218, -1335, -874,  220,
+    -1187, -1659, -1185, -1530, -1278, 794,   -1510, -854, -870,  478,   -108,
+    -308,  996,   991,   958,   -1460, 1522,  1628,
+};
diff --git a/crypto/fipsmodule/ml_kem/mlkem_native_bcm.c b/crypto/fipsmodule/ml_kem/mlkem_native_bcm.c
new file mode 100644
index 0000000000..b8b553f132
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem_native_bcm.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * WARNING: This file is auto-generated from scripts/autogen
+ *          Do not modify it directly.
+ */
+
+/*
+ * Monolithic compilation unit bundling all compilation units within
+ * mlkem-native
+ */
+
+/* If parts of the mlkem-native source tree are not used,
+ * consider reducing this header via `unifdef`.
+ *
+ * Example:
+ * ```bash
+ * unifdef -UMLK_MONOBUILD_WITH_NATIVE_ARITH mlkem_native_monobuild.c
+ * ```
+ */
+
+#include "mlkem/sys.h"
+
+#include "mlkem/compress.c"
+#include "mlkem/debug.c"
+#include "mlkem/indcpa.c"
+#include "mlkem/kem.c"
+#include "mlkem/poly.c"
+#include "mlkem/poly_k.c"
+#include "mlkem/sampling.c"
+#include "mlkem/verify.c"
+
+
+
+
+/*
+ * Undefine macros from MLKEM_K-specific files
+ */
+/* mlkem/common.h */
+#undef MLK_ARITH_BACKEND_NAME
+#undef MLK_ASM_FN_SYMBOL
+#undef MLK_ASM_NAMESPACE
+#undef MLK_COMMON_H
+#undef MLK_EMPTY_CU
+#undef MLK_EXTERNAL_API
+#undef MLK_FIPS202X4_HEADER_FILE
+#undef MLK_FIPS202_BACKEND_NAME
+#undef MLK_FIPS202_HEADER_FILE
+#undef MLK_INTERNAL_API
+#undef MLK_MAKE_NAMESPACE
+#undef MLK_MAKE_NAMESPACE_
+#undef MLK_MAKE_NAMESPACE_K
+#undef MLK_MAKE_NAMESPACE_K_
+#undef MLK_NAMESPACE
+#undef MLK_NAMESPACE_K
+#undef MLK_PREFIX_UNDERSCORE
+#undef MLK_PREFIX_UNDERSCORE_
+/* mlkem/config.h */
+#undef MLKEM_K
+#undef MLK_ARITH_BACKEND_FILE
+#undef MLK_CONFIG_H
+#undef MLK_DEFAULT_NAMESPACE_PREFIX
+#undef MLK_FIPS202_BACKEND_FILE
+#undef MLK_NAMESPACE_PREFIX
+/* mlkem/indcpa.h */
+#undef MLK_INDCPA_H
+#undef gen_matrix
+#undef indcpa_dec
+#undef indcpa_enc
+#undef indcpa_keypair_derand
+/* mlkem/kem.h */
+#undef MLK_KEM_H
+#undef crypto_kem_dec
+#undef crypto_kem_enc
+#undef crypto_kem_enc_derand
+#undef crypto_kem_keypair
+#undef crypto_kem_keypair_derand
+/* mlkem/mlkem_native.h */
+#undef CRYPTO_BYTES
+#undef CRYPTO_CIPHERTEXTBYTES
+#undef CRYPTO_PUBLICKEYBYTES
+#undef CRYPTO_SECRETKEYBYTES
+#undef CRYPTO_SYMBYTES
+#undef MLKEM1024_BYTES
+#undef MLKEM1024_CIPHERTEXTBYTES
+#undef MLKEM1024_PUBLICKEYBYTES
+#undef MLKEM1024_SECRETKEYBYTES
+#undef MLKEM1024_SYMBYTES
+#undef MLKEM512_BYTES
+#undef MLKEM512_CIPHERTEXTBYTES
+#undef MLKEM512_PUBLICKEYBYTES
+#undef MLKEM512_SECRETKEYBYTES
+#undef MLKEM512_SYMBYTES
+#undef MLKEM768_BYTES
+#undef MLKEM768_CIPHERTEXTBYTES
+#undef MLKEM768_PUBLICKEYBYTES
+#undef MLKEM768_SECRETKEYBYTES
+#undef MLKEM768_SYMBYTES
+#undef MLKEM_BYTES
+#undef MLKEM_CIPHERTEXTBYTES
+#undef MLKEM_CIPHERTEXTBYTES_
+#undef MLKEM_PUBLICKEYBYTES
+#undef MLKEM_PUBLICKEYBYTES_
+#undef MLKEM_SECRETKEYBYTES
+#undef MLKEM_SECRETKEYBYTES_
+#undef MLKEM_SYMBYTES
+#undef MLK_BUILD_INFO_CONCAT2
+#undef MLK_BUILD_INFO_CONCAT2_
+#undef MLK_BUILD_INFO_CONCAT3
+#undef MLK_BUILD_INFO_CONCAT3_
+#undef MLK_BUILD_INFO_LVL
+#undef MLK_BUILD_INFO_NAMESPACE
+#undef MLK_H
+#undef MLK_MUST_CHECK_RETURN_VALUE
+#undef crypto_kem_dec
+#undef crypto_kem_enc
+#undef crypto_kem_enc_derand
+#undef crypto_kem_keypair
+#undef crypto_kem_keypair_derand
+/* mlkem/params.h */
+#undef MLKEM_DU
+#undef MLKEM_DV
+#undef MLKEM_ETA1
+#undef MLKEM_ETA2
+#undef MLKEM_INDCCA_CIPHERTEXTBYTES
+#undef MLKEM_INDCCA_PUBLICKEYBYTES
+#undef MLKEM_INDCCA_SECRETKEYBYTES
+#undef MLKEM_INDCPA_BYTES
+#undef MLKEM_INDCPA_MSGBYTES
+#undef MLKEM_INDCPA_PUBLICKEYBYTES
+#undef MLKEM_INDCPA_SECRETKEYBYTES
+#undef MLKEM_LVL
+#undef MLKEM_N
+#undef MLKEM_POLYBYTES
+#undef MLKEM_POLYCOMPRESSEDBYTES_D10
+#undef MLKEM_POLYCOMPRESSEDBYTES_D11
+#undef MLKEM_POLYCOMPRESSEDBYTES_D4
+#undef MLKEM_POLYCOMPRESSEDBYTES_D5
+#undef MLKEM_POLYCOMPRESSEDBYTES_DU
+#undef MLKEM_POLYCOMPRESSEDBYTES_DV
+#undef MLKEM_POLYVECBYTES
+#undef MLKEM_POLYVECCOMPRESSEDBYTES_DU
+#undef MLKEM_Q
+#undef MLKEM_Q_HALF
+#undef MLKEM_SSBYTES
+#undef MLKEM_SYMBYTES
+#undef MLKEM_UINT12_LIMIT
+#undef MLK_PARAMS_H
+/* mlkem/poly_k.h */
+#undef MLK_POLY_K_H
+#undef poly_compress_du
+#undef poly_compress_dv
+#undef poly_decompress_du
+#undef poly_decompress_dv
+#undef poly_getnoise_eta1122_4x
+#undef poly_getnoise_eta1_4x
+#undef poly_getnoise_eta2
+#undef poly_getnoise_eta2_4x
+#undef polyvec
+#undef polyvec_add
+#undef polyvec_basemul_acc_montgomery_cached
+#undef polyvec_compress_du
+#undef polyvec_decompress_du
+#undef polyvec_frombytes
+#undef polyvec_invntt_tomont
+#undef polyvec_mulcache
+#undef polyvec_mulcache_compute
+#undef polyvec_ntt
+#undef polyvec_reduce
+#undef polyvec_tobytes
+#undef polyvec_tomont
+/* mlkem/sys.h */
+#undef MLK_ALIGN
+#undef MLK_ALWAYS_INLINE
+#undef MLK_CET_ENDBR
+#undef MLK_CT_TESTING_DECLASSIFY
+#undef MLK_CT_TESTING_SECRET
+#undef MLK_DEFAULT_ALIGN
+#undef MLK_HAVE_INLINE_ASM
+#undef MLK_INLINE
+#undef MLK_MUST_CHECK_RETURN_VALUE
+#undef MLK_RESTRICT
+#undef MLK_SYS_AARCH64
+#undef MLK_SYS_AARCH64_EB
+#undef MLK_SYS_BIG_ENDIAN
+#undef MLK_SYS_H
+#undef MLK_SYS_LITTLE_ENDIAN
+#undef MLK_SYS_WINDOWS
+#undef MLK_SYS_X86_64
+#undef MLK_SYS_X86_64_AVX2
+
+#if !defined(MLK_MONOBUILD_KEEP_SHARED_HEADERS)
+/*
+ * Undefine macros from MLKEM_K-generic files
+ */
+/* mlkem/arith_backend.h */
+#undef MLK_ARITH_BACKEND_H
+/* mlkem/compress.h */
+#undef MLK_COMPRESS_H
+#undef poly_compress_d10
+#undef poly_compress_d11
+#undef poly_compress_d4
+#undef poly_compress_d5
+#undef poly_decompress_d10
+#undef poly_decompress_d11
+#undef poly_decompress_d4
+#undef poly_decompress_d5
+#undef poly_frombytes
+#undef poly_frommsg
+#undef poly_tobytes
+#undef poly_tomsg
+#undef scalar_compress_d1
+#undef scalar_compress_d10
+#undef scalar_compress_d11
+#undef scalar_compress_d4
+#undef scalar_compress_d5
+#undef scalar_decompress_d10
+#undef scalar_decompress_d11
+#undef scalar_decompress_d4
+#undef scalar_decompress_d5
+/* mlkem/debug.h */
+#undef MLK_DEBUG_H
+#undef debug_assert
+#undef debug_assert_abs_bound
+#undef debug_assert_abs_bound_2d
+#undef debug_assert_bound
+#undef debug_assert_bound_2d
+#undef mlkem_debug_assert
+#undef mlkem_debug_check_bounds
+/* mlkem/poly.h */
+#undef MLK_INVNTT_BOUND
+#undef MLK_NTT_BOUND
+#undef MLK_POLY_H
+#undef cast_uint16_to_int16
+#undef montgomery_reduce
+#undef poly
+#undef poly_add
+#undef poly_invntt_tomont
+#undef poly_mulcache
+#undef poly_mulcache_compute
+#undef poly_ntt
+#undef poly_reduce
+#undef poly_sub
+#undef poly_tomont
+/* mlkem/randombytes.h */
+#undef MLK_RANDOMBYTES_H
+/* mlkem/sampling.h */
+#undef MLK_SAMPLING_H
+#undef poly_cbd2
+#undef poly_cbd3
+#undef poly_rej_uniform
+#undef poly_rej_uniform_x4
+/* mlkem/symmetric.h */
+#undef MLK_SYMMETRIC_H
+#undef XOF_RATE
+#undef hash_g
+#undef hash_h
+#undef hash_j
+#undef prf_eta
+#undef prf_eta1
+#undef prf_eta1_x4
+#undef prf_eta2
+#undef xof_absorb
+#undef xof_ctx
+#undef xof_init
+#undef xof_release
+#undef xof_squeezeblocks
+#undef xof_x4_absorb
+#undef xof_x4_ctx
+#undef xof_x4_init
+#undef xof_x4_release
+#undef xof_x4_squeezeblocks
+/* mlkem/verify.h */
+#undef MLK_USE_ASM_VALUE_BARRIER
+#undef MLK_VERIFY_H
+#undef ct_cmask_neg_i16
+#undef ct_cmask_nonzero_u16
+#undef ct_cmask_nonzero_u8
+#undef ct_cmov_zero
+#undef ct_memcmp
+#undef ct_opt_blocker_u64
+#undef ct_sel_int16
+#undef ct_sel_uint8
+#undef ct_zeroize
+#undef value_barrier_i32
+#undef value_barrier_u32
+#undef value_barrier_u8
+/* mlkem/cbmc.h */
+#undef MLK_CBMC_H
+#undef __contract__
+#undef __loop__
+
+
+#endif /* MLK_MONOBUILD_KEEP_SHARED_HEADERS */
diff --git a/crypto/fipsmodule/ml_kem/mlkem_native_config.h b/crypto/fipsmodule/ml_kem/mlkem_native_config.h
new file mode 100644
index 0000000000..4b28819a2a
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem_native_config.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef MLK_CONFIG_H
+#define MLK_CONFIG_H
+
+#include "../../internal.h"
+
+/* Namespacing: All symbols are of the form mlkem*. Level-specific
+ * symbols are further prefixed with their security level, e.g.
+ * mlkem512*, mlkem768*, mlkem1024*. */
+#define MLK_NAMESPACE_PREFIX mlkem
+#define MLK_NAMESPACE_PREFIX_ADD_LEVEL
+#define MLK_MONOBUILD
+
+/* Everything is built in a single CU, so even the top-level
+ * mlkem-native API can have internal linkage. */
+#define MLK_EXTERNAL_API static
+
+/* Enable PCT if and only if AWS-LC is built in FIPS-mode. */
+#if defined(AWSLC_FIPS)
+#define MLK_KEYGEN_PCT
+#endif /* AWSLC_FIPS */
+
+#if defined(BORINGSSL_FIPS_BREAK_TESTS)
+#define MLK_KEYGEN_PCT_BREAKAGE_TEST
+#if !defined(__ASSEMBLER__) && !defined(MLK_MULTILEVEL_BUILD_NO_SHARED)
+#include "mlkem/sys.h"
+static MLK_INLINE int mlk_break_pct(void)
+{
+  return boringssl_fips_break_test("MLKEM_PWCT");
+}
+#endif /* !__ASSEMBLER__ */
+#endif /* BORINGSSL_FIPS_BREAK_TESTS */
+
+/* Enable valgrind-based assertions in mlkem-native through macro
+ * from AWS-LC/BoringSSL. */
+#if defined(BORINGSSL_CONSTANT_TIME_VALIDATION)
+#define MLK_CT_TESTING_ENABLED
+#endif /* BORINGSSL_CONSTANT_TIME_VALIDATION */
+
+/* Map zeroization function to the one used by AWS-LC */
+#define MLK_USE_CT_ZEROIZE_NATIVE
+#if !defined(__ASSEMBLER__) && !defined(MLK_MULTILEVEL_BUILD_NO_SHARED)
+#include <stdint.h>
+#include "mlkem/sys.h"
+#include <openssl/base.h>
+static MLK_INLINE void ct_zeroize_native(void *ptr, size_t len)
+{
+    OPENSSL_cleanse(ptr, len);
+}
+#endif /* !__ASSEMBLER__ */
+
+#endif /* MLkEM_NATIVE_CONFIG_H */