From 920b1493364a1e9046a207389bac5ae77ab71546 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Fri, 10 May 2024 16:15:56 +0200 Subject: [PATCH 01/11] Get docker compose file --- .github/workflows/integration.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index d19989d..406d4ca 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -25,3 +25,4 @@ jobs: - run: python -m pip install --upgrade pip - run: python -m pip install -r requirements/ci.txt - run: docker-compose version + - run: wget https://gitlab.esss.lu.se/ecdc/ess-dmsc/kafka-to-nexus/-/blob/main/integration-tests/docker-compose.yml From ca79461046f0beb55571cf303401c47156caa598 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Fri, 10 May 2024 16:21:08 +0200 Subject: [PATCH 02/11] Docker compose kafka. --- .github/workflows/integration.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 406d4ca..215a17e 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -26,3 +26,4 @@ jobs: - run: python -m pip install -r requirements/ci.txt - run: docker-compose version - run: wget https://gitlab.esss.lu.se/ecdc/ess-dmsc/kafka-to-nexus/-/blob/main/integration-tests/docker-compose.yml + - run: docker compose up -d From 77b79830e764cae05fc4f4a4fb63b9e1717511c9 Mon Sep 17 00:00:00 2001 From: Sunyoung Yoo Date: Fri, 10 May 2024 16:28:57 +0200 Subject: [PATCH 03/11] Update integration.yml --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 215a17e..12e1fb8 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -26,4 +26,4 @@ jobs: - run: python -m pip install -r requirements/ci.txt - run: docker-compose version - run: wget https://gitlab.esss.lu.se/ecdc/ess-dmsc/kafka-to-nexus/-/blob/main/integration-tests/docker-compose.yml - - run: docker compose up -d + - run: docker-compose up -d From 98b06793197c95dabbc24351cabc3bd6ae737a0d Mon Sep 17 00:00:00 2001 From: Sunyoung Yoo Date: Fri, 10 May 2024 16:37:15 +0200 Subject: [PATCH 04/11] Update integration.yml --- .github/workflows/integration.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 12e1fb8..3959c5c 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -16,6 +16,8 @@ jobs: tests: name: Integration Tests runs-on: 'ubuntu-22.04' + env: + FILEWRITER_FILEWRITER_CONTAINER_NAME: filewriter-$GITHUB_JOB # For ECDC docker compose recipe. steps: - run: sudo apt install --yes docker-compose - uses: actions/checkout@v4 From 592b44149d8618d9f8a5dd8d436571474e881a23 Mon Sep 17 00:00:00 2001 From: Sunyoung Yoo Date: Fri, 10 May 2024 16:41:28 +0200 Subject: [PATCH 05/11] Update integration.yml --- .github/workflows/integration.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 3959c5c..91f5441 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -11,13 +11,16 @@ on: inputs: python-version: type: string +env: + # For ECDC docker compose recipe. + FILEWRITER_ZOOKEEPER_CONTAINER_NAME: zookeeper-$GITHUB_JOB + FILEWRITER_KAFKA_CONTAINER_NAME: kafka-$GITHUB_JOB + FILEWRITER_FILEWRITER_CONTAINER_NAME: filewriter-$GITHUB_JOB jobs: tests: name: Integration Tests runs-on: 'ubuntu-22.04' - env: - FILEWRITER_FILEWRITER_CONTAINER_NAME: filewriter-$GITHUB_JOB # For ECDC docker compose recipe. steps: - run: sudo apt install --yes docker-compose - uses: actions/checkout@v4 From a7434ea533e03b7fb1b854dae69fc8aca3233ffb Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Mon, 27 May 2024 16:53:34 +0200 Subject: [PATCH 06/11] Update docker compose file writer. --- .github/workflows/integration.yml | 9 +--- tests/docker-compose-file-writer.yml | 71 ++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 7 deletions(-) create mode 100644 tests/docker-compose-file-writer.yml diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 91f5441..64ec7ba 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -11,11 +11,6 @@ on: inputs: python-version: type: string -env: - # For ECDC docker compose recipe. - FILEWRITER_ZOOKEEPER_CONTAINER_NAME: zookeeper-$GITHUB_JOB - FILEWRITER_KAFKA_CONTAINER_NAME: kafka-$GITHUB_JOB - FILEWRITER_FILEWRITER_CONTAINER_NAME: filewriter-$GITHUB_JOB jobs: tests: @@ -30,5 +25,5 @@ jobs: - run: python -m pip install --upgrade pip - run: python -m pip install -r requirements/ci.txt - run: docker-compose version - - run: wget https://gitlab.esss.lu.se/ecdc/ess-dmsc/kafka-to-nexus/-/blob/main/integration-tests/docker-compose.yml - - run: docker-compose up -d + - run: docker-compose -f tests/docker-compose-file-writer.yml up -d + - run: docker-compose -f tests/docker-compose-file-writer.yml down diff --git a/tests/docker-compose-file-writer.yml b/tests/docker-compose-file-writer.yml new file mode 100644 index 0000000..eaeb3bc --- /dev/null +++ b/tests/docker-compose-file-writer.yml @@ -0,0 +1,71 @@ +version: "3.5" + +services: +# Kafka and file-writer services are copied from +# https://gitlab.esss.lu.se/ecdc/ess-dmsc/kafka-to-nexus/-/blob/main/integration-tests/docker-compose.yml +# Currently github-ci fails to run the original docker-compose.yml file in the ecdc repository +# so we copied and modified the file here. + kafka: + container_name: file-writer-kafka + hostname: file-writer-kafka + image: confluentinc/cp-kafka:7.4.3 + deploy: + resources: + limits: + memory: 600M + restart: always + depends_on: + - zookeeper + ports: + - "9093:9093" + networks: + - frontend + environment: + KAFKA_ZOOKEEPER_CONNECT: file-writer-zookeeper:2181 + KAFKA_BROKER_ID: 0 + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 + KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 + KAFKA_MESSAGE_MAX_BYTES: 300000000 + KAFKA_SOCKET_REQUEST_MAX_BYTES: 300000000 + KAFKA_REPLICA_FETCH_MAX_BYTES: 300000000 + KAFKA_LOG_RETENTION_MS: -1 # keep data forever, required for tests involving fake "historical" data + ## listeners + KAFKA_LISTENERS: INSIDE://:9092,OUTSIDE://:9093 + KAFKA_ADVERTISED_LISTENERS: INSIDE://file-writer-kafka:9092,OUTSIDE://file-writer-kafka:9093 + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT + KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE + healthcheck: + test: ["CMD", "kafka-topics", "--bootstrap-server", "localhost:9092", "--list"] + interval: 5s + timeout: 5s + retries: 5 + start_period: 10s + + zookeeper: + container_name: file-writer-zookeeper + hostname: file-writer-zookeeper + image: confluentinc/cp-zookeeper:7.4.3 + deploy: + resources: + limits: + memory: 200M + restart: always + environment: + ZOOKEEPER_CLIENT_PORT: 2181 + ZOOKEEPER_TICK_TIME: 2000 + networks: + - frontend + + filewriter: + container_name: file-writer-file-writer + image: registry.esss.lu.se/ecdc/ess-dmsc/docker-centos7-build-node:latest + depends_on: + kafka: + condition: service_healthy + tty: true + networks: + - frontend + +networks: + frontend: From 675df356deb74b9b8c9a5225aad6d19bb52fc1ec Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Mon, 27 May 2024 17:24:43 +0200 Subject: [PATCH 07/11] Add simple instiatiation test. --- .github/workflows/integration.yml | 2 ++ config.20240405.json | 4 +--- src/scicat_ingestor.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 64ec7ba..58ce3e2 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -24,6 +24,8 @@ jobs: python-version: ${{ inputs.python-version }} - run: python -m pip install --upgrade pip - run: python -m pip install -r requirements/ci.txt + - run: python -m pip install -e . - run: docker-compose version - run: docker-compose -f tests/docker-compose-file-writer.yml up -d + - run: scicat_ingestor --verbose - run: docker-compose -f tests/docker-compose-file-writer.yml down diff --git a/config.20240405.json b/config.20240405.json index e4954af..bc58626 100644 --- a/config.20240405.json +++ b/config.20240405.json @@ -2,9 +2,7 @@ "kafka": { "topics": ["KAFKA_TOPIC_1","KAFKA_TOPIC_2"], "group_id": "GROUP_ID", - "bootstrap_servers": [ - "HOST:9092" - ], + "bootstrap_servers": "file-writer-kafka:9093", "enable_auto_commit": true, "auto_offset_reset": "earliest" }, diff --git a/src/scicat_ingestor.py b/src/scicat_ingestor.py index b9bf2c2..879de0c 100644 --- a/src/scicat_ingestor.py +++ b/src/scicat_ingestor.py @@ -7,12 +7,12 @@ from scicat_logging import build_logger -def quit(logger: logging.Logger) -> None: +def quit(logger: logging.Logger, unexpected: bool = True) -> None: """Log the message and exit the program.""" import sys logger.info("Exiting ingestor") - sys.exit() + sys.exit(1 if unexpected else 0) def main() -> None: From 8a76c64edfe17df0db062e964f48d66e5f4058a4 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Mon, 27 May 2024 17:28:58 +0200 Subject: [PATCH 08/11] Update advertised kafka listner address. --- config.20240405.json | 2 +- tests/docker-compose-file-writer.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config.20240405.json b/config.20240405.json index bc58626..1c09f99 100644 --- a/config.20240405.json +++ b/config.20240405.json @@ -2,7 +2,7 @@ "kafka": { "topics": ["KAFKA_TOPIC_1","KAFKA_TOPIC_2"], "group_id": "GROUP_ID", - "bootstrap_servers": "file-writer-kafka:9093", + "bootstrap_servers": ["localhost:9093"], "enable_auto_commit": true, "auto_offset_reset": "earliest" }, diff --git a/tests/docker-compose-file-writer.yml b/tests/docker-compose-file-writer.yml index eaeb3bc..eb39197 100644 --- a/tests/docker-compose-file-writer.yml +++ b/tests/docker-compose-file-writer.yml @@ -32,7 +32,7 @@ services: KAFKA_LOG_RETENTION_MS: -1 # keep data forever, required for tests involving fake "historical" data ## listeners KAFKA_LISTENERS: INSIDE://:9092,OUTSIDE://:9093 - KAFKA_ADVERTISED_LISTENERS: INSIDE://file-writer-kafka:9092,OUTSIDE://file-writer-kafka:9093 + KAFKA_ADVERTISED_LISTENERS: INSIDE://file-writer-kafka:9092,OUTSIDE://localhost:9093 KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE healthcheck: From d72192044768d6277acc45a339f2cd2f7322fd57 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Mon, 27 May 2024 17:41:15 +0200 Subject: [PATCH 09/11] Parse bootstrap server address. --- src/scicat_kafka.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/scicat_kafka.py b/src/scicat_kafka.py index 11fcd36..a832867 100644 --- a/src/scicat_kafka.py +++ b/src/scicat_kafka.py @@ -13,37 +13,43 @@ def collect_consumer_options(options: kafkaOptions) -> dict: # Build logger and formatter config_dict = { - key.replace('_', '.'): value + key.replace("_", "."): value for key, value in asdict(options).items() - if key not in ('topics', 'individual_message_commit') + if key not in ("topics", "individual_message_commit") } - config_dict['enable.auto.commit'] = ( + config_dict["enable.auto.commit"] = ( not options.individual_message_commit ) and options.enable_auto_commit + if isinstance(bootstrap_servers := options.bootstrap_servers, list): + # Convert the list to a comma-separated string + config_dict["bootstrap.servers"] = ",".join(bootstrap_servers) + else: + config_dict["bootstrap.servers"] = bootstrap_servers + return config_dict def collect_kafka_topics(options: kafkaOptions) -> list[str]: """Return the Kafka topics as a list.""" if isinstance(options.topics, str): - return options.topics.split(',') + return options.topics.split(",") elif isinstance(options.topics, list): return options.topics else: - raise TypeError('The topics must be a list or a comma-separated string.') + raise TypeError("The topics must be a list or a comma-separated string.") def build_consumer(kafka_options: kafkaOptions, logger: logging.Logger) -> Consumer: """Build a Kafka consumer and configure it according to the ``options``.""" consumer_options = collect_consumer_options(kafka_options) - logger.info('Connecting to Kafka with the following parameters:') + logger.info("Connecting to Kafka with the following parameters:") logger.info(consumer_options) consumer = Consumer(consumer_options) if not validate_consumer(consumer, logger): return None kafka_topics = collect_kafka_topics(kafka_options) - logger.info(f'Subscribing to the following Kafka topics: {kafka_topics}') + logger.info(f"Subscribing to the following Kafka topics: {kafka_topics}") consumer.subscribe(kafka_topics) return Consumer(consumer_options) @@ -58,5 +64,5 @@ def validate_consumer(consumer: Consumer, logger: logging.Logger) -> bool: ) return False else: - logger.info('Kafka consumer successfully instantiated') + logger.info("Kafka consumer successfully instantiated") return True From 177cf445c456f8aaf5bd6d9f4b9f5ccadf26b376 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Mon, 27 May 2024 17:46:19 +0200 Subject: [PATCH 10/11] Select configuration file for integration test. --- .github/workflows/integration.yml | 2 +- resources/config.sample.json | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 58ce3e2..dfeb8b2 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -27,5 +27,5 @@ jobs: - run: python -m pip install -e . - run: docker-compose version - run: docker-compose -f tests/docker-compose-file-writer.yml up -d - - run: scicat_ingestor --verbose + - run: scicat_ingestor -c resources/config.sample.json --verbose - run: docker-compose -f tests/docker-compose-file-writer.yml down diff --git a/resources/config.sample.json b/resources/config.sample.json index 12dd806..1ef930f 100644 --- a/resources/config.sample.json +++ b/resources/config.sample.json @@ -1,10 +1,8 @@ { "kafka": { - "topics": ["KAFKA_TOPIC_1","KAFKA_TOPIC_2"], + "topics": ["KAFKA_TOPIC_1", "KAFKA_TOPIC_2"], "group_id": "GROUP_ID", - "bootstrap_servers": [ - "HOST:9092" - ], + "bootstrap_servers": ["localhost:9093"], "individual_message_commit": false, "enable_auto_commit": true, "auto_offset_reset": "earliest" From fadb2eef91fe4dc696ea50431241c6ee6e0f1648 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Mon, 27 May 2024 17:53:05 +0200 Subject: [PATCH 11/11] Update integration test dependency. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f7789d6..039cb30 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,7 +49,7 @@ jobs: intergration-tests: name: Integration Tests - needs: tests + needs: [tests, formatting] uses: ./.github/workflows/integration.yml with: python-version: '${{needs.formatting.outputs.min_python}}'