diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml new file mode 100644 index 00000000..f4193307 --- /dev/null +++ b/.github/workflows/build-and-test.yml @@ -0,0 +1,188 @@ +name: Build/Test + +on: + workflow_call: + workflow_dispatch: + inputs: + part: + description: 'Name of the charm to build/test manually. Defaults to all charms' + required: false + default: '' + +jobs: + modifiedparts: + runs-on: ubuntu-latest + outputs: + parts: ${{ steps.determine-parts.outputs.parts }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # For non-manual triggered runs + - name: Get modified files + id: changed-files + if: ${{ github.event_name != 'workflow_dispatch' }} + uses: tj-actions/changed-files@v35 + + - name: Determine charms to build/test + id: determine-parts + env: + INPUT_PART: ${{ inputs.part }} + GITHUB_EVENT_NAME: ${{ github.event_name }} + ALL_MOD_FILES: ${{ steps.changed-files.outputs.all_modified_files }} + run: | + if [ "$GITHUB_EVENT_NAME" = "workflow_dispatch" ]; then + if [ -n "$INPUT_PART" ]; then + # Manual run with a specified charm + components=($INPUT_PART) + else + # Manual run, no charm specified -> run all + components=($(find . -maxdepth 1 -type d ! -path '.' -exec bash -c '[[ -f "$0/charmcraft.yaml" ]] && basename "$0"' {} \; | sort)) + fi + else + # Automatic run: use changed-files to determine modified charms + echo "Modified files to eval: ${ALL_MOD_FILES}" + components=() + # Retrieve components with a 'tox.ini' file. + for file in ${ALL_MOD_FILES} ; do + component=$(echo "$file" | cut -d "/" -f1) + if [[ -f "./$component/charmcraft.yaml" ]]; then + # This is a charm. + components+=("$component") + elif [[ -f "./$component/tox.ini" ]]; then + # Assume this is a library. + # TODO: Add dependent charms here. + : + fi + done + # Remove dups + components=($(echo "${components[@]}" | tr ' ' '\n' | sort -u)) + fi + json_output=$(jq --compact-output --null-input '$ARGS.positional' --args -- "${components[@]}") + echo "Modified parts: $json_output" + echo "parts=$json_output" >> $GITHUB_OUTPUT + + build: + needs: modifiedparts + name: Build the charm + runs-on: ubuntu-latest + if: ${{ needs.modifiedparts.outputs.parts != '[]' }} + strategy: + matrix: + part: ${{ fromJson(needs.modifiedparts.outputs.parts) }} + fail-fast: false + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + sudo apt-get -qq install libxslt-dev libxml2-dev python3-lxml tox + + - name: Run linters + run: tox -c ${{ matrix.part }} -e pep8 + + - name: Run unit tests + run: tox -c ${{ matrix.part }} -e py3 + + - name: Setup LXD + uses: canonical/setup-lxd@v0.1.1 + with: + channel: 5.21/stable + + - name: Build charm(s) + id: builder + run: | + sudo snap install charmcraft --classic + tox -c ${{ matrix.part }} -e build + + - name: Upload built charm + uses: actions/upload-artifact@v4 + with: + name: charm-artifact-${{ matrix.part }} + path: "./${{ matrix.part }}/*.charm" + + functional-test: + needs: + - modifiedparts + - build + name: Functional tests + runs-on: [self-hosted, linux, amd64, X64, large, noble] + if: ${{ needs.modifiedparts.outputs.parts != '[]' }} + strategy: + matrix: + part: ${{ fromJson(needs.modifiedparts.outputs.parts) }} + fail-fast: false + steps: + - name: Download charm + uses: actions/download-artifact@v4 + with: + name: charm-artifact-${{ matrix.part }} + pattern: charm-artifact-* + merge-multiple: true + path: ~/artifacts/ + + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Copy utils + run: cp tests/scripts/actionutils.sh $HOME + + - name: Clear FORWARD firewall rules + run: ~/actionutils.sh cleaript + + - name: Setup LXD + run: | + if [[ "$(snap list | grep -c lxd)" -eq 0 ]]; then + sudo snap install lxd --channel=5.21/stable + sudo usermod -aG lxd "$USER" + newgrp lxd + lxd init --minimal + fi + + - name: Install and configure tests + run: | + date + ~/actionutils.sh setup_functest + ~/actionutils.sh cacheimgs "ubuntu@22.04" + + - name: Run the tests + run: | + date + mv ~/artifacts/*.charm ./ + if [[ -f "./${{ matrix.part }}/src/tox.ini" ]]; then + tox -c ${{ matrix.part }}/src -e func-dev + else + tox -c ${{ matrix.part }} -e func-dev + fi + + - name: Generate crash dumps + if: failure() + run: | + models=$(juju models | grep zaza | awk '{print $1}' | tr -d '*') + rm -rf ./crashdumps + mkdir ./crashdumps + for model in $models; do + juju-crashdump -m $model -o ./crashdumps + done + + - name: Upload artifacts on failure + uses: actions/upload-artifact@v4 + with: + name: crashdumps-${{ matrix.part }} + path: "./crashdumps/*" + if: failure() + + - name: Setup tmate session + if: ${{ failure() && runner.debug }} + uses: canonical/action-tmate@main + + - name: Tear down models + if: always() + run: | + models=$(juju models | grep zaza | awk '{print $1}' | tr -d '*') + for model in $models; do + juju destroy-model --no-prompt --force --destroy-storage $model + done diff --git a/.github/workflows/commits.yml b/.github/workflows/commits.yml new file mode 100644 index 00000000..74adde12 --- /dev/null +++ b/.github/workflows/commits.yml @@ -0,0 +1,33 @@ +name: Commits +on: + - pull_request + +permissions: + contents: read + +jobs: + cla-check: + permissions: + pull-requests: read + name: Canonical CLA signed + runs-on: ubuntu-20.04 + steps: + - name: Check if CLA signed + uses: canonical/has-signed-canonical-cla@v2 + + dco-check: + permissions: + pull-requests: read # for tim-actions/get-pr-commits to get list of commits from the PR + name: Signed-off-by (DCO) + runs-on: ubuntu-20.04 + steps: + - name: Get PR Commits + id: 'get-pr-commits' + uses: tim-actions/get-pr-commits@master + with: + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Check that all commits are signed-off + uses: tim-actions/dco@master + with: + commits: ${{ steps.get-pr-commits.outputs.commits }} diff --git a/.github/workflows/plan-terraform.yml b/.github/workflows/plan-terraform.yml new file mode 100644 index 00000000..ae93e64b --- /dev/null +++ b/.github/workflows/plan-terraform.yml @@ -0,0 +1,88 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +name: Plan Terraform tests + +on: + push: + branches: + - main + pull_request: + +jobs: + plan-terraform: + name: Plan Terraform with Juju + runs-on: ubuntu-latest + strategy: + matrix: + test: + - name: default + yaml: ../tests/terraform/default.yaml + env: + TF_VAR_model: test + TF_VAR_manifest_yaml: ${{ matrix.test.yaml }} + WORKING_DIR: 'terraform' + steps: + - uses: actions/checkout@v4 + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + - uses: charmed-kubernetes/actions-operator@main + with: + provider: lxd + channel: latest/stable + - name: Prepare juju tf provider environment + run: | + CONTROLLER=$(juju whoami | yq .Controller) + JUJU_CONTROLLER_ADDRESSES="$(juju show-controller | yq '.[$CONTROLLER]'.details.\"api-endpoints\" | tr -d "[]' "|tr -d '"'|tr -d '\n')" + JUJU_USERNAME="$(cat ~/.local/share/juju/accounts.yaml | yq .controllers.$CONTROLLER.user|tr -d '"')" + JUJU_PASSWORD="$(cat ~/.local/share/juju/accounts.yaml | yq .controllers.$CONTROLLER.password|tr -d '"')" + + echo "JUJU_CONTROLLER_ADDRESSES=$JUJU_CONTROLLER_ADDRESSES" >> "$GITHUB_ENV" + echo "JUJU_USERNAME=$JUJU_USERNAME" >> "$GITHUB_ENV" + echo "JUJU_PASSWORD=$JUJU_PASSWORD" >> "$GITHUB_ENV" + { + echo 'JUJU_CA_CERT<> "$GITHUB_ENV" + - uses: hashicorp/setup-terraform@v3 + - run: terraform init + working-directory: ${{env.WORKING_DIR}} + - run: terraform plan -out=tfplan + working-directory: ${{env.WORKING_DIR}} + - run: terraform show tfplan + working-directory: ${{env.WORKING_DIR}} + - run: | + juju add-model test + set -e # Exit on error + + # Apply Terraform changes + terraform apply -auto-approve || { echo "Terraform apply failed"; exit 1; } + + # Wait for Juju applications to become active + MAX_RETRIES=30 + for i in $(seq 1 $MAX_RETRIES); do + echo "Checking Juju application statuses... Attempt $i/$MAX_RETRIES" + + # Fetch status JSON once and store it + STATUS_JSON=$(juju status --format=json) + + # Check if all applications are active + if echo "$STATUS_JSON" | jq -e '.applications | all(.["application-status"].current == "active")' > /dev/null; then + echo "✅ All applications are active" + exit 0 + fi + + echo "⏳ Waiting for applications to become active..." + sleep 10 + done + + echo "❌ Timeout waiting for applications to become active" + exit 1 + working-directory: ${{env.WORKING_DIR}} + - uses: actions/upload-artifact@v4 + with: + name: ${{matrix.test.name}}-terraform-plan + path: ${{env.WORKING_DIR}}/tfplan diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml new file mode 100644 index 00000000..6e045d55 --- /dev/null +++ b/.github/workflows/pr.yaml @@ -0,0 +1,8 @@ +name: PR + +on: + pull_request: + +jobs: + build: + uses: ./.github/workflows/build-and-test.yml diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..901e8bd5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +bin +.idea +.coverage +.testrepository +.tox +*.sw[nop] +*.charm +.idea +*.pyc +func-results.json +.stestr +__pycache__ diff --git a/ceph-dashboard/.flake8 b/ceph-dashboard/.flake8 new file mode 100644 index 00000000..8ef84fcd --- /dev/null +++ b/ceph-dashboard/.flake8 @@ -0,0 +1,9 @@ +[flake8] +max-line-length = 99 +select: E,W,F,C,N +exclude: + venv + .git + build + dist + *.egg_info diff --git a/ceph-dashboard/.gitignore b/ceph-dashboard/.gitignore new file mode 100644 index 00000000..0c9e80f2 --- /dev/null +++ b/ceph-dashboard/.gitignore @@ -0,0 +1,8 @@ +.tox +**/*.swp +__pycache__ +.stestr/ +lib/* +!lib/README.txt +build +*.charm diff --git a/ceph-dashboard/.gitreview b/ceph-dashboard/.gitreview new file mode 100644 index 00000000..7568dc50 --- /dev/null +++ b/ceph-dashboard/.gitreview @@ -0,0 +1,6 @@ +[gerrit] +host=review.opendev.org +port=29418 +project=openstack/charm-ceph-dashboard.git + +defaultbranch=stable/squid-jammy diff --git a/ceph-dashboard/.jujuignore b/ceph-dashboard/.jujuignore new file mode 100644 index 00000000..6ccd559e --- /dev/null +++ b/ceph-dashboard/.jujuignore @@ -0,0 +1,3 @@ +/venv +*.py[cod] +*.charm diff --git a/ceph-dashboard/.stestr.conf b/ceph-dashboard/.stestr.conf new file mode 100644 index 00000000..5fcccaca --- /dev/null +++ b/ceph-dashboard/.stestr.conf @@ -0,0 +1,3 @@ +[DEFAULT] +test_path=./unit_tests +top_dir=./ diff --git a/ceph-dashboard/.zuul.yaml b/ceph-dashboard/.zuul.yaml new file mode 100644 index 00000000..7ffc71cb --- /dev/null +++ b/ceph-dashboard/.zuul.yaml @@ -0,0 +1,4 @@ +- project: + templates: + - openstack-python3-charm-yoga-jobs + - openstack-cover-jobs diff --git a/ceph-dashboard/LICENSE b/ceph-dashboard/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/ceph-dashboard/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ceph-dashboard/README.md b/ceph-dashboard/README.md new file mode 100644 index 00000000..4b8b6bcf --- /dev/null +++ b/ceph-dashboard/README.md @@ -0,0 +1,209 @@ +# Overview + +The ceph-dashboard charm deploys the [Ceph Dashboard][upstream-ceph-dashboard], +a built-in web-based Ceph management and monitoring application. It works in +conjunction with the [openstack-loadbalancer][loadbalancer-charm] charm, which +in turn utilises the [hacluster][hacluster-charm] charm. + +# Usage + +## Configuration + +This section covers common and/or important configuration options. See file +`config.yaml` for the full list of options, along with their descriptions and +default values. See the [Juju documentation][juju-docs-config-apps] for details +on configuring applications. + +#### `grafana-api-url` + +Sets the URL of the Grafana API when using embedded graphs. See +[Embedded Grafana dashboards][anchor-grafana-dashboards]. + +#### `public-hostname` + +Sets the hostname or address of the public endpoint used to access +the dashboard. + +#### `enable-password-policy` + +Sets whether certain password restrictions are enforced when a user +is created or changes their password. + +#### `password-*` + +There are a number of `password-*` options which impose constraints on which +passwords can be used. These options are ignored unless +`enable-password-policy` is set to 'True'. + +## Deployment + +We are assuming a pre-existing Ceph cluster. + +Deploy ceph-dashboard as a subordinate to the ceph-mon charm: + + juju deploy ceph-dashboard + juju add-relation ceph-dashboard:dashboard ceph-mon:dashboard + +TLS is a requirement for this charm. Enable it by adding a relation to the +vault application: + + juju add-relation ceph-dashboard:certificates vault:certificates + +See [Managing TLS certificates][cdg-tls] in the +[OpenStack Charms Deployment Guide][cdg] for more information on TLS. + +> **Note**: This charm also supports TLS configuration via charm options + `ssl_cert`, `ssl_key`, and `ssl_ca`. + +### Load balancer + +The dashboard is accessed via a load balancer using VIPs and implemented via +the openstack-loadbalancer and hacluster charms: + + juju deploy -n 3 --config vip=10.5.20.200 openstack-loadbalancer + juju deploy hacluster openstack-loadbalancer-hacluster + juju add-relation openstack-loadbalancer:ha openstack-loadbalancer-hacluster:ha + +Now add a relation between the openstack-loadbalancer and ceph-dashboard +applications: + + juju add-relation ceph-dashboard:loadbalancer openstack-loadbalancer:loadbalancer + +### Dashboard user + +Credentials are needed to log in to the dashboard. Set these up by applying an +action to any ceph-dashboard unit. For example, to create an administrator user +called 'admin': + + juju run-action --wait ceph-dashboard/0 add-user username=admin role=administrator + +The command's output will include a generated password. + +The dashboard can then be accessed on the configured VIP and on port 8443: + +https://10.5.20.200:8443 + +## Embedded Grafana dashboards + +To embed Grafana dashboards within the Ceph dashboard some additional relations +are required (Grafana, Telegraf, and Prometheus are assumed to be +pre-existing): + + juju add-relation ceph-dashboard:grafana-dashboard grafana:dashboards + juju add-relation ceph-dashboard:prometheus prometheus:website + juju add-relation ceph-mon:prometheus prometheus:target + juju add-relation ceph-osd:juju-info telegraf:juju-info + juju add-relation ceph-mon:juju-info telegraf:juju-info + +Grafana, Telegraf, and Prometheus should be related in the standard way: + + juju add-relation grafana:grafana-source prometheus:grafana-source + juju add-relation telegraf:prometheus-client prometheus:target + juju add-relation telegraf:dashboards grafana:dashboards + +When Grafana is integrated with the Ceph Dashboard it requires TLS, so +add a relation to Vault (the grafana charm also supports TLS configuration via +`ssl_*` charm options): + + juju add-relation grafana:certificates vault:certificates + +> **Important**: Ceph Dashboard will (silently) fail to display Grafana output + if the client browser cannot validate the Grafana server's TLS certificate. + Either ensure the signing CA certificate is known to the browser or, if in a + testing environment, contact the Grafana dashboard directly and have the + browser accept the unverified certificate. + +Grafana should be configured with the following charm options: + + juju config grafana anonymous=True + juju config grafana allow_embedding=True + +The grafana charm also requires the vonage-status-panel and +grafana-piechart-panel plugins. The `install_plugins` configuration option +should be set to include URLs from which these plugins can be downloaded. They +are currently available from https://storage.googleapis.com/plugins-community. +For example: + + juju config grafana install_plugins="https://storage.googleapis.com/plugins-community/vonage-status-panel/release/1.0.11/vonage-status-panel-1.0.11.zip,https://storage.googleapis.com/plugins-community/grafana-piechart-panel/release/1.6.2/grafana-piechart-panel-1.6.2.zip" + +Telegraf should be configured with the following charm option: + + juju config telegraf hostname="{host}" + +> **Note**: The above command is to be invoked verbatim; no substitution is + required. + +Currently the dashboard does not autodetect the API endpoint of the Grafana +service. It needs to be provided via a configuration option: + + juju config ceph-dashboard grafana-api-url="https://:3000" + +## Prometheus alerting + +To enable alerting for an existing Prometheus service add the following +relations: + + juju add-relation ceph-dashboard:prometheus prometheus:website + juju add-relation ceph-mon:prometheus prometheus:target + juju add-relation ceph-dashboard:alertmanager-service prometheus-alertmanager:alertmanager-service + juju add-relation prometheus:alertmanager-service prometheus-alertmanager:alertmanager-service + +## Ceph Object storage + +To enable Object storage management of an existing Ceph RADOS Gateway service +add the following relation: + + juju add-relation ceph-dashboard:radosgw-dashboard ceph-radosgw:radosgw-user + +> **Note**: For Ceph versions older than Pacific the dashboard can only be + related to a single ceph-radosgw application. + +## Actions + +This section lists Juju [actions][juju-docs-actions] supported by the charm. +Actions allow specific operations to be performed on a per-unit basis. To +display action descriptions run `juju actions --schema ceph-dashboard`. If the +charm is not deployed then see file `actions.yaml`. + +* `add-user` +* `delete-user` + +# Documentation + +The OpenStack Charms project maintains two documentation guides: + +* [OpenStack Charm Guide][cg]: for project information, including development + and support notes +* [OpenStack Charms Deployment Guide][cdg]: for charm usage information + +# Contributing + +Please see the [OpenStack Charm Guide community section][cgc] for contribution guidelines. Specifically, see the [software contributions section][swc] for software contribution guidelines, and the [documentation contributions section][docc] for guidelines on documentation contribution. + + +# Bugs + +Please report bugs on [Launchpad][lp-bugs-charm-ceph-dashboard]. Note the [bug submission section][bugc] on guidelines for reporting bugs. + + +# License + +The ceph-dashboard charm is free software, distributed under the Apache 2.0 software licence. See the LICENSE file for more information. + + + + +[juju-docs-actions]: https://juju.is/docs/working-with-actions +[juju-docs-config-apps]: https://juju.is/docs/configuring-applications +[upstream-ceph-dashboard]: https://docs.ceph.com/en/latest/mgr/dashboard/ +[cg]: https://docs.openstack.org/charm-guide +[cdg]: https://docs.openstack.org/project-deploy-guide/charm-deployment-guide +[cdg-tls]: https://docs.openstack.org/project-deploy-guide/charm-deployment-guide/latest/app-certificate-management.html +[cgc]: https://docs.openstack.org/charm-guide/latest/community/ +[swc]: https://docs.openstack.org/charm-guide/latest/community/software-contrib/ +[docc]: https://docs.openstack.org/charm-guide/latest/community/doc-contrib/ +[bugc]: https://docs.openstack.org/charm-guide/latest/community/software-bug.html +[lp-bugs-charm-ceph-dashboard]: https://bugs.launchpad.net/charm-ceph-dashboard +[anchor-grafana-dashboards]: #embedded-grafana-dashboards +[loadbalancer-charm]: https://jaas.ai/u/openstack-charmers/openstack-loadbalancer +[hacluster-charm]: https://jaas.ai/hacluster diff --git a/ceph-dashboard/actions.yaml b/ceph-dashboard/actions.yaml new file mode 100644 index 00000000..df585b55 --- /dev/null +++ b/ceph-dashboard/actions.yaml @@ -0,0 +1,21 @@ +# Copyright 2021 Canonical +# See LICENSE file for licensing details. + +add-user: + description: add a dashboard user + params: + username: + description: Name of user to create + type: string + default: "" + role: + description: Role to give user + type: string + default: "" +delete-user: + description: delete a dashboard user + params: + username: + description: Name of user to delete + type: string + default: "" diff --git a/ceph-dashboard/build-requirements.txt b/ceph-dashboard/build-requirements.txt new file mode 100644 index 00000000..b6d2452f --- /dev/null +++ b/ceph-dashboard/build-requirements.txt @@ -0,0 +1,7 @@ +# NOTES(lourot): +# * We don't install charmcraft via pip anymore because it anyway spins up a +# container and scp the system's charmcraft snap inside it. So the charmcraft +# snap is necessary on the system anyway. +# * `tox -e build` successfully validated with charmcraft 1.2.1 + +cffi==1.14.6; python_version < '3.6' # cffi 1.15.0 drops support for py35. diff --git a/ceph-dashboard/charmcraft.yaml b/ceph-dashboard/charmcraft.yaml new file mode 100644 index 00000000..0fbd75be --- /dev/null +++ b/ceph-dashboard/charmcraft.yaml @@ -0,0 +1,35 @@ +type: charm + +parts: + charm: + after: + - update-certificates + charm-python-packages: + # NOTE(lourot): see + # * https://github.com/canonical/charmcraft/issues/551 + - setuptools + build-packages: + - git + + update-certificates: + plugin: nil + # See https://github.com/canonical/charmcraft/issues/658 + override-build: | + apt update + apt install -y ca-certificates + update-ca-certificates + +base: ubuntu@22.04 +platforms: + amd64: + build-on: amd64 + build-for: amd64 + arm64: + build-on: arm64 + build-for: arm64 + ppc64el: + build-on: ppc64el + build-for: ppc64el + s390x: + build-on: s390x + build-for: s390x diff --git a/ceph-dashboard/config.yaml b/ceph-dashboard/config.yaml new file mode 100644 index 00000000..1460d023 --- /dev/null +++ b/ceph-dashboard/config.yaml @@ -0,0 +1,123 @@ +# Copyright 2021 Canonical +# See LICENSE file for licensing details. + +options: + debug: + type: boolean + default: False + description: | + Control debug mode. It is recommended that debug be disabled in + production deployments. + grafana-api-url: + type: string + default: + description: | + URL of grafana api. The url must be using https. + public-hostname: + type: string + default: + description: | + The hostname or address of the public endpoints created for the + dashboard + enable-password-policy: + type: boolean + default: True + description: Enable password policy + password-policy-check-length: + type: boolean + default: True + description: | + Reject password if it is shorter then password-policy-min-length + password-policy-check-oldpwd: + type: boolean + default: True + description: Reject password if it matches previous password. + password-policy-check-username: + type: boolean + default: True + description: Reject password if username is included in password. + password-policy-check-exclusion-list: + type: boolean + default: True + description: Reject password if it contains a word from a forbidden list. + password-policy-check-complexity: + type: boolean + default: True + description: | + Check password meets a complexity score of password-policy-min-complexity. + See https://docs.ceph.com/en/latest/mgr/dashboard/#password-policy + password-policy-check-sequential-chars: + type: boolean + default: True + description: | + Reject password if it contains a sequence of sequential characters. e.g. + a password containing '123' or 'efg' would be rejected. + password-policy-check-repetitive-chars: + type: boolean + default: True + description: | + Reject password if password contains consecutive repeating charachters. + password-policy-min-length: + type: int + default: 8 + description: Set minimum password length. + password-policy-min-complexity: + type: int + default: 10 + description: | + Set minimum password complexity score. + See https://docs.ceph.com/en/latest/mgr/dashboard/#password-policy + audit-api-enabled: + type: boolean + default: False + description: | + Log requests made to the dashboard REST API to the Ceph audit log. + audit-api-log-payload: + type: boolean + default: True + description: | + Include payload in Ceph audit logs. audit-api-enabled must be set to True + to enable this., + motd: + type: string + default: "" + description: | + Message of the day settings. Should be in the format "severity|expires|message". Set to "" to disable. + saml-base-url: + type: string + default: "" + description: | + The base URL from where the Ceph dashboard is accessed. Must support the SAML protocol. + saml-idp-metadata: + type: string + default: "" + description: | + URL that points to the IdP metadata XML. Can be remote or local. + saml-username-attribute: + type: string + default: "" + description: | + The attribute that is used to get the username from the authentication response. + saml-idp-entity-id: + type: string + default: "uid" + description: | + Unique ID to disambiguate when more than one entity id exists on the IdP metadata. + ssl_cert: + type: string + default: + description: | + SSL certificate to install and use for API ports. Setting this value + and ssl_key will enable reverse proxying, point Neutron's entry in the + Keystone catalog to use https, and override any certificate and key + issued by Keystone (if it is configured to do so). + ssl_key: + type: string + default: + description: SSL key to use with certificate specified as ssl_cert. + ssl_ca: + type: string + default: + description: | + SSL CA to use with the certificate and key provided - this is only + required if you are providing a privately signed ssl_cert and ssl_key. diff --git a/ceph-dashboard/copyright b/ceph-dashboard/copyright new file mode 100644 index 00000000..d0b7f44f --- /dev/null +++ b/ceph-dashboard/copyright @@ -0,0 +1,16 @@ +Format: http://dep.debian.net/deps/dep5/ + +Files: * +Copyright: Copyright 2015-2020, Canonical Ltd., All Rights Reserved. +License: Apache License 2.0 + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + . + http://www.apache.org/licenses/LICENSE-2.0 + . + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ceph-dashboard/metadata.yaml b/ceph-dashboard/metadata.yaml new file mode 100644 index 00000000..bea2d947 --- /dev/null +++ b/ceph-dashboard/metadata.yaml @@ -0,0 +1,38 @@ +# Copyright 2021 Canonical +# See LICENSE file for licensing details. +name: ceph-dashboard +display-name: Ceph Dashboard +maintainer: OpenStack Charmers +summary: Enable dashboard for Ceph +description: | + Enable the ceph dashboard on the ceph mon units +docs: https://discourse.charmhub.io/t/ceph-dashboard-docs-index/11007 +tags: +- openstack +- storage +- backup +extra-bindings: + public: +subordinate: true +series: +- focal +- jammy +requires: + dashboard: + interface: ceph-dashboard + scope: container + certificates: + interface: tls-certificates + loadbalancer: + interface: openstack-loadbalancer + alertmanager-service: + interface: http + prometheus: + interface: http + radosgw-dashboard: + interface: radosgw-user + iscsi-dashboard: + interface: ceph-iscsi-admin-access +provides: + grafana-dashboard: + interface: grafana-dashboard diff --git a/ceph-dashboard/osci.yaml b/ceph-dashboard/osci.yaml new file mode 100644 index 00000000..92e37937 --- /dev/null +++ b/ceph-dashboard/osci.yaml @@ -0,0 +1,10 @@ +- project: + templates: + - charm-unit-jobs-py38 + - charm-unit-jobs-py310 + - charm-functional-jobs + vars: + needs_charm_build: true + charm_build_name: ceph-dashboard + build_type: charmcraft + charmcraft_channel: 2.x/stable diff --git a/ceph-dashboard/rename.sh b/ceph-dashboard/rename.sh new file mode 100755 index 00000000..d0c35c97 --- /dev/null +++ b/ceph-dashboard/rename.sh @@ -0,0 +1,13 @@ +#!/bin/bash +charm=$(grep "charm_build_name" osci.yaml | awk '{print $2}') +echo "renaming ${charm}_*.charm to ${charm}.charm" +echo -n "pwd: " +pwd +ls -al +echo "Removing bad downloaded charm maybe?" +if [[ -e "${charm}.charm" ]]; +then + rm "${charm}.charm" +fi +echo "Renaming charm here." +mv ${charm}_*.charm ${charm}.charm diff --git a/ceph-dashboard/requirements-dev.txt b/ceph-dashboard/requirements-dev.txt new file mode 100644 index 00000000..4f2a3f5b --- /dev/null +++ b/ceph-dashboard/requirements-dev.txt @@ -0,0 +1,3 @@ +-r requirements.txt +coverage +flake8 diff --git a/ceph-dashboard/requirements.txt b/ceph-dashboard/requirements.txt new file mode 100644 index 00000000..2b5ec510 --- /dev/null +++ b/ceph-dashboard/requirements.txt @@ -0,0 +1,8 @@ +importlib-resources +ops >= 1.2.0, <= 1.5.2 +tenacity +git+https://github.com/openstack/charms.ceph#egg=charms_ceph +git+https://opendev.org/openstack/charm-ops-openstack#egg=ops_openstack +git+https://opendev.org/openstack/charm-ops-interface-tls-certificates#egg=interface_tls_certificates +git+https://github.com/openstack-charmers/ops-interface-ceph-iscsi-admin-access#egg=interface_ceph_iscsi_admin_access +git+https://github.com/openstack-charmers/ops-interface-openstack-loadbalancer#egg=interface_openstack_loadbalancer diff --git a/ceph-dashboard/src/ceph_dashboard_commands.py b/ceph-dashboard/src/ceph_dashboard_commands.py new file mode 100644 index 00000000..6dfbf494 --- /dev/null +++ b/ceph-dashboard/src/ceph_dashboard_commands.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 +# Copyright 2023 Canonical +# See LICENSE file for licensing details. +# +# Learn more at: https://juju.is/docs/sdk + +import json +import os +import socket +import tempfile +from typing import List, Tuple +from functools import partial + +import subprocess +import logging + +from charm_option import CharmCephOption + +logger = logging.getLogger(__name__) + + +def _run_cmd(cmd: List[str]): + """Run command in subprocess + + `cmd` The command to run + """ + return subprocess.check_output( + cmd, stderr=subprocess.STDOUT + ).decode('UTF-8') + + +def exec_option_ceph_cmd(option: CharmCephOption, value: str) -> None: + """Execute internal ceph command for the CharmCephOption""" + _run_cmd(option.ceph_command(value)) + + +def ceph_dashboard_delete_user(user: str) -> None: + """Delete Ceph dashboard user.""" + cmd = ['ceph', 'dashboard', 'ac-user-delete', user] + _run_cmd(cmd) + + +def ceph_dashboard_add_user(user: str, filename: str, role: str) -> str: + """Create Ceph dashboard user.""" + cmd = [ + 'ceph', 'dashboard', 'ac-user-create', '--enabled', + '-i', filename, user, role + ] + return _run_cmd(cmd) + + +def ceph_dashboard_config_saml( + base_url: str, idp_meta: str, + username_attr: str, idp_entity_id: str +) -> None: + """Configure SSO SAML2""" + cmd = [ + 'ceph', 'dashboard', 'sso', 'setup', 'saml2', + base_url, idp_meta + ] + if username_attr: + cmd.append(username_attr) + + if idp_entity_id: + cmd.append(idp_entity_id) + _run_cmd(cmd) + + +def ceph_config_get(key: str) -> str: + "Fetch Value for a particular ceph-config key." + cmd = [ + "ceph", "config-key", "get", key + ] + try: + return _run_cmd(cmd) + except subprocess.CalledProcessError: + logger.error("Failed to fetch key %s", key) + + +def ceph_config_list() -> list: + "Fetch list of ceph-config keys." + cmd = [ + "ceph", "config-key", "ls" + ] + + # CLI returns empty list if no config-key is configured. + return json.loads(_run_cmd(cmd)) + + +def ceph_config_set(key: str, value: str) -> None: + "Remove the provided key/value pair" + cmd = ["ceph", "config-key", "set", key, value] + + logging.debug("Setting config-key: %s", key) + _run_cmd(cmd) + + +def ceph_config_reset(key: str) -> None: + "Remove the provided key/value pair" + cmd = ["ceph", "config-key", "rm", key] + + logging.debug("Removing config-key: %s", key) + _run_cmd(cmd) + + +def dashboard_set(prop: str, value: str) -> str: + "Configure ceph dashboard properties" + logger.debug("Setting Dashboard %s as %s", prop, value) + return _run_cmd(["ceph", "dashboard", prop, value]) + + +def apply_setting(ceph_setting: str, value: List[str]) -> str: + """Apply a dashboard setting""" + cmd = ["ceph", "dashboard", ceph_setting] + cmd.extend(value) + return _run_cmd(cmd) + + +get_ceph_dashboard_ssl_key = partial(ceph_config_get, "mgr/dashboard/key") +get_ceph_dashboard_ssl_crt = partial(ceph_config_get, "mgr/dashboard/crt") +get_ceph_dashboard_host_ssl_key = partial( + ceph_config_get, f"mgr/dashboard/{socket.gethostname()}/key" +) +get_ceph_dashboard_host_ssl_crt = partial( + ceph_config_get, f"mgr/dashboard/{socket.gethostname()}/crt" +) + + +def check_ceph_dashboard_ssl_enabled() -> bool: + """Check if ssl config-key is set to true""" + ssl_status = ceph_config_get("config/mgr/mgr/dashboard/ssl") + return ssl_status == "true" + + +def check_ceph_dashboard_ssl_configured( + is_check_host_key: bool = False) -> bool: + """Check if SSL key and certificate are configured on ceph dashboard.""" + if is_check_host_key: + keys = [ + f"mgr/dashboard/{socket.gethostname()}/crt", + f"mgr/dashboard/{socket.gethostname()}/key", + ] + else: + keys = [ # List of keys to check for ssl configuration + "mgr/dashboard/crt", + "mgr/dashboard/key" + ] + + for key in keys: + value = ceph_config_get(key) + if value is None: + return False + + return True + + +def validate_ssl_keypair(cert: bytes, key: bytes) -> Tuple[bool, str]: + """Validates if a private key matches a certificate + + Args: + cert, key (str): SSL material + + Returns: + Tuple[bool, str]: bool for validaity and err message + """ + try: + with tempfile.NamedTemporaryFile(mode="wb", delete=False) as cert_temp: + cert_temp.write(cert) + cert_path = cert_temp.name + + with tempfile.NamedTemporaryFile(mode="wb", delete=False) as key_temp: + key_temp.write(key) + key_path = key_temp.name + except IOError as e: + return False, f"Failed to create temporary files: {str(e)}" + + try: + # check if pubkeys from cert and key match + try: + cert_pubkey_cmd = subprocess.run( + ["openssl", "x509", "-in", cert_path, "-noout", "-pubkey"], + capture_output=True, + text=True, + check=True, + ) + cert_pubkey = cert_pubkey_cmd.stdout.strip() + except subprocess.CalledProcessError as e: + return ( + False, + f"Failed to extract pubkey from cert: {e.stderr.strip()}", + ) + + try: + key_pubkey_cmd = subprocess.run( + ["openssl", "rsa", "-in", key_path, "-pubout"], + capture_output=True, + text=True, + check=True, + ) + key_pubkey = key_pubkey_cmd.stdout.strip() + except subprocess.CalledProcessError as e: + return ( + False, + f"Failed to extract pubkey from priv key: {e.stderr.strip()}", + ) + + if cert_pubkey != key_pubkey: + return False, "Certificate and private key do not match" + + return ( + True, + "Certificate and private key match and certificate is valid", + ) + + finally: + # Best effort clean up + try: + os.unlink(cert_path) + os.unlink(key_path) + except Exception: + pass diff --git a/ceph-dashboard/src/charm.py b/ceph-dashboard/src/charm.py new file mode 100755 index 00000000..9313f5c9 --- /dev/null +++ b/ceph-dashboard/src/charm.py @@ -0,0 +1,777 @@ +#!/usr/bin/env python3 +# Copyright 2021 Canonical +# See LICENSE file for licensing details. +# +# Learn more at: https://juju.is/docs/sdk + +"""Charm for the Ceph Dashboard.""" + +import json +import base64 +import logging +import re +import secrets +import socket +import string +import subprocess +import tempfile +from pathlib import Path +from typing import List, Tuple, Union + +import charms_ceph.utils as ceph_utils +import cryptography.hazmat.primitives.serialization as serialization +import interface_ceph_iscsi_admin_access.admin_access as admin_access +import interface_dashboard +import interface_grafana_dashboard as grafana_interface +import interface_http +import interface_openstack_loadbalancer.loadbalancer as ops_lb_interface +import interface_radosgw_user +import interface_tls_certificates.ca_client as ca_client +import ops_openstack.plugins.classes +import tenacity + +from ops.charm import ActionEvent, CharmEvents +from ops.framework import EventBase, EventSource, StoredState +from ops.main import main +from ops.model import ActiveStatus, BlockedStatus, StatusBase + +# Charm Src +import ceph_dashboard_commands as cmds +from charm_option import CharmCephOptionList + +logger = logging.getLogger(__name__) + +TLS_Config = Tuple[Union[bytes, None], Union[bytes, None], Union[bytes, None]] + + +# Maintenance Events +class DisableSSL(EventBase): + """Charm Event to disable SSL and clean certificates.""" + + +class EnableSSLFromConfig(EventBase): + """Charm Event to configure SSL using Charm config values.""" + + +class CephCharmEvents(CharmEvents): + """Custom charm events.""" + + disable_ssl = EventSource(DisableSSL) + enable_ssl_from_config = EventSource(EnableSSLFromConfig) + + +class CephDashboardCharm(ops_openstack.core.OSBaseCharm): + """Ceph Dashboard charm.""" + + _stored = StoredState() + PACKAGES = ['ceph-mgr-dashboard', 'python3-onelogin-saml2'] + CEPH_CONFIG_PATH = Path('/etc/ceph') + TLS_KEY_PATH = CEPH_CONFIG_PATH / 'ceph-dashboard.key' + TLS_PUB_KEY_PATH = CEPH_CONFIG_PATH / 'ceph-dashboard-pub.key' + TLS_CERT_PATH = CEPH_CONFIG_PATH / 'ceph-dashboard.crt' + TLS_KEY_AND_CERT_PATH = CEPH_CONFIG_PATH / 'ceph-dashboard.pem' + TLS_CA_CERT_DIR = Path('/usr/local/share/ca-certificates') + TLS_VAULT_CA_CERT_PATH = TLS_CA_CERT_DIR / 'vault_juju_ca_cert.crt' + TLS_CHARM_CA_CERT_PATH = TLS_CA_CERT_DIR / 'charm_config_juju_ca_cert.crt' + TLS_PORT = 8443 + DASH_DIR = Path('src/dashboards') + LB_SERVICE_NAME = "ceph-dashboard" + + # Charm Events + on = CephCharmEvents() + + CHARM_TO_CEPH_OPTIONS = CharmCephOptionList().get() + + def __init__(self, *args) -> None: + """Setup adapters and observers.""" + super().__init__(*args) + super().register_status_check(self.check_dashboard) + self.framework.observe( + self.on.config_changed, self._configure_dashboard + ) + self.mon = interface_dashboard.CephDashboardRequires(self, "dashboard") + self.radosgw_user = interface_radosgw_user.RadosGWUserRequires( + self, "radosgw-dashboard", request_system_role=True + ) + self.iscsi_user = admin_access.CephISCSIAdminAccessRequires( + self, "iscsi-dashboard" + ) + self.framework.observe( + self.mon.on.mon_ready, self._configure_dashboard + ) + self.framework.observe( + self.radosgw_user.on.gw_user_ready, self._configure_dashboard + ) + self.framework.observe( + self.iscsi_user.on.admin_access_ready, self._configure_dashboard + ) + self.framework.observe(self.on.add_user_action, self._add_user_action) + self.framework.observe( + self.on.delete_user_action, self._delete_user_action + ) + self.ingress = ops_lb_interface.OSLoadbalancerRequires( + self, "loadbalancer" + ) + self.grafana_dashboard = grafana_interface.GrafanaDashboardProvides( + self, "grafana-dashboard" + ) + self.alertmanager = interface_http.HTTPRequires( + self, "alertmanager-service" + ) + self.prometheus = interface_http.HTTPRequires(self, "prometheus") + self.framework.observe( + self.grafana_dashboard.on.dash_ready, self._configure_dashboard + ) + self.framework.observe( + self.alertmanager.on.http_ready, self._configure_dashboard + ) + self.framework.observe( + self.prometheus.on.http_ready, self._configure_dashboard + ) + self.framework.observe( + self.ingress.on.lb_relation_ready, self._request_loadbalancer + ) + self.framework.observe( + self.ingress.on.lb_configured, self._configure_dashboard + ) + + # Certificates Relation + self.ca_client = ca_client.CAClient(self, "certificates") + self.framework.observe( + self.ca_client.on.ca_available, self._request_certificates + ) + self.framework.observe( + self.ca_client.on.tls_server_config_ready, + self._enable_ssl_from_relation + ) + self.framework.observe( + self.on["certificates"].relation_departed, + self._certificates_relation_departed, + ) + + # Charm Custom Events + self.framework.observe(self.on.disable_ssl, self._clean_ssl_conf) + self.framework.observe( + self.on.enable_ssl_from_config, self._enable_ssl_from_config + ) + + self._stored.set_default(is_started=False) + + def _request_loadbalancer(self, _event) -> None: + """Send request to create loadbalancer""" + self.ingress.request_loadbalancer( + self.LB_SERVICE_NAME, + self.TLS_PORT, + self.TLS_PORT, + self._get_bind_ip(), + 'http', + ) + + def _register_dashboards(self) -> None: + """Register all dashboards with grafana""" + if not self.unit.is_leader(): + return # Do nothing on non leader units. + + for dash_file in self.DASH_DIR.glob("*.json"): + self.grafana_dashboard.register_dashboard( + dash_file.stem, + json.loads(dash_file.read_text())) + logging.debug( + "register_grafana_dashboard: {}".format(dash_file)) + + def _update_radosgw_creds( + self, access_key: str, secret_key: str + ) -> None: + """Update dashboard db with access & secret key for rados gateways. + + This method uses the legacy format which only supports one gateway. + """ + self._apply_file_setting('set-rgw-api-access-key', access_key) + self._apply_file_setting('set-rgw-api-secret-key', secret_key) + + def _manage_radosgw(self) -> None: + """Register rados gateways in dashboard db""" + if self.unit.is_leader(): + creds = self.radosgw_user.get_user_creds() + cred_count = len(set([ + (c['access_key'], c['secret_key']) + for c in creds])) + if cred_count < 1: + logging.info("No object gateway creds found") + return + # Update the provided creds for radosgw. + # NOTE(utkarshbhatthere): Having multiple credentials is not + # required even where there are multiple radosgw applications + # in the juju model. Therefore, first available creds are + # populated in dashboard. + self._update_radosgw_creds( + creds[0]['access_key'], + creds[0]['secret_key']) + + def _request_certificates(self, event) -> None: + """Request TLS certificates.""" + if not self.ca_client.is_joined: + logging.debug("Cannot request certificates, relation not present.") + return + addresses = set() + if self.ingress.relations: + lb_response = self.ingress.get_frontend_data() + if lb_response: + lb_config = lb_response[self.LB_SERVICE_NAME] + addresses.update( + [i for d in lb_config.values() for i in d['ip']]) + else: + logging.debug( + ("Defering certificate request until loadbalancer has " + "responded.")) + event.defer() + return + for binding_name in ['public']: + binding = self.model.get_binding(binding_name) + addresses.add(binding.network.ingress_address) + addresses.add(binding.network.bind_address) + sans = [str(s) for s in addresses] + sans.append(socket.gethostname()) + if self.config.get('public-hostname'): + sans.append(self.config.get('public-hostname')) + self.ca_client.request_server_certificate(socket.getfqdn(), sans) + + def _check_for_certs(self) -> bool: + """Check that charm has TLS data it needs""" + # Check charm config for TLS data + key, cert, _ = self._get_tls_from_config() + if key and cert: + return True + # Check relation for TLS data + try: + self.ca_client.server_key + return True + except ca_client.CAClientError: + return False + + def _check_dashboard_responding(self) -> bool: + """Check the dashboard port is open""" + + @tenacity.retry(wait=tenacity.wait_fixed(2), + stop=tenacity.stop_after_attempt(30), reraise=True) + def _check_port(ip, port): + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + result = sock.connect_ex((ip, port)) + assert result == 0 + + try: + _check_port(self._get_bind_ip(), self.TLS_PORT) + return True + except AssertionError: + return False + + def _check_grafana_config(self) -> bool: + """Check that garfana-api is set if the grafana is in use.""" + if self.grafana_dashboard.dashboard_relation: + return bool(self.config.get('grafana-api-url')) + else: + return True + + def check_dashboard(self) -> StatusBase: + """Check status of dashboard""" + checks = [ + (ceph_utils.is_dashboard_enabled, 'Dashboard is not enabled'), + (self._check_for_certs, ('No certificates found. Please add a ' + 'certifcates relation or provide via ' + 'charm config')), + (self._check_grafana_config, 'Charm config option grafana-api-url ' + 'not set'), + (self._check_dashboard_responding, 'Dashboard not responding') + ] + for check_f, msg in checks: + if not check_f(): + return BlockedStatus(msg) + + # Check if both relation based and config based certs are supplied. + return self._status_check_conflicting_ssl_sources() + + def kick_dashboard(self) -> None: + """Disable and re-enable dashboard""" + ceph_utils.mgr_disable_dashboard() + ceph_utils.mgr_enable_dashboard() + + def _apply_file_setting( + self, ceph_setting: str, file_contents: str, + extra_args: List[str] = None + ) -> None: + """Apply a setting via a file""" + with tempfile.NamedTemporaryFile(mode="w", delete=True) as _file: + _file.write(file_contents) + _file.flush() + settings = ["-i", _file.name] + if extra_args: + settings.extend(extra_args) + cmds.apply_setting(ceph_setting, settings) + + def _apply_ceph_config_from_charm_config(self) -> None: + """Read charm config and apply settings to dashboard config""" + for option in self.CHARM_TO_CEPH_OPTIONS: + try: + value = self.config[option.charm_option_name] + except KeyError: + logging.error( + "Unknown charm option {}, skipping".format( + option.charm_option_name)) + continue + if option.is_supported(): + cmds.exec_option_ceph_cmd(option, value) + else: + logging.warning( + "Skipping charm option {}, not supported".format( + option.charm_option_name)) + + def _configure_service_apis(self) -> None: + """Configure related service APIs in ceph dashboard""" + if self.unit.is_leader(): + grafana_ep = self.config.get("grafana-api-url") + if grafana_ep: + cmds.dashboard_set("set-grafana-api-url", grafana_ep) + + alertmanager_conn = self.alertmanager.get_service_ep_data() + if alertmanager_conn: + cmds.dashboard_set( + "set-alertmanager-api-host", + "http://{}:{}".format( + alertmanager_conn["hostname"], + alertmanager_conn["port"] + ), + ) + + prometheus_conn = self.prometheus.get_service_ep_data() + if prometheus_conn: + cmds.dashboard_set( + "set-prometheus-api-host", + "http://{}:{}".format( + prometheus_conn["hostname"], prometheus_conn["port"] + ), + ) + + def _configure_dashboard(self, event) -> None: + """Configure dashboard""" + if not self.mon.mons_ready: + logging.info("Not configuring dashboard, mons not ready") + return + + if ceph_utils.is_dashboard_enabled(): + if not self.unit.is_leader(): + # leader already enabled the dashboard and also handles config, + # we don't need to do anything except set ourselves as ready + logging.debug("Dashboard already enabled, setting ready.") + self._stored.is_started = True + self.update_status() + return + else: + if self.unit.is_leader(): + # we're the leader, enable dashboard and continue + # configuration below + logging.debug("Enabling dashboard as leader.") + ceph_utils.mgr_enable_dashboard() + else: + # non-leader, defer event until leader has enabled and + # configured the dashboard + logging.info("Dashboard not enabled, deferring event on " + "non-leader") + event.defer() + return + + if self.unit.is_leader(): + # If charm config ssl is present. + if self._is_charm_ssl_from_config(): + if not cmds.check_ceph_dashboard_ssl_configured(): + # Configure SSL using charm config. + self.on.enable_ssl_from_config.emit() + else: # charm config is not present. + # Since certificates relation can provide unique certs to each + # unit, the below check should only be performed on leader as + # the central key/cert pair matches leader unit. + key, cert, _ = self._get_tls_from_relation() + if not self.is_ceph_dashboard_ssl_key_cert_same(key, cert): + # clean SSL if not configured using relation + self.on.disable_ssl.emit() + # apply charm config + self._apply_ceph_config_from_charm_config() + + self._configure_saml() + + ceph_utils.mgr_config_set( + "mgr/dashboard/{hostname}/server_addr".format( + hostname=socket.gethostname() + ), + str(self._get_bind_ip()), + ) + + # configure grafana, prometheus and alertmanager API endpoints + self._configure_service_apis() + + self._register_dashboards() + self._manage_radosgw() + self._manage_iscsigw() + self._stored.is_started = True + self.update_status() + + def _get_bind_ip(self) -> str: + """Return the IP to bind the dashboard to""" + binding = self.model.get_binding('public') + return str(binding.network.ingress_address) + + def _clean_ssl_conf(self, _event) -> None: + """Clean ssl conf for ceph-dashboard.""" + + # NOTE: Clearing up of SSL key/cert is done centrally so that it can + # be performed with consistency for all units at once. + if self.unit.is_leader(): + # Disable ssl + cmds.ceph_config_set("config/mgr/mgr/dashboard/ssl", "false") + + config_keys = cmds.ceph_config_list() + for config in config_keys: + # clear all certificates. + if re.match("mgr/dashboard.*/crt", config): + cmds.ceph_config_reset(config) + # clear all keys. + if re.match("mgr/dashboard.*/key", config): + cmds.ceph_config_reset(config) + + def is_ceph_dashboard_ssl_key_cert_same( + self, key: str, cert: str, check_host: bool = False + ) -> Union[bool, None]: + """Checks if provided ssl key/cert match with configured key/cert. + + Since this method can result in falsy values even if the provided pair + is empty (None). It is advised to use this method for falsy checks + carefully. + + :returns: None if ssl is not configured or provided key/cert are empty. + """ + if not cmds.check_ceph_dashboard_ssl_configured(): + # Ceph Dashboard SSL not configured. + return None + + # Provided key/crt from param + if key is None or cert is None: + logger.debug("Empty key/cert pair : \n" + "Key %s, \nCerts: %s", (key is None), (cert is None)) + return None + + # Decode to ascii strings if bytes. + if isinstance(key, bytes): + key = key.decode() + if isinstance(cert, bytes): + cert = cert.decode() + + # Configured key/crt from ceph-dashboard + if not check_host: + ssl_key = cmds.get_ceph_dashboard_ssl_key() + ssl_crt = cmds.get_ceph_dashboard_ssl_crt() + else: + ssl_key = cmds.get_ceph_dashboard_host_ssl_key() + ssl_crt = cmds.get_ceph_dashboard_host_ssl_crt() + + if ssl_key == key and ssl_crt == cert: + return True + else: + return False + + def _get_tls_from_config(self) -> TLS_Config: + """Extract TLS config from charm config.""" + raw_key = self.config.get("ssl_key") + raw_cert = self.config.get("ssl_cert") + raw_ca_cert = self.config.get("ssl_ca") + if not (raw_key and raw_key): + return None, None, None + + key = base64.b64decode(raw_key) + cert = base64.b64decode(raw_cert) + if raw_ca_cert: + ca_cert = base64.b64decode(raw_ca_cert) + else: + ca_cert = None + return key, cert, ca_cert + + def _is_relation_active(self, relation_name: str) -> bool: + """Check if any instance of the relation is present.""" + return any( + relation.id for relation in self.model.relations[relation_name] + ) + + def _get_tls_from_relation(self) -> TLS_Config: + """Extract TLS config from certificates relation.""" + # If 'certificates' relation is not present return None. + if not self._is_relation_active('certificates'): + return None, None, None + + if not self.ca_client.is_server_cert_ready: + return None, None, None + key = self.ca_client.server_key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.TraditionalOpenSSL, + encryption_algorithm=serialization.NoEncryption()) + cert = self.ca_client.server_certificate.public_bytes( + encoding=serialization.Encoding.PEM) + try: + root_ca_chain = self.ca_client.root_ca_chain.public_bytes( + encoding=serialization.Encoding.PEM + ) + except ca_client.CAClientError: + # A root ca chain is not always available. If configured to just + # use vault with self-signed certificates, you will not get a ca + # chain. Instead, you will get a CAClientError being raised. For + # now, use a bytes() object for the root_ca_chain as it shouldn't + # cause problems and if a ca_cert_chain comes later, then it will + # get updated. + root_ca_chain = bytes() + ca_cert = ( + self.ca_client.ca_certificate.public_bytes( + encoding=serialization.Encoding.PEM + ) + root_ca_chain) + return key, cert, ca_cert + + def _update_iscsigw_creds(self, creds): + self._apply_file_setting( + 'iscsi-gateway-add', + '{}://{}:{}@{}:{}'.format( + creds['scheme'], + creds['username'], + creds['password'], + creds['host'], + creds['port']), + [creds['name']]) + + def _manage_iscsigw(self) -> None: + """Register rados gateways in dashboard db""" + if self.unit.is_leader(): + creds = self.iscsi_user.get_user_creds() + if len(creds) < 1: + logging.info("No iscsi gateway creds found") + return + else: + for c in creds: + self._update_iscsigw_creds(c) + + def _certificates_relation_departed(self, event) -> None: + """Certificates relation departed handle""" + if self.unit.is_leader(): + # Clear SSL if not configured using charm config. + # NOTE: Since certificates relation has departed, check has to be + # done using the charm config key/certs. + key, cert, _ = self._get_tls_from_config() + if not self.is_ceph_dashboard_ssl_key_cert_same(key, cert): + self._clean_ssl_conf(event) + + # Possible handover to charm-config SSL. + if self._is_charm_ssl_from_config(): + self.on.enable_ssl_from_config.emit() + + def _configure_tls(self, key, cert, ca_cert, ca_cert_path) -> None: + """Configure TLS using provided credentials""" + is_valid, msg = cmds.validate_ssl_keypair(cert, key) + if not is_valid: + logging.error("Invalid SSL key/cert: %s", msg) + return + self.TLS_KEY_PATH.write_bytes(key) + self.TLS_CERT_PATH.write_bytes(cert) + if ca_cert: + ca_cert_path.write_bytes(ca_cert) + subprocess.check_call(['update-ca-certificates']) + + hostname = socket.gethostname() + ceph_utils.dashboard_set_ssl_certificate( + self.TLS_CERT_PATH, + hostname=hostname) + ceph_utils.dashboard_set_ssl_certificate_key( + self.TLS_KEY_PATH, + hostname=hostname) + if self.unit.is_leader(): + ceph_utils.mgr_config_set( + 'mgr/dashboard/standby_behaviour', + 'redirect') + ceph_utils.mgr_config_set( + 'mgr/dashboard/ssl', + 'true') + # Set the ssl artifacte without the hostname which appears to + # be required even though they aren't used. + ceph_utils.dashboard_set_ssl_certificate( + self.TLS_CERT_PATH) + ceph_utils.dashboard_set_ssl_certificate_key( + self.TLS_KEY_PATH) + self.kick_dashboard() + + def _configure_saml(self) -> None: + if not self.unit.is_leader(): + logger.debug("Unit not leader, skipping saml config") + return + + base_url = self.config.get('saml-base-url') + idp_metadata = self.config.get('saml-idp-metadata') + username_attr = self.config.get('saml-username-attribute') + idp_entity_id = self.config.get('saml-idp-entity-id') + if not base_url or not idp_metadata: + return + + cmds.ceph_dashboard_config_saml( + base_url, idp_metadata, username_attr, idp_entity_id + ) + + def _gen_user_password(self, length: int = 12) -> str: + """Generate a password""" + alphabet = ( + string.ascii_lowercase + string.ascii_uppercase + string.digits) + + return ''.join(secrets.choice(alphabet) for i in range(length)) + + def _add_user_action(self, event: ActionEvent) -> None: + """Create a user""" + username = event.params["username"] + role = event.params["role"] + if not all([username, role]): + event.fail("Config missing") + else: + password = self._gen_user_password() + with tempfile.NamedTemporaryFile(mode="w", delete=True) as fp: + fp.write(password) + fp.flush() + cmd_out = cmds.ceph_dashboard_add_user(username, fp.name, role) + if re.match('User.*already exists', cmd_out): + event.fail("User already exists") + else: + event.set_results({"password": password}) + + def _delete_user_action(self, event: ActionEvent) -> None: + """Delete a user""" + username = event.params["username"] + try: + cmds.ceph_dashboard_delete_user(username) + event.set_results({"message": "User {} deleted".format(username)}) + except subprocess.CalledProcessError as exc: + event.fail(exc.output) + + def _is_charm_ssl_from_relation(self) -> bool: + """Check if ssl cert/key are provided by certificates relation.""" + key, cert, _ = self._get_tls_from_relation() + # True if both key and cert are present false otherwise. + return key and cert + + def _is_charm_ssl_from_config(self) -> bool: + """Check if ssl cert/key are configured in charm config.""" + key, cert, _ = self._get_tls_from_config() + # True if both key and cert are present false otherwise. + return key and cert + + def _is_charm_ssl_multiple_sources(self) -> bool: + """Check if SSL key/cert are available from multiple sources.""" + return self._is_charm_ssl_from_config() \ + and self._is_charm_ssl_from_relation() + + def _status_check_conflicting_ssl_sources(self): + """Generate status check message for multiple ssl key/cert scenario.""" + # If conflicting SSL source is not present + if not self._is_charm_ssl_multiple_sources(): + return ActiveStatus() + + # If both are waiting. + if not cmds.check_ceph_dashboard_ssl_configured(): + return BlockedStatus( + "Conflict: SSL configuration available from 'certificates' " + "relation and Charm config, refusing to guess. " + "Remove conflicting source to proceed." + ) + + key, cert, _ = self._get_tls_from_config() + if self.is_ceph_dashboard_ssl_key_cert_same(key, cert): + # SSL currently configured from charm config. + return BlockedStatus( + "Conflict: Active SSL from Charm config, 'certificates' " + "relation is ignored. Remove conflicting source to proceed." + ) + + key, cert, _ = self._get_tls_from_relation() + # 'Certificates' relation provides unique key/cert to each host. + # Hence cert check is performed for host. + if self.is_ceph_dashboard_ssl_key_cert_same( + key, cert, check_host=True + ): + # SSL currently configured from relation. + return BlockedStatus( + "Conflict: Active SSL from 'certificates' relation, Charm " + "config is ignored. Remove conflicting source to proceed." + ) + + # Check for ssl material validity. + is_valid, msg = cmds.validate_ssl_keypair(cert, key) + if not is_valid: + return BlockedStatus( + "Invalid SSL key/cert: {}".format(msg) + ) + return BlockedStatus("Unknown SSL source.") + + def _configure_tls_from_charm_config(self) -> None: + """Configure TLS using charm config values.""" + logging.debug("Attempting to collect TLS config from charm config") + key, cert, ca_cert = self._get_tls_from_config() + if not (key and cert): + logging.error("Not configuring, not all config data present") + return + + # Configure TLS + self._configure_tls(key, cert, ca_cert, self.TLS_CHARM_CA_CERT_PATH) + + def _configure_tls_from_relation(self) -> None: + """Configure TLS from certificates relation""" + logging.debug("Attempting to collect TLS config from relation") + key, cert, ca_cert = self._get_tls_from_relation() + if not (key and cert): + logging.error("Not configuring TLS, not all relation data present") + return + + # Configure TLS + self._configure_tls(key, cert, ca_cert, self.TLS_VAULT_CA_CERT_PATH) + + # Custom SSL Event Handles + def _enable_ssl_from_config(self, event) -> None: + """Configure Ceph Dashboard SSL with available key/cert from charm.""" + if not ceph_utils.is_dashboard_enabled(): + if self.unit.is_leader(): + ceph_utils.mgr_enable_dashboard() + else: + event.defer() + return + + if all([ + cmds.check_ceph_dashboard_ssl_configured(), + cmds.check_ceph_dashboard_ssl_configured(is_check_host_key=True) + ]): + # SSL is already configured for both central and host key/cert. + return + + self._configure_tls_from_charm_config() + self.update_status() + + # Certificates relation handle. + def _enable_ssl_from_relation(self, event) -> None: + """Configure Ceph Dashboard SSL using key/cert from relation.""" + if not ceph_utils.is_dashboard_enabled(): + if self.unit.is_leader(): + ceph_utils.mgr_enable_dashboard() + else: + event.defer() + return + + if cmds.check_ceph_dashboard_ssl_configured(): + key, cert, _ = self._get_tls_from_config() + if self.is_ceph_dashboard_ssl_key_cert_same(key, cert): + # Charm relation event deferred until conflicting charm config + # ssl is removed. Operator is informed through unit status. + event.defer() + return # SSL is already configured. + + self._configure_tls_from_relation() + self.update_status() + + +if __name__ == "__main__": + main(CephDashboardCharm) diff --git a/ceph-dashboard/src/charm_option.py b/ceph-dashboard/src/charm_option.py new file mode 100644 index 00000000..08f72af7 --- /dev/null +++ b/ceph-dashboard/src/charm_option.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +# Copyright 2023 Canonical +# See LICENSE file for licensing details. +# +# Learn more at: https://juju.is/docs/sdk + +import charmhelpers.core.host as ch_host +from typing import List, Union + + +class CharmCephOption(): + """Manage a charm option to ceph command to manage that option""" + + def __init__( + self, charm_option_name, ceph_option_name, min_version=None + ): + self.charm_option_name = charm_option_name + self.ceph_option_name = ceph_option_name + self.min_version = min_version + + def is_supported(self) -> bool: + """Is the option supported on this unit""" + if self.min_version: + return self.minimum_supported(self.min_version) + return True + + def minimum_supported(self, supported_version: str) -> bool: + """Check if installed Ceph release is >= to supported_version""" + return ch_host.cmp_pkgrevno('ceph-common', supported_version) >= 0 + + def convert_option(self, value: Union[bool, str, int]) -> List[str]: + """Convert a value to the corresponding value part of the ceph + dashboard command""" + return [str(value)] + + def ceph_command(self, value: List[str]) -> List[str]: + """Shell command to set option to desired value""" + cmd = ['ceph', 'dashboard', self.ceph_option_name] + cmd.extend(self.convert_option(value)) + return cmd + + +class DebugOption(CharmCephOption): + + def convert_option(self, value): + """Convert charm True/False to enable/disable""" + if value: + return ['enable'] + else: + return ['disable'] + + +class MOTDOption(CharmCephOption): + + def convert_option(self, value): + """Split motd charm option into ['severity', 'time', 'message']""" + if value: + return value.split('|') + else: + return ['clear'] + + +class CharmCephOptionList(): + def get(self) -> List: + """Get Charm options list""" + return [ + DebugOption('debug', 'debug'), + CharmCephOption( + 'enable-password-policy', + 'set-pwd-policy-enabled'), + CharmCephOption( + 'password-policy-check-length', + 'set-pwd-policy-check-length-enabled'), + CharmCephOption( + 'password-policy-check-oldpwd', + 'set-pwd-policy-check-oldpwd-enabled'), + CharmCephOption( + 'password-policy-check-username', + 'set-pwd-policy-check-username-enabled'), + CharmCephOption( + 'password-policy-check-exclusion-list', + 'set-pwd-policy-check-exclusion-list-enabled'), + CharmCephOption( + 'password-policy-check-complexity', + 'set-pwd-policy-check-complexity-enabled'), + CharmCephOption( + 'password-policy-check-sequential-chars', + 'set-pwd-policy-check-sequential-chars-enabled'), + CharmCephOption( + 'password-policy-check-repetitive-chars', + 'set-pwd-policy-check-repetitive-chars-enabled'), + CharmCephOption( + 'password-policy-min-length', + 'set-pwd-policy-min-length'), + CharmCephOption( + 'password-policy-min-complexity', + 'set-pwd-policy-min-complexity'), + CharmCephOption( + 'audit-api-enabled', + 'set-audit-api-enabled'), + CharmCephOption( + 'audit-api-log-payload', + 'set-audit-api-log-payload'), + MOTDOption( + 'motd', + 'motd', + min_version='15.2.14') + ] diff --git a/ceph-dashboard/src/dashboards/ceph-cluster.json b/ceph-dashboard/src/dashboards/ceph-cluster.json new file mode 100644 index 00000000..d376e242 --- /dev/null +++ b/ceph-dashboard/src/dashboards/ceph-cluster.json @@ -0,0 +1,1624 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "heatmap", + "name": "Heatmap", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "vonage-status-panel", + "name": "Status Panel", + "version": "1.0.8" + } + ], + "annotations": { + "list": [] + }, + "description": "Ceph cluster overview", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1525415495309, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 128, 45, 0.9)", + "rgba(237, 129, 40, 0.9)", + "rgb(255, 0, 0)" + ], + "datasource": "$datasource", + "editable": false, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 0, + "y": 0 + }, + "hideTimeOverride": true, + "id": 21, + "interval": "1m", + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "ceph_health_status{job=~\"$job\"}", + "format": "time_series", + "instant": true, + "interval": "$interval", + "intervalFactor": 1, + "refId": "A", + "step": 60 + } + ], + "thresholds": "1,2", + "timeFrom": null, + "title": "Health Status", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "OK", + "value": "0" + }, + { + "op": "=", + "text": "WARN", + "value": "1" + }, + { + "op": "=", + "text": "ERR", + "value": "2" + } + ], + "valueName": "current" + }, + { + "colorMode": "Panel", + "colors": { + "crit": "rgb(255, 0, 0)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgba(50, 128, 45, 0.9)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 0, + "datasource": "$datasource", + "displayName": "", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 3, + "w": 2, + "x": 2, + "y": 0 + }, + "id": 43, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [], + "targets": [ + { + "aggregation": "Last", + "alias": "All", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_osd_metadata{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "All", + "refId": "A", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "In", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osds_in{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "In", + "refId": "B", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "Out", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osd_in{job=~\"$job\"} == bool 0)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Out", + "refId": "C", + "units": "none", + "valueHandler": "Number Threshold", + "warn": 1 + }, + { + "aggregation": "Last", + "alias": "Up", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osd_up{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Up", + "refId": "D", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "Down", + "crit": 2, + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osd_up{job=~\"$job\"} == bool 0)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Down", + "refId": "E", + "units": "none", + "valueHandler": "Number Threshold", + "warn": 1 + } + ], + "title": "OSDs", + "type": "vonage-status-panel" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 2, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 47, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_osd_stat_bytes_used{job=~\"$job\"})/sum(ceph_osd_stat_bytes{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used", + "refId": "A" + } + ], + "thresholds": "70,80", + "title": "Capacity used", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 0 + }, + "hiddenSeries": false, + "id": 53, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Active", + "color": "#508642", + "fill": 1, + "stack": "A" + }, + { + "alias": "Total", + "color": "#f9e2d2" + }, + { + "alias": "Degraded", + "color": "#eab839" + }, + { + "alias": "Undersized", + "color": "#f9934e" + }, + { + "alias": "Inconsistent", + "color": "#e24d42" + }, + { + "alias": "Down", + "color": "#bf1b00" + }, + { + "alias": "Inactive", + "color": "#bf1b00", + "fill": 4, + "linewidth": 0, + "stack": "A" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(ceph_pg_total{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Total", + "refId": "A" + }, + { + "expr": "sum(ceph_pg_active{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Active", + "refId": "B" + }, + { + "expr": "sum(ceph_pg_total{job=~\"$job\"} - ceph_pg_active{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Inactive", + "refId": "G" + }, + { + "expr": "sum(ceph_pg_undersized{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Undersized", + "refId": "F" + }, + { + "expr": "sum(ceph_pg_degraded{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Degraded", + "refId": "C" + }, + { + "expr": "sum(ceph_pg_inconsistent{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Inconsistent", + "refId": "D" + }, + { + "expr": "sum(ceph_pg_down{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Down", + "refId": "E" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "PG States", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 0 + }, + "hiddenSeries": false, + "id": 66, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Avg Apply Latency", + "color": "#7eb26d" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile(0.95, ceph_osd_apply_latency_ms{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Apply Latency P_95", + "refId": "A" + }, + { + "expr": "quantile(0.95, ceph_osd_commit_latency_ms{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Commit Latency P_95", + "refId": "B" + }, + { + "expr": "avg(ceph_osd_apply_latency_ms{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Avg Apply Latency", + "refId": "C" + }, + { + "expr": "avg(ceph_osd_commit_latency_ms{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Avg Commit Latency", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "OSD Latencies", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "clusterName": "", + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgba(50, 128, 45, 0.9)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 1, + "datasource": "$datasource", + "displayName": "", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 3, + "w": 2, + "x": 0, + "y": 3 + }, + "id": 41, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [], + "targets": [ + { + "aggregation": "Last", + "alias": "In Quorum", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_mon_quorum_status{job=~\"$job\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "In Quorum", + "refId": "A", + "units": "none", + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "Total", + "crit": 1, + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_mon_quorum_status{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Total", + "refId": "B", + "units": "none", + "valueHandler": "Text Only", + "warn": 2 + }, + { + "aggregation": "Last", + "alias": "MONs out of Quorum", + "crit": 1.6, + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Annotation", + "displayValueWithAlias": "Never", + "expr": "count(ceph_mon_quorum_status{job=~\"$job\"}) / sum(ceph_mon_quorum_status{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "MONs out of Quorum", + "refId": "C", + "units": "none", + "valueHandler": "Number Threshold", + "warn": 1.1 + } + ], + "title": "Monitors", + "type": "vonage-status-panel" + }, + { + "colorMode": "Disabled", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgba(50, 128, 45, 0.9)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 0, + "datasource": "$datasource", + "displayName": "", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 3, + "w": 2, + "x": 2, + "y": 3 + }, + "id": 68, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [], + "targets": [ + { + "aggregation": "Last", + "alias": "Clients", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "ceph_mds_server_handle_client_session{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Clients", + "refId": "A", + "units": "none", + "valueHandler": "Number Threshold" + } + ], + "title": "Client connections", + "type": "vonage-status-panel" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 6 + }, + "hiddenSeries": false, + "id": 45, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.1", + "pointradius": 0.5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:271", + "alias": "Reads", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(ceph_osd_op_w_in_bytes{job=~\"$job\"}[1m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Writes", + "refId": "A" + }, + { + "expr": "sum(irate(ceph_osd_op_r_out_bytes{job=~\"$job\"}[1m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Reads", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Cluster I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:278", + "format": "Bps", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:279", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 6 + }, + "hiddenSeries": false, + "id": 62, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(deriv(ceph_pool_stored{job=~\"$job\"}[1m]))", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "In-/Egress", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": " Egress (-) / Ingress (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus - Juju generated source", + "decimals": 0, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 15 + }, + "hiddenSeries": false, + "id": 70, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(ceph_osd_op_w{job=~\"$job\"}[1m]))", + "interval": "", + "legendFormat": "Write", + "queryType": "randomWalk", + "refId": "A" + }, + { + "expr": "sum(irate(ceph_osd_op_r{job=~\"$job\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "Read", + "queryType": "randomWalk", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:184", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:185", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Degraded": "orange", + "Misplaced": "yellow", + "Unfound": "red" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus - Juju generated source", + "decimals": 0, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 15 + }, + "hiddenSeries": false, + "id": 71, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(ceph_pool_objects{job=~\"$job\"})", + "interval": "", + "legendFormat": "Total", + "queryType": "randomWalk", + "refId": "A" + }, + { + "expr": "ceph_num_objects_degraded{job=~\"$job\"}", + "hide": false, + "interval": "", + "legendFormat": "Degraded", + "queryType": "randomWalk", + "refId": "B" + }, + { + "expr": "ceph_num_objects_misplaced{job=~\"$job\"}", + "hide": false, + "interval": "", + "legendFormat": "Misplaced", + "queryType": "randomWalk", + "refId": "C" + }, + { + "expr": "ceph_num_objects_unfound{job=~\"$job\"}", + "hide": false, + "interval": "", + "legendFormat": "Unfound", + "queryType": "randomWalk", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Objects in the cluster", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:184", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:185", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": 1 + }, + "color": { + "cardColor": "rgb(0, 254, 255)", + "colorScale": "sqrt", + "colorScheme": "interpolateBlues", + "exponent": 0.5, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 26 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 55, + "legend": { + "show": true + }, + "links": [], + "reverseYBuckets": false, + "span": 12, + "targets": [ + { + "expr": "ceph_osd_stat_bytes_used{job=~\"$job\"} / ceph_osd_stat_bytes{job=~\"$job\"}", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Util (%)", + "refId": "A", + "step": 60 + } + ], + "timeFrom": null, + "title": "OSD Capacity Utilization", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": "", + "yAxis": { + "decimals": 2, + "format": "percentunit", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": 1 + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateBlues", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 26 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 59, + "legend": { + "show": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "expr": "ceph_osd_numpg{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "#PGs", + "refId": "A" + } + ], + "title": "PGs per OSD", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": "", + "yAxis": { + "decimals": null, + "format": "none", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 26 + }, + "hiddenSeries": false, + "id": 64, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(ceph_osd_recovery_ops{job=~\"$job\"}[1m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Op/s", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Recovery Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": "Recovery Ops/s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "30s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph", + "cluster" + ], + "templating": { + "list": [ + { + "hide": 0, + "label": null, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "auto": true, + "auto_count": 10, + "auto_min": "1m", + "current": { + "text": "auto", + "value": "$__auto_interval_interval" + }, + "datasource": null, + "hide": 0, + "includeAll": false, + "label": "Interval", + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "type": "interval" + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(ceph_osd_metadata, job)", + "refresh": 1, + "regex": "(.*)", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Ceph - Cluster", + "version": 13 + } diff --git a/ceph-dashboard/src/dashboards/cephfs-overview.json b/ceph-dashboard/src/dashboards/cephfs-overview.json new file mode 100644 index 00000000..bb07e5ce --- /dev/null +++ b/ceph-dashboard/src/dashboards/cephfs-overview.json @@ -0,0 +1,330 @@ +{ + "__inputs": [], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.3.2" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1557392920097, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 10, + "panels": [], + "title": "MDS Performance", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Reads/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(ceph_objecter_op_r{ceph_daemon=~\"($mds_servers).*\", job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Read Ops", + "refId": "A" + }, + { + "expr": "sum(ceph_objecter_op_w{ceph_daemon=~\"($mds_servers).*\", job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Write Ops", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "MDS Workload - $mds_servers", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "Reads(-) / Writes (+)", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_mds_server_handle_client_request{ceph_daemon=~\"($mds_servers).*\", job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ceph_daemon}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Request Load - $mds_servers", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "Client Requests", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "15s", + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "MDS Server", + "multi": false, + "name": "mds_servers", + "options": [], + "query": "label_values(ceph_mds_inodes, ceph_daemon)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(ceph_osd_metadata, job)", + "refresh": 1, + "regex": "(.*)", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "MDS Performance", + "uid": "tbO9LAiZz", + "version": 2 +} diff --git a/ceph-dashboard/src/dashboards/host-details.json b/ceph-dashboard/src/dashboards/host-details.json new file mode 100644 index 00000000..91ba1d25 --- /dev/null +++ b/ceph-dashboard/src/dashboards/host-details.json @@ -0,0 +1,1290 @@ +{ + "__inputs": [], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.3.2" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1615564911000, + "links": [], + "panels": [ + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 16, + "title": "$ceph_hosts System Overview", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 1 + }, + "height": "160", + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 4, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{hostname='$ceph_hosts', job=~\"$job\"}))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + "interrupt": "#447EBC", + "steal": "#6D1F62", + "system": "#890F02", + "user": "#3F6833", + "wait": "#C15C17" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown", + "fill": 1, + "gridPos": { + "h": 10, + "w": 6, + "x": 3, + "y": 1 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "cpu_usage_user{cpu=\"cpu-total\", host='$ceph_hosts', job=~\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "user - {{ host }}", + "refId": "A", + "step": 2 + }, + { + "expr": "cpu_usage_iowait{cpu=\"cpu-total\", host='$ceph_hosts', job=~\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "iowait - {{ host }}", + "refId": "C", + "step": 2 + }, + { + "expr": "cpu_usage_nice{cpu=\"cpu-total\", host='$ceph_hosts', job=~\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "nice - {{ host }}", + "refId": "D", + "step": 2 + }, + { + "expr": "cpu_usage_softirq{cpu=\"cpu-total\", host='$ceph_hosts', job=~\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "softirq - {{ host }}", + "refId": "E", + "step": 2 + }, + { + "expr": "cpu_usage_irq{cpu=\"cpu-total\", host='$ceph_hosts', job=~\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "irq - {{ host }}", + "refId": "F", + "step": 2 + }, + { + "expr": "cpu_usage_system{cpu=\"cpu-total\", host='$ceph_hosts', job=~\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "system - {{ host }}", + "refId": "G", + "step": 2 + }, + { + "expr": "cpu_usage_idle{cpu=\"cpu-total\", host='$ceph_hosts', job=~\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "idle - {{ host }}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilization", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "% Utilization", + "logBase": 1, + "max": "100", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Available": "#508642", + "Free": "#508642", + "Total": "#bf1b00", + "Used": "#bf1b00", + "total": "#bf1b00", + "used": "#0a50a1" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 10, + "w": 6, + "x": 9, + "y": 1 + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "color": "#bf1b00", + "fill": 0, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "mem_used{host='$ceph_hosts', job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "used", + "refId": "D" + }, + { + "expr": "mem_free{host='$ceph_hosts', job=~\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Free", + "refId": "A" + }, + { + "expr": "mem_buffered{host='$ceph_hosts', job=~\"$job\"} + mem_cached{host='$ceph_hosts', job=~\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "buffers/cache", + "refId": "C" + }, + { + "expr": "mem_total{host='$ceph_hosts', job=~\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "total", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "RAM Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "RAM used", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Show the network load (rx,tx) across all interfaces (excluding loopback 'lo')", + "fill": 0, + "gridPos": { + "h": 10, + "w": 6, + "x": 15, + "y": 1 + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (device) (\n irate(net_bytes_recv{host='$ceph_hosts',device!=\"lo\", job=~\"$job\"}[1m])\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.rx", + "refId": "A", + "step": 10, + "textEditor": true + }, + { + "expr": "sum by (device) (\n irate(net_bytes_sent{host='$ceph_hosts',device!=\"lo\", job=~\"$job\"}[1m])\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.tx", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Load", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "Send (-) / Receive (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "gridPos": { + "h": 5, + "w": 3, + "x": 21, + "y": 1 + }, + "hideTimeOverride": true, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(net_drop_in{host='$ceph_hosts', job=~\"$job\"}[1m])", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "{{device}}.rx", + "refId": "A" + }, + { + "expr": "irate(net_drop_out{host='$ceph_hosts', job=~\"$job\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.tx", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network drop rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": "Send (-) / Receive (+)", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "$datasource", + "decimals": 0, + "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 6 + }, + "height": "160", + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 4, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_osd_stat_bytes{job=~\"$job\"} and on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\", job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": "", + "title": "Raw Capacity", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "gridPos": { + "h": 5, + "w": 3, + "x": 21, + "y": 6 + }, + "hideTimeOverride": true, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(net_err_in{host='$ceph_hosts', job=~\"$job\"}[1m])", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "{{device}}.rx", + "refId": "A" + }, + { + "expr": "irate(net_err_out{host='$ceph_hosts', job=~\"$job\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.tx", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network error rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": "Send (-) / Receive (+)", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 12, + "panels": [], + "repeat": null, + "title": "OSD Disk Performance Statistics", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "For any OSD devices on the host, this chart shows the iops per physical device. Each device is shown by it's name and corresponding OSD id value", + "fill": 1, + "gridPos": { + "h": 9, + "w": 11, + "x": 0, + "y": 12 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*reads/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(label_replace((irate(diskio_writes{host='$ceph_hosts', job=~\"$job\"}[5m])), \"device\", \"$1\", \"name\", \"(.+)\"), \"exported_instance\", \"$1\", \"host\", \"(.+)\") * on(exported_instance, device) group_right(ceph_daemon) label_replace(ceph_disk_occupation{exported_instance='$ceph_hosts', job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) writes", + "refId": "A", + "step": 10, + "textEditor": true + }, + { + "expr": "label_replace(label_replace((irate(diskio_reads{host='$ceph_hosts', job=~\"$job\"}[5m])), \"device\", \"$1\", \"name\", \"(.+)\"), \"exported_instance\", \"$1\", \"host\", \"(.+)\") * on(exported_instance, device) group_right(ceph_daemon) label_replace(ceph_disk_occupation{exported_instance='$ceph_hosts', job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\")", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) reads", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Disk IOPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "For OSD hosts, this chart shows the disk bandwidth (read bytes/sec + write bytes/sec) of the physical OSD device. Each device is shown by device name, and corresponding OSD id", + "fill": 1, + "gridPos": { + "h": 9, + "w": 11, + "x": 12, + "y": 12 + }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*read/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr" : "label_replace(label_replace((irate(diskio_write_bytes{job=~\"$job\"}[5m]) / 10 ), \"device\", \"$1\", \"name\", \"(.+)\"), \"exported_instance\", \"$1\", \"host\", \"(.+)\") * on(exported_instance, device) group_right(ceph_daemon) label_replace(ceph_disk_occupation{exported_instance='$ceph_hosts', job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) write", + "refId": "B" + }, + { + "expr" : "label_replace(label_replace((irate(diskio_read_bytes{job=~\"$job\"}[5m]) / 10 ), \"device\", \"$1\", \"name\", \"(.+)\"), \"exported_instance\", \"$1\", \"host\", \"(.+)\") * on(exported_instance, device) group_right(ceph_daemon) label_replace(ceph_disk_occupation{exported_instance='$ceph_hosts', job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) read", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Throughput by Disk", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "For OSD hosts, this chart shows the latency at the physical drive. Each drive is shown by device name, with it's corresponding OSD id", + "fill": 1, + "gridPos": { + "h": 9, + "w": 11, + "x": 0, + "y": 21 + }, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(label_replace((irate(diskio_weighted_io_time{host='$ceph_hosts', job=~\"$job\"}[5m])), \"device\", \"$1\", \"name\", \"(.+)\"), \"exported_instance\", \"$1\", \"host\", \"(.+)\") * on(exported_instance, device) group_right(ceph_daemon) label_replace(ceph_disk_occupation{exported_instance='$ceph_hosts', job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\")", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}})", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts I/O Queued RQs * RQ time Waiting", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "Weighted I/O time", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 11, + "x": 12, + "y": 21 + }, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 2, + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(label_replace((irate(diskio_io_time{host='$ceph_hosts', job=~\"$job\"}[5m])), \"device\", \"$1\", \"name\", \"(.+)\"), \"exported_instance\", \"$1\", \"host\", \"(.+)\") * on(exported_instance, device) group_right(ceph_daemon) label_replace(ceph_disk_occupation{exported_instance='$ceph_hosts', job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\")", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}})", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "$ceph_hosts I/O Queue existance time", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "I/O time", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "10s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Hostname", + "multi": false, + "name": "ceph_hosts", + "options": [], + "query": "label_values(node_scrape_collector_success, instance) ", + "refresh": 1, + "regex": "([^.:]*).*", + "skipUrlSync": false, + "sort": 3, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(ceph_osd_metadata, job)", + "refresh": 1, + "regex": "(.*)", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Host Details", + "uid": "rtOg0AiWz", + "version": 4 +} diff --git a/ceph-dashboard/src/dashboards/hosts-overview.json b/ceph-dashboard/src/dashboards/hosts-overview.json new file mode 100644 index 00000000..85880eb1 --- /dev/null +++ b/ceph-dashboard/src/dashboards/hosts-overview.json @@ -0,0 +1,873 @@ +{ + "__inputs": [], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.3.2" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1557393917915, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(sum by (hostname) (ceph_osd_metadata{job=~\"$job\"}))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "OSD Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster", + "decimals": 2, + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg(\n 1 - (\n avg by(dns_name) \n (cpu_usage_idle{dns_name=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\", cpu='cpu-total', job=~\"$job\"} / 100)))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "AVG CPU Busy", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)", + "decimals": 2, + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg (((mem_total{dns_name=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\", job=~\"$job\"}) - (\n (mem_free{dns_name=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (mem_cached{dns_name=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\", job=~\"$job\"}) + \n (mem_buffered{dns_name=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\", job=~\"$job\"}) +\n (mem_slab{dns_name=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\", job=~\"$job\"})\n )) /\n (mem_total{dns_name=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\", job=~\"$job\"}))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "AVG RAM Utilization", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "IOPS Load at the device as reported by the OS on all OSD hosts", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum ((irate(diskio_reads{dns_name=~\"($osd_hosts|$mds_hosts).*\", job=~\"$job\"}[5m])) + \n(irate(diskio_writes{dns_name=~\"($osd_hosts|$mds_hosts).*\", job=~\"$job\"}[5m])))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Physical IOPS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 20, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr" : "avg (label_replace(label_replace((irate(diskio_io_time{job=~\"$job\"}[5m]) / 10 ), \"device\", \"$1\", \"name\", \"(.+)\"), \"exported_instance\", \"$1\", \"host\", \"(.+)\") * on(exported_instance, device) group_right(ceph_daemon) label_replace(ceph_disk_occupation{dns_name=~\"($osd_hosts|$mds_hosts).*\", job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "AVG Disk Utilization", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 0, + "description": "Total send/receive network load across all hosts in the ceph cluster", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 18, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (\n irate(net_bytes_recv{dns_name=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\", job=~\"$job\"}[1m])\n) +\nsum (\n irate(net_bytes_sent{dns_name=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\", job=~\"$job\"}[1m]))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Network Load", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Show the top 10 busiest hosts by cpu", + "fill": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 5 + }, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,100 * (\n 1 - (\n avg by(dns_name) \n (cpu_usage_idle{dns_name=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\", cpu='cpu-total', job=~\"$job\"} / 100))))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{dns_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Busy - Top 10 Hosts", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "percent", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Top 10 hosts by network load", + "fill": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 5 + }, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, (sum by(dns_name) (\n (\n irate(net_bytes_recv{dns_name=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\", job=~\"$job\"}[1m])\n ) +\n (\n irate(net_bytes_sent{dns_name=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\", job=~\"$job\"}[1m])\n ))\n )\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{dns_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Load - Top 10 Hosts", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "10s", + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": "", + "current": {}, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "osd_hosts", + "options": [], + "query": "label_values(ceph_disk_occupation, exported_instance)", + "refresh": 1, + "regex": "([^.]*).*", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "ceph", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "mon_hosts", + "options": [], + "query": "label_values(ceph_mon_metadata, ceph_daemon)", + "refresh": 1, + "regex": "mon.(.*)", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "mds_hosts", + "options": [], + "query": "label_values(ceph_mds_inodes, ceph_daemon)", + "refresh": 1, + "regex": "mds.(.*)", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "rgw_hosts", + "options": [], + "query": "label_values(ceph_rgw_qlen, ceph_daemon)", + "refresh": 1, + "regex": "rgw.(.*)", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(ceph_osd_metadata, job)", + "refresh": 1, + "regex": "(.*)", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Host Overview", + "uid": "y0KGL0iZz", + "version": 3 +} diff --git a/ceph-dashboard/src/dashboards/osd-device-details.json b/ceph-dashboard/src/dashboards/osd-device-details.json new file mode 100644 index 00000000..e2ee7e28 --- /dev/null +++ b/ceph-dashboard/src/dashboards/osd-device-details.json @@ -0,0 +1,1326 @@ +{ + "__inputs": [], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.3.2" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1557395861896, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 16, + "panels": [], + "title": "Status $osd", + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "prometheus - Juju generated source", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": {}, + "mappings": [ + { + "from": "0", + "id": 1, + "text": "DOWN", + "to": "0.99", + "type": 2, + "value": "0" + }, + { + "from": "0.99", + "id": 2, + "text": "UP", + "to": "1", + "type": 2, + "value": "1" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0 + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 0, + "y": 1 + }, + "id": 18, + "interval": null, + "links": [], + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": { + "valueSize": 30 + }, + "textMode": "value" + }, + "pluginVersion": "7.4.1", + "targets": [ + { + "$$hashKey": "object:484", + "aggregation": "Last", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", + "expr": "ceph_osd_up{job=~\"$job\", ceph_daemon=~\"$osd\"}", + "format": "table", + "instant": false, + "interval": "", + "legendFormat": "", + "queryType": "randomWalk", + "refId": "A", + "units": "none", + "valueHandler": "Number Threshold" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "", + "transparent": true, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "prometheus - Juju generated source", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": {}, + "mappings": [ + { + "from": "0", + "id": 1, + "text": "OUT", + "to": "0.99", + "type": 2, + "value": "0" + }, + { + "from": "0.99", + "id": 2, + "text": "IN", + "to": "1", + "type": 2, + "value": "1" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0 + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 2, + "y": 1 + }, + "id": 19, + "interval": null, + "links": [], + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": { + "valueSize": 30 + }, + "textMode": "value" + }, + "pluginVersion": "7.4.1", + "targets": [ + { + "$$hashKey": "object:484", + "aggregation": "Last", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", + "expr": "ceph_osd_in{job=~\"$job\", ceph_daemon=~\"$osd\"}", + "format": "table", + "instant": false, + "interval": "", + "legendFormat": "", + "queryType": "randomWalk", + "refId": "A", + "units": "none", + "valueHandler": "Number Threshold" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "", + "transparent": true, + "type": "stat" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 21, + "panels": [], + "title": "Utilization $osd", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus - Juju generated source", + "fieldConfig": { + "defaults": { + "color": {}, + "custom": {}, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 18, + "x": 0, + "y": 5 + }, + "hiddenSeries": false, + "id": 23, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_osd_numpg{job=~\"$job\", ceph_daemon=~\"$osd\"}", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "Num PGs in $osd", + "queryType": "randomWalk", + "refId": "A" + }, + { + "expr": "avg(ceph_osd_numpg)", + "hide": false, + "interval": "", + "legendFormat": "Average Number of PGs in the Cluster", + "refId": "B" + } + ], + "thresholds": [ + { + "$$hashKey": "object:702", + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": null, + "yaxis": "right" + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "PGs", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:94", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:95", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "prometheus - Juju generated source", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": {}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 60 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 18, + "y": 5 + }, + "id": 25, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "7.4.1", + "targets": [ + { + "expr": "(ceph_osd_stat_bytes_used{job=~\"$job\", ceph_daemon=~\"$osd\"}/ceph_osd_stat_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"})*100", + "interval": "", + "legendFormat": "", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Utilization", + "type": "gauge" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 14, + "panels": [], + "title": "OSD Performance", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "read", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(ceph_osd_op_r_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count{job=~\"$job\", ceph_daemon=~\"$osd\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "read", + "refId": "A" + }, + { + "expr": "irate(ceph_osd_op_w_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count{job=~\"$job\", ceph_daemon=~\"$osd\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "write", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "$osd Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 16 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Reads", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(ceph_osd_op_r{job=~\"$job\", ceph_daemon=~\"$osd\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Reads", + "refId": "A" + }, + { + "expr": "irate(ceph_osd_op_w{job=~\"$job\", ceph_daemon=~\"$osd\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Writes", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "$osd R/W IOPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 12, + "y": 16 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Read Bytes", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(ceph_osd_op_r_out_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Read Bytes", + "refId": "A" + }, + { + "expr": "irate(ceph_osd_op_w_in_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Write Bytes", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "$osd R/W Bytes", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 25 + }, + "id": 12, + "panels": [], + "title": "Physical Device Performance", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 26 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Reads/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(label_replace(irate(node_disk_read_time_seconds_total[1m]) / irate(node_disk_reads_completed_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device) label_replace(label_replace(ceph_disk_occupation{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}/{{device}} Reads", + "refId": "A" + }, + { + "expr": "(label_replace(irate(node_disk_write_time_seconds_total[1m]) / irate(node_disk_writes_completed_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device) label_replace(label_replace(ceph_disk_occupation{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}/{{device}} Writes", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Physical Device Latency for $osd", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 26 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Reads/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(irate(node_disk_writes_completed_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device) label_replace(label_replace(ceph_disk_occupation{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} on {{instance}} Writes", + "refId": "A" + }, + { + "expr": "label_replace(irate(node_disk_reads_completed_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device) label_replace(label_replace(ceph_disk_occupation{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} on {{instance}} Reads", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Physical Device R/W IOPS for $osd", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 12, + "y": 26 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Reads/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(irate(node_disk_read_bytes_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device) label_replace(label_replace(ceph_disk_occupation{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}} {{device}} Reads", + "refId": "A" + }, + { + "expr": "label_replace(irate(node_disk_written_bytes_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device) label_replace(label_replace(ceph_disk_occupation{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}} {{device}} Writes", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Physical Device R/W Bytes for $osd", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 18, + "y": 26 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.1", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(irate(node_disk_io_time_seconds_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device) label_replace(label_replace(ceph_disk_occupation{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} on {{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Physical Device Util% for $osd", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(ceph_osd_metadata, job)", + "refresh": 1, + "regex": "(.*)", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "OSD", + "multi": false, + "name": "osd", + "options": [], + "query": "label_values(ceph_osd_metadata{job=~\"$job\"}, ceph_daemon)", + "refresh": 1, + "regex": "(.*)", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "OSD device details", + "uid": "CrAHE0iZz", + "version": 3 +} diff --git a/ceph-dashboard/src/dashboards/osds-overview.json b/ceph-dashboard/src/dashboards/osds-overview.json new file mode 100644 index 00000000..95bac3c4 --- /dev/null +++ b/ceph-dashboard/src/dashboards/osds-overview.json @@ -0,0 +1,897 @@ +{ + + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "grafana-piechart-panel", + "name": "Pie Chart", + "version": "1.3.3" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1538083987689, + "links": [], + "panels": [ + { + "aliasColors": { + "@95%ile": "#e0752d" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 12, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg (irate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count{job=~\"$job\"}[1m]) * 1000)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "AVG read", + "refId": "A" + }, + { + "expr": "max (irate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count{job=~\"$job\"}[1m]) * 1000)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "MAX read", + "refId": "B" + }, + { + "expr": "quantile(0.95,\n (irate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count{job=~\"$job\"}[1m]) * 1000)\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "@95%ile", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Read Latencies", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "columns": [], + "datasource": "$datasource", + "description": "This table shows the osd's that are delivering the 10 highest read latencies within the cluster", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 15, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 2, + "desc": true + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "ceph_daemon", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Latency (ms)", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk(10,\n (sort(\n (irate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count{job=~\"$job\"}[1m]) * 1000)\n ))\n)\n\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Highest READ Latencies", + "transform": "table", + "type": "table" + }, + { + "aliasColors": { + "@95%ile write": "#e0752d" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 8, + "w": 8, + "x": 12, + "y": 0 + }, + "id": 13, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg (irate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count{job=~\"$job\"}[1m]) * 1000)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "AVG write", + "refId": "A" + }, + { + "expr": "max (irate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count{job=~\"$job\"}[1m]) * 1000)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "MAX write", + "refId": "B" + }, + { + "expr": "quantile(0.95,\n (irate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count{job=~\"$job\"}[1m]) * 1000)\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "@95%ile write", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Write Latencies", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "columns": [], + "datasource": "$datasource", + "description": "This table shows the osd's that are delivering the 10 highest write latencies within the cluster", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 16, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 2, + "desc": true + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "ceph_daemon", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Latency (ms)", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk(10,\n (sort(\n (irate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count{job=~\"$job\"}[1m]) * 1000)\n ))\n)\n\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Highest WRITE Latencies", + "transform": "table", + "type": "table" + }, + { + "aliasColors": {}, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "$datasource", + "fontSize": "80%", + "format": "none", + "gridPos": { + "h": 8, + "w": 4, + "x": 0, + "y": 8 + }, + "id": 2, + "interval": null, + "legend": { + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": 1, + "targets": [ + { + "expr": "count by (device_class) (ceph_osd_metadata{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device_class}}", + "refId": "A" + } + ], + "title": "OSD Types Summary", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + "Non-Encrypted": "#E5AC0E" + }, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "$datasource", + "fontSize": "80%", + "format": "none", + "gridPos": { + "h": 8, + "w": 4, + "x": 4, + "y": 8 + }, + "height": "200px", + "hideTimeOverride": true, + "id": 4, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": "1", + "minSpan": 4, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": 1, + "targets": [ + { + "expr": "count(ceph_bluefs_wal_total_bytes{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "bluestore", + "refId": "A", + "step": 240 + }, + { + "expr": "count(ceph_osd_metadata{job=~\"$job\"}) - count(ceph_bluefs_wal_total_bytes{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "filestore", + "refId": "B", + "step": 240 + }, + { + "expr": "absent(ceph_bluefs_wal_total_bytes{job=~\"$job\"})*count(ceph_osd_metadata{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "filestore", + "refId": "C", + "step": 240 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "OSD Objectstore Types", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": {}, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": "0.05" + }, + "datasource": "$datasource", + "description": "The pie chart shows the various OSD sizes used within the cluster", + "fontSize": "80%", + "format": "none", + "gridPos": { + "h": 8, + "w": 4, + "x": 8, + "y": 8 + }, + "height": "220", + "hideTimeOverride": true, + "id": 8, + "interval": null, + "legend": { + "header": "", + "percentage": false, + "show": true, + "sideWidth": null, + "sortDesc": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": "", + "minSpan": 6, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": "1", + "targets": [ + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} < 1099511627776)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<1 TB", + "refId": "A", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 1099511627776 < 2199023255552)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<2 TB", + "refId": "B", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 2199023255552 < 3298534883328)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<3TB", + "refId": "C", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 3298534883328 < 4398046511104)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<4TB", + "refId": "D", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 4398046511104 < 6597069766656)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<6TB", + "refId": "E", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 6597069766656 < 8796093022208)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<8TB", + "refId": "F", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 8796093022208 < 10995116277760)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<10TB", + "refId": "G", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 10995116277760 < 13194139533312)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<12TB", + "refId": "H", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 13194139533312)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "12TB+", + "refId": "I", + "step": 2 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "OSD Size Summary", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Each bar indicates the number of OSD's that have a PG count in a specific range as shown on the x axis.", + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_osd_numpg{job=~\"$job\"}\n", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "PGs per OSD", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Distribution of PGs per OSD", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": 20, + "mode": "histogram", + "name": null, + "show": true, + "values": [ + "total" + ] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": "# of OSDs", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 20, + "panels": [], + "title": "R/W Profile", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Show the read/write workload profile overtime", + "fill": 1, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum(irate(ceph_pool_rd{job=~\"$job\"}[30s])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Reads", + "refId": "A" + }, + { + "expr": "round(sum(irate(ceph_pool_wr{job=~\"$job\"}[30s])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Writes", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Read/Write Profile", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "refresh": "10s", + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "tags": [], + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(ceph_osd_metadata, job)", + "refresh": 1, + "regex": "(.*)", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "OSD Overview", + "uid": "lo02I1Aiz", + "version": 3 +} diff --git a/ceph-dashboard/src/dashboards/pool-detail.json b/ceph-dashboard/src/dashboards/pool-detail.json new file mode 100644 index 00000000..056592f8 --- /dev/null +++ b/ceph-dashboard/src/dashboards/pool-detail.json @@ -0,0 +1,686 @@ +{ + "__inputs": [], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.3.2" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1551858875941, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 2, + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 0, + "y": 0 + }, + "id": 12, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "(ceph_pool_stored{job=~\"$job\"} / (ceph_pool_stored{job=~\"$job\"} + ceph_pool_max_avail{job=~\"$job\"})) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\", job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": ".7,.8", + "title": "Capacity used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Time till pool is full assuming the average fill rate of the last 6 hours", + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 7, + "y": 0 + }, + "id": 14, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(ceph_pool_max_avail{job=~\"$job\"} / deriv(ceph_pool_stored{job=~\"$job\"}[6h])) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\", job=~\"$job\"} > 0", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Time till full", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "inf", + "value": "null" + }, + { + "op": "=", + "text": "inf", + "value": "N/A" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "deriv(ceph_pool_objects{job=~\"$job\"}[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\", job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Objects per second", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$pool_name Object Ingress/Egress", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": "Objects out(-) / in(+) ", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "reads", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(ceph_pool_rd{job=~\"$job\"}[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\", job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "reads", + "refId": "B" + }, + { + "expr": "irate(ceph_pool_wr{job=~\"$job\"}[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\", job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "writes", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$pool_name Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "iops", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "reads", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(ceph_pool_rd_bytes{job=~\"$job\"}[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\", job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "reads", + "refId": "A" + }, + { + "expr": "irate(ceph_pool_wr_bytes{job=~\"$job\"}[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\", job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "writes", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$pool_name Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_pool_objects{job=~\"$job\"} * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\", job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Number of Objects", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$pool_name Objects", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "Objects", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "15s", + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus admin.virt1.home.fajerski.name:9090", + "value": "Prometheus admin.virt1.home.fajerski.name:9090" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Pool Name", + "multi": false, + "name": "pool_name", + "options": [], + "query": "label_values(ceph_pool_metadata,name)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(ceph_osd_metadata, job)", + "refresh": 1, + "regex": "(.*)", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Ceph Pool Details", + "uid": "-xyV8KCiz", + "version": 1 +} diff --git a/ceph-dashboard/src/dashboards/pool-overview.json b/ceph-dashboard/src/dashboards/pool-overview.json new file mode 100644 index 00000000..1751b8a0 --- /dev/null +++ b/ceph-dashboard/src/dashboards/pool-overview.json @@ -0,0 +1,1585 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1617656284287, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 0, + "y": 0 + }, + "id": 21, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_pool_metadata{job=~\"$job\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Pools", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Count of the pools that have compression enabled", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 3, + "y": 0 + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "pluginVersion": "6.7.4", + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_pool_metadata{compression_mode!=\"none\", job=~\"$job\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Pools with Compression", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 1, + "description": "Total raw capacity available to the cluster", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 6, + "y": 0 + }, + "id": 27, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_osd_stat_bytes{job=~\"$job\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Total Raw Capacity", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Total raw capacity consumed by user data and associated overheads (metadata + redundancy)", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 9, + "y": 0 + }, + "id": 25, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_pool_bytes_used{job=~\"$job\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Raw Capacity Consumed", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current", + "decimals": 2 + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 1, + "description": "Total of client data stored in the cluster", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 12, + "y": 0 + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_pool_stored{job=~\"$job\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Logical Stored ", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 1, + "description": "A compression saving is determined as the data eligible to be compressed minus the capacity used to store the data after compression", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 15, + "y": 0 + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_pool_compress_under_bytes{job=~\"$job\"} - ceph_pool_compress_bytes_used{job=~\"$job\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Compression Savings", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Indicates how suitable the data is within the pools that are/have been enabled for compression - averaged across all pools holding compressed data\n", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 18, + "y": 0 + }, + "id": 17, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "(sum(ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) / sum(ceph_pool_stored_raw{job=~\"$job\"} and ceph_pool_compress_under_bytes{job=~\"$job\"} > 0)) * 100", + "format": "table", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Compression Eligibility", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "This factor describes the average ratio of data eligible to be compressed divided by the data actually stored. It does not account for data written that was ineligible for compression (too small, or compression yield too low)", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 21, + "y": 0 + }, + "id": 15, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "80%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) / sum(ceph_pool_compress_bytes_used{job=~\"$job\"} > 0)", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Compression Factor", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "columns": [], + "datasource": "$datasource", + "fontSize": "100%", + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 5, + "links": [], + "maxPerRow": 3, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 5, + "desc": true + }, + "styles": [ + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Time", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "instance", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "job", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Pool Name", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "name", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Pool ID", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "pool_id", + "thresholds": [], + "type": "hidden", + "unit": "none" + }, + { + "alias": "Compression Factor", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 1, + "mappingType": 1, + "pattern": "Value #A", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "% Used", + "align": "auto", + "colorMode": "value", + "colors": [ + "rgb(0, 0, 0)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #D", + "thresholds": [ + "70", + "85" + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Usable Free", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #B", + "thresholds": [], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Compression Eligibility", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #C", + "thresholds": [], + "type": "number", + "unit": "percent" + }, + { + "alias": "Compression Savings", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 1, + "mappingType": 1, + "pattern": "Value #E", + "thresholds": [], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Growth (5d)", + "align": "auto", + "colorMode": "value", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #F", + "thresholds": [ + "0", + "0" + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "IOPS", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #G", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "Bandwidth", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #H", + "thresholds": [], + "type": "number", + "unit": "Bps" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "__name__", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "type", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "compression_mode", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Type", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "description", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Stored", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 1, + "mappingType": 1, + "pattern": "Value #J", + "thresholds": [], + "type": "number", + "unit": "bytes" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #I", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Compression", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #K", + "thresholds": [], + "type": "string", + "unit": "short", + "valueMaps": [ + { + "text": "ON", + "value": "1" + } + ] + } + ], + "targets": [ + { + "expr": "(ceph_pool_percent_used{job=~\"$job\"} * on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "D" + }, + { + "expr": "ceph_pool_stored{job=~\"$job\"} * on(pool_id) group_left ceph_pool_metadata{job=~\"$job\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "J" + }, + { + "expr": "ceph_pool_max_avail{job=~\"$job\"} * on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" + }, + { + "expr": "delta(ceph_pool_stored{job=~\"$job\"}[5d])", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "F" + }, + { + "expr": "ceph_pool_metadata{job=~\"$job\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "I" + }, + { + "expr": "ceph_pool_metadata{compression_mode!=\"none\", job=~\"$job\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "K" + }, + { + "expr": "(ceph_pool_compress_under_bytes{job=~\"$job\"} / ceph_pool_compress_bytes_used{job=~\"$job\"} > 0) and on(pool_id) (((ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) / ceph_pool_stored_raw{job=~\"$job\"}) * 100 > 0.5)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + }, + { + "expr": "((ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) / ceph_pool_stored_raw{job=~\"$job\"}) * 100", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "C" + }, + { + "expr": "(ceph_pool_compress_under_bytes{job=~\"$job\"} - ceph_pool_compress_bytes_used{job=~\"$job\"} > 0)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "E" + }, + { + "expr": "rate(ceph_pool_rd{job=~\"$job\"}[30s]) + rate(ceph_pool_wr{job=~\"$job\"}[30s])", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "G" + }, + { + "expr": "rate(ceph_pool_rd_bytes{job=~\"$job\"}[30s]) + rate(ceph_pool_wr_bytes{job=~\"$job\"}[30s])", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "H" + }, + { + "expr": "", + "interval": "", + "legendFormat": "", + "refId": "L" + } + ], + "title": "Pool Overview", + "transform": "table", + "type": "table" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "This chart shows the sum of read and write IOPS from all clients by pool", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "hiddenSeries": false, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 2, + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($topk,round((rate(ceph_pool_rd{job=~\"$job\"}[30s]) + rate(ceph_pool_wr{job=~\"$job\"}[30s])),1) * on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\"}) ", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{name}} ", + "refId": "F" + }, + { + "expr": "topk($topk,rate(ceph_pool_wr{job=~\"$job\"}[30s]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\"}) ", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{name}} - write", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Top $topk Client IOPS by Pool", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "The chart shows the sum of read and write bytes from all clients, by pool", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "maxPerRow": 2, + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($topk,(rate(ceph_pool_rd_bytes{job=~\"$job\"}[30s]) + rate(ceph_pool_wr_bytes{job=~\"$job\"}[30s])) * on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Top $topk Client Bandwidth by Pool", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": "Throughput", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Historical view of capacity usage, to help identify growth and trends in pool consumption", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 17 + }, + "hiddenSeries": false, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_pool_bytes_used{job=~\"$job\"} * on(pool_id) group_right ceph_pool_metadata{job=~\"$job\"}", + "interval": "", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": "14d", + "timeRegions": [ + { + "colorMode": "background6", + "fill": true, + "fillColor": "rgba(234, 112, 112, 0.12)", + "line": false, + "lineColor": "rgba(237, 46, 24, 0.60)", + "op": "time" + } + ], + "timeShift": null, + "title": "Pool Capacity Usage (RAW)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "bytes", + "label": "Capacity Used", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "15s", + "schemaVersion": 22, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Dashboard1", + "value": "Dashboard1" + }, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "text": "15", + "value": "15" + }, + "hide": 0, + "label": "Top K", + "name": "topk", + "options": [ + { + "text": "15", + "value": "15" + } + ], + "query": "15", + "skipUrlSync": false, + "type": "textbox" + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(ceph_osd_metadata, job)", + "refresh": 1, + "regex": "(.*)", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Ceph Pools Overview", + "uid": "z99hzWtmk", + "variables": { + "list": [] + }, + "version": 10 +} diff --git a/ceph-dashboard/src/dashboards/radosgw-detail.json b/ceph-dashboard/src/dashboards/radosgw-detail.json new file mode 100644 index 00000000..5a5c64ae --- /dev/null +++ b/ceph-dashboard/src/dashboards/radosgw-detail.json @@ -0,0 +1,512 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "grafana-piechart-panel", + "name": "Pie Chart", + "version": "1.3.3" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1534386250869, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 12, + "panels": [], + "repeat": null, + "title": "RGW Host Detail : $rgw_servers", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 34, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (ceph_daemon) (rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\", ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_get_initial_lat_count{job=~\"$job\", ceph_daemon=~\"($rgw_servers)\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GET {{ceph_daemon}}", + "refId": "A" + }, + { + "expr": "sum by (ceph_daemon)(rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\", ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_put_initial_lat_count{job=~\"$job\", ceph_daemon=~\"($rgw_servers)\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUT {{ceph_daemon}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "$rgw_servers GET/PUT Latencies", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 7, + "x": 6, + "y": 1 + }, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rgw_get_b{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs {{ceph_daemon}}", + "refId": "B" + }, + { + "expr": "rate(ceph_rgw_put_b{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs {{ceph_daemon}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Bandwidth by HTTP Operation", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "GETs": "#7eb26d", + "Other": "#447ebc", + "PUTs": "#eab839", + "Requests": "#3f2b5b", + "Requests Failed": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 7, + "x": 13, + "y": 1 + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rgw_failed_req{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Requests Failed {{ceph_daemon}}", + "refId": "B" + }, + { + "expr": "rate(ceph_rgw_get{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs {{ceph_daemon}}", + "refId": "C" + }, + { + "expr": "rate(ceph_rgw_put{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs {{ceph_daemon}}", + "refId": "D" + }, + { + "expr": "rate(ceph_rgw_req{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}[30s]) -\n (rate(ceph_rgw_get{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}[30s]) +\n rate(ceph_rgw_put{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Other {{ceph_daemon}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "HTTP Request Breakdown", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Failures": "#bf1b00", + "GETs": "#7eb26d", + "Other (HEAD,POST,DELETE)": "#447ebc", + "PUTs": "#eab839" + }, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "$datasource", + "fontSize": "80%", + "format": "none", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 23, + "interval": null, + "legend": { + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": 1, + "targets": [ + { + "expr": "rate(ceph_rgw_failed_req{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Failures {{ceph_daemon}}", + "refId": "A" + }, + { + "expr": "rate(ceph_rgw_get{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs {{ceph_daemon}}", + "refId": "B" + }, + { + "expr": "rate(ceph_rgw_put{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs {{ceph_daemon}}", + "refId": "C" + }, + { + "expr": "rate(ceph_rgw_req{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}[30s]) -\n (rate(ceph_rgw_get{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}[30s]) +\n rate(ceph_rgw_put{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Other (DELETE,LIST) {{ceph_daemon}}", + "refId": "D" + } + ], + "title": "Workload Breakdown", + "type": "grafana-piechart-panel", + "valueName": "current" + } + ], + "refresh": "15s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "current": { + "tags": [], + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "rgw_servers", + "options": [], + "query": "label_values(ceph_rgw_req, ceph_daemon)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(ceph_osd_metadata, job)", + "refresh": 1, + "regex": "(.*)", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "RGW Instance Detail", + "uid": "x5ARzZtmk", + "version": 2 +} diff --git a/ceph-dashboard/src/dashboards/radosgw-overview.json b/ceph-dashboard/src/dashboards/radosgw-overview.json new file mode 100644 index 00000000..e1f7cf22 --- /dev/null +++ b/ceph-dashboard/src/dashboards/radosgw-overview.json @@ -0,0 +1,651 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1534386107523, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [], + "title": "RGW Overview - All Gateways", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[30s]) / rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GET AVG", + "refId": "A" + }, + { + "expr": "rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[30s]) / rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUT AVG", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average GET/PUT Latencies", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 7, + "w": 7, + "x": 8, + "y": 1 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(rgw_host) (label_replace(rate(ceph_rgw_req{job=~\"$job\"}[30s]), \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total Requests/sec by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts", + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 15, + "y": 1 + }, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\") / \nlabel_replace(rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "GET Latencies by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": false + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Total bytes transferred in/out of all radosgw instances within the cluster", + "fill": 1, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 8 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(ceph_rgw_get_b{job=~\"$job\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs", + "refId": "A" + }, + { + "expr": "sum(rate(ceph_rgw_put_b{job=~\"$job\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Bandwidth Consumed by Type", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Total bytes transferred in/out through get/put operations, by radosgw instance", + "fill": 1, + "gridPos": { + "h": 6, + "w": 7, + "x": 8, + "y": 8 + }, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(rgw_host) (\n (label_replace(rate(ceph_rgw_get_b{job=~\"$job\"}[30s]), \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")) + \n (label_replace(rate(ceph_rgw_put_b{job=~\"$job\"}[30s]), \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\"))\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Bandwidth by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts", + "fill": 1, + "gridPos": { + "h": 6, + "w": 6, + "x": 15, + "y": 8 + }, + "id": 32, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\") / \nlabel_replace(rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "PUT Latencies by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": false + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "refresh": "15s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "rgw_servers", + "options": [], + "query": "label_values(ceph_rgw_req, ceph_daemon)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "tags": [], + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(ceph_osd_metadata, job)", + "refresh": 1, + "regex": "(.*)", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "RGW Overview", + "uid": "WAkugZpiz", + "version": 2 +} diff --git a/ceph-dashboard/src/dashboards/radosgw-sync-overview.json b/ceph-dashboard/src/dashboards/radosgw-sync-overview.json new file mode 100644 index 00000000..66775da6 --- /dev/null +++ b/ceph-dashboard/src/dashboards/radosgw-sync-overview.json @@ -0,0 +1,461 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1534386107523, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum{job=~\"$job\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{source_zone}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Replication (throughput) from Source Zone", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "unit": "bytes", + "format": "Bps", + "decimals": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 7, + "w": 7.4, + "x": 8.3, + "y": 0 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count{job=~\"$job\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{source_zone}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Replication (objects) from Source Zone", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "decimals": null, + "label": "Objects/s", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum{job=~\"$job\"}[30s]) * 1000)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{source_zone}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Polling Request Latency from Source Zone", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "unit": "s", + "format": "ms", + "decimals": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors{job=~\"$job\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{source_zone}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Unsuccessful Object Replications from Source Zone", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "decimals": null, + "label": "Count/s", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "refresh": "15s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "rgw_servers", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "tags": [], + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(ceph_osd_metadata, job)", + "refresh": 1, + "regex": "(.*)", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "RGW Sync Overview", + "uid": "rgw-sync-overview", + "version": 2 +} diff --git a/ceph-dashboard/src/dashboards/rbd-details.json b/ceph-dashboard/src/dashboards/rbd-details.json new file mode 100644 index 00000000..86851c5f --- /dev/null +++ b/ceph-dashboard/src/dashboards/rbd-details.json @@ -0,0 +1,430 @@ +{ + "__inputs": [], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.3.3" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Detailed Performance of RBD Images (IOPS/Throughput/Latency)", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1584428820779, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(ceph_rbd_write_ops{job=~\"$job\", pool=\"$Pool\", image=\"$Image\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Write", + "refId": "A" + }, + { + "expr": "irate(ceph_rbd_read_ops{job=~\"$job\", pool=\"$Pool\", image=\"$Image\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Read", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "iops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "iops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": true, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(ceph_rbd_write_bytes{job=~\"$job\", pool=\"$Pool\", image=\"$Image\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Write", + "refId": "A" + }, + { + "expr": "irate(ceph_rbd_read_bytes{job=~\"$job\", pool=\"$Pool\", image=\"$Image\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Read", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": true, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(ceph_rbd_write_latency_sum{job=~\"$job\", pool=\"$Pool\", image=\"$Image\"}[30s]) / irate(ceph_rbd_write_latency_count{job=~\"$job\", pool=\"$Pool\", image=\"$Image\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Write", + "refId": "A" + }, + { + "expr": "irate(ceph_rbd_read_latency_sum{job=~\"$job\", pool=\"$Pool\", image=\"$Image\"}[30s]) / irate(ceph_rbd_read_latency_count{job=~\"$job\", pool=\"$Pool\", image=\"$Image\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Read", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ns", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "ns", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": true, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": {}, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "Pool", + "options": [], + "query": "label_values(pool)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "Image", + "options": [], + "query": "label_values(image)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(ceph_osd_metadata, job)", + "refresh": 1, + "regex": "(.*)", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "RBD Details", + "uid": "YhCYGcuZz", + "version": 7 +} diff --git a/ceph-dashboard/src/dashboards/rbd-overview.json b/ceph-dashboard/src/dashboards/rbd-overview.json new file mode 100644 index 00000000..a415e1d5 --- /dev/null +++ b/ceph-dashboard/src/dashboards/rbd-overview.json @@ -0,0 +1,706 @@ +{ + "__inputs": [], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.4.2" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1547242766440, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum(irate(ceph_rbd_write_ops{job=~\"$job\"}[30s])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Writes", + "refId": "A" + }, + { + "expr": "round(sum(irate(ceph_rbd_read_ops{job=~\"$job\"}[30s])))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Reads", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "IOPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum(irate(ceph_rbd_write_bytes{job=~\"$job\"}[30s])))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "Write", + "refId": "A" + }, + { + "expr": "round(sum(irate(ceph_rbd_read_bytes{job=~\"$job\"}[30s])))", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Read", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Throughput", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum(irate(ceph_rbd_write_latency_sum{job=~\"$job\"}[30s])) / sum(irate(ceph_rbd_write_latency_count{job=~\"$job\"}[30s])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Write", + "refId": "A" + }, + { + "expr": "round(sum(irate(ceph_rbd_read_latency_sum{job=~\"$job\"}[30s])) / sum(irate(ceph_rbd_read_latency_count{job=~\"$job\"}[30s])))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Read", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Average Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ns", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [], + "datasource": "$datasource", + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "hideTimeOverride": false, + "id": 12, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 3, + "desc": true + }, + "styles": [ + { + "alias": "Pool", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pool", + "thresholds": [], + "type": "string", + "unit": "short", + "valueMaps": [] + }, + { + "alias": "Image", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "image", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "IOPS", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "iops" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk(10, (sort((irate(ceph_rbd_write_ops{job=~\"$job\"}[30s]) + on (image, pool, namespace) irate(ceph_rbd_read_ops{job=~\"$job\"}[30s])))))", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Highest IOPS", + "transform": "table", + "type": "table" + }, + { + "columns": [], + "datasource": "$datasource", + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 7 + }, + "id": 10, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 3, + "desc": true + }, + "styles": [ + { + "alias": "Pool", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pool", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Image", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "image", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Throughput", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "Bps" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk(10, sort(sum(irate(ceph_rbd_read_bytes{job=~\"$job\"}[30s]) + irate(ceph_rbd_write_bytes{job=~\"$job\"}[30s])) by (pool, image, namespace)))", + "format": "table", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "Highest Throughput", + "transform": "table", + "type": "table" + }, + { + "columns": [], + "datasource": "$datasource", + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 7 + }, + "id": 14, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 3, + "desc": true + }, + "styles": [ + { + "alias": "Pool", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pool", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Image", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "image", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Latency", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "ns" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk(10,\n sum(\n irate(ceph_rbd_write_latency_sum{job=~\"$job\"}[30s]) / clamp_min(irate(ceph_rbd_write_latency_count{job=~\"$job\"}[30s]), 1) +\n irate(ceph_rbd_read_latency_sum{job=~\"$job\"}[30s]) / clamp_min(irate(ceph_rbd_read_latency_count{job=~\"$job\"}[30s]), 1)\n ) by (pool, image, namespace)\n)", + "format": "table", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "Highest Latency", + "transform": "table", + "type": "table" + } + ], + "refresh": "30s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(ceph_osd_metadata, job)", + "refresh": 1, + "regex": "(.*)", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "RBD Overview", + "uid": "41FrpeUiz", + "version": 8 +} diff --git a/ceph-dashboard/src/interface_dashboard.py b/ceph-dashboard/src/interface_dashboard.py new file mode 100644 index 00000000..8381aa1d --- /dev/null +++ b/ceph-dashboard/src/interface_dashboard.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 + +import logging + +from ops.framework import ( + StoredState, + EventBase, + ObjectEvents, + EventSource, + Object) + + +class MonReadyEvent(EventBase): + pass + + +class CephDashboardEvents(ObjectEvents): + mon_ready = EventSource(MonReadyEvent) + + +class CephDashboardRequires(Object): + + on = CephDashboardEvents() + _stored = StoredState() + READY_KEY = 'mon-ready' + + def __init__(self, charm, relation_name): + super().__init__(charm, relation_name) + self.relation_name = relation_name + self.framework.observe( + charm.on[relation_name].relation_changed, + self.on_changed) + + @property + def mons_ready(self) -> bool: + """Check that all mons have reported ready.""" + ready = False + if self.dashboard_relation: + # There will only be one unit as this is a subordinate relation. + for unit in self.dashboard_relation.units: + unit_data = self.dashboard_relation.data[unit] + if unit_data.get(self.READY_KEY) == 'True': + ready = True + return ready + + def on_changed(self, event): + """Emit mon_ready if mons are ready.""" + logging.debug("CephDashboardRequires on_changed") + if self.mons_ready: + self.on.mon_ready.emit() + + @property + def dashboard_relation(self): + return self.framework.model.get_relation(self.relation_name) diff --git a/ceph-dashboard/src/interface_grafana_dashboard.py b/ceph-dashboard/src/interface_grafana_dashboard.py new file mode 100644 index 00000000..e93c024e --- /dev/null +++ b/ceph-dashboard/src/interface_grafana_dashboard.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 + +import copy +import json +import hashlib +import logging +from uuid import uuid4 +from typing import List + +from ops.charm import RelationChangedEvent +from ops.framework import ( + StoredState, + EventBase, + ObjectEvents, + EventSource, + Object) + + +class GrafanaDashboardEvent(EventBase): + pass + + +class GrafanaDashboardEvents(ObjectEvents): + dash_ready = EventSource(GrafanaDashboardEvent) + + +class GrafanaDashboardProvides(Object): + + on = GrafanaDashboardEvents() + _stored = StoredState() + + def __init__(self, charm: str, relation_name: str) -> None: + super().__init__(charm, relation_name) + self.relation_name = relation_name + self.framework.observe( + charm.on[self.relation_name].relation_changed, + self._on_relation_changed) + + def _on_relation_changed(self, event: RelationChangedEvent) -> None: + """Handle the relation-changed event.""" + self.on.dash_ready.emit() + + def get_requests_by_name(self, name: str, relation: str) -> List[str]: + """Get a let of requests on relation matching given name + + Check the relation data this unit has set on the given relation, + for requests which a matching name and return them. + """ + requests = [] + for k, v in relation.data[self.model.unit].items(): + if k.startswith('request'): + request = json.loads(v) + if request.get('name') == name: + requests.append(request) + return requests + + def get_request_key(self, request_id: str) -> str: + """Return the juju relation key for a given request_id""" + return 'request_{}'.format(request_id) + + def get_request_id(self, name: str, relation: str, digest: str) -> str: + """Return the request id for a request with given name and digest + + Look for an existing request which has a matching name and digest, if + there is one return the request id of that request. If no matching + request is found then generate a new request id. + """ + logging.debug("Checking for existing request for {}".format(name)) + for request in self.get_requests_by_name(name, relation): + if request.get('dashboard', {}).get('digest') == digest: + logging.debug("Found existing dashboard request") + request_id = request.get('request_id') + break + else: + logging.debug("Generating new request_id") + request_id = str(uuid4()) + return request_id + + def clear_old_requests(self, name: str, relation: str, + digest: str) -> None: + """Remove requests with matching name but different digest""" + old_requests = [] + for request in self.get_requests_by_name(name, relation): + if request.get('dashboard', {}).get('digest') != digest: + old_requests.append(request.get('request_id')) + for request_id in old_requests: + logging.debug("Actually Removing {}".format(request_id)) + rq_key = self.get_request_key(request_id) + relation.data[self.model.unit][rq_key] = '' + + def register_dashboard(self, name: str, dashboard: str): + """ + Request a dashboard to be imported. + + :param name: Name of dashboard. Informational only, so that you can + tell which dashboard request this was, e.g. to check for success or + failure. + :param dashboard: Data structure defining the dashboard. Must be JSON + serializable. (Note: This should *not* be pre-serialized JSON.) + """ + + _dashboard = copy.deepcopy(dashboard) + # In this interface the request id for a job name is preserved. + if self.dashboard_relation: + digest = hashlib.md5( + json.dumps(_dashboard).encode("utf8")).hexdigest() + _dashboard["digest"] = digest + _dashboard["source_model"] = self.model.name + request_id = self.get_request_id(name, self.dashboard_relation, + _dashboard.get('digest')) + rq_key = self.get_request_key(request_id) + self.dashboard_relation.data[self.model.unit][rq_key] = json.dumps( + { + 'request_id': request_id, + 'name': name, + 'dashboard': _dashboard, + }, + sort_keys=True) + self.clear_old_requests( + name, + self.dashboard_relation, + _dashboard.get('digest')) + + @property + def dashboard_relation(self): + return self.model.get_relation(self.relation_name) diff --git a/ceph-dashboard/src/interface_http.py b/ceph-dashboard/src/interface_http.py new file mode 100644 index 00000000..7ec2ed4c --- /dev/null +++ b/ceph-dashboard/src/interface_http.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 + +import logging +from typing import Dict, Union + +from ops.charm import RelationChangedEvent +from ops.model import Relation + +from ops.framework import ( + StoredState, + EventBase, + ObjectEvents, + EventSource, + Object) + + +class HTTPEvent(EventBase): + pass + + +class HTTPEvents(ObjectEvents): + http_ready = EventSource(HTTPEvent) + + +class HTTPRequires(Object): + + on = HTTPEvents() + _stored = StoredState() + required_keys = {'hostname', 'port'} + + def __init__(self, charm: str, relation_name: str) -> None: + super().__init__(charm, relation_name) + self.relation_name = relation_name + self.framework.observe( + charm.on[relation_name].relation_changed, + self.on_changed) + + def on_changed(self, event: RelationChangedEvent) -> None: + """Handle the relation-changed event + + When the relation changes check the relation data from the remote + units to see if all the keys needed are present.""" + logging.debug("http on_changed") + if self.http_relation: + for u in self.http_relation.units: + rel_data = self.http_relation.data[u] + if self.required_keys.issubset(set(rel_data.keys())): + self.on.http_ready.emit() + + def get_service_ep_data(self) -> Union[Dict[str, str], None]: + """Return endpoint data for accessing the remote service. + + Return endpoint data for accessing the remote service. If the relation + or required keys are missing then return None""" + logging.debug("http on_changed") + if self.http_relation: + for u in self.http_relation.units: + rel_data = self.http_relation.data[u] + if self.required_keys.issubset( + set(self.http_relation.data[u].keys())): + return {'hostname': rel_data['hostname'], + 'port': rel_data['port']} + + @property + def http_relation(self) -> Union[Relation, None]: + """The relation matching self.relation_name if it exists""" + return self.model.get_relation(self.relation_name) diff --git a/ceph-dashboard/src/interface_radosgw_user.py b/ceph-dashboard/src/interface_radosgw_user.py new file mode 100644 index 00000000..16278666 --- /dev/null +++ b/ceph-dashboard/src/interface_radosgw_user.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 + +# Copyright 2021 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + +from ops.framework import ( + StoredState, + EventBase, + ObjectEvents, + EventSource, + Object) + + +class RadosGWUserEvent(EventBase): + pass + + +class RadosGWUserEvents(ObjectEvents): + gw_user_ready = EventSource(RadosGWUserEvent) + + +class RadosGWUserRequires(Object): + + on = RadosGWUserEvents() + _stored = StoredState() + + def __init__(self, charm, relation_name, request_system_role=False): + super().__init__(charm, relation_name) + self.relation_name = relation_name + self.request_system_role = request_system_role + self.framework.observe( + charm.on[self.relation_name].relation_joined, + self.request_user) + self.framework.observe( + charm.on[self.relation_name].relation_changed, + self._on_relation_changed) + + def request_user(self, event): + if self.model.unit.is_leader(): + for relation in self.framework.model.relations[self.relation_name]: + relation.data[self.model.app]['system-role'] = json.dumps( + self.request_system_role) + + def get_user_creds(self): + creds = [] + for relation in self.framework.model.relations[self.relation_name]: + app_data = relation.data[relation.app] + for unit in relation.units: + unit_data = relation.data[unit] + cred_data = { + 'access_key': app_data.get('access-key'), + 'secret_key': app_data.get('secret-key'), + 'uid': app_data.get('uid'), + 'daemon_id': unit_data.get('daemon-id')} + if all(cred_data.values()): + creds.append(cred_data) + creds = sorted(creds, key=lambda k: k['daemon_id']) + return creds + + def _on_relation_changed(self, event): + """Handle the relation-changed event.""" + if self.get_user_creds(): + self.on.gw_user_ready.emit() diff --git a/ceph-dashboard/test-requirements.txt b/ceph-dashboard/test-requirements.txt new file mode 100644 index 00000000..740ef811 --- /dev/null +++ b/ceph-dashboard/test-requirements.txt @@ -0,0 +1,13 @@ +coverage>=3.6 +flake8 +pyflakes==2.1.1 +stestr>=2.2.0 +requests>=2.18.4 +psutil +# oslo.i18n dropped py35 support +oslo.i18n<4.0.0 +git+https://github.com/openstack-charmers/zaza.git#egg=zaza +git+https://github.com/openstack-charmers/zaza-openstack-tests.git#egg=zaza.openstack +pytz # workaround for 14.04 pip/tox +pyudev # for ceph-* charm unit tests (not mocked?) +cffi==1.14.6; python_version < '3.6' # cffi 1.15.0 drops support for py35. diff --git a/ceph-dashboard/tests/README.md b/ceph-dashboard/tests/README.md new file mode 100644 index 00000000..3e0ccefb --- /dev/null +++ b/ceph-dashboard/tests/README.md @@ -0,0 +1,18 @@ +# Overview + +This directory provides Zaza test definitions and bundles to verify basic +deployment functionality from the perspective of this charm, its requirements +and its features, as exercised in a subset of the full OpenStack deployment +test bundle topology. + +Run the smoke tests with: + +```bash +cd ../ +tox -e build +tox -e func-smoke +``` + +For full details on functional testing of OpenStack charms please refer to +the [testing](https://docs.openstack.org/charm-guide/latest/community/software-contrib/testing.html) +section of the OpenStack Charm Guide. diff --git a/ceph-dashboard/tests/__init__.py b/ceph-dashboard/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-dashboard/tests/bundles/jammy-caracal.yaml b/ceph-dashboard/tests/bundles/jammy-caracal.yaml new file mode 100644 index 00000000..1920ac4b --- /dev/null +++ b/ceph-dashboard/tests/bundles/jammy-caracal.yaml @@ -0,0 +1,73 @@ +local_overlay_enabled: False +series: jammy +variables: + openstack-origin: &openstack-origin cloud:jammy-caracal + source: &source cloud:jammy-caracal + +machines: + '0': + constraints: cores=2 mem=6G root-disk=40G virt-type=virtual-machine + '1': + constraints: cores=2 mem=6G root-disk=40G virt-type=virtual-machine + '2': + constraints: cores=2 mem=6G root-disk=40G virt-type=virtual-machine + +applications: + ceph-osd: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 'loop,10G' + options: + source: *openstack-origin + osd-devices: '/dev/test-non-existent' + channel: latest/edge + to: + - '0' + - '1' + - '2' + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + source: *openstack-origin + monitor-count: '3' + channel: latest/edge + vault: + num_units: 1 + charm: ch:vault + channel: latest/edge + mysql-innodb-cluster: + charm: ch:mysql-innodb-cluster + constraints: mem=3072M + num_units: 3 + options: + source: *openstack-origin + channel: latest/edge + vault-mysql-router: + charm: ch:mysql-router + channel: latest/edge + ceph-dashboard: + charm: ch:ceph-dashboard + channel: latest/edge + options: + public-hostname: 'ceph-dashboard.zaza.local' + prometheus: + charm: ch:prometheus2 + num_units: 1 + series: jammy +relations: + - - 'ceph-osd:mon' + - 'ceph-mon:osd' + - - 'vault:shared-db' + - 'vault-mysql-router:shared-db' + - - 'vault-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + - - 'ceph-dashboard:dashboard' + - 'ceph-mon:dashboard' + - - 'ceph-dashboard:certificates' + - 'vault:certificates' + - - 'ceph-mon:prometheus' + - 'prometheus:target' + - - 'ceph-dashboard:prometheus' + - 'prometheus:website' diff --git a/ceph-dashboard/tests/target.py b/ceph-dashboard/tests/target.py new file mode 100644 index 00000000..63b05daf --- /dev/null +++ b/ceph-dashboard/tests/target.py @@ -0,0 +1,479 @@ +# Copyright 2021 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Encapsulating `ceph-dashboard` testing.""" + +import json +import uuid +import logging +import collections +from base64 import b64encode +import requests +import tenacity +import trustme + +import zaza +import zaza.openstack.charm_tests.test_utils as test_utils +import zaza.openstack.utilities.openstack as openstack_utils +import zaza.utilities.networking as network_utils + + +X509_CERT = ''' +MIICZDCCAg6gAwIBAgICBr8wDQYJKoZIhvcNAQEEBQAwgZIxCzAJBgNVBAYTAlVTMRMwEQYDVQQI +EwpDYWxpZm9ybmlhMRQwEgYDVQQHEwtTYW50YSBDbGFyYTEeMBwGA1UEChMVU3VuIE1pY3Jvc3lz +dGVtcyBJbmMuMRowGAYDVQQLExFJZGVudGl0eSBTZXJ2aWNlczEcMBoGA1UEAxMTQ2VydGlmaWNh +dGUgTWFuYWdlcjAeFw0wNzAzMDcyMTUwMDVaFw0xMDEyMDEyMTUwMDVaMDsxFDASBgNVBAoTC2V4 +YW1wbGUuY29tMSMwIQYDVQQDExpMb2FkQmFsYW5jZXItMy5leGFtcGxlLmNvbTCBnzANBgkqhkiG +9w0BAQEFAAOBjQAwgYkCgYEAlOhN9HddLMpE3kCjkPSOFpCkDxTNuhMhcgBkYmSEF/iJcQsLX/ga +pO+W1SIpwqfsjzR5ZvEdtc/8hGumRHqcX3r6XrU0dESM6MW5AbNNJsBnwIV6xZ5QozB4wL4zREhw +zwwYejDVQ/x+8NRESI3ym17tDLEuAKyQBueubgjfic0CAwEAAaNgMF4wEQYJYIZIAYb4QgEBBAQD +AgZAMA4GA1UdDwEB/wQEAwIE8DAfBgNVHSMEGDAWgBQ7oCE35Uwn7FsjS01w5e3DA1CrrjAYBgNV +HREEETAPgQ1tYWxsYUBzdW4uY29tMA0GCSqGSIb3DQEBBAUAA0EAGhJhep7X2hqWJWQoXFcdU7eQ +''' + +X509_DATA = ''' +EwpDYWxpZm9ybmlhMRQwEgYDVQQHEwtTYW50YSBDbGFyYTEeMBwGA1UEChMVU3VuIE1pY3Jvc3lz +dGVtcyBJbmMuMRowGAYDVQQLExFJZGVudGl0eSBTZXJ2aWNlczEcMBoGA1UEAxMTQ2VydGlmaWNh +dGUgTWFuYWdlcjAeFw0wNzAzMDcyMjAxMTVaFw0xMDEyMDEyMjAxMTVaMDsxFDASBgNVBAoTC2V4 +YW1wbGUuY29tMSMwIQYDVQQDExpMb2FkQmFsYW5jZXItMy5leGFtcGxlLmNvbTCBnzANBgkqhkiG +HREEETAPgQ1tYWxsYUBzdW4uY29tMA0GCSqGSIb3DQEBBAUAA0EAEgbmnOz2Rvpj9bludb9lEeVa +OA46zRiyt4BPlbgIaFyG6P7GWSddMi/14EimQjjDbr4ZfvlEdPJmimHExZY3KQ== +''' + +SAML_IDP_METADATA = ''' + + + + + + + {cert} + + + + + + + + {data} + + + + + + urn:oasis:names:tc:SAML:2.0:nameid-format:persistent + + + urn:oasis:names:tc:SAML:2.0:nameid-format:transient + + + + +''' + + +def check_dashboard_cert(model_name=None): + """Wait for Dashboard to be ready. + + :param model_name: Name of model to query. + :type model_name: str + """ + logging.info("Check dashbaord Waiting for cacert") + openstack_utils.block_until_ca_exists( + 'ceph-dashboard', + 'CERTIFICATE', + model_name=model_name) + zaza.model.block_until_all_units_idle(model_name=model_name) + + +def set_grafana_url(model_name=None): + """Set the url for the grafana api. + + :param model_name: Name of model to query. + :type model_name: str + """ + try: + unit = zaza.model.get_units('grafana')[0] + except KeyError: + return + zaza.model.set_application_config( + 'ceph-dashboard', + { + 'grafana-api-url': "https://{}:3000".format( + zaza.model.get_unit_public_address(unit)) + }) + + +class CephDashboardTest(test_utils.BaseCharmTest): + """Class for `ceph-dashboard` tests.""" + + REMOTE_CERT_FILE = ('/usr/local/share/ca-certificates/' + 'vault_ca_cert_dashboard.crt') + + @classmethod + def setUpClass(cls): + """Run class setup for running ceph dashboard tests.""" + super().setUpClass() + cls.application_name = 'ceph-dashboard' + cls.local_ca_cert = openstack_utils.get_remote_ca_cert_file( + cls.application_name) + + @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, + min=5, max=10), + retry=tenacity.retry_if_exception_type( + requests.exceptions.ConnectionError), + reraise=True) + def _run_request_get(self, url, verify, allow_redirects): + """Run a GET request against `url` with tenacity retries. + + :param url: url to access + :type url: str + :param verify: Path to a CA_BUNDLE file or directory with certificates + of trusted CAs or False to ignore verifying the SSL + certificate. + :type verify: Union[str, bool] + :param allow_redirects: Set to True if redirect following is allowed. + :type allow_redirects: bool + :returns: Request response + :rtype: requests.models.Response + """ + return requests.get( + url, + verify=verify, + allow_redirects=allow_redirects, + timeout=120) + + @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, + min=5, max=10), + retry=tenacity.retry_if_exception_type( + requests.exceptions.ConnectionError), + reraise=True) + def _run_request_post(self, url, verify, data, headers): + """Run a POST request against `url` with tenacity retries. + + :param url: url to access + :type url: str + :param verify: Path to a CA_BUNDLE file or directory with certificates + of trusted CAs or False to ignore verifying the SSL + certificate. + :type verify: Union[str, bool] + :param data: Data to post to url + :type data: str + :param headers: Headers to set when posting + :type headers: dict + :returns: Request response + :rtype: requests.models.Response + """ + return requests.post( + url, + data=data, + headers=headers, + verify=verify, + timeout=120) + + @tenacity.retry(wait=tenacity.wait_fixed(2), reraise=True, + stop=tenacity.stop_after_attempt(90)) + def get_master_dashboard_url(self): + """Get the url of the dashboard servicing requests. + + Only one unit serves requests at any one time, the other units + redirect to that unit. + + :returns: URL of dashboard on unit + :rtype: Union[str, None] + """ + output = zaza.model.run_on_leader( + 'ceph-mon', + 'ceph mgr services')['Stdout'] + url = json.loads(output).get('dashboard') + if url is None: + raise tenacity.RetryError(None) + return url.strip('/') + + def test_001_dashboard_units(self): + """Check dashboard units are configured correctly.""" + self.verify_ssl_config(self.local_ca_cert) + + def create_user(self, username, role='administrator'): + """Create a dashboard user. + + :param username: Username to create. + :type username: str + :param role: Role to grant to user. + :type role: str + :returns: Results from action. + :rtype: juju.action.Action + """ + action = zaza.model.run_action_on_leader( + 'ceph-dashboard', + 'add-user', + action_params={ + 'username': username, + 'role': role}) + return action + + def get_random_username(self): + """Generate a username to use in tests. + + :returns: Username + :rtype: str + """ + return "zazauser-{}".format(uuid.uuid1()) + + def test_002_create_user(self): + """Test create user action.""" + test_user = self.get_random_username() + action = self.create_user(test_user) + self.assertEqual(action.status, "completed") + self.assertTrue(action.data['results']['password']) + action = self.create_user(test_user) + # Action should fail as the user already exists + self.assertEqual(action.status, "failed") + + def access_dashboard(self, dashboard_url): + """Test logging via a dashboard url. + + :param dashboard_url: Base url to use to login to + :type dashboard_url: str + """ + user = self.get_random_username() + action = self.create_user(username=user) + self.assertEqual(action.status, "completed") + password = action.data['results']['password'] + path = "api/auth" + headers = { + 'Content-type': 'application/json', + 'Accept': 'application/vnd.ceph.api.v1.0+json'} + payload = {"username": user, "password": password} + verify = self.local_ca_cert + r = self._run_request_post( + "{}/{}".format(dashboard_url, path), + verify=verify, + data=json.dumps(payload), + headers=headers) + self.assertEqual(r.status_code, requests.codes.created) + + def test_003_access_dashboard(self): + """Test logging in to the dashboard.""" + self.access_dashboard(self.get_master_dashboard_url()) + + def test_004_ceph_keys(self): + """Check that ceph services are properly registered.""" + status = zaza.model.get_status() + applications = status.applications.keys() + dashboard_keys = [] + ceph_keys = [] + if 'ceph-radosgw' in applications: + dashboard_keys.extend(['RGW_API_ACCESS_KEY', 'RGW_API_SECRET_KEY']) + if 'grafana' in applications: + dashboard_keys.append('GRAFANA_API_URL') + if 'prometheus' in applications: + dashboard_keys.append('PROMETHEUS_API_HOST') + ceph_keys.extend( + ['config/mgr/mgr/dashboard/{}'.format(k) for k in dashboard_keys]) + if 'ceph-iscsi' in applications: + ceph_keys.append('mgr/dashboard/_iscsi_config') + for key in ceph_keys: + logging.info("Checking key {} exists".format(key)) + check_out = zaza.model.run_on_leader( + 'ceph-dashboard', + 'ceph config-key exists {}'.format(key)) + self.assertEqual(check_out['Code'], '0') + + @tenacity.retry(wait=tenacity.wait_fixed(2), reraise=True, + stop=tenacity.stop_after_attempt(20)) + def wait_for_saml_dashboard(self): + """Wait until the Ceph dashboard is enabled.""" + output = zaza.model.run_on_leader( + 'ceph-mon', + 'ceph dashboard sso status')['Stdout'] + if 'enabled' in output: + return + raise tenacity.RetryError(None) + + def test_005_saml(self): + """Check that the dashboard is accessible with SAML enabled.""" + url = self.get_master_dashboard_url() + idp_meta = SAML_IDP_METADATA.format( + cert=X509_CERT, + data=X509_DATA, + host=url) + + zaza.model.set_application_config( + 'ceph-dashboard', + {'saml-base-url': url, 'saml-idp-metadata': idp_meta} + ) + + self.wait_for_saml_dashboard() + + # Check that both login and metadata are accesible. + resp = self._run_request_get( + url + '/auth/saml2/login', + verify=self.local_ca_cert, + allow_redirects=False) + self.assertTrue(resp.status_code, requests.codes.ok) + + resp = self._run_request_get( + url + '/auth/saml2/metadata', + verify=self.local_ca_cert, + allow_redirects=False) + self.assertEqual(resp.status_code, requests.codes.ok) + + def is_app_deployed(self, app_name) -> bool: + """Check if the provided app is deployed in the zaza model.""" + try: + zaza.model.get_application(app_name) + return True + except KeyError: + return False + + def _get_wait_for_dashboard_assert_state( + self, state, message_prefix) -> dict: + """Generate a assert state for ceph-dashboard charm blocked state.""" + assert_state = { + 'ceph-dashboard': { + "workload-status": state, + "workload-status-message-prefix": message_prefix + } + } + # Telegraf has a non-standard active state message. + if self.is_app_deployed('telegraf'): + assert_state['telegraf'] = { + "workload-status": "active", + "workload-status-message-prefix": "Monitoring ceph" + } + + return assert_state + + def verify_ssl_config(self, ca_file): + """Check if request validates the configured SSL cert.""" + units = zaza.model.get_units('ceph-mon') + + for attempt in tenacity.Retrying( + wait=tenacity.wait_exponential(max=60), + reraise=True, stop=tenacity.stop_after_attempt(10) + ): + with attempt: + rcs = collections.defaultdict(list) + for unit in units: + ipaddr = network_utils.format_addr( + zaza.model.get_unit_public_address(unit) + ) + req = self._run_request_get( + 'https://{}:8443'.format( + ipaddr), + verify=ca_file, + allow_redirects=False) + rcs[req.status_code].append( + zaza.model.get_unit_public_address(unit) + ) + self.assertEqual(len(rcs[requests.codes.ok]), 1) + self.assertEqual( + len(rcs[requests.codes.see_other]), + len(units) - 1) + + def _get_dashboard_hostnames_sans(self): + """Get a generator for Dashboard unit public addresses.""" + yield 'ceph-dashboard' # Include hostname in san as well. + # Since Ceph-Dashboard is a subordinate application, + # we use the principle application to get public addresses. + for unit in zaza.model.get_units('ceph-mon'): + addr = zaza.model.get_unit_public_address(unit) + if addr: + yield addr + + def test_006_charm_config_ssl(self): + """Config charm SSL certs to test the Ceph dashboard application.""" + # Use RSA keys not ECSDA + local_ca = trustme.CA(key_type=trustme.KeyType.RSA) + server_cert = local_ca.issue_cert( + *self._get_dashboard_hostnames_sans(), + key_type=trustme.KeyType.RSA + ) + + ssl_cert = b64encode(server_cert.cert_chain_pems[0].bytes()).decode() + ssl_key = b64encode(server_cert.private_key_pem.bytes()).decode() + ssl_ca = b64encode(local_ca.cert_pem.bytes()).decode() + + # Configure local certs in charm config + zaza.model.set_application_config( + 'ceph-dashboard', + { + 'ssl_cert': ssl_cert, 'ssl_key': ssl_key, + 'ssl_ca': ssl_ca + } + ) + + # Check application status message. + assert_state = self._get_wait_for_dashboard_assert_state( + "blocked", "Conflict: Active SSL from 'certificates' relation" + ) + zaza.model.wait_for_application_states( + states=assert_state, timeout=500 + ) + + # Remove certificates relation to trigger configured certs. + zaza.model.remove_relation( + 'ceph-dashboard', 'ceph-dashboard:certificates', + 'vault:certificates' + ) + + # Wait for status to clear + assert_state = self._get_wait_for_dashboard_assert_state( + "active", "Unit is ready" + ) + zaza.model.wait_for_application_states( + states=assert_state, timeout=500 + ) + + # Verify Certificates. + with local_ca.cert_pem.tempfile() as ca_temp_file: + self.verify_ssl_config(ca_temp_file) + + # Re-add certificates relation + zaza.model.add_relation( + 'ceph-dashboard', 'ceph-dashboard:certificates', + 'vault:certificates' + ) + + # Check blocked status message + assert_state = self._get_wait_for_dashboard_assert_state( + "blocked", "Conflict: Active SSL from Charm config" + ) + zaza.model.wait_for_application_states( + states=assert_state, timeout=500 + ) + + # Remove SSL config + zaza.model.set_application_config( + 'ceph-dashboard', + {'ssl_cert': "", 'ssl_key': "", 'ssl_ca': ""} + ) + + # Wait for status to clear + assert_state = self._get_wait_for_dashboard_assert_state( + "active", "Unit is ready" + ) + zaza.model.wait_for_application_states( + states=assert_state, timeout=500 + ) + + # Verify Relation SSL certs. + self.verify_ssl_config(self.local_ca_cert) diff --git a/ceph-dashboard/tests/tests.yaml b/ceph-dashboard/tests/tests.yaml new file mode 100644 index 00000000..9a2b09f2 --- /dev/null +++ b/ceph-dashboard/tests/tests.yaml @@ -0,0 +1,26 @@ +charm_name: ceph-dasboard +gate_bundles: + - jammy-caracal +smoke_bundles: + - jammy-caracal +dev_bundles: + - jammy-caracal +dev_bundles: + - jammy-caracal + +configure: + - zaza.openstack.charm_tests.vault.setup.auto_initialize_no_validation + - tests.target.check_dashboard_cert +tests: + - zaza.charm_tests.lifecycle.refresh.CharmRefreshAll + - tests.target.CephDashboardTest +target_deploy_status: + ceph-dashboard: + workload-status: blocked + workload-status-message-regex: "No certificates found|Charm config option|Unit is ready" + vault: + workload-status: blocked + workload-status-message-prefix: Vault needs to be initialized + prometheus2: + workload-status: active + workload-status-message-prefix: Ready diff --git a/ceph-dashboard/tox.ini b/ceph-dashboard/tox.ini new file mode 100644 index 00000000..57b83c31 --- /dev/null +++ b/ceph-dashboard/tox.ini @@ -0,0 +1,163 @@ +# Operator charm (with zaza): tox.ini + +[tox] +envlist = pep8,py3 +skipsdist = True +# NOTE: Avoid build/test env pollution by not enabling sitepackages. +sitepackages = False +# NOTE: Avoid false positives by not skipping missing interpreters. +skip_missing_interpreters = False +# NOTES: +# * We avoid the new dependency resolver by pinning pip < 20.3, see +# https://github.com/pypa/pip/issues/9187 +# * Pinning dependencies requires tox >= 3.2.0, see +# https://tox.readthedocs.io/en/latest/config.html#conf-requires +# * It is also necessary to pin virtualenv as a newer virtualenv would still +# lead to fetching the latest pip in the func* tox targets, see +# https://stackoverflow.com/a/38133283 +# * It is necessary to declare setuptools as a dependency otherwise tox will +# fail very early at not being able to load it. The version pinning is in +# line with `pip.sh`. +requires = pip + virtualenv + setuptools +# NOTE: https://wiki.canonical.com/engineering/OpenStack/InstallLatestToxOnOsci +minversion = 3.2.0 + +[testenv] +setenv = VIRTUAL_ENV={envdir} + PYTHONHASHSEED=0 + TEST_JUJU3=1 + CHARM_DIR={envdir} + CHARMS_ARTIFACT_DIR={toxinidir}/.. + +install_command = + pip install {opts} {packages} +commands = stestr run --slowest {posargs} +allowlist_externals = + git + add-to-archive.py + bash + charmcraft + {toxinidir}/rename.sh + ls + pwd +passenv = + HOME + TERM + CS_* + OS_* + TEST_* +deps = -r{toxinidir}/test-requirements.txt + +[testenv:py35] +basepython = python3.5 +# python3.5 is irrelevant on a focal+ charm. +commands = /bin/true + +[testenv:py36] +basepython = python3.6 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py37] +basepython = python3.7 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py38] +basepython = python3.8 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py39] +basepython = python3.9 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py310] +basepython = python3.10 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py3] +basepython = python3 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:pep8] +basepython = python3 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt +commands = flake8 {posargs} src unit_tests tests + +[testenv:cover] +# Technique based heavily upon +# https://github.com/openstack/nova/blob/master/tox.ini +basepython = python3 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt +setenv = + {[testenv]setenv} + PYTHON=coverage run +commands = + coverage erase + stestr run --slowest {posargs} + coverage combine + coverage html -d cover + coverage xml -o cover/coverage.xml + coverage report + +[coverage:run] +branch = True +concurrency = multiprocessing +parallel = True +source = + . +omit = + .tox/* + */charmhelpers/* + unit_tests/* + +[testenv:venv] +basepython = python3 +commands = {posargs} + +[testenv:build] +basepython = python3 +deps = -r{toxinidir}/build-requirements.txt +commands = + charmcraft clean + charmcraft -v pack + {toxinidir}/rename.sh + +[testenv:func-noop] +basepython = python3 +commands = + functest-run-suite --help + +[testenv:func] +basepython = python3 +commands = + functest-run-suite --keep-model + +[testenv:func-dev] +basepython = python3 +commands = + functest-run-suite --keep-model --dev + +[testenv:func-smoke] +basepython = python3 +commands = + functest-run-suite --keep-model --smoke + +[testenv:func-target] +basepython = python3 +commands = + pwd + ls -l + functest-run-suite --keep-model --bundle {posargs} + +[flake8] +# Ignore E902 because the unit_tests directory is missing in the built charm. +ignore = E402,E226,E902 diff --git a/ceph-dashboard/unit_tests/__init__.py b/ceph-dashboard/unit_tests/__init__.py new file mode 100644 index 00000000..59620d3a --- /dev/null +++ b/ceph-dashboard/unit_tests/__init__.py @@ -0,0 +1,24 @@ +# Copyright 2020 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import unittest.mock as mock + +# Mock out secrets to make py35 happy. +sys.modules['secrets'] = mock.MagicMock() + +# Tenacity decorators need to be mocked before import +tenacity = mock.MagicMock() +tenacity.retry.side_effect = lambda *args, **kwargs: lambda x: x +sys.modules['tenacity'] = tenacity diff --git a/ceph-dashboard/unit_tests/test_ceph_dashboard_charm.py b/ceph-dashboard/unit_tests/test_ceph_dashboard_charm.py new file mode 100644 index 00000000..0c62fb79 --- /dev/null +++ b/ceph-dashboard/unit_tests/test_ceph_dashboard_charm.py @@ -0,0 +1,722 @@ +#!/usr/bin/env python3 + +# Copyright 2020 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import json +import unittest +import sys + +sys.path.append('lib') # noqa +sys.path.append('src') # noqa + +from unittest.mock import ANY, call, patch, MagicMock + +from ops.testing import Harness, _TestingModelBackend +from ops.model import ( + ActiveStatus, + BlockedStatus, +) +from ops import framework, model +import charm + +TEST_CA = '''-----BEGIN CERTIFICATE----- +MIIC8TCCAdmgAwIBAgIUAK1dgpjTc850TgQx6y3W1brByOwwDQYJKoZIhvcNAQEL +BQAwGjEYMBYGA1UEAwwPRGl2aW5lQXV0aG9yaXR5MB4XDTIxMDYyMTExNTg1OFoX +DTIxMDcyMTExNTg1OVowGjEYMBYGA1UEAwwPRGl2aW5lQXV0aG9yaXR5MIIBIjAN +BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA08gO8TDPARVhfVLOkYYRvCU1Rviv +RYmy+ptA82XIHO1HvAuLQ8x/4bGxE+IMKSNIl+DIF9TMdmOCvKOBgRKsoOibZNfW +MJIeQwff/8LMFWReAjOxcf9Bu2EqOqkLmUV72FU+Weta8r2kuFhgryqvz1rZeZzQ +jP6OsscoY2FVt/TnvUL5cCOSTpKuQLSr8pDms3OuFIyhFkUinpGbgJQ83xQO1tRh +MGiA87lahsLECTKXsLPyFMMPZ/QQuoDmuUHNkR2deOLcYRSWIBy23PctuV893gbM +2sFTprWo1PKXSmFUd3lg6G5wSM2XRQAP81CTA3Hp8Fj5XCpOHa4HFQLxDwIDAQAB +oy8wLTAaBgNVHREEEzARgg9EaXZpbmVBdXRob3JpdHkwDwYDVR0TAQH/BAUwAwEB +/zANBgkqhkiG9w0BAQsFAAOCAQEAKsrUnYBJyyEIPXkWaemR5vmp0G+V6Xz3KvPB +hLYKRONMba8xFwrjRv7b0DNAws8TcXXOKtRtJWbnSIMGhfVESF6ohqEdn+J1crXs +2RpJgyF2u+l6gg9Sg2ngYMQYBkzjAHYTroO/itI4AWLPLHpgygzz8ho6ykWpDoxJ +QfrrtHCl90zweYDhl4g2joIOJSZdd36+Nx9f2guItRMN87EZy1mOrKs94HlW9jwj +mAfiGaYhgFn4JH2jVcZu4wVJErh4Z0A3UNNyOq4zlAq8pHa/54jerHTDB49UQbaI +vZ5PsZhTZLy3FImSbe25xMUZNTt/2MMjsQwSjwiQuxLSuicJAA== +-----END CERTIFICATE-----''' + +TEST_CERT = '''-----BEGIN CERTIFICATE----- +MIIEdjCCA16gAwIBAgIUPmsr+BnLb6Yy22Zg6hkXn1B6KZcwDQYJKoZIhvcNAQEL +BQAwRTFDMEEGA1UEAxM6VmF1bHQgSW50ZXJtZWRpYXRlIENlcnRpZmljYXRlIEF1 +dGhvcml0eSAoY2hhcm0tcGtpLWxvY2FsKTAeFw0yMTA2MjExMTU4MzNaFw0yMjA2 +MjExMDU5MDJaMD4xPDA6BgNVBAMTM2p1anUtOGMzOTI5LXphemEtZWZjMDU2ZjE2 +NmNkLTAucHJvamVjdC5zZXJ2ZXJzdGFjazCCASIwDQYJKoZIhvcNAQEBBQADggEP +ADCCAQoCggEBANW0NkSLH53M2Aok6lxN4qSSUDTnIWeuKsemLp7FwZn6zN7fRa4V +utuGWbeYahdSIY6AG3w5opCyijM/+L4+HWoY5BWGFPj/U5V4CDF9jOerNDcoxKDy ++h+CbJ324xJrCBOjMyW8wqK/lzCadQzy6DymOtK0RBJNHXsXiGWta7UMFo2AZcqM +8OkOd0HkBeDM90dzTRSuy3pvqNBKmpwG4Hmg/ESh7VuobuHTtkD2/sGEVMGoXm7Q +qk6Yf8POzNqdPoHzvY40uZWqL3OwedGWDrnNbH4sTYb1xB7fwBthvs+LNPUDzRXA +NOYlKsfRrsiH9ELyMWUfarKXxg+7JelBIdECAwEAAaOCAWMwggFfMA4GA1UdDwEB +/wQEAwIDqDAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwHQYDVR0OBBYE +FEpYZVtgGevbnUrzWsjAXZix5zgzMEoGCCsGAQUFBwEBBD4wPDA6BggrBgEFBQcw +AoYuaHR0cDovLzE3Mi4yMC4wLjExOTo4MjAwL3YxL2NoYXJtLXBraS1sb2NhbC9j +YTCBgAYDVR0RBHkwd4IZY2VwaC1kYXNoYm9hcmQuemF6YS5sb2NhbIIfanVqdS04 +YzM5MjktemF6YS1lZmMwNTZmMTY2Y2QtMIIzanVqdS04YzM5MjktemF6YS1lZmMw +NTZmMTY2Y2QtMC5wcm9qZWN0LnNlcnZlcnN0YWNrhwSsFAD9MEAGA1UdHwQ5MDcw +NaAzoDGGL2h0dHA6Ly8xNzIuMjAuMC4xMTk6ODIwMC92MS9jaGFybS1wa2ktbG9j +YWwvY3JsMA0GCSqGSIb3DQEBCwUAA4IBAQBRUsmnc5fnNh1TSO1hVdpYBo6SRqdN +VPuG3EV6QYPGnqadzGTr3uREUyZdkOUu4nhqDONMTdlfCwg744AIlY+eo2tpiNEp +GOeFV0qZOiGRq7q2kllCTYCnh7hKCTCSN17o9QDTCL6w46cmH5OXo84BHkozdBiO +cHPQ+uJ/VZaRCuOIlVS4Y4vTDB0LpNX2nHC/tMYL0zA5+pu+N6e8OWcCgKwObdh5 +38iuimYbbwv2QWBD+4eQUbxY0+TXlhdg42Um41N8BVdPapNAQRXIHrZJC5P6fXqX +uoZ6TvbI2U0GSfpjScPP5D2F6tWK7/3nbA8bPLUJ1MKDofBVtrlA4PIH +-----END CERTIFICATE-----''' + +TEST_KEY = '''-----BEGIN RSA PRIVATE KEY----- +MIIEowIBAAKCAQEA1bQ2RIsfnczYCiTqXE3ipJJQNOchZ64qx6YunsXBmfrM3t9F +rhW624ZZt5hqF1IhjoAbfDmikLKKMz/4vj4dahjkFYYU+P9TlXgIMX2M56s0NyjE +oPL6H4JsnfbjEmsIE6MzJbzCor+XMJp1DPLoPKY60rREEk0dexeIZa1rtQwWjYBl +yozw6Q53QeQF4Mz3R3NNFK7Lem+o0EqanAbgeaD8RKHtW6hu4dO2QPb+wYRUwahe +btCqTph/w87M2p0+gfO9jjS5laovc7B50ZYOuc1sfixNhvXEHt/AG2G+z4s09QPN +FcA05iUqx9GuyIf0QvIxZR9qspfGD7sl6UEh0QIDAQABAoIBAAHqAk5s3JSiQBEf +MYYwIGaO9O70XwU5tyJgp6w+YzSI3Yrlfw9HHIxY0LbnQ5P/5VMMbLKZJY6cOsao +vQafMc5AeNKEh+2PA+Wj1Jb04+0zSF1yHQjABGOB3I0xp+kDUmgynwOohCnHA4io +6YF7L39TkdVPTgjH7gqrNEqM2hkeBWg1LY5QARDtz6Nj10LRtpQXjx/zwfGfzV2c +TGpO8ArfPLS+a7LAJ+E+iSgDUX272Fd7DYAv7xRcRe8991umpqFzbY8FDigLWEdd +3muWnRsJjricYM+2OO0QO8fyKhWCE31Dvc0xMLgrSTWoZAl8t7/WxyowevuVAm5o +oclYFU0CgYEA4M6seEB/neaqAWMIshwIcwZWaLy7oQAQagjXbKohSAXNlYqgTuv7 +glk0P6uzeQOu0ejipwga6mQIc093WSzpG1sdT4bBysHS0b44Gx/6Cv0Jf6hmJGcU +wNo3XV8b0rHZ+KWDCfr1dUjxCA9rR2fOTJniCh9Ng28cyhrFyZ6HaUcCgYEA81sj +Z3ATs2uMxZePmGMWxOJqbQ+bHaoE+UG1dTQIVO//MmanJm3+o4ciH46D2QRWkYha +4Eqb5wnPKCQjun8JDpwgkLkd0EGGG4uJ6E6YqL3I0+cs5lwMWJ9M3oOaFGGoFAoP +V9lgz5f3yVdSChoubklS4KLeCiAojW/qX1rrKCcCgYEAuALz0YqZ6xm/1lrF52Ri +1iQ93oV934854FFUZDHuBBIb8WgDSBaJTGzQA737rfaBxngl7isIPQucjyZgvrGw +LSArocjgH6L/eYeGTU2jUhNFDyU8Vle5+RGld9w93fyOOqTf2e99s379LGfSnCQw +DSt4hmiQ/iCZJCU9+Ia2uEkCgYAGsPjWPUStaEWkoTg3jnHv0/HtMcKoHCaq292r +bVTVUQwJTL1H1zprMKoFiBuj+fSPZ9pn1GVZAvIJPoUk+Z08I5rZn91r/oE7fKi8 +FH0qFp3RBcg8RUepoCey7pdr/AttEaG+XqHE037isF33HSUtryJyPsgwKxYyXWNq +X8ubfQKBgBwIpk7N754lN0i6V08Dadz0BlpfFYGO/ZfTmvVrPUxwehogtvpGnjhO +xPs1epK65/vHbBtaUDExayOEIvVhVWcnaXdx3z1aw/Hr29NlOi62x4g/RRSloLZH +08UCW9F5C8Ian6kglB5bPrZiJxcmssj7vSA+O6k9BjsO+ebaSRgk +-----END RSA PRIVATE KEY-----''' + +TEST_CHAIN = '''-----BEGIN CERTIFICATE----- +MIIDADCCAeigAwIBAgIUN93XI0mOu3wkX5YureWnMImedUMwDQYJKoZIhvcNAQEL +BQAwGjEYMBYGA1UEAwwPRGl2aW5lQXV0aG9yaXR5MB4XDTIxMDYyMzEwMzcwMFoX +DTMyMDYwNjEwMzcwMFowRTFDMEEGA1UEAxM6VmF1bHQgSW50ZXJtZWRpYXRlIENl +cnRpZmljYXRlIEF1dGhvcml0eSAoY2hhcm0tcGtpLWxvY2FsKTCCASIwDQYJKoZI +hvcNAQEBBQADggEPADCCAQoCggEBAL1t5WYd7IVsfT5d4uztBhOPBA0EtrKw81Fe +Rp2TNdPUkkKSQxOYKV6F1ndyD88Nxx1mcxwi8U28b1azTNVaPRjSLxyDCOD0L5qk +LaFqppTWv8vLcjjlp6Ed3BLXoVMThWwMxJm/VSPuEXnWN5GrMR97Ae8vmnlrYDTF +re67j0zjDPhkyevVQ5+pLeZ/saQtNNeal1qzfWMPDQK0COfXolXmlmZGzhap742e +x4gE6alyYYrpTPA6CL9NbGhNovuz/LJvHN8fIdfw3jX+GW+yy312xDG+67PCW342 +VDrPcG+Vq/BhEPwL3blYgbmtNPDQ1plWJqoPqoJzbCxLesXZHP8CAwEAAaMTMBEw +DwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEARv1bBEgwlDG3PuhF +Zt5kIeDLEnjFH2STz4LLERZXdKhTzuaV08QvYr+cL8XHi4Sop5BDkAuQq8mVC/xj +7DoW/Lb9SnxfsCIu6ugwKLfJ2El6r23kDzTauIaovDYNSEo21yBYALsFZjzMJotJ +XLpLklASTAdMmLP703hcgKgY8yxzS3WEXA9jekmn6z0y3+UZjIF5W9dW9gaQk0Eg +vsLN7xzG9TmQfk1OHUj7y+cEbYr0M3Jdif/gG8Kl2SuaYUmvU6leA5+oZVF/Inle +jdSckxCCd1rbvGd60AY5azD1pAuazijwW9Y9Icv2tS5oZI/4MN7YJEssj/ZLjEA7 +Alm0ZQ== +-----END CERTIFICATE-----''' + + +class CharmTestCase(unittest.TestCase): + + def setUp(self, obj, patches): + super().setUp() + self.patches = patches + self.obj = obj + self.patch_all() + + def patch(self, method): + _m = patch.object(self.obj, method) + mock = _m.start() + self.addCleanup(_m.stop) + return mock + + def patch_all(self): + for method in self.patches: + setattr(self, method, self.patch(method)) + + +class _CephDashboardCharm(charm.CephDashboardCharm): + + def _get_bind_ip(self): + return '10.0.0.10' + + def _clean_ssl_conf(self, _event): + return # empty stub + + def _is_relation_active(self, _event): + return True + + +class TestCephDashboardCharmBase(CharmTestCase): + + PATCHES = [ + 'ceph_utils', + 'socket', + 'subprocess', # charm's subprocess import + ] + + def setUp(self): + super().setUp(charm, self.PATCHES) + self.harness = self.get_harness() + + self.socket.gethostname.return_value = 'server1' + self.socket.getfqdn.return_value = 'server1.local' + + def get_harness(self): + initial_config = {'grafana-api-url': None} + _harness = Harness( + _CephDashboardCharm, + ) + + # BEGIN: Workaround until network_get is implemented + class _TestingOPSModelBackend(_TestingModelBackend): + + def network_get(self, endpoint_name, relation_id=None): + network_data = { + 'bind-addresses': [{ + 'interface-name': 'eth0', + 'addresses': [{ + 'cidr': '10.0.0.0/24', + 'value': '10.0.0.10'}]}], + 'ingress-addresses': ['10.0.0.10'], + 'egress-subnets': ['10.0.0.0/24']} + return network_data + + _harness._backend = _TestingOPSModelBackend( + _harness._unit_name, _harness._meta) + _harness._model = model.Model( + _harness._meta, + _harness._backend) + _harness._framework = framework.Framework( + ":memory:", + _harness._charm_dir, + _harness._meta, + _harness._model) + # END Workaround + _harness.update_config(initial_config) + return _harness + + def test_init(self): + self.harness.begin() + self.assertFalse(self.harness.charm._stored.is_started) + + @patch('ceph_dashboard_commands.subprocess') + @patch('charm_option.ch_host') + def test_charm_config(self, option_ch_host, subprocess): + self.ceph_utils.is_dashboard_enabled.return_value = True + option_ch_host.cmp_pkgrevno.return_value = 0 + basic_boolean = [ + ('enable-password-policy', 'set-pwd-policy-enabled'), + ('password-policy-check-length', + 'set-pwd-policy-check-length-enabled'), + ('password-policy-check-oldpwd', + 'set-pwd-policy-check-oldpwd-enabled'), + ('password-policy-check-username', + 'set-pwd-policy-check-username-enabled'), + ('password-policy-check-exclusion-list', + 'set-pwd-policy-check-exclusion-list-enabled'), + ('password-policy-check-complexity', + 'set-pwd-policy-check-complexity-enabled'), + ('password-policy-check-sequential-chars', + 'set-pwd-policy-check-sequential-chars-enabled'), + ('password-policy-check-repetitive-chars', + 'set-pwd-policy-check-repetitive-chars-enabled'), + ('audit-api-enabled', + 'set-audit-api-enabled'), + ('audit-api-log-payload', + 'set-audit-api-log-payload')] + expect = [] + for charm_option, ceph_option in basic_boolean: + expect.append((charm_option, True, [ceph_option, 'True'])) + expect.append((charm_option, False, [ceph_option, 'False'])) + expect.extend([ + ('debug', True, ['debug', 'enable']), + ('debug', False, ['debug', 'disable'])]) + expect.extend([ + ('motd', 'warning|5w|enough is enough', ['motd', 'warning', '5w', + 'enough is enough']), + ('motd', '', ['motd', 'clear'])]) + base_cmd = ['ceph', 'dashboard'] + for charm_option, charm_value, expected_options in expect: + _harness = self.get_harness() + rel_id = _harness.add_relation('dashboard', 'ceph-mon') + _harness.add_relation_unit( + rel_id, + 'ceph-mon/0') + _harness.update_relation_data( + rel_id, + 'ceph-mon/0', + { + 'mon-ready': 'True'}) + _harness.begin() + _harness.set_leader(True) + _harness.charm.is_ceph_dashboard_ssl_key_cert_same \ + = lambda *_: True + expected_cmd = base_cmd + expected_options + subprocess.check_output.reset_mock() + _harness.update_config( + key_values={charm_option: charm_value}) + subprocess.check_output.assert_called_once_with( + expected_cmd, + stderr=subprocess.STDOUT) + + def test__on_ca_available(self): + rel_id = self.harness.add_relation('certificates', 'vault') + self.harness.begin() + self.harness.add_relation_unit( + rel_id, + 'vault/0') + self.harness.update_relation_data( + rel_id, + 'vault/0', + {'ingress-address': '10.0.0.3'}) + rel_data = self.harness.get_relation_data(rel_id, 'ceph-dashboard/0') + self.assertEqual( + rel_data['cert_requests'], + '{"server1.local": {"sans": ["10.0.0.10", "server1"]}}') + + def test_check_dashboard(self): + socket_mock = MagicMock() + self.socket.socket.return_value = socket_mock + socket_mock.connect_ex.return_value = 0 + self.ceph_utils.is_dashboard_enabled.return_value = True + self.harness.begin() + self.assertEqual( + self.harness.charm.check_dashboard(), + BlockedStatus('No certificates found. Please add a certifcates ' + 'relation or provide via charm config')) + self.harness.update_config( + key_values={ + 'ssl_key': base64.b64encode(TEST_KEY.encode("utf-8")), + 'ssl_cert': base64.b64encode(TEST_CERT.encode("utf-8")), + 'ssl_ca': base64.b64encode(TEST_CA.encode("utf-8"))}) + self.assertEqual( + self.harness.charm.check_dashboard(), + ActiveStatus()) + + socket_mock.connect_ex.return_value = 1 + self.assertEqual( + self.harness.charm.check_dashboard(), + BlockedStatus('Dashboard not responding')) + + socket_mock.connect_ex.return_value = 0 + self.ceph_utils.is_dashboard_enabled.return_value = False + self.assertEqual( + self.harness.charm.check_dashboard(), + BlockedStatus('Dashboard is not enabled')) + + def test_check_dashboard_grafana(self): + socket_mock = MagicMock() + self.socket.socket.return_value = socket_mock + socket_mock.connect_ex.return_value = 0 + self.ceph_utils.is_dashboard_enabled.return_value = True + rel_id = self.harness.add_relation('grafana-dashboard', 'grafana') + self.harness.begin() + self.harness.add_relation_unit( + rel_id, + 'grafana/0') + self.harness.update_config( + key_values={ + 'ssl_key': base64.b64encode(TEST_KEY.encode("utf-8")), + 'ssl_cert': base64.b64encode(TEST_CERT.encode("utf-8")), + 'ssl_ca': base64.b64encode(TEST_CA.encode("utf-8"))}) + self.assertEqual( + self.harness.charm.check_dashboard(), + BlockedStatus('Charm config option grafana-api-url not set')) + + def test_kick_dashboard(self): + self.harness.begin() + self.harness.charm.kick_dashboard() + self.ceph_utils.mgr_disable_dashboard.assert_called_once_with() + self.ceph_utils.mgr_enable_dashboard.assert_called_once_with() + + @patch('ceph_dashboard_commands.subprocess') + def test_configure_dashboard(self, subprocess): + self.ceph_utils.is_dashboard_enabled.return_value = True + rel_id = self.harness.add_relation('dashboard', 'ceph-mon') + self.harness.begin() + self.harness.add_relation_unit( + rel_id, + 'ceph-mon/0') + self.harness.update_relation_data( + rel_id, + 'ceph-mon/0', + { + 'mon-ready': 'True'}) + self.ceph_utils.mgr_config_set.reset_mock() + self.ceph_utils.is_dashboard_enabled.return_value = True + self.harness.set_leader() + self.harness.charm._configure_dashboard(None) + self.assertFalse(self.ceph_utils.mgr_enable_dashboard.called) + self.ceph_utils.mgr_config_set.assert_called_once_with( + 'mgr/dashboard/server1/server_addr', + '10.0.0.10') + + self.ceph_utils.mgr_config_set.reset_mock() + self.ceph_utils.is_dashboard_enabled.return_value = False + self.harness.set_leader() + self.harness.charm._configure_dashboard(None) + self.ceph_utils.mgr_enable_dashboard.assert_called_once_with() + self.ceph_utils.mgr_config_set.assert_called_once_with( + 'mgr/dashboard/server1/server_addr', + '10.0.0.10') + + def test__get_bind_ip(self): + self.harness.begin() + self.assertEqual( + self.harness.charm._get_bind_ip(), + '10.0.0.10') + + @patch('ceph_dashboard_commands.check_ceph_dashboard_ssl_configured') + @patch('socket.gethostname') + def test_certificates_relation(self, _gethostname, ssl_configured): + self.ceph_utils.is_dashboard_enabled.return_value = True + ssl_configured.return_value = False + mock_TLS_KEY_PATH = MagicMock() + mock_TLS_CERT_PATH = MagicMock() + mock_TLS_VAULT_CA_CERT_PATH = MagicMock() + _gethostname.return_value = 'server1' + cert_rel_id = self.harness.add_relation('certificates', 'vault') + dash_rel_id = self.harness.add_relation('dashboard', 'ceph-mon') + lb_rel_id = self.harness.add_relation( + 'loadbalancer', + 'openstack-loadbalancer') + self.harness.begin() + self.harness.set_leader() + self.harness.charm.TLS_CERT_PATH = mock_TLS_CERT_PATH + self.harness.charm.TLS_VAULT_CA_CERT_PATH = mock_TLS_VAULT_CA_CERT_PATH + self.harness.charm.TLS_KEY_PATH = mock_TLS_KEY_PATH + self.harness.add_relation_unit( + dash_rel_id, + 'ceph-mon/0') + self.harness.update_relation_data( + dash_rel_id, + 'ceph-mon/0', + { + 'mon-ready': 'True'}) + self.harness.add_relation_unit( + cert_rel_id, + 'vault/0') + self.harness.add_relation_unit( + lb_rel_id, + 'openstack-loadbalancer/0') + # If lb relation is present but has not responded then certs should + # not have been requested yet. + self.assertEqual( + self.harness.get_relation_data( + cert_rel_id, + 'ceph-dashboard/0'), + {}) + self.harness.update_relation_data( + lb_rel_id, + 'openstack-loadbalancer', + { + 'frontends': json.dumps( + { + 'ceph-dashboard': { + 'admin': { + 'ip': ['10.20.0.101'], + 'port': 8443, + 'protocol': 'http'}, + 'internal': { + 'ip': ['10.30.0.101'], + 'port': 8443, + 'protocol': 'http'}, + 'public': { + 'ip': ['10.10.0.101'], + 'port': 8443, + 'protocol': 'http'}}})}) + # Reemit deferred events. + self.harness.framework.reemit() + self.assertNotEqual( + self.harness.get_relation_data( + cert_rel_id, + 'ceph-dashboard/0'), + {}) + self.harness.update_relation_data( + cert_rel_id, + 'vault/0', + { + 'ceph-dashboard_0.server.cert': TEST_CERT, + 'ceph-dashboard_0.server.key': TEST_KEY, + 'chain': TEST_CHAIN, + 'ca': TEST_CA}) + mock_TLS_CERT_PATH.write_bytes.assert_called_once() + mock_TLS_VAULT_CA_CERT_PATH.write_bytes.assert_called_once() + mock_TLS_KEY_PATH.write_bytes.assert_called_once() + self.subprocess.check_call.assert_called_once_with( + ['update-ca-certificates']) + self.ceph_utils.dashboard_set_ssl_certificate.assert_has_calls([ + call(mock_TLS_CERT_PATH, hostname='server1'), + call(mock_TLS_CERT_PATH)]) + self.ceph_utils.dashboard_set_ssl_certificate_key.assert_has_calls([ + call(mock_TLS_KEY_PATH, hostname='server1'), + call(mock_TLS_KEY_PATH)]) + self.ceph_utils.mgr_config_set.assert_has_calls([ + call('mgr/dashboard/standby_behaviour', 'redirect'), + call('mgr/dashboard/ssl', 'true')]) + self.ceph_utils.mgr_disable_dashboard.assert_called_once_with() + self.ceph_utils.mgr_enable_dashboard.assert_called_once_with() + + @patch('ceph_dashboard_commands.check_ceph_dashboard_ssl_configured') + def test_certificates_from_config(self, ssl_configured): + self.ceph_utils.is_dashboard_enabled.return_value = True + ssl_configured.return_value = False + mock_TLS_KEY_PATH = MagicMock() + mock_TLS_CERT_PATH = MagicMock() + mock_TLS_CHARM_CA_CERT_PATH = MagicMock() + dash_rel_id = self.harness.add_relation('dashboard', 'ceph-mon') + self.harness.begin() + self.harness.set_leader() + self.harness.add_relation_unit( + dash_rel_id, + 'ceph-mon/0') + self.harness.update_relation_data( + dash_rel_id, + 'ceph-mon/0', + { + 'mon-ready': 'True'}) + self.harness.charm.TLS_CERT_PATH = mock_TLS_CERT_PATH + self.harness.charm.TLS_CHARM_CA_CERT_PATH = mock_TLS_CHARM_CA_CERT_PATH + self.harness.charm.TLS_KEY_PATH = mock_TLS_KEY_PATH + self.subprocess.check_call.reset_mock() + self.harness.update_config( + key_values={ + 'ssl_key': base64.b64encode(TEST_KEY.encode("utf-8")), + 'ssl_cert': base64.b64encode(TEST_CERT.encode("utf-8")), + 'ssl_ca': base64.b64encode(TEST_CA.encode("utf-8"))}) + self.subprocess.check_call.assert_called_once_with( + ['update-ca-certificates']) + self.ceph_utils.dashboard_set_ssl_certificate.assert_has_calls([ + call(mock_TLS_CERT_PATH, hostname='server1'), + call(mock_TLS_CERT_PATH)]) + self.ceph_utils.dashboard_set_ssl_certificate_key.assert_has_calls([ + call(mock_TLS_KEY_PATH, hostname='server1'), + call(mock_TLS_KEY_PATH)]) + self.ceph_utils.mgr_config_set.assert_has_calls([ + call('mgr/dashboard/standby_behaviour', 'redirect'), + call('mgr/dashboard/ssl', 'true')]) + self.ceph_utils.mgr_disable_dashboard.assert_called_once_with() + self.ceph_utils.mgr_enable_dashboard.assert_called_once_with() + + @patch('ceph_dashboard_commands.subprocess') + def test_rados_gateway(self, subprocess): + self.ceph_utils.is_dashboard_enabled.return_value = True + mon_rel_id = self.harness.add_relation('dashboard', 'ceph-mon') + rel_id = self.harness.add_relation('radosgw-dashboard', 'ceph-radosgw') + self.harness.begin() + self.harness.set_leader() + self.harness.add_relation_unit( + mon_rel_id, + 'ceph-mon/0') + self.harness.update_relation_data( + mon_rel_id, + 'ceph-mon/0', + { + 'mon-ready': 'True'}) + self.harness.add_relation_unit( + rel_id, + 'ceph-radosgw/0') + self.harness.add_relation_unit( + rel_id, + 'ceph-radosgw/1') + self.harness.update_relation_data( + rel_id, + 'ceph-radosgw/0', + { + 'daemon-id': 'juju-80416c-zaza-7af97ef8a776-3'}) + self.harness.update_relation_data( + rel_id, + 'ceph-radosgw/1', + { + 'daemon-id': 'juju-80416c-zaza-7af97ef8a776-4'}) + self.harness.update_relation_data( + rel_id, + 'ceph-radosgw', + { + 'access-key': 'XNUZVPL364U0BL1OXWJZ', + 'secret-key': 'SgBo115xJcW90nkQ5EaNQ6fPeyeUUT0GxhwQbLFo', + 'uid': 'radosgw-user-9'}) + subprocess.check_output.assert_has_calls([ + call(['ceph', 'dashboard', 'set-rgw-api-access-key', '-i', ANY], + stderr=subprocess.STDOUT), + call().decode('UTF-8'), + call(['ceph', 'dashboard', 'set-rgw-api-secret-key', '-i', ANY], + stderr=subprocess.STDOUT), + call().decode('UTF-8'), + ]) + + @patch('ceph_dashboard_commands.subprocess') + def test_rados_gateway_multi_relations_pacific(self, subprocess): + self.ceph_utils.is_dashboard_enabled.return_value = True + rel_id1 = self.harness.add_relation('radosgw-dashboard', 'ceph-eu') + rel_id2 = self.harness.add_relation('radosgw-dashboard', 'ceph-us') + mon_rel_id = self.harness.add_relation('dashboard', 'ceph-mon') + self.harness.begin() + self.harness.set_leader() + self.harness.add_relation_unit( + mon_rel_id, + 'ceph-mon/0') + self.harness.update_relation_data( + mon_rel_id, + 'ceph-mon/0', + { + 'mon-ready': 'True'}) + self.harness.add_relation_unit( + rel_id1, + 'ceph-eu/0') + self.harness.add_relation_unit( + rel_id2, + 'ceph-us/0') + self.harness.update_relation_data( + rel_id1, + 'ceph-eu/0', + { + 'daemon-id': 'juju-80416c-zaza-7af97ef8a776-3'}) + self.harness.update_relation_data( + rel_id2, + 'ceph-us/0', + { + 'daemon-id': 'juju-dddddd-zaza-sdfsfsfs-4'}) + self.harness.update_relation_data( + rel_id1, + 'ceph-eu', + { + 'access-key': 'XNUZVPL364U0BL1OXWJZ', + 'secret-key': 'SgBo115xJcW90nkQ5EaNQ6fPeyeUUT0GxhwQbLFo', + 'uid': 'radosgw-user-9'}) + subprocess.check_output.reset_mock() + self.harness.update_relation_data( + rel_id2, + 'ceph-us', + { + 'access-key': 'JGHKJGDKJGJGJHGYYYYM', + 'secret-key': 'iljkdfhHKHKd88LKxNLSKDiijfjfjfldjfjlf44', + 'uid': 'radosgw-user-10'}) + subprocess.check_output.assert_has_calls([ + call(['ceph', 'dashboard', 'set-rgw-api-access-key', '-i', ANY], + stderr=subprocess.STDOUT), + call().decode('UTF-8'), + call(['ceph', 'dashboard', 'set-rgw-api-secret-key', '-i', ANY], + stderr=subprocess.STDOUT), + call().decode('UTF-8'), + ]) + + @patch('ceph_dashboard_commands.subprocess') + def test_rados_gateway_multi_relations_octopus(self, subprocess): + rel_id1 = self.harness.add_relation('radosgw-dashboard', 'ceph-eu') + rel_id2 = self.harness.add_relation('radosgw-dashboard', 'ceph-us') + self.harness.begin() + self.harness.set_leader() + self.harness.add_relation_unit( + rel_id1, + 'ceph-eu/0') + self.harness.add_relation_unit( + rel_id2, + 'ceph-us/0') + self.harness.update_relation_data( + rel_id1, + 'ceph-eu/0', + { + 'daemon-id': 'juju-80416c-zaza-7af97ef8a776-3'}) + self.harness.update_relation_data( + rel_id2, + 'ceph-us/0', + { + 'daemon-id': 'juju-dddddd-zaza-sdfsfsfs-4'}) + self.harness.update_relation_data( + rel_id1, + 'ceph-eu', + { + 'access-key': 'XNUZVPL364U0BL1OXWJZ', + 'secret-key': 'SgBo115xJcW90nkQ5EaNQ6fPeyeUUT0GxhwQbLFo', + 'uid': 'radosgw-user-9'}) + subprocess.check_output.reset_mock() + self.harness.update_relation_data( + rel_id2, + 'ceph-us', + { + 'access-key': 'JGHKJGDKJGJGJHGYYYYM', + 'secret-key': 'iljkdfhHKHKd88LKxNLSKDiijfjfjfldjfjlf44', + 'uid': 'radosgw-user-10'}) + self.assertFalse(subprocess.check_output.called) + + @patch.object(charm.secrets, 'choice') + def test__gen_user_password(self, _choice): + self.harness.begin() + _choice.return_value = 'r' + self.assertEqual( + self.harness.charm._gen_user_password(), + 'rrrrrrrrrrrr') + + @patch('ceph_dashboard_commands.subprocess') + @patch.object(charm.tempfile, 'NamedTemporaryFile') + @patch.object(charm.secrets, 'choice') + def test_add_user_action(self, _choice, _NTFile, subprocess): + subprocess.check_output.return_value = b'Byte String' + _NTFile.return_value.__enter__.return_value.name = 'tempfilename' + _choice.return_value = 'r' + self.harness.begin() + action_event = MagicMock() + action_event.params = { + 'username': 'auser', + 'role': 'administrator'} + self.harness.charm._add_user_action(action_event) + subprocess.check_output.assert_called_once_with( + ['ceph', 'dashboard', 'ac-user-create', '--enabled', '-i', + 'tempfilename', 'auser', 'administrator'], + stderr=subprocess.STDOUT + ) + + @patch('ceph_dashboard_commands.subprocess') + def test__delete_user_action(self, subprocess): + subprocess.check_output.return_value = b'' + self.harness.begin() + action_event = MagicMock() + action_event.params = { + 'username': 'auser'} + self.harness.charm._delete_user_action(action_event) + subprocess.check_output.assert_called_once_with( + ['ceph', 'dashboard', 'ac-user-delete', 'auser'], + stderr=subprocess.STDOUT) + + @patch('ceph_dashboard_commands.subprocess') + def test_saml(self, subprocess): + subprocess.check_output.return_value = b'' + self.harness.begin() + self.harness.charm.PACKAGES.append('python3-onelogin-saml2') + self.harness.charm._configure_saml() + subprocess.check_output.assert_not_called() + + base_url = 'https://saml-base' + idp_meta = 'file://idp.xml' + username_attr = 'uid' + entity_id = 'some_id' + + self.harness.update_config( + key_values={ + 'saml-base-url': base_url, + 'saml-idp-metadata': idp_meta, + 'saml-username-attribute': username_attr, + 'saml-idp-entity-id': entity_id, + } + ) + + self.harness.set_leader() + self.harness.charm._configure_saml() + subprocess.check_output.assert_called_with( + ['ceph', 'dashboard', 'sso', 'setup', 'saml2', + base_url, idp_meta, username_attr, entity_id], + stderr=ANY + ) diff --git a/ceph-dashboard/unit_tests/test_ceph_dashboard_commands.py b/ceph-dashboard/unit_tests/test_ceph_dashboard_commands.py new file mode 100644 index 00000000..aa595a31 --- /dev/null +++ b/ceph-dashboard/unit_tests/test_ceph_dashboard_commands.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +# Copyright 2024 Canonical +# See LICENSE file for licensing details. +# +# Learn more at: https://juju.is/docs/sdk +import unittest +import subprocess +import tempfile +import os + +from ceph_dashboard_commands import validate_ssl_keypair + + +class TestSSLValidation(unittest.TestCase): + @classmethod + def setUpClass(cls): + """Generate test certificates and keys for all test cases""" + cls.valid_cert, cls.valid_key = cls._generate_cert_key_pair() + cls.another_cert, cls.another_key = cls._generate_cert_key_pair() + cls.malformed_cert = ( + b"-----BEGIN CERTIFICATE-----\nMalform\n-----END CERTIFICATE-----" + ) + cls.malformed_key = ( + b"-----BEGIN PRIVATE KEY-----\nMalform\n-----END PRIVATE KEY-----" + ) + + @staticmethod + def _generate_cert_key_pair(days=1): + """Generate a test certificate and private key pair""" + # create a key tmpfile + with tempfile.NamedTemporaryFile(mode="wb", delete=False) as key_file: + subprocess.run( + [ + "openssl", + "genpkey", + "-algorithm", + "RSA", + "-out", + key_file.name, + ], + check=True, + capture_output=True, + ) + # openssl config file + with tempfile.NamedTemporaryFile( + mode="w", delete=False + ) as config_file: + config_content = """ + [req] + default_bits = 2048 + prompt = no + default_md = sha256 + distinguished_name = dn + x509_extensions = v3_req + + [dn] + CN = test.local + + [v3_req] + basicConstraints = CA:FALSE + keyUsage = nonRepudiation, digitalSignature, keyEncipherment + subjectAltName = @alt_names + + [alt_names] + DNS.1 = test.local + """ + config_file.write(config_content) + config_file.flush() + + # create certificate with config file + with tempfile.NamedTemporaryFile( + delete=False, mode="wb" + ) as cert_file: + subprocess.run( + [ + "openssl", + "req", + "-new", + "-x509", + "-key", + key_file.name, + "-out", + cert_file.name, + "-config", + config_file.name, + ], + check=True, + capture_output=True, + ) + with open(cert_file.name, "rb") as cert_f: + cert_content = cert_f.read() + with open(key_file.name, "rb") as key_f: + key_content = key_f.read() + + os.unlink(cert_file.name) + os.unlink(config_file.name) + os.unlink(key_file.name) + + return cert_content, key_content + + def test_valid_cert_key_pair(self): + """Test validation of a valid certificate and key pair""" + is_valid, message = validate_ssl_keypair( + self.valid_cert, self.valid_key + ) + self.assertTrue(is_valid) + + def test_mismatched_pair(self): + """Test validation with mismatched certificate and key""" + is_valid, message = validate_ssl_keypair( + self.valid_cert, self.another_key + ) + self.assertFalse(is_valid) + + def test_malformed_cert(self): + """Test validation with malformed certificate""" + is_valid, message = validate_ssl_keypair( + self.malformed_cert, self.valid_key + ) + self.assertFalse(is_valid) + + def test_malformed_key(self): + """Test validation with malformed key""" + is_valid, message = validate_ssl_keypair( + self.valid_cert, self.malformed_key + ) + self.assertFalse(is_valid) + + def test_empty_inputs(self): + """Test validation with empty inputs""" + is_valid, message = validate_ssl_keypair(b"", b"") + self.assertFalse(is_valid) + + +if __name__ == "__main__": + unittest.main() diff --git a/ceph-dashboard/unit_tests/test_interface_dashboard.py b/ceph-dashboard/unit_tests/test_interface_dashboard.py new file mode 100644 index 00000000..0d7b4e03 --- /dev/null +++ b/ceph-dashboard/unit_tests/test_interface_dashboard.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 + +# Copyright 2020 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import sys +sys.path.append('lib') # noqa +sys.path.append('src') # noqa +from ops.testing import Harness +from ops.charm import CharmBase, CharmMeta +import interface_dashboard + + +class MyCharm(CharmBase): + + def __init__(self, *args): + super().__init__(*args) + self.framework.meta = CharmMeta.from_yaml(metadata=''' +name: my-charm +requires: + dashboard: + interface: ceph-dashboard + scope: container +''') + + self.seen_events = [] + self.mon = interface_dashboard.CephDashboardRequires( + self, + 'dashboard') + + self.framework.observe( + self.mon.on.mon_ready, + self._log_event) + + def _log_event(self, event): + self.seen_events.append(type(event).__name__) + + +class TestCephDashboardRequires(unittest.TestCase): + + def setUp(self): + super().setUp() + self.harness = Harness( + MyCharm, + ) + + def add_dashboard_relation(self): + rel_id = self.harness.add_relation('dashboard', 'ceph-mon') + self.harness.add_relation_unit( + rel_id, + 'ceph-mon/0') + return rel_id + + def test_relation_name(self): + self.harness.begin() + self.assertEqual( + self.harness.charm.mon.relation_name, + 'dashboard') + + def test_dashboard_relation(self): + self.harness.begin() + self.assertIsNone( + self.harness.charm.mon.dashboard_relation) + rel_id = self.add_dashboard_relation() + self.assertEqual( + self.harness.charm.mon.dashboard_relation.id, + rel_id) + + def test_on_changed(self): + self.harness.begin() + # No MonReadyEvent as relation is absent + self.assertEqual( + self.harness.charm.seen_events, + []) + rel_id = self.add_dashboard_relation() + # No MonReadyEvent as ceph-mon has not declared it is ready. + self.assertEqual( + self.harness.charm.seen_events, + []) + self.harness.update_relation_data( + rel_id, + 'ceph-mon/0', + {'mon-ready': 'True'}) + self.assertEqual( + self.harness.charm.seen_events, + ['MonReadyEvent']) + self.assertTrue( + self.harness.charm.mon.mons_ready) + + def test_on_changed_not_ready_unit(self): + self.harness.begin() + # No MonReadyEvent as relation is absent + self.assertEqual( + self.harness.charm.seen_events, + []) + rel_id = self.add_dashboard_relation() + # No MonReadyEvent as ceph-mon has not declared it is ready. + self.assertEqual( + self.harness.charm.seen_events, + []) + self.harness.update_relation_data( + rel_id, + 'ceph-mon/0', + {}) + self.assertEqual( + self.harness.charm.seen_events, + []) + self.assertFalse( + self.harness.charm.mon.mons_ready) diff --git a/ceph-dashboard/unit_tests/test_interface_grafana_dashboard.py b/ceph-dashboard/unit_tests/test_interface_grafana_dashboard.py new file mode 100644 index 00000000..89bf0d7c --- /dev/null +++ b/ceph-dashboard/unit_tests/test_interface_grafana_dashboard.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python3 + +# Copyright 2021 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import hashlib +import json +import unittest +import sys +sys.path.append('lib') # noqa +sys.path.append('src') # noqa +from ops.testing import Harness +from ops.charm import CharmBase +import interface_grafana_dashboard + + +class TestGrafanaDashboardProvides(unittest.TestCase): + + class MyCharm(CharmBase): + + def __init__(self, *args): + super().__init__(*args) + self.seen_events = [] + self.grafana_dashboard = \ + interface_grafana_dashboard.GrafanaDashboardProvides( + self, + 'grafana-dashboard') + self.seen_events = [] + + self.framework.observe( + self.grafana_dashboard.on.dash_ready, + self._log_event) + + def _log_event(self, event): + self.seen_events.append(type(event).__name__) + + def setUp(self): + super().setUp() + self.harness = Harness( + self.MyCharm, + meta=''' +name: my-charm +provides: + grafana-dashboard: + interface: grafana-dashboard +''' + ) + + def add_grafana_dashboard_relation(self): + rel_id = self.harness.add_relation( + 'grafana-dashboard', + 'grafana') + self.harness.add_relation_unit( + rel_id, + 'grafana/0') + self.harness.update_relation_data( + rel_id, + 'grafana/0', + {'ingress-address': '10.0.0.3'}) + return rel_id + + def test_init(self): + self.harness.begin() + self.assertEqual( + self.harness.charm.grafana_dashboard.relation_name, + 'grafana-dashboard') + + def test_on_changed(self): + self.harness.begin() + # No GrafanaDashboardEvent as relation is absent + self.assertEqual( + self.harness.charm.seen_events, + []) + self.add_grafana_dashboard_relation() + self.assertEqual( + self.harness.charm.seen_events, + ['GrafanaDashboardEvent']) + + def get_requests_on_relation(self, rel_data): + requests = {k: v for k, v in rel_data.items() + if k.startswith('request')} + return requests + + def test_register_dashboard(self): + self.harness.begin() + rel_id = self.add_grafana_dashboard_relation() + dashboard = { + 'uid': '123', + 'foo': 'ba1'} + digest = hashlib.md5(json.dumps(dashboard).encode("utf8")).hexdigest() + self.harness.charm.grafana_dashboard.register_dashboard( + 'my-dash.json', + dashboard) + rel_data = self.harness.get_relation_data( + rel_id, + 'my-charm/0') + requests = self.get_requests_on_relation(rel_data) + self.assertEqual( + len(requests), + 1) + key = list(requests.keys())[0] + expect = { + "dashboard": { + "digest": digest, + "foo": "ba1", + "source_model": None, # Model name appears as None in testing + # harness + "uid": "123"}, + "name": "my-dash.json", + "request_id": key.replace("request_", "")} + self.assertEqual( + requests[key], + json.dumps(expect)) + # Register the same dashboard again + self.harness.charm.grafana_dashboard.register_dashboard( + 'my-dash.json', + dashboard) + # Check the relation data is unchanged + requests = self.get_requests_on_relation(rel_data) + self.assertEqual( + len(requests), + 1) + new_key = list(requests.keys())[0] + # A duplicate was registered so the key should be unchanged. + self.assertEqual( + new_key, + key) + expect = { + "dashboard": { + "digest": digest, + "foo": "ba1", + "source_model": None, # Model name appears as None in testing + # harness + "uid": "123"}, + "name": "my-dash.json", + "request_id": new_key.replace("request_", "")} + self.assertEqual( + requests[new_key], + json.dumps(expect)) + # Update an existing dashboard with a new version. This should create + # a new request and remove the old one. + updated_dashboard = { + 'uid': '123', + 'foo': 'ba2'} + updated_digest = hashlib.md5( + json.dumps(updated_dashboard).encode("utf8")).hexdigest() + self.harness.charm.grafana_dashboard.register_dashboard( + 'my-dash.json', + updated_dashboard) + rel_data = self.harness.get_relation_data( + rel_id, + 'my-charm/0') + requests = self.get_requests_on_relation(rel_data) + # The old request should have been removed so there is still just one + # key. + self.assertEqual( + len(requests), + 1) + updated_key = list(requests.keys())[0] + expect = { + "dashboard": { + "digest": updated_digest, + "foo": "ba2", + "source_model": None, # Model name appears as None in testing + # harness + "uid": "123"}, + "name": "my-dash.json", + "request_id": updated_key.replace("request_", "")} + self.assertEqual( + requests[updated_key], + json.dumps(expect)) diff --git a/ceph-dashboard/unit_tests/test_interface_http.py b/ceph-dashboard/unit_tests/test_interface_http.py new file mode 100644 index 00000000..9f9e4a3b --- /dev/null +++ b/ceph-dashboard/unit_tests/test_interface_http.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 + +# Copyright 2020 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import sys +sys.path.append('lib') # noqa +sys.path.append('src') # noqa +from ops.testing import Harness +from ops.charm import CharmBase, CharmMeta +import interface_http + + +class MyCharm(CharmBase): + + def __init__(self, *args): + super().__init__(*args) + self.framework.meta = CharmMeta.from_yaml(metadata=''' +name: my-charm +requires: + prometheus: + interface: http +''') + + self.seen_events = [] + self.prometheus = interface_http.HTTPRequires( + self, + 'prometheus') + self.framework.observe( + self.prometheus.on.http_ready, + self._log_event) + + def _log_event(self, event): + self.seen_events.append(type(event).__name__) + + +class TestHTTPRequires(unittest.TestCase): + + def setUp(self): + super().setUp() + self.harness = Harness( + MyCharm, + ) + + def add_http_relation(self): + rel_id = self.harness.add_relation('prometheus', 'prometheus') + self.harness.add_relation_unit( + rel_id, + 'prometheus/0') + return rel_id + + def test_relation_name(self): + self.harness.begin() + self.assertEqual( + self.harness.charm.prometheus.relation_name, + 'prometheus') + + def test_http_ready_event(self): + self.harness.begin() + rel_id = self.add_http_relation() + self.assertEqual( + self.harness.charm.seen_events, + []) + self.harness.update_relation_data( + rel_id, + 'prometheus/0', + { + 'hostname': 'promhost', + 'port': 3000}) + self.assertEqual( + self.harness.charm.seen_events, + ['HTTPEvent']) + + def test_get_service_ep_data(self): + self.harness.begin() + rel_id = self.add_http_relation() + self.harness.update_relation_data( + rel_id, + 'prometheus/0', + { + 'hostname': 'promhost', + 'port': 3000}) + self.assertEqual( + self.harness.charm.prometheus.get_service_ep_data(), + {'hostname': 'promhost', 'port': 3000}) diff --git a/ceph-dashboard/unit_tests/test_interface_radosgw_user.py b/ceph-dashboard/unit_tests/test_interface_radosgw_user.py new file mode 100644 index 00000000..06645a19 --- /dev/null +++ b/ceph-dashboard/unit_tests/test_interface_radosgw_user.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 + +# Copyright 2021 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import sys +sys.path.append('lib') # noqa +sys.path.append('src') # noqa +from ops.testing import Harness +from ops.charm import CharmBase +import interface_radosgw_user + + +class TestRadosGWUserRequires(unittest.TestCase): + + class MyCharm(CharmBase): + + def __init__(self, *args): + super().__init__(*args) + self.seen_events = [] + self.radosgw_user = interface_radosgw_user.RadosGWUserRequires( + self, + 'radosgw-dashboard') + + self.framework.observe( + self.radosgw_user.on.gw_user_ready, + self._log_event) + + def _log_event(self, event): + self.seen_events.append(type(event).__name__) + + def setUp(self): + super().setUp() + self.harness = Harness( + self.MyCharm, + meta=''' +name: my-charm +requires: + radosgw-dashboard: + interface: radosgw-user +''' + ) + + def test_init(self): + self.harness.begin() + self.assertEqual( + self.harness.charm.radosgw_user.relation_name, + 'radosgw-dashboard') + + def test_add_radosgw_dashboard_relation(self): + rel_id1 = self.harness.add_relation('radosgw-dashboard', 'ceph-eu') + rel_id2 = self.harness.add_relation('radosgw-dashboard', 'ceph-us') + self.harness.begin() + self.assertEqual( + self.harness.charm.seen_events, + []) + self.harness.set_leader() + self.harness.add_relation_unit( + rel_id1, + 'ceph-eu/0') + self.harness.add_relation_unit( + rel_id1, + 'ceph-eu/1') + self.harness.add_relation_unit( + rel_id2, + 'ceph-us/0') + self.harness.add_relation_unit( + rel_id2, + 'ceph-us/1') + self.harness.update_relation_data( + rel_id1, + 'ceph-eu/0', + { + 'daemon-id': 'juju-80416c-zaza-7af97ef8a776-3'}) + self.harness.update_relation_data( + rel_id1, + 'ceph-eu/1', + { + 'daemon-id': 'juju-80416c-zaza-7af97ef8a776-4'}) + self.harness.update_relation_data( + rel_id2, + 'ceph-us/0', + { + 'daemon-id': 'juju-dddddd-zaza-sdfsfsfs-4'}) + self.harness.update_relation_data( + rel_id2, + 'ceph-us/1', + { + 'daemon-id': 'juju-dddddd-zaza-sdfsfsfs-5'}) + self.harness.update_relation_data( + rel_id1, + 'ceph-eu', + { + 'access-key': 'XNUZVPL364U0BL1OXWJZ', + 'secret-key': 'SgBo115xJcW90nkQ5EaNQ6fPeyeUUT0GxhwQbLFo', + 'uid': 'radosgw-user-9'}) + self.assertEqual( + self.harness.charm.seen_events, + ['RadosGWUserEvent']) + self.harness.update_relation_data( + rel_id2, + 'ceph-us', + { + 'access-key': 'JGHKJGDKJGJGJHGYYYYM', + 'secret-key': 'iljkdfhHKHKd88LKxNLSKDiijfjfjfldjfjlf44', + 'uid': 'radosgw-user-10'}) + self.assertEqual( + self.harness.charm.radosgw_user.get_user_creds(), + [ + { + 'access_key': 'XNUZVPL364U0BL1OXWJZ', + 'daemon_id': 'juju-80416c-zaza-7af97ef8a776-3', + 'secret_key': 'SgBo115xJcW90nkQ5EaNQ6fPeyeUUT0GxhwQbLFo', + 'uid': 'radosgw-user-9'}, + { + 'access_key': 'XNUZVPL364U0BL1OXWJZ', + 'daemon_id': 'juju-80416c-zaza-7af97ef8a776-4', + 'secret_key': 'SgBo115xJcW90nkQ5EaNQ6fPeyeUUT0GxhwQbLFo', + 'uid': 'radosgw-user-9'}, + { + 'access_key': 'JGHKJGDKJGJGJHGYYYYM', + 'daemon_id': 'juju-dddddd-zaza-sdfsfsfs-4', + 'secret_key': 'iljkdfhHKHKd88LKxNLSKDiijfjfjfldjfjlf44', + 'uid': 'radosgw-user-10'}, + { + 'access_key': 'JGHKJGDKJGJGJHGYYYYM', + 'daemon_id': 'juju-dddddd-zaza-sdfsfsfs-5', + 'secret_key': 'iljkdfhHKHKd88LKxNLSKDiijfjfjfldjfjlf44', + 'uid': 'radosgw-user-10'}]) + + def test_add_radosgw_dashboard_relation_missing_data(self): + rel_id1 = self.harness.add_relation('radosgw-dashboard', 'ceph-eu') + self.harness.begin() + self.assertEqual( + self.harness.charm.seen_events, + []) + self.harness.set_leader() + self.harness.add_relation_unit( + rel_id1, + 'ceph-eu/0') + self.harness.update_relation_data( + rel_id1, + 'ceph-eu/0', + { + 'daemon-id': 'juju-80416c-zaza-7af97ef8a776-3'}) + self.harness.update_relation_data( + rel_id1, + 'ceph-eu', + { + 'secret-key': 'SgBo115xJcW90nkQ5EaNQ6fPeyeUUT0GxhwQbLFo', + 'uid': 'radosgw-user-9'}) + self.assertEqual( + self.harness.charm.radosgw_user.get_user_creds(), + []) + self.assertEqual( + self.harness.charm.seen_events, + []) diff --git a/ceph-fs/.gitignore b/ceph-fs/.gitignore new file mode 100644 index 00000000..3c71ec16 --- /dev/null +++ b/ceph-fs/.gitignore @@ -0,0 +1,10 @@ +build +.tox +layers +.testrepository +__pycache__ +*.pyc +.idea +.stestr +*.charm +.vscode diff --git a/ceph-fs/.gitreview b/ceph-fs/.gitreview new file mode 100644 index 00000000..d8e1269c --- /dev/null +++ b/ceph-fs/.gitreview @@ -0,0 +1,6 @@ +[gerrit] +host=review.opendev.org +port=29418 +project=openstack/charm-ceph-fs.git + +defaultbranch=stable/squid-jammy diff --git a/ceph-fs/.stestr.conf b/ceph-fs/.stestr.conf new file mode 100644 index 00000000..5fcccaca --- /dev/null +++ b/ceph-fs/.stestr.conf @@ -0,0 +1,3 @@ +[DEFAULT] +test_path=./unit_tests +top_dir=./ diff --git a/ceph-fs/.zuul.yaml b/ceph-fs/.zuul.yaml new file mode 100644 index 00000000..7ffc71cb --- /dev/null +++ b/ceph-fs/.zuul.yaml @@ -0,0 +1,4 @@ +- project: + templates: + - openstack-python3-charm-yoga-jobs + - openstack-cover-jobs diff --git a/ceph-fs/bindep.txt b/ceph-fs/bindep.txt new file mode 100644 index 00000000..17575d9f --- /dev/null +++ b/ceph-fs/bindep.txt @@ -0,0 +1,4 @@ +libffi-dev [platform:dpkg] +libpq-dev [platform:dpkg] +libxml2-dev [platform:dpkg] +libxslt1-dev [platform:dpkg] diff --git a/ceph-fs/build-requirements.txt b/ceph-fs/build-requirements.txt new file mode 100644 index 00000000..b6d2452f --- /dev/null +++ b/ceph-fs/build-requirements.txt @@ -0,0 +1,7 @@ +# NOTES(lourot): +# * We don't install charmcraft via pip anymore because it anyway spins up a +# container and scp the system's charmcraft snap inside it. So the charmcraft +# snap is necessary on the system anyway. +# * `tox -e build` successfully validated with charmcraft 1.2.1 + +cffi==1.14.6; python_version < '3.6' # cffi 1.15.0 drops support for py35. diff --git a/ceph-fs/charmcraft.yaml b/ceph-fs/charmcraft.yaml new file mode 100644 index 00000000..326d045b --- /dev/null +++ b/ceph-fs/charmcraft.yaml @@ -0,0 +1,35 @@ +type: charm + +parts: + charm: + plugin: reactive + reactive-charm-build-arguments: + - --binary-wheels-from-source + - -v + build-packages: + - tox + - git + - python3-dev + - libffi-dev + source: src/ + build-snaps: + - charm/latest/edge + build-environment: + - CHARM_INTERFACES_DIR: $CRAFT_PROJECT_DIR/interfaces/ + - CHARM_LAYERS_DIR: $CRAFT_PROJECT_DIR/layers/ + +base: ubuntu@22.04 +build-base: ubuntu@22.04 +platforms: + amd64: + build-on: amd64 + build-for: amd64 + arm64: + build-on: arm64 + build-for: arm64 + s390x: + build-on: s390x + build-for: s390x + ppc64el: + build-on: ppc64el + build-for: ppc64el diff --git a/ceph-fs/interfaces/cephfs_share/interface.yaml b/ceph-fs/interfaces/cephfs_share/interface.yaml new file mode 100644 index 00000000..f02fe2df --- /dev/null +++ b/ceph-fs/interfaces/cephfs_share/interface.yaml @@ -0,0 +1,3 @@ +name: cephfs_share +summary: CephFS Share provider interface +version: 1 \ No newline at end of file diff --git a/ceph-fs/interfaces/cephfs_share/provides.py b/ceph-fs/interfaces/cephfs_share/provides.py new file mode 100644 index 00000000..3ae7d8fc --- /dev/null +++ b/ceph-fs/interfaces/cephfs_share/provides.py @@ -0,0 +1,99 @@ +from charms.reactive import scopes, when, set_flag, clear_flag +from charms.reactive.endpoints import Endpoint + +from charmhelpers.core import hookenv + +from typing import Iterable, Dict, Set + +import json + +class _Transaction: + """Store transaction information between data mappings.""" + + def __init__(self, added: Set, changed: Set, deleted: Set): + self.added: Set = added + self.changed: Set = changed + self.deleted: Set = deleted + +def _eval(relation) -> _Transaction: + """Evaluate the difference between data in an integration changed databag. + + Args: + relation: Relation with the written data. + + Returns: + _Transaction: + Transaction info containing the added, deleted, and changed + keys from the relation databag. + """ + # Retrieve the old data from the data key in the unit databag. + old_data = json.loads(relation.to_publish_raw.get("cache", "{}")) + # Retrieve the new data from the relation integration databag. + new_data = { + key: value for key, value in relation.received_app.items() if key != "cache" + } + # These are the keys that were added to the databag and triggered this event. + added = new_data.keys() - old_data.keys() + # These are the keys that were removed from the databag and triggered this event. + deleted = old_data.keys() - new_data.keys() + # These are the keys that were added or already existed in the databag, but had their values changed. + changed = added.union( + {key for key in old_data.keys() & new_data.keys() if old_data[key] != new_data[key]} + ) + # Convert the new_data to a serializable format and save it for a next diff check. + relation.to_publish_raw.update({ + "cache": json.dumps(new_data) + }) + + # Return the transaction with all possible changes. + return _Transaction(added, changed, deleted) + +class CephFSProvides(Endpoint): + + @when('endpoint.{endpoint_name}.changed') + def changed(self): + if hookenv.is_leader(): + for relation in self.relations: + transaction = _eval(relation) + if "name" in transaction.added: + set_flag(self.expand_name('{endpoint_name}.available')) + + def manage_flags(self): + if not self.is_joined: + clear_flag( + self.expand_name('{endpoint_name}.available') + ) + + def set_share(self, share_info: Dict, auth_info: Dict) -> None: + """Set info for mounting a CephFS share. + + Args: + relation: + share_info: Dictionary with the information required to mount the CephFS share. + - fsid: ID of the Ceph cluster. + - name: Name of the exported Ceph filesystem. + - path: Exported path of the Ceph filesystem. + - monitor_hosts: Address list of the available Ceph MON nodes. + auth_info: Dictionary with the information required to authenticate against the Ceph cluster. + - username: Name of the user authorized to access the Ceph filesystem. + - key: Cephx key for the authorized user. + + Notes: + Only the application leader unit can set the CephFS share data. + """ + if hookenv.is_leader(): + share_info = json.dumps({ + 'fsid': share_info['fsid'], + 'name': share_info['name'], + 'path': share_info['path'], + 'monitor_hosts': share_info['monitor_hosts'] + }) + auth_info = json.dumps({ + 'username': auth_info['username'], + 'key': auth_info['key'] + }) + for relation in self.relations: + relation.to_publish_app_raw.update({ + "share_info": share_info, + "auth": f"plain:{auth_info}", + }) diff --git a/ceph-fs/metadata.yaml b/ceph-fs/metadata.yaml new file mode 120000 index 00000000..07686838 --- /dev/null +++ b/ceph-fs/metadata.yaml @@ -0,0 +1 @@ +src/metadata.yaml \ No newline at end of file diff --git a/ceph-fs/osci.yaml b/ceph-fs/osci.yaml new file mode 100644 index 00000000..ec739f37 --- /dev/null +++ b/ceph-fs/osci.yaml @@ -0,0 +1,11 @@ +- project: + templates: + - charm-unit-jobs-py38 + - charm-unit-jobs-py39 + - charm-yoga-functional-jobs + - charm-functional-jobs + vars: + needs_charm_build: true + charm_build_name: ceph-fs + build_type: charmcraft + charmcraft_channel: 2.x/stable diff --git a/ceph-fs/pip.sh b/ceph-fs/pip.sh new file mode 100755 index 00000000..9a7e6b09 --- /dev/null +++ b/ceph-fs/pip.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of tox.ini for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools +# +# setuptools 58.0 dropped the support for use_2to3=true which is needed to +# install blessings (an indirect dependency of charm-tools). +# +# More details on the beahvior of tox and virtualenv creation can be found at +# https://github.com/tox-dev/tox/issues/448 +# +# This script is wrapper to force the use of the pinned versions early in the +# process when the virtualenv was created and upgraded before installing the +# depedencies declared in the target. +pip install 'pip<20.3' 'setuptools<50.0.0' +pip "$@" diff --git a/ceph-fs/rebuild b/ceph-fs/rebuild new file mode 100644 index 00000000..f7d381af --- /dev/null +++ b/ceph-fs/rebuild @@ -0,0 +1,5 @@ +# This file is used to trigger rebuilds +# when dependencies of the charm change, +# but nothing in the charm needs to. +# simply change the uuid to something new +53a974a8-1178-11ec-a86e-07dd4090d760 diff --git a/ceph-fs/rename.sh b/ceph-fs/rename.sh new file mode 100755 index 00000000..d0c35c97 --- /dev/null +++ b/ceph-fs/rename.sh @@ -0,0 +1,13 @@ +#!/bin/bash +charm=$(grep "charm_build_name" osci.yaml | awk '{print $2}') +echo "renaming ${charm}_*.charm to ${charm}.charm" +echo -n "pwd: " +pwd +ls -al +echo "Removing bad downloaded charm maybe?" +if [[ -e "${charm}.charm" ]]; +then + rm "${charm}.charm" +fi +echo "Renaming charm here." +mv ${charm}_*.charm ${charm}.charm diff --git a/ceph-fs/requirements.txt b/ceph-fs/requirements.txt new file mode 100644 index 00000000..29081289 --- /dev/null +++ b/ceph-fs/requirements.txt @@ -0,0 +1,10 @@ +# NOTE: newer versions of cryptography require a Rust compiler to build, +# see +# * https://github.com/openstack-charmers/zaza/issues/421 +# * https://mail.python.org/pipermail/cryptography-dev/2021-January/001003.html +# +cryptography<3.4 + +git+https://github.com/juju/charm-tools.git + +simplejson diff --git a/ceph-fs/src/README.md b/ceph-fs/src/README.md new file mode 100644 index 00000000..5c94b7e9 --- /dev/null +++ b/ceph-fs/src/README.md @@ -0,0 +1,147 @@ +# Overview + +[Ceph][ceph-upstream] is a unified, distributed storage system designed for +excellent performance, reliability, and scalability. + +The ceph-fs charm deploys the metadata server daemon (MDS) for the Ceph +distributed file system (CephFS). The deployment is done within the context of +an existing Ceph cluster. + +# Usage + +## Configuration + +This section covers common and/or important configuration options. See file +`config.yaml` for the full list of options, along with their descriptions and +default values. A YAML file (e.g. `ceph-osd.yaml`) is often used to store +configuration options. See the [Juju documentation][juju-docs-config-apps] for +details on configuring applications. + +#### `pool-type` + +The `pool-type` option dictates the storage pool type. See section 'Ceph pool +type' for more information. + +#### `source` + +The `source` option states the software sources. A common value is an OpenStack +UCA release (e.g. 'cloud:xenial-queens' or 'cloud:bionic-ussuri'). See [Ceph +and the UCA][cloud-archive-ceph]. The underlying host's existing apt sources +will be used if this option is not specified (this behaviour can be explicitly +chosen by using the value of 'distro'). + +## Ceph pool type + +Ceph storage pools can be configured to ensure data resiliency either through +replication or by erasure coding. This charm supports both types via the +`pool-type` configuration option, which can take on the values of 'replicated' +and 'erasure-coded'. The default value is 'replicated'. + +For this charm, the pool type will be associated with CephFS volumes. + +> **Note**: Erasure-coded pools are supported starting with Ceph Luminous. + +### Replicated pools + +Replicated pools use a simple replication strategy in which each written object +is copied, in full, to multiple OSDs within the cluster. + +The `ceph-osd-replication-count` option sets the replica count for any object +stored within the 'ceph-fs-data' cephfs pool. Increasing this value increases +data resilience at the cost of consuming more real storage in the Ceph cluster. +The default value is '3'. + +> **Important**: The `ceph-osd-replication-count` option must be set prior to + adding the relation to the ceph-mon application. Otherwise, the pool's + configuration will need to be set by interfacing with the cluster directly. + +### Erasure coded pools + +Erasure coded pools use a technique that allows for the same resiliency as +replicated pools, yet reduces the amount of space required. Written data is +split into data chunks and error correction chunks, which are both distributed +throughout the cluster. + +> **Note**: Erasure coded pools require more memory and CPU cycles than + replicated pools do. + +When using erasure coded pools for CephFS file systems two pools will be +created: a replicated pool (for storing MDS metadata) and an erasure coded pool +(for storing the data written into a CephFS volume). The +`ceph-osd-replication-count` configuration option only applies to the metadata +(replicated) pool. + +Note that the replicated pool will be the default pool for all data. +The user must manually configure the secondary erasure coded pool for use, +for example by using [file layouts][file-layouts]. + +It's not recommended to use an erasure coded data pool as the default data pool; +see [createfs docs][createfs] for more explanation. +Thus, the charm does not support this case. + +Erasure coded pools can be configured via options whose names begin with the +`ec-` prefix. + +> **Important**: It is strongly recommended to tailor the `ec-profile-k` and + `ec-profile-m` options to the needs of the given environment. These latter + options have default values of '1' and '2' respectively, which result in the + same space requirements as those of a replicated pool. + +See [Ceph Erasure Coding][cdg-ceph-erasure-coding] in the [OpenStack Charms +Deployment Guide][cdg] for more information. + +## Ceph BlueStore compression + +This charm supports [BlueStore inline compression][ceph-bluestore-compression] +for its associated Ceph storage pool(s). The feature is enabled by assigning a +compression mode via the `bluestore-compression-mode` configuration option. The +default behaviour is to disable compression. + +The efficiency of compression depends heavily on what type of data is stored +in the pool and the charm provides a set of configuration options to fine tune +the compression behaviour. + +> **Note**: BlueStore compression is supported starting with Ceph Mimic. + +## Deployment + +To deploy a single MDS node within an existing Ceph cluster: + + juju deploy ceph-fs + juju add-relation ceph-fs:ceph-mds ceph-mon:mds + +## High availability + +Highly available CephFS is achieved by deploying multiple MDS servers (i.e. +multiple ceph-fs units). + +## Actions + +This section lists Juju [actions][juju-docs-actions] supported by the charm. +Actions allow specific operations to be performed on a per-unit basis. To +display action descriptions run `juju actions ceph-fs`. If the charm is not +deployed then see file `actions.yaml`. + +* `get-quota` +* `remove-quota` +* `set-quota` + +# Bugs + +Please report bugs on [Launchpad][lp-bugs-charm-ceph-fs]. + +For general charm questions refer to the OpenStack [Charm Guide][cg]. + + + +[cg]: https://docs.openstack.org/charm-guide +[cdg]: https://docs.openstack.org/project-deploy-guide/charm-deployment-guide +[ceph-upstream]: https://ceph.io +[juju-docs-actions]: https://jaas.ai/docs/actions +[juju-docs-config-apps]: https://juju.is/docs/configuring-applications +[lp-bugs-charm-ceph-fs]: https://bugs.launchpad.net/charm-ceph-fs/+filebug +[cloud-archive-ceph]: https://wiki.ubuntu.com/OpenStack/CloudArchive#Ceph_and_the_UCA +[cdg-ceph-erasure-coding]: https://docs.openstack.org/project-deploy-guide/charm-deployment-guide/latest/app-erasure-coding.html +[ceph-bluestore-compression]: https://docs.ceph.com/en/latest/rados/configuration/bluestore-config-ref/#inline-compression +[createfs]: https://docs.ceph.com/en/latest/cephfs/createfs/ +[file-layouts]: https://docs.ceph.com/en/latest/cephfs/file-layouts/ diff --git a/ceph-fs/src/actions.yaml b/ceph-fs/src/actions.yaml new file mode 100644 index 00000000..c3dd0f57 --- /dev/null +++ b/ceph-fs/src/actions.yaml @@ -0,0 +1,63 @@ +get-quota: + description: View quota settings on a directory + params: + max-files: + type: integer + description: | + The limit of how many files can be written. Use either this or + max-bytes but not both. The action tries max-files first and then + falls back on max-bytes if both are set + max-bytes: + type: integer + description: | + The maximum number of bytes that are allowed to be written. Use + either this or max-files but not both. The action tries max-files + first and then falls back on max-bytes if both are set + directory: + type: string + description: | + The directory to query for quota information. + required: [directory] + additionalProperties: false +remove-quota: + description: Remove a quota on a directory + params: + max-files: + type: integer + description: | + The limit of how many files can be written. Use either this or + max-bytes but not both. The action tries max-files first and then + falls back on max-bytes if both are set + max-bytes: + type: integer + description: | + The maximum number of bytes that are allowed to be written. Use + either this or max-files but not both. The action tries max-files + first and then falls back on max-bytes if both are set + directory: + type: string + description: | + The directory to remove the quota from. + required: [directory] + additionalProperties: false +set-quota: + description: Create a new quota + params: + max-files: + type: integer + description: | + The limit of how many files can be written. Use either this or + max-bytes but not both. The action tries max-files + first and then falls back on max-bytes if both are set + max-bytes: + type: integer + description: | + The maximum number of bytes that are allowed to be written. Use + either this or max-files but not both. The action tries max-files + first and then falls back on max-bytes if both are set + directory: + type: string + description: | + The directory to apply this quota to. + required: [directory] + additionalProperties: false diff --git a/ceph-fs/src/actions/__init__.py b/ceph-fs/src/actions/__init__.py new file mode 100644 index 00000000..bd8bf091 --- /dev/null +++ b/ceph-fs/src/actions/__init__.py @@ -0,0 +1 @@ +__author__ = 'Chris Holcombe ' diff --git a/ceph-fs/src/actions/get-quota b/ceph-fs/src/actions/get-quota new file mode 120000 index 00000000..075a7983 --- /dev/null +++ b/ceph-fs/src/actions/get-quota @@ -0,0 +1 @@ +get_quota.py \ No newline at end of file diff --git a/ceph-fs/src/actions/get_quota.py b/ceph-fs/src/actions/get_quota.py new file mode 100755 index 00000000..683395ad --- /dev/null +++ b/ceph-fs/src/actions/get_quota.py @@ -0,0 +1,45 @@ +#!/usr/bin/python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from charmhelpers.core.hookenv import action_get, action_fail, action_set +import xattr + +__author__ = 'Chris Holcombe ' + + +def get_quota(): + max_files = action_get('max-files') + max_bytes = action_get('max-bytes') + directory = action_get('directory') + + if not os.path.exists(directory): + action_fail("Directory must exist before setting quota") + attr = "ceph.quota.{}" + if max_files: + attr = attr.format("max_files") + elif max_bytes: + attr = attr.format("max_bytes") + + try: + quota_value = xattr.getxattr(directory, attr) + action_set({'{} quota'.format(directory): quota_value}) + except IOError as err: + action_fail( + "Unable to get xattr on {}. Error: {}".format(directory, err)) + + +if __name__ == '__main__': + get_quota() diff --git a/ceph-fs/src/actions/remove-quota b/ceph-fs/src/actions/remove-quota new file mode 120000 index 00000000..0e3dad46 --- /dev/null +++ b/ceph-fs/src/actions/remove-quota @@ -0,0 +1 @@ +remove_quota.py \ No newline at end of file diff --git a/ceph-fs/src/actions/remove_quota.py b/ceph-fs/src/actions/remove_quota.py new file mode 100755 index 00000000..c647bdb1 --- /dev/null +++ b/ceph-fs/src/actions/remove_quota.py @@ -0,0 +1,45 @@ +#!/usr/bin/python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +from charmhelpers.core.hookenv import action_get, action_fail +import xattr + +__author__ = 'Chris Holcombe ' + + +def remove_quota(): + max_files = action_get('max-files') + max_bytes = action_get('max-bytes') + directory = action_get('directory') + + if not os.path.exists(directory): + action_fail("Directory must exist before setting quota") + attr = "ceph.quota.{}" + if max_files: + attr = attr.format("max_files") + elif max_bytes: + attr = attr.format("max_bytes") + + try: + xattr.setxattr(directory, attr, str(0)) + except IOError as err: + action_fail( + "Unable to set xattr on {}. Error: {}".format(directory, err)) + + +if __name__ == '__main__': + remove_quota() diff --git a/ceph-fs/src/actions/set-quota b/ceph-fs/src/actions/set-quota new file mode 120000 index 00000000..9ab54db1 --- /dev/null +++ b/ceph-fs/src/actions/set-quota @@ -0,0 +1 @@ +set_quota.py \ No newline at end of file diff --git a/ceph-fs/src/actions/set_quota.py b/ceph-fs/src/actions/set_quota.py new file mode 100755 index 00000000..83b4429e --- /dev/null +++ b/ceph-fs/src/actions/set_quota.py @@ -0,0 +1,47 @@ +#!/usr/bin/python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__author__ = 'Chris Holcombe ' +import os +from charmhelpers.core.hookenv import action_get, action_fail +import xattr + + +def set_quota(): + max_files = action_get('max-files') + max_bytes = action_get('max-bytes') + directory = action_get('directory') + + if not os.path.exists(directory): + action_fail("Directory must exist before setting quota") + attr = "ceph.quota.{}" + value = None + if max_files: + attr = attr.format("max_files") + value = str(max_files) + elif max_bytes: + attr = attr.format("max_bytes") + value = str(max_bytes) + + try: + xattr.setxattr(directory, attr, value) + except IOError as err: + action_fail( + "Unable to set xattr on {}. Error: {}".format(directory, err)) + + +if __name__ == '__main__': + set_quota() diff --git a/ceph-fs/src/build.lock b/ceph-fs/src/build.lock new file mode 100644 index 00000000..a41bbd2a --- /dev/null +++ b/ceph-fs/src/build.lock @@ -0,0 +1,268 @@ +{ + "locks": [ + { + "type": "layer", + "item": "layer:options", + "url": "https://github.com/juju-solutions/layer-options.git", + "vcs": null, + "branch": "fcdcea4e5de3e1556c24e6704607862d0ba00a56", + "commit": "fcdcea4e5de3e1556c24e6704607862d0ba00a56" + }, + { + "type": "layer", + "item": "layer:basic", + "url": "https://github.com/juju-solutions/layer-basic.git", + "vcs": null, + "branch": "33526bd6aaa01ffe717a5c66ed62bc4790344ef2", + "commit": "33526bd6aaa01ffe717a5c66ed62bc4790344ef2" + }, + { + "type": "layer", + "item": "layer:openstack", + "url": "https://github.com/openstack/charm-layer-openstack", + "vcs": null, + "branch": "7c671b0696977f455616565d956895b2f890464b", + "commit": "7c671b0696977f455616565d956895b2f890464b" + }, + { + "type": "layer", + "item": "layer:ceph", + "url": "https://github.com/openstack/charm-layer-ceph.git", + "vcs": null, + "branch": "17d40abd8d9ec3b8c32756ca981c80c4733c016f", + "commit": "17d40abd8d9ec3b8c32756ca981c80c4733c016f" + }, + { + "type": "layer", + "item": "ceph-fs", + "url": null, + "vcs": null, + "branch": "e6c6f13cde785174cee1a48a8df1c581e394fc3b", + "commit": "e6c6f13cde785174cee1a48a8df1c581e394fc3b" + }, + { + "type": "layer", + "item": "interface:tls-certificates", + "url": "https://github.com/juju-solutions/interface-tls-certificates", + "vcs": null, + "branch": "da891c403864482688ec767a964218e5857f0e49", + "commit": "da891c403864482688ec767a964218e5857f0e49" + }, + { + "type": "layer", + "item": "interface:ceph-mds", + "url": "https://opendev.org/openstack/charm-interface-ceph-client.git", + "vcs": null, + "branch": "d9f3b53ca0cf30e47347a68beab59da5c03ce4c7", + "commit": "d9f3b53ca0cf30e47347a68beab59da5c03ce4c7" + }, + { + "type": "layer", + "item": "interface:cephfs_share", + "url": null, + "vcs": null, + "branch": "e6c6f13cde785174cee1a48a8df1c581e394fc3b", + "commit": "e6c6f13cde785174cee1a48a8df1c581e394fc3b" + }, + { + "type": "python_module", + "package": "dnspython3", + "vcs": null, + "version": "1.12.0" + }, + { + "type": "python_module", + "package": "netifaces", + "vcs": null, + "version": "0.11.0" + }, + { + "type": "python_module", + "package": "packaging", + "vcs": null, + "version": "24.1" + }, + { + "type": "python_module", + "package": "setuptools", + "vcs": null, + "version": "71.1.0" + }, + { + "type": "python_module", + "package": "pyaml", + "vcs": null, + "version": "21.10.1" + }, + { + "type": "python_module", + "package": "flit_scm", + "vcs": null, + "version": "1.7.0" + }, + { + "type": "python_module", + "package": "charms.reactive", + "url": "git+https://github.com/canonical/charms.reactive.git", + "branch": "0dc82abb7ac01f288042ee44b56a9d428c8fc46c", + "version": "0dc82abb7ac01f288042ee44b56a9d428c8fc46c", + "vcs": "git" + }, + { + "type": "python_module", + "package": "psutil", + "vcs": null, + "version": "6.0.0" + }, + { + "type": "python_module", + "package": "pyxattr", + "vcs": null, + "version": "0.8.1" + }, + { + "type": "python_module", + "package": "MarkupSafe", + "vcs": null, + "version": "2.1.5" + }, + { + "type": "python_module", + "package": "trove_classifiers", + "vcs": null, + "version": "2024.7.2" + }, + { + "type": "python_module", + "package": "flit_core", + "vcs": null, + "version": "3.9.0" + }, + { + "type": "python_module", + "package": "PyYAML", + "vcs": null, + "version": "6.0.1" + }, + { + "type": "python_module", + "package": "charmhelpers", + "url": "git+https://github.com/juju/charm-helpers.git", + "branch": "1b2d4dc8f8effd79d782241a32a0485af1f01e73", + "version": "1b2d4dc8f8effd79d782241a32a0485af1f01e73", + "vcs": "git" + }, + { + "type": "python_module", + "package": "pip", + "vcs": null, + "version": "22.0.4" + }, + { + "type": "python_module", + "package": "calver", + "vcs": null, + "version": "2022.6.26" + }, + { + "type": "python_module", + "package": "pluggy", + "vcs": null, + "version": "1.5.0" + }, + { + "type": "python_module", + "package": "pyudev", + "vcs": null, + "version": "0.24.3" + }, + { + "type": "python_module", + "package": "six", + "vcs": null, + "version": "1.16.0" + }, + { + "type": "python_module", + "package": "pathspec", + "vcs": null, + "version": "0.12.1" + }, + { + "type": "python_module", + "package": "jinja2", + "vcs": null, + "version": "3.1.4" + }, + { + "type": "python_module", + "package": "pbr", + "vcs": null, + "version": "6.0.0" + }, + { + "type": "python_module", + "package": "charms.ceph", + "url": "git+https://github.com/openstack/charms.ceph.git", + "branch": "64f3c1b12b14545a76321469478fb456b379832d", + "version": "64f3c1b12b14545a76321469478fb456b379832d", + "vcs": "git" + }, + { + "type": "python_module", + "package": "looseversion", + "vcs": null, + "version": "1.3.0" + }, + { + "type": "python_module", + "package": "hatchling", + "vcs": null, + "version": "1.25.0" + }, + { + "type": "python_module", + "package": "netaddr", + "vcs": null, + "version": "0.7.19" + }, + { + "type": "python_module", + "package": "Cython", + "vcs": null, + "version": "0.29.37" + }, + { + "type": "python_module", + "package": "charms.openstack", + "url": "git+https://github.com/openstack/charms.openstack.git", + "branch": "355d65f64cc1dac133d885aa7cfc58b1804a0c30", + "version": "355d65f64cc1dac133d885aa7cfc58b1804a0c30", + "vcs": "git" + }, + { + "type": "python_module", + "package": "wheel", + "vcs": null, + "version": "0.43.0" + }, + { + "type": "python_module", + "package": "dnspython", + "vcs": null, + "version": "2.6.1" + }, + { + "type": "python_module", + "package": "ceph_api", + "vcs": null, + "version": "0.4.0" + }, + { + "type": "python_module", + "package": "setuptools_scm", + "vcs": null, + "version": "8.1.0" + } + ] +} \ No newline at end of file diff --git a/ceph-fs/src/config.yaml b/ceph-fs/src/config.yaml new file mode 100644 index 00000000..b503b9c4 --- /dev/null +++ b/ceph-fs/src/config.yaml @@ -0,0 +1,281 @@ +options: + loglevel: + default: 1 + type: int + description: Mon and OSD debug level. Max is 20. + source: + type: string + default: caracal + description: | + Optional configuration to support use of additional sources such as: + . + - ppa:myteam/ppa + - cloud:bionic-ussuri + - cloud:xenial-proposed/queens + - http://my.archive.com/ubuntu main + . + The last option should be used in conjunction with the key configuration + option. + key: + type: string + default: + description: | + Key ID to import to the apt keyring to support use with arbitary source + configuration from outside of Launchpad archives or PPA's. + use-syslog: + type: boolean + default: False + description: | + If set to True, supporting services will log to syslog. + ceph-public-network: + type: string + default: + description: | + The IP address and netmask of the public (front-side) network (e.g., + 192.168.0.0/24). + If multiple networks are to be used, a space-delimited list of a.b.c.d/x + can be provided. + prefer-ipv6: + type: boolean + default: False + description: | + If True enables IPv6 support. The charm will expect network interfaces + to be configured with an IPv6 address. If set to False (default) IPv4 + is expected. + + NOTE: these charms do not currently support IPv6 privacy extension. In + order for this charm to function correctly, the privacy extension must be + disabled and a non-temporary address must be configured/available on + your network interface. + ceph-osd-replication-count: + type: int + default: 3 + description: | + This value dictates the number of replicas ceph must make of any + object it stores within the images rbd pool. Of course, this only + applies if using Ceph as a backend store. Note that once the images + rbd pool has been created, changing this value will not have any + effect (although it can be changed in ceph by manually configuring + your ceph cluster). + ceph-pool-weight: + type: int + default: 5 + description: | + Defines a relative weighting of the pool as a percentage of the total + amount of data in the Ceph cluster. This effectively weights the number + of placement groups for the pool created to be appropriately portioned + to the amount of data expected. For example, if the compute images + for the OpenStack compute instances are expected to take up 20% of the + overall configuration then this value would be specified as 20. Note - + it is important to choose an appropriate value for the pool weight as + this directly affects the number of placement groups which will be + created for the pool. The number of placement groups for a pool can + only be increased, never decreased - so it is important to identify the + percent of data that will likely reside in the pool. + rbd-pool-name: + default: + type: string + description: | + Name of the data pool to be created/used. If not defined a data pool name + will be generated based on the name of the application. + metadata-pool: + type: string + default: + description: | + Name of the metadata pool to be created/used. If not defined a metadata + pool name will be generated based on the name of the application. + The metadata pool is always replicated, not erasure coded. + pool-type: + type: string + default: replicated + description: | + Ceph pool type to use for storage - valid values include ‘replicated’ + and ‘erasure-coded’. Note that if erasure-coded is enabled then data + pools for both replicated and erasure-coded are created - use of the + erasure-coded pool is controlled using xattrs on directories and files. + ec-pool-weight: + type: int + default: 5 + description: | + Defines a relative weighting of the EC pool as a percentage of the total + amount of data in the Ceph cluster. This effectively weights the number + of placement groups for the pool created to be appropriately portioned + to the amount of data expected. For example, if the compute images + for the OpenStack compute instances are expected to take up 20% of the + overall configuration then this value would be specified as 20. Note - + it is important to choose an appropriate value for the pool weight as + this directly affects the number of placement groups which will be + created for the pool. The number of placement groups for a pool can + only be increased, never decreased - so it is important to identify the + percent of data that will likely reside in the pool. + ec-profile-name: + type: string + default: + description: | + Name for the EC profile to be created for the EC pools. If not defined + a profile name will be generated based on the name of the pool used by + the application. + ec-profile-k: + type: int + default: 1 + description: | + Number of data chunks that will be used for EC data pool. K+M factors + should never be greater than the number of available zones (or hosts) + for balancing. + ec-profile-m: + type: int + default: 2 + description: | + Number of coding chunks that will be used for EC data pool. K+M factors + should never be greater than the number of available zones (or hosts) + for balancing. + ec-profile-locality: + type: int + default: + description: | + (lrc plugin - l) Group the coding and data chunks into sets of size l. + For instance, for k=4 and m=2, when l=3 two groups of three are created. + Each set can be recovered without reading chunks from another set. Note + that using the lrc plugin does incur more raw storage usage than isa or + jerasure in order to reduce the cost of recovery operations. + ec-profile-crush-locality: + type: string + default: + description: | + (lrc plugin) The type of the crush bucket in which each set of chunks + defined by l will be stored. For instance, if it is set to rack, each + group of l chunks will be placed in a different rack. It is used to + create a CRUSH rule step such as step choose rack. If it is not set, + no such grouping is done. + ec-profile-durability-estimator: + type: int + default: + description: | + (shec plugin - c) The number of parity chunks each of which includes + each data chunk in its calculation range. The number is used as a + durability estimator. For instance, if c=2, 2 OSDs can be down + without losing data. + ec-profile-helper-chunks: + type: int + default: + description: | + (clay plugin - d) Number of OSDs requested to send data during + recovery of a single chunk. d needs to be chosen such that + k+1 <= d <= k+m-1. Larger the d, the better the savings. + ec-profile-scalar-mds: + type: string + default: + description: | + (clay plugin) specifies the plugin that is used as a building + block in the layered construction. It can be one of jerasure, + isa, shec (defaults to jerasure). + ec-profile-plugin: + type: string + default: jerasure + description: | + EC plugin to use for this applications pool. The following list of + plugins acceptable - jerasure, lrc, isa, shec, clay. + ec-profile-technique: + type: string + default: + description: | + EC profile technique used for this applications pool - will be + validated based on the plugin configured via ec-profile-plugin. + Supported techniques are ‘reed_sol_van’, ‘reed_sol_r6_op’, + ‘cauchy_orig’, ‘cauchy_good’, ‘liber8tion’ for jerasure, + ‘reed_sol_van’, ‘cauchy’ for isa and ‘single’, ‘multiple’ + for shec. + ec-profile-device-class: + type: string + default: + description: | + Device class from CRUSH map to use for placement groups for + erasure profile - valid values: ssd, hdd or nvme (or leave + unset to not use a device class). + bluestore-compression-algorithm: + type: string + default: + description: | + Compressor to use (if any) for pools requested by this charm. + . + NOTE: The ceph-osd charm sets a global default for this value (defaults + to 'lz4' unless configured by the end user) which will be used unless + specified for individual pools. + bluestore-compression-mode: + type: string + default: + description: | + Policy for using compression on pools requested by this charm. + . + 'none' means never use compression. + 'passive' means use compression when clients hint that data is + compressible. + 'aggressive' means use compression unless clients hint that + data is not compressible. + 'force' means use compression under all circumstances even if the clients + hint that the data is not compressible. + bluestore-compression-required-ratio: + type: float + default: + description: | + The ratio of the size of the data chunk after compression relative to the + original size must be at least this small in order to store the + compressed version on pools requested by this charm. + bluestore-compression-min-blob-size: + type: int + default: + description: | + Chunks smaller than this are never compressed on pools requested by + this charm. + bluestore-compression-min-blob-size-hdd: + type: int + default: + description: | + Value of bluestore compression min blob size for rotational media on + pools requested by this charm. + bluestore-compression-min-blob-size-ssd: + type: int + default: + description: | + Value of bluestore compression min blob size for solid state media on + pools requested by this charm. + bluestore-compression-max-blob-size: + type: int + default: + description: | + Chunks larger than this are broken into smaller blobs sizing bluestore + compression max blob size before being compressed on pools requested by + this charm. + bluestore-compression-max-blob-size-hdd: + type: int + default: + description: | + Value of bluestore compression max blob size for rotational media on + pools requested by this charm. + bluestore-compression-max-blob-size-ssd: + type: int + default: + description: | + Value of bluestore compression max blob size for solid state media on + pools requested by this charm. + mds-cache-memory-limit: + type: string + default: 4Gi + description: | + Set the maximum size of Metadata Server (MDS) cache, in bytes. The MDS + will try to stay under this value by (1 - mds_cache_reservation) as a + percent. This is not a hard limit. + mds-cache-reservation: + type: float + default: 0.05 + description: | + The cache reservation for the MDS cache to maintain. The MDS will try + to stay under this value as a percent by (1 - mds_cache_reservation) + as a percent. + mds-health-cache-threshold: + type: float + default: 1.5 + description: | + If the MDS exceeds the cache size specified in mds-cache-memory-limit, + this parameter sets the memory limit, as a percentage of + mds_cache_reservation, that triggers a health warning. diff --git a/ceph-fs/src/copyright b/ceph-fs/src/copyright new file mode 100644 index 00000000..17795a5e --- /dev/null +++ b/ceph-fs/src/copyright @@ -0,0 +1,6 @@ +Format: http://dep.debian.net/deps/dep5/ + +Files: * +Copyright: Copyright 2016, Canonical Ltd +License: Apache-2.0 + diff --git a/ceph-fs/src/files/.gitkeep b/ceph-fs/src/files/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/ceph-fs/src/icon.svg b/ceph-fs/src/icon.svg new file mode 100644 index 00000000..e9383990 --- /dev/null +++ b/ceph-fs/src/icon.svg @@ -0,0 +1,311 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + diff --git a/ceph-fs/src/layer.yaml b/ceph-fs/src/layer.yaml new file mode 100644 index 00000000..146b2947 --- /dev/null +++ b/ceph-fs/src/layer.yaml @@ -0,0 +1,14 @@ +includes: ['layer:ceph', 'interface:ceph-mds', 'interface:cephfs_share'] +options: + basic: + use_venv: True + include_system_packages: False +repo: https://git.openstack.org/openstack/charm-ceph-fs +config: + deletes: + - debug + - ssl_ca + - ssl_cert + - ssl_key + - use-internal-endpoints + - verbose diff --git a/ceph-fs/src/lib/__init__.py b/ceph-fs/src/lib/__init__.py new file mode 100644 index 00000000..17dd8e7f --- /dev/null +++ b/ceph-fs/src/lib/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2020 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-fs/src/lib/charm/__init__.py b/ceph-fs/src/lib/charm/__init__.py new file mode 100644 index 00000000..17dd8e7f --- /dev/null +++ b/ceph-fs/src/lib/charm/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2020 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-fs/src/lib/charm/openstack/__init__.py b/ceph-fs/src/lib/charm/openstack/__init__.py new file mode 100644 index 00000000..17dd8e7f --- /dev/null +++ b/ceph-fs/src/lib/charm/openstack/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2020 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-fs/src/lib/charm/openstack/ceph_fs.py b/ceph-fs/src/lib/charm/openstack/ceph_fs.py new file mode 100644 index 00000000..99e891ff --- /dev/null +++ b/ceph-fs/src/lib/charm/openstack/ceph_fs.py @@ -0,0 +1,183 @@ +# Copyright 2020 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import socket + +import dns.resolver + +import charms_openstack.adapters +import charms_openstack.charm +import charms_openstack.plugins + +import charmhelpers.core as ch_core + +# NOTE(fnordahl) theese out of style imports are here to help keeping helpers +# moved from reactive module as-is to make the diff managable. At some point +# in time we should replace them in favor of common helpers that would do the +# same job. +from charmhelpers.core.hookenv import ( + config, log, cached, DEBUG, unit_get, + network_get_primary_address, + status_set) +from charmhelpers.contrib.network.ip import ( + get_address_in_network, + get_ipv6_addr) + + +charms_openstack.charm.use_defaults('charm.default-select-release') + + +class CephFSCharmConfigurationAdapter( + charms_openstack.adapters.ConfigurationAdapter): + + @property + def hostname(self): + return self.charm_instance.hostname + + @property + def mds_name(self): + return self.charm_instance.hostname + + @property + def networks(self): + return self.charm_instance.get_networks('ceph-public-network') + + @property + def mds_cache(self): + return self.charm_instance.get_mds_cache() + + @property + def public_addr(self): + if ch_core.hookenv.config('prefer-ipv6'): + return get_ipv6_addr()[0] + else: + return self.charm_instance.get_public_addr() + + +class CephFSCharmRelationAdapters( + charms_openstack.adapters.OpenStackRelationAdapters): + relation_adapters = { + 'ceph-mds': charms_openstack.plugins.CephRelationAdapter, + } + + +class BaseCephFSCharm(charms_openstack.plugins.CephCharm): + abstract_class = True + name = 'ceph-fs' + python_version = 3 + required_relations = ['ceph-mds'] + user = 'ceph' + group = 'ceph' + adapters_class = CephFSCharmRelationAdapters + configuration_class = CephFSCharmConfigurationAdapter + ceph_service_type = charms_openstack.plugins.CephCharm.CephServiceType.mds + ceph_service_name_override = 'mds' + ceph_key_per_unit_name = True + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.services = [ + 'ceph-mds@{}'.format(self.hostname), + ] + self.restart_map = { + '/etc/ceph/ceph.conf': self.services, + } + + # NOTE(fnordahl) moved from reactive handler module, otherwise keeping + # these as-is to make the diff managable. At some point in time we should + # replace them in favor of common helpers that would do the same job. + @staticmethod + def get_networks(config_opt='ceph-public-network'): + """Get all configured networks from provided config option. + + If public network(s) are provided, go through them and return those for + which we have an address configured. + """ + networks = config(config_opt) + if networks: + networks = networks.split() + return [n for n in networks if get_address_in_network(n)] + + return [] + + @cached + def get_public_addr(self): + if config('ceph-public-network'): + return self.get_network_addrs('ceph-public-network')[0] + + try: + return network_get_primary_address('public') + except NotImplementedError: + log("network-get not supported", DEBUG) + + return self.get_host_ip() + + def get_mds_cache(self): + return {'mds-cache-memory-limit': config('mds-cache-memory-limit'), + 'mds-cache-reservation': config('mds-cache-reservation'), + 'mds-health-cache-threshold': + config('mds-health-cache-threshold') + } + + @cached + @staticmethod + def get_host_ip(hostname=None): + if config('prefer-ipv6'): + return get_ipv6_addr()[0] + + hostname = hostname or unit_get('private-address') + try: + # Test to see if already an IPv4 address + socket.inet_aton(hostname) + return hostname + except socket.error: + # This may throw an NXDOMAIN exception; in which case + # things are badly broken so just let it kill the hook + answers = dns.resolver.query(hostname, 'A') + if answers: + return answers[0].address + + def get_network_addrs(self, config_opt): + """Get all configured public networks addresses. + + If public network(s) are provided, go through them and return the + addresses we have configured on any of those networks. + """ + addrs = [] + networks = config(config_opt) + if networks: + networks = networks.split() + addrs = [get_address_in_network(n) for n in networks] + addrs = [a for a in addrs if a] + + if not addrs: + if networks: + msg = ("Could not find an address on any of '%s' - resolve " + "this error to retry" % networks) + status_set('blocked', msg) + raise Exception(msg) + else: + return [self.get_host_ip()] + + return addrs + + +class MitakaCephFSCharm(BaseCephFSCharm): + release = 'mitaka' + packages = ['ceph-mds', 'gdisk', 'btrfs-tools', 'xfsprogs'] + + +class UssuriCephFSCharm(BaseCephFSCharm): + release = 'ussuri' + packages = ['ceph-mds', 'gdisk', 'btrfs-progs', 'xfsprogs'] diff --git a/ceph-fs/src/metadata.yaml b/ceph-fs/src/metadata.yaml new file mode 100644 index 00000000..cc812f9c --- /dev/null +++ b/ceph-fs/src/metadata.yaml @@ -0,0 +1,25 @@ +name: ceph-fs +summary: Highly scalable distributed storage +maintainer: OpenStack Charmers +description: | + Ceph is a distributed storage and network file system designed to provide + excellent performance, reliability, and scalability. +docs: https://discourse.charmhub.io/t/ceph-fs-docs-index/11223 +tags: +- openstack +- storage +- file-servers +- misc +series: +- focal +- jammy +subordinate: false +requires: + ceph-mds: + interface: ceph-mds +provides: + cephfs-share: + interface: cephfs_share + +extra-bindings: + public: diff --git a/ceph-fs/src/reactive/ceph_fs.py b/ceph-fs/src/reactive/ceph_fs.py new file mode 100644 index 00000000..e58b3e45 --- /dev/null +++ b/ceph-fs/src/reactive/ceph_fs.py @@ -0,0 +1,244 @@ +# Copyright 2024 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charms import reactive + +import charmhelpers.core as ch_core + +from charmhelpers.core.hookenv import ( + service_name, application_name, + is_leader, + config) +from charmhelpers.contrib.storage.linux import ceph + +import charms_openstack.bus +import charms_openstack.charm as charm + +import os +import subprocess + + +charms_openstack.bus.discover() + + +charm.use_defaults( + 'charm.installed', + 'config.changed', + 'config.rendered', + 'upgrade-charm', + 'update-status', +) + + +@reactive.when_none('charm.paused', 'is-update-status-hook') +@reactive.when('ceph-mds.pools.available') +def config_changed(): + ceph_mds = reactive.endpoint_from_flag('ceph-mds.pools.available') + with charm.provide_charm_instance() as cephfs_charm: + host = cephfs_charm.hostname + exists = os.path.exists('/var/lib/ceph/mds/ceph-%s/keyring' % host) + + cephfs_charm.configure_ceph_keyring(ceph_mds.mds_key()) + cephfs_charm.render_with_interfaces([ceph_mds]) + if reactive.is_flag_set('config.changed.source'): + # update system source configuration and check for upgrade + cephfs_charm.install() + cephfs_charm.upgrade_if_available([ceph_mds]) + reactive.clear_flag('config.changed.source') + reactive.set_flag('cephfs.configured') + reactive.set_flag('config.rendered') + cephfs_charm.assess_status() + + # If the keyring file existed before this call, then the new + # provided key implies a rotation. + if exists: + svc = 'ceph-mds@%s.service' % host + try: + # Reset the failure count first, as the service may fail + # to come up due to the way the restart-map is handled. + subprocess.check_call(['sudo', 'systemctl', + 'reset-failed', svc]) + subprocess.check_call(['sudo', 'systemctl', 'restart', svc]) + except subprocess.CalledProcessError as exc: + # The service can be temporarily masked when booting, so + # skip that class of errors. + ch_core.hookenv.log('Failed to restart MDS service: %s' % + str(exc)) + + +@reactive.when_none('charm.paused', 'is-update-status-hook') +@reactive.when('ceph-mds.connected') +def storage_ceph_connected(ceph): + ceph_mds = reactive.endpoint_from_flag('ceph-mds.connected') + ceph_mds.announce_mds_name() + service = service_name() + weight = config('ceph-pool-weight') + replicas = config('ceph-osd-replication-count') + + if config('rbd-pool-name'): + pool_name = config('rbd-pool-name') + else: + pool_name = "{}_data".format(service) + + # The '_' rather than '-' in the default pool name + # maintains consistency with previous versions of the + # charm but is inconsistent with ceph-client charms. + metadata_pool_name = ( + config('metadata-pool') or + "{}_metadata".format(service) + ) + # Metadata sizing is approximately 20% of overall data weight + # https://ceph.io/planet/cephfs-ideal-pg-ratio-between-metadata-and-data-pools/ + metadata_weight = weight * 0.20 + # Resize data pool weight to accomodate metadata weight + weight = weight - metadata_weight + extra_pools = [] + + bluestore_compression = None + with charm.provide_charm_instance() as cephfs_charm: + # TODO: move this whole method into the charm class and add to the + # common pool creation logic in charms.openstack. For now we reuse + # the common bluestore compression wrapper here. + try: + bluestore_compression = cephfs_charm._get_bluestore_compression() + except ValueError as e: + ch_core.hookenv.log('Invalid value(s) provided for Ceph BlueStore ' + 'compression: "{}"' + .format(str(e))) + + if config('pool-type') == 'erasure-coded': + # General EC plugin config + plugin = config('ec-profile-plugin') + technique = config('ec-profile-technique') + device_class = config('ec-profile-device-class') + bdm_k = config('ec-profile-k') + bdm_m = config('ec-profile-m') + # LRC plugin config + bdm_l = config('ec-profile-locality') + crush_locality = config('ec-profile-crush-locality') + # SHEC plugin config + bdm_c = config('ec-profile-durability-estimator') + # CLAY plugin config + bdm_d = config('ec-profile-helper-chunks') + scalar_mds = config('ec-profile-scalar-mds') + # Weight for EC pool + ec_pool_weight = config('ec-pool-weight') + # Profile name + profile_name = ( + config('ec-profile-name') or "{}-profile".format(service) + ) + # Create erasure profile + ceph_mds.create_erasure_profile( + name=profile_name, + k=bdm_k, m=bdm_m, + lrc_locality=bdm_l, + lrc_crush_locality=crush_locality, + shec_durability_estimator=bdm_c, + clay_helper_chunks=bdm_d, + clay_scalar_mds=scalar_mds, + device_class=device_class, + erasure_type=plugin, + erasure_technique=technique + ) + + # Create EC data pool + ec_pool_name = 'ec_{}'.format(pool_name) + + # NOTE(fnordahl): once we deprecate Python 3.5 support we can do + # the unpacking of the BlueStore compression arguments as part of + # the function arguments. Until then we need to build the dict + # prior to the function call. + kwargs = { + 'name': ec_pool_name, + 'erasure_profile': profile_name, + 'weight': ec_pool_weight, + 'app_name': ceph_mds.ceph_pool_app_name, + 'allow_ec_overwrites': True, + } + if bluestore_compression: + kwargs.update(bluestore_compression) + ceph_mds.create_erasure_pool(**kwargs) + + # NOTE(fnordahl): once we deprecate Python 3.5 support we can do + # the unpacking of the BlueStore compression arguments as part of + # the function arguments. Until then we need to build the dict + # prior to the function call. + kwargs = { + 'name': pool_name, + 'weight': weight, + 'app_name': ceph_mds.ceph_pool_app_name, + } + if bluestore_compression: + kwargs.update(bluestore_compression) + ceph_mds.create_replicated_pool(**kwargs) + ceph_mds.create_replicated_pool( + name=metadata_pool_name, + weight=metadata_weight, + app_name=ceph_mds.ceph_pool_app_name + ) + extra_pools = [ec_pool_name] + else: + # NOTE(fnordahl): once we deprecate Python 3.5 support we can do + # the unpacking of the BlueStore compression arguments as part of + # the function arguments. Until then we need to build the dict + # prior to the function call. + kwargs = { + 'name': pool_name, + 'replicas': replicas, + 'weight': weight, + 'app_name': ceph_mds.ceph_pool_app_name, + } + if bluestore_compression: + kwargs.update(bluestore_compression) + ceph_mds.create_replicated_pool(**kwargs) + ceph_mds.create_replicated_pool( + name=metadata_pool_name, + replicas=replicas, + weight=metadata_weight, + app_name=ceph_mds.ceph_pool_app_name) + ceph_mds.request_cephfs(service, extra_pools=extra_pools) + # Must have a current request thanks to the call above + rq = ceph_mds.get_current_request() + rq.add_op({ + 'op': 'create-cephfs-client', + 'fs_name': service, + 'client_id': '{}-client'.format(service), + 'path': "/", + 'perms': 'rw', + }) + ceph_mds.send_request_if_needed(rq) + + +@reactive.when_none('charm.paused', 'is-update-status-hook') +@reactive.when('cephfs.configured', 'ceph-mds.pools.available', + 'cephfs-share.available') +def cephfs_share_available(): + cephfs_share = reactive.endpoint_from_flag('cephfs-share.available') + ceph_mds = reactive.endpoint_from_flag('ceph-mds.pools.available') + service = application_name() + if is_leader(): + response_key = ceph.get_broker_rsp_key() + # After the `create-cephfs-client` request completes, the + # databag must contain the generated key for that user. + key = ceph_mds.all_joined_units.received[response_key]["key"] + + cephfs_share.set_share(share_info={ + "fsid": ceph_mds.fsid, + "name": service, + "path": "/", + "monitor_hosts": ceph_mds.mon_hosts(), + }, auth_info={ + "username": '{}-client'.format(service), + "key": key + }) diff --git a/ceph-fs/src/templates/ceph.conf b/ceph-fs/src/templates/ceph.conf new file mode 100644 index 00000000..cd2725cc --- /dev/null +++ b/ceph-fs/src/templates/ceph.conf @@ -0,0 +1,36 @@ + +[global] +auth cluster required = {{ ceph_mds.auth }} +auth service required = {{ ceph_mds.auth }} +auth client required = {{ ceph_mds.auth }} +keyring = /etc/ceph/$cluster.$name.keyring +mon host = {{ ceph_mds.monitors }} +fsid = {{ ceph_mds.fsid }} + +log to syslog = {{ options.use_syslog }} +err to syslog = {{ options.use_syslog }} +clog to syslog = {{ options.use_syslog }} +mon cluster log to syslog = {{ options.use_syslog }} +debug mon = {{ options.loglevel }}/5 +debug osd = {{ options.loglevel }}/5 + +{% if options.networks %} +public network = {{ options.networks|join(',') }} +{%- endif %} +{%- if options.public_addr %} +public addr = {{ options.public_addr }} +{%- endif %} + +[client] +log file = /var/log/ceph.log + +[mds] +keyring = /var/lib/ceph/mds/$cluster-$id/keyring +mds cache memory limit = {{ options.mds_cache_memory_limit }} +mds cache reservation = {{ options.mds_cache_reservation }} +mds health cache threshold = {{ options.mds_health_cache_threshold }} + +[mds.{{ options.mds_name }}] +host = {{ options.hostname }} + + diff --git a/ceph-fs/src/test-requirements.txt b/ceph-fs/src/test-requirements.txt new file mode 100644 index 00000000..43248e4c --- /dev/null +++ b/ceph-fs/src/test-requirements.txt @@ -0,0 +1,50 @@ +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of *requirements.txt files for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools + +# NOTE: newer versions of cryptography require a Rust compiler to build, +# see +# * https://github.com/openstack-charmers/zaza/issues/421 +# * https://mail.python.org/pipermail/cryptography-dev/2021-January/001003.html +# +cryptography<3.4 + +requests>=2.18.4 + +stestr>=2.2.0 + +# Dependency of stestr. Workaround for +# https://github.com/mtreinish/stestr/issues/145 +cliff<3.0.0 + +# Dependencies of stestr. Newer versions use keywords that didn't exist in +# python 3.5 yet (e.g. "ModuleNotFoundError") +importlib-metadata<3.0.0; python_version < '3.6' +importlib-resources<3.0.0; python_version < '3.6' + +# Some Zuul nodes sometimes pull newer versions of these dependencies which +# dropped support for python 3.5: +osprofiler<2.7.0;python_version<'3.6' +stevedore<1.31.0;python_version<'3.6' +debtcollector<1.22.0;python_version<'3.6' +oslo.utils<=3.41.0;python_version<'3.6' + +coverage>=4.5.2 +pyudev # for ceph-* charm unit tests (need to fix the ceph-* charm unit tests/mocking) +git+https://github.com/openstack-charmers/zaza.git#egg=zaza +git+https://github.com/openstack-charmers/zaza-openstack-tests.git#egg=zaza.openstack + +# Needed for charm-glance: +git+https://opendev.org/openstack/tempest.git#egg=tempest;python_version>='3.8' +tempest<30.0.0;python_version<'3.8' and python_version >= '3.6' +tempest<24.0.0;python_version<'3.6' + +croniter # needed for charm-rabbitmq-server unit tests + +# icey: pyopenssl 22 introduces a requirement on newer OpenSSL which causes test +# failures. Pin pyopenssl to resolve the failure. +pyopenssl<=22.0.0 + +pydantic < 2 +cosl diff --git a/ceph-fs/src/tests/bundles/focal-yoga.yaml b/ceph-fs/src/tests/bundles/focal-yoga.yaml new file mode 100644 index 00000000..718084ba --- /dev/null +++ b/ceph-fs/src/tests/bundles/focal-yoga.yaml @@ -0,0 +1,44 @@ +variables: + openstack-origin: &openstack-origin cloud:focal-yoga + +local_overlay_enabled: False + +series: &series focal + +applications: + ubuntu: # used to test mounts + charm: ch:ubuntu + num_units: 2 + ceph-fs: + charm: ../../../ceph-fs.charm + num_units: 1 + options: + source: *openstack-origin + pool-type: erasure-coded + ec-profile-k: 4 + ec-profile-m: 2 + + ceph-osd: + charm: ch:ceph-osd + num_units: 6 + storage: + osd-devices: 'cinder,10G' + options: + osd-devices: '/dev/test-non-existent' + source: *openstack-origin + channel: latest/edge + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + monitor-count: '3' + source: *openstack-origin + channel: latest/edge + +relations: + - - 'ceph-mon:mds' + - 'ceph-fs:ceph-mds' + + - - 'ceph-osd:mon' + - 'ceph-mon:osd' diff --git a/ceph-fs/src/tests/bundles/jammy-antelope.yaml b/ceph-fs/src/tests/bundles/jammy-antelope.yaml new file mode 100644 index 00000000..dfab58f3 --- /dev/null +++ b/ceph-fs/src/tests/bundles/jammy-antelope.yaml @@ -0,0 +1,44 @@ +variables: + openstack-origin: &openstack-origin cloud:jammy-antelope + +local_overlay_enabled: False + +series: &series jammy + +applications: + ubuntu: # used to test mounts + charm: ch:ubuntu + num_units: 2 + ceph-fs: + charm: ../../../ceph-fs.charm + num_units: 1 + options: + source: *openstack-origin + pool-type: erasure-coded + ec-profile-k: 4 + ec-profile-m: 2 + + ceph-osd: + charm: ch:ceph-osd + num_units: 6 + storage: + osd-devices: 'cinder,10G' + options: + osd-devices: '/dev/test-non-existent' + source: *openstack-origin + channel: reef/edge + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + monitor-count: '3' + source: *openstack-origin + channel: reef/edge + +relations: + - - 'ceph-mon:mds' + - 'ceph-fs:ceph-mds' + + - - 'ceph-osd:mon' + - 'ceph-mon:osd' diff --git a/ceph-fs/src/tests/bundles/jammy-bobcat.yaml b/ceph-fs/src/tests/bundles/jammy-bobcat.yaml new file mode 100644 index 00000000..09ab5319 --- /dev/null +++ b/ceph-fs/src/tests/bundles/jammy-bobcat.yaml @@ -0,0 +1,44 @@ +variables: + openstack-origin: &openstack-origin cloud:jammy-bobcat + +local_overlay_enabled: False + +series: &series jammy + +applications: + ubuntu: # used to test mounts + charm: ch:ubuntu + num_units: 2 + ceph-fs: + charm: ../../../ceph-fs.charm + num_units: 1 + options: + source: *openstack-origin + pool-type: erasure-coded + ec-profile-k: 4 + ec-profile-m: 2 + + ceph-osd: + charm: ch:ceph-osd + num_units: 6 + storage: + osd-devices: 'cinder,10G' + options: + osd-devices: '/dev/test-non-existent' + source: *openstack-origin + channel: latest/edge + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + monitor-count: '3' + source: *openstack-origin + channel: latest/edge + +relations: + - - 'ceph-mon:mds' + - 'ceph-fs:ceph-mds' + + - - 'ceph-osd:mon' + - 'ceph-mon:osd' diff --git a/ceph-fs/src/tests/bundles/jammy-caracal.yaml b/ceph-fs/src/tests/bundles/jammy-caracal.yaml new file mode 100644 index 00000000..a1bbd92c --- /dev/null +++ b/ceph-fs/src/tests/bundles/jammy-caracal.yaml @@ -0,0 +1,61 @@ +variables: + openstack-origin: &openstack-origin cloud:jammy-caracal + +series: &series jammy + +machines: + '0': + constraints: cores=2 mem=4G root-disk=25G virt-type=virtual-machine + '1': + constraints: cores=2 mem=4G root-disk=25G virt-type=virtual-machine + '2': + constraints: cores=2 mem=4G root-disk=25G virt-type=virtual-machine + '3': + '4': + '5': + +applications: + ceph-fs: + charm: ch:ceph-fs + channel: latest/edge + num_units: 1 + options: + source: *openstack-origin + pool-type: erasure-coded + ec-profile-k: 4 + ec-profile-m: 2 + to: + - '2' + + ceph-osd: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 'loop,10G' + options: + osd-devices: '/dev/test-non-existent' + source: *openstack-origin + channel: latest/edge + to: + - '0' + - '1' + - '2' + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + monitor-count: '3' + source: *openstack-origin + channel: latest/edge + to: + - '3' + - '4' + - '5' + +relations: + - - 'ceph-mon:mds' + - 'ceph-fs:ceph-mds' + + - - 'ceph-osd:mon' + - 'ceph-mon:osd' diff --git a/ceph-fs/src/tests/bundles/jammy-yoga.yaml b/ceph-fs/src/tests/bundles/jammy-yoga.yaml new file mode 100644 index 00000000..2a428f1d --- /dev/null +++ b/ceph-fs/src/tests/bundles/jammy-yoga.yaml @@ -0,0 +1,44 @@ +variables: + openstack-origin: &openstack-origin distro + +local_overlay_enabled: False + +series: &series jammy + +applications: + ubuntu: # used to test mounts + charm: ch:ubuntu + num_units: 2 + ceph-fs: + charm: ../../../ceph-fs.charm + num_units: 1 + options: + source: *openstack-origin + pool-type: erasure-coded + ec-profile-k: 4 + ec-profile-m: 2 + + ceph-osd: + charm: ch:ceph-osd + num_units: 6 + storage: + osd-devices: 'cinder,10G' + options: + osd-devices: '/dev/test-non-existent' + source: *openstack-origin + channel: latest/edge + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + monitor-count: '3' + source: *openstack-origin + channel: latest/edge + +relations: + - - 'ceph-mon:mds' + - 'ceph-fs:ceph-mds' + + - - 'ceph-osd:mon' + - 'ceph-mon:osd' diff --git a/ceph-fs/src/tests/target.py b/ceph-fs/src/tests/target.py new file mode 100644 index 00000000..933f3c50 --- /dev/null +++ b/ceph-fs/src/tests/target.py @@ -0,0 +1,275 @@ +# Copyright 2020 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Encapsulate CephFS testing.""" + +import logging +import json +import subprocess +from tenacity import ( + retry, Retrying, stop_after_attempt, wait_exponential, + retry_if_exception_type, retry_if_result) +import unittest +import zaza +import zaza.model as model +import zaza.openstack.charm_tests.test_utils as test_utils +import zaza.openstack.utilities.generic as zaza_utils + + +class CephFSTests(unittest.TestCase): + """Encapsulate CephFS tests.""" + + mounts_share = False + mount_dir = '/mnt/cephfs' + CEPH_MON = 'ceph-mon' + + def tearDown(self): + """Cleanup after running tests.""" + if self.mounts_share: + for unit in ['ceph-osd/0', 'ceph-osd/1']: + try: + zaza.utilities.generic.run_via_ssh( + unit_name=unit, + cmd='sudo fusermount -u {0} && sudo rmdir {0}'.format( + self.mount_dir)) + except subprocess.CalledProcessError: + logging.warning( + "Failed to cleanup mounts on {}".format(unit)) + + def _mount_share(self, unit_name: str, perform_retry: bool = True): + self._install_dependencies(unit_name) + self._install_keyring(unit_name) + cmd = 'sudo mkdir -p {0} && sudo ceph-fuse {0}'.format( + self.mount_dir) + + if perform_retry: + @retry( + stop=stop_after_attempt(5), + wait=wait_exponential(multiplier=3, min=2, max=10), + retry=retry_if_result(lambda res: res.get('Code') != '0') + ) + def _do_mount(): + logging.info(f"Mounting CephFS on {unit_name}") + res = model.run_on_unit(unit_name, cmd) + logging.info(f"Mount result: {res}") + return res + + _do_mount() + else: + model.run_on_unit(unit_name, cmd) + + self.mounts_share = True + + def _install_keyring(self, unit_name: str): + + keyring = model.run_on_leader( + self.CEPH_MON, 'cat /etc/ceph/ceph.client.admin.keyring')['Stdout'] + config = model.run_on_leader( + self.CEPH_MON, 'cat /etc/ceph/ceph.conf')['Stdout'] + commands = [ + 'sudo mkdir -p /etc/ceph', + "echo '{}' | sudo tee /etc/ceph/ceph.conf".format(config), + "echo '{}' | " + 'sudo tee /etc/ceph/ceph.client.admin.keyring'.format(keyring) + ] + for cmd in commands: + zaza.utilities.generic.run_via_ssh( + unit_name=unit_name, + cmd=cmd) + + def _install_dependencies(self, unit: str): + zaza.utilities.generic.run_via_ssh( + unit_name=unit, + cmd='sudo apt-get install -yq ceph-fuse') + + @classmethod + def setUpClass(cls): + """Run class setup for running tests.""" + super(CephFSTests, cls).setUpClass() + + @retry( + stop=stop_after_attempt(5), + wait=wait_exponential(multiplier=3, min=2, max=10)) + def _write_testing_file_on_instance(self, instance_name: str): + zaza.utilities.generic.run_via_ssh( + unit_name=instance_name, + cmd='echo -n "test" | sudo tee {}/test'.format(self.mount_dir)) + + @retry( + stop=stop_after_attempt(5), + wait=wait_exponential(multiplier=3, min=2, max=10)) + def _verify_testing_file_on_instance(self, instance_name: str): + output = zaza.model.run_on_unit( + instance_name, 'sudo cat {}/test'.format(self.mount_dir))['Stdout'] + self.assertEqual('test', output.strip()) + + def test_cephfs_share(self): + """Test that CephFS shares can be accessed on two instances. + + 1. Spawn two servers + 2. mount it on both + 3. write a file on one + 4. read it on the other + 5. profit + """ + self._mount_share('ceph-osd/0') + self._mount_share('ceph-osd/1') + + self._write_testing_file_on_instance('ceph-osd/0') + self._verify_testing_file_on_instance('ceph-osd/1') + + def test_conf(self): + """Test ceph to ensure juju config options are properly set.""" + self.TESTED_UNIT = 'ceph-fs/0' + + def _get_conf(): + """get/parse ceph daemon response into dict. + + :returns dict: Current configuration of the Ceph MDS daemon + :rtype: dict + """ + cmd = "sudo ceph daemon mds.$HOSTNAME config show" + conf = model.run_on_unit(self.TESTED_UNIT, cmd) + return json.loads(conf['Stdout']) + + @retry(wait=wait_exponential(multiplier=1, min=4, max=10), + stop=stop_after_attempt(10)) + def _change_conf_check(mds_config): + """Change configs, then assert to ensure config was set. + + Doesn't return a value. + """ + model.set_application_config('ceph-fs', mds_config) + results = _get_conf() + self.assertEqual( + results['mds_cache_memory_limit'], + mds_config['mds-cache-memory-limit']) + self.assertAlmostEqual( + float(results['mds_cache_reservation']), + float(mds_config['mds-cache-reservation'])) + self.assertAlmostEqual( + float(results['mds_health_cache_threshold']), + float(mds_config['mds-health-cache-threshold'])) + + # ensure defaults are set + mds_config = {'mds-cache-memory-limit': '4294967296', + 'mds-cache-reservation': '0.05', + 'mds-health-cache-threshold': '1.5'} + _change_conf_check(mds_config) + + # change defaults + mds_config = {'mds-cache-memory-limit': '8589934592', + 'mds-cache-reservation': '0.10', + 'mds-health-cache-threshold': '2'} + _change_conf_check(mds_config) + + # Restore config to keep tests idempotent + mds_config = {'mds-cache-memory-limit': '4294967296', + 'mds-cache-reservation': '0.05', + 'mds-health-cache-threshold': '1.5'} + _change_conf_check(mds_config) + + +class CharmOperationTest(test_utils.BaseCharmTest): + """CephFS Charm operation tests.""" + + def test_pause_resume(self): + """Run pause and resume tests. + + Pause service and check services are stopped, then resume and check + they are started. + """ + services = ['ceph-mds'] + with self.pause_resume(services): + logging.info('Testing pause resume (services="{}")' + .format(services)) + + +class CephKeyRotationTests(test_utils.BaseCharmTest): + """Tests for the rotate-key action.""" + + def _get_all_keys(self, unit, entity_filter): + cmd = 'sudo ceph auth ls' + result = model.run_on_unit(unit, cmd) + # Don't use json formatting, as it's buggy upstream. + data = result['Stdout'].split() + ret = set() + + for ix, line in enumerate(data): + # Structure: + # $ENTITY + # key: + # key contents + # That's why we need to move one position ahead. + if 'key:' in line and entity_filter(data[ix - 1]): + ret.add((data[ix - 1], data[ix + 1])) + return ret + + def _check_key_rotation(self, entity, unit): + def entity_filter(name): + return name.startswith(entity) + + old_keys = self._get_all_keys(unit, entity_filter) + action_obj = model.run_action( + unit_name=unit, + action_name='rotate-key', + action_params={'entity': entity} + ) + zaza_utils.assertActionRanOK(action_obj) + # NOTE(lmlg): There's a nasty race going on here. Essentially, + # since this action involves 2 different applications, what + # happens is as follows: + # (1) (2) (3) (4) + # ceph-mon rotates key | (idle) | remote-unit rotates key | (idle) + # Between (2) and (3), there's a window where all units are + # idle, _but_ the key hasn't been rotated in the other unit. + # As such, we retry a few times instead of using the + # `wait_for_application_states` interface. + + for attempt in Retrying( + wait=wait_exponential(multiplier=2, max=32), + reraise=True, stop=stop_after_attempt(20), + retry=retry_if_exception_type(AssertionError) + ): + with attempt: + new_keys = self._get_all_keys(unit, entity_filter) + self.assertNotEqual(old_keys, new_keys) + + diff = new_keys - old_keys + self.assertEqual(len(diff), 1) + first = next(iter(diff)) + # Check that the entity matches. The 'entity_filter' + # callable will return a true-like value if it + # matches the type of entity we're after (i.e: 'mgr') + self.assertTrue(entity_filter(first[0])) + + def _get_fs_client(self, unit): + def _filter_fs(name): + return (name.startswith('mds.') and + name not in ('mds.ceph-fs', 'mds.None')) + + ret = self._get_all_keys(unit, _filter_fs) + if not ret: + return None + return next(iter(ret))[0] + + def test_key_rotate(self): + """Test that rotating the keys actually changes them.""" + unit = 'ceph-mon/0' + fs_svc = self._get_fs_client(unit) + + if fs_svc is not None: + self._check_key_rotation(fs_svc, unit) + else: + logging.info('ceph-fs units present, but no MDS service') diff --git a/ceph-fs/src/tests/tests.yaml b/ceph-fs/src/tests/tests.yaml new file mode 100644 index 00000000..0742bfde --- /dev/null +++ b/ceph-fs/src/tests/tests.yaml @@ -0,0 +1,20 @@ +charm_name: ceph-fs + +gate_bundles: + - jammy-caracal + +smoke_bundles: + - jammy-caracal + +dev_bundles: + - jammy-caracal + +tests: + - zaza.charm_tests.lifecycle.refresh.CharmRefreshAll + - tests.target.CephFSTests + - tests.target.CharmOperationTest + - tests.target.CephKeyRotationTests +target_deploy_status: + ubuntu: + workload-status: active + workload-status-message-prefix: '' diff --git a/ceph-fs/src/tox.ini b/ceph-fs/src/tox.ini new file mode 100644 index 00000000..6f3c7f9c --- /dev/null +++ b/ceph-fs/src/tox.ini @@ -0,0 +1,63 @@ +# Source charm (with zaza): ./src/tox.ini +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of tox.ini for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools + +[tox] +envlist = pep8 +# NOTE: Avoid build/test env pollution by not enabling sitepackages. +sitepackages = False +# NOTE: Avoid false positives by not skipping missing interpreters. +skip_missing_interpreters = False + +[testenv] +# We use tox mainly for virtual environment management for test requirements +# and do not install the charm code as a Python package into that environment. +# Ref: https://tox.wiki/en/latest/config.html#skip_install +skip_install = True +setenv = VIRTUAL_ENV={envdir} + PYTHONHASHSEED=0 + TEST_JUJU3=1 + CHARMS_ARTIFACT_DIR={toxinidir}/../.. +allowlist_externals = juju +passenv = + HOME + TERM + CS_* + OS_* + TEST_* +deps = + -r{toxinidir}/test-requirements.txt + +[testenv:pep8] +basepython = python3 +commands = charm-proof + +[testenv:func-noop] +basepython = python3 +commands = + functest-run-suite --help + +[testenv:func] +basepython = python3 +commands = + functest-run-suite --keep-model + +[testenv:func-smoke] +basepython = python3 +commands = + functest-run-suite --keep-model --smoke + +[testenv:func-dev] +basepython = python3 +commands = + functest-run-suite --keep-model --dev + +[testenv:func-target] +basepython = python3 +commands = + functest-run-suite --keep-model --bundle {posargs} + +[testenv:venv] +commands = {posargs} diff --git a/ceph-fs/src/wheelhouse.txt b/ceph-fs/src/wheelhouse.txt new file mode 100644 index 00000000..303c3962 --- /dev/null +++ b/ceph-fs/src/wheelhouse.txt @@ -0,0 +1,11 @@ +netifaces +dnspython3 +ceph_api +pyxattr +psutil + +git+https://github.com/openstack/charms.openstack.git#egg=charms.openstack + +git+https://github.com/openstack/charms.ceph.git#egg=charms.ceph + +git+https://github.com/juju/charm-helpers.git#egg=charmhelpers diff --git a/ceph-fs/test-requirements.txt b/ceph-fs/test-requirements.txt new file mode 100644 index 00000000..35d38c5b --- /dev/null +++ b/ceph-fs/test-requirements.txt @@ -0,0 +1,45 @@ +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of *requirements.txt files for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools + +# NOTE: newer versions of cryptography require a Rust compiler to build, +# see +# * https://github.com/openstack-charmers/zaza/issues/421 +# * https://mail.python.org/pipermail/cryptography-dev/2021-January/001003.html +# +cryptography<3.4 + +requests>=2.18.4 + +stestr>=2.2.0 + +# Dependency of stestr. Workaround for +# https://github.com/mtreinish/stestr/issues/145 +cliff<3.0.0 + +# Dependencies of stestr. Newer versions use keywords that didn't exist in +# python 3.5 yet (e.g. "ModuleNotFoundError") +importlib-metadata<3.0.0; python_version < '3.6' +importlib-resources<3.0.0; python_version < '3.6' + +# Some Zuul nodes sometimes pull newer versions of these dependencies which +# dropped support for python 3.5: +osprofiler<2.7.0;python_version<'3.6' +stevedore<1.31.0;python_version<'3.6' +debtcollector<1.22.0;python_version<'3.6' +oslo.utils<=3.41.0;python_version<'3.6' + +coverage>=4.5.2 +pyudev # for ceph-* charm unit tests (need to fix the ceph-* charm unit tests/mocking) + +# icey: pyopenssl 22 introduces a requirement on newer OpenSSL which causes test +# failures. Pin pyopenssl to resolve the failure. +pyopenssl<=22.0.0 + +pydantic < 2 +cosl + +netifaces +git+https://github.com/openstack/charms.openstack.git#egg=charms.openstack +charms.reactive diff --git a/ceph-fs/tox.ini b/ceph-fs/tox.ini new file mode 100644 index 00000000..291604e4 --- /dev/null +++ b/ceph-fs/tox.ini @@ -0,0 +1,116 @@ +# Source charm: ./tox.ini +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of tox.ini for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools + +[tox] +envlist = pep8,py3 +# NOTE: Avoid build/test env pollution by not enabling sitepackages. +sitepackages = False +# NOTE: Avoid false positives by not skipping missing interpreters. +skip_missing_interpreters = False + +[testenv] +setenv = VIRTUAL_ENV={envdir} + PYTHONHASHSEED=0 + TERM=linux + CHARM_LAYERS_DIR={toxinidir}/layers + CHARM_INTERFACES_DIR={toxinidir}/interfaces + JUJU_REPOSITORY={toxinidir}/build + TEST_JUJU3=1 +passenv = + no_proxy + http_proxy + https_proxy + CHARM_INTERFACES_DIR + CHARM_LAYERS_DIR + JUJU_REPOSITORY +allowlist_externals = + charmcraft + bash + tox + {toxinidir}/rename.sh +deps = + -r{toxinidir}/requirements.txt + +[testenv:build] +basepython = python3 +commands = + charmcraft clean + charmcraft -v pack + {toxinidir}/rename.sh + +[testenv:build-reactive] +basepython = python3 +commands = + charm-build --log-level DEBUG --use-lock-file-branches --binary-wheels-from-source -o {toxinidir}/build/builds src {posargs} + +[testenv:add-build-lock-file] +basepython = python3 +commands = + charm-build --log-level DEBUG --write-lock-file -o {toxinidir}/build/builds src {posargs} + +[testenv:py3] +basepython = python3 +deps = + -r{toxinidir}/test-requirements.txt +commands = stestr run --slowest {posargs} + +[testenv:py310] +basepython = python3.10 +deps = + -r{toxinidir}/test-requirements.txt +commands = stestr run --slowest {posargs} + +[testenv:pep8] +basepython = python3 +deps = flake8 +commands = flake8 {posargs} src unit_tests + +[testenv:func-target] +# Hack to get functional tests working in the charmcraft +# world. We should fix this. +basepython = python3 +deps = -r{toxinidir}/src/test-requirements.txt +changedir = {toxinidir}/src +commands = + bash -c "if [ ! -f ../*.charm ]; then echo 'Charm does not exist. Run tox -e build'; exit 1; fi" + tox --version + tox -e func-target {posargs} + +[testenv:cover] +# Technique based heavily upon +# https://github.com/openstack/nova/blob/master/tox.ini +basepython = python3 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt +setenv = + {[testenv]setenv} + PYTHON=coverage run +commands = + coverage erase + stestr run --slowest {posargs} + coverage combine + coverage html -d cover + coverage xml -o cover/coverage.xml + coverage report + +[coverage:run] +branch = True +concurrency = multiprocessing +parallel = True +source = + . +omit = + .tox/* + */charmhelpers/* + unit_tests/* + +[testenv:venv] +basepython = python3 +commands = {posargs} + +[flake8] +# E402 ignore necessary for path append before sys module import in actions +ignore = E402,W503,W504 diff --git a/ceph-fs/unit_tests/__init__.py b/ceph-fs/unit_tests/__init__.py new file mode 100644 index 00000000..3265b909 --- /dev/null +++ b/ceph-fs/unit_tests/__init__.py @@ -0,0 +1,26 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import unittest.mock as mock + +sys.path.append('src') +sys.path.append('src/lib') + +# Mock out charmhelpers so that we can test without it. +import charms_openstack.test_mocks # noqa +charms_openstack.test_mocks.mock_charmhelpers() + +sys.modules['dns'] = mock.MagicMock() +sys.modules['dns.resolver'] = mock.MagicMock() diff --git a/ceph-fs/unit_tests/test_actions.py b/ceph-fs/unit_tests/test_actions.py new file mode 100644 index 00000000..21649054 --- /dev/null +++ b/ceph-fs/unit_tests/test_actions.py @@ -0,0 +1,79 @@ +import sys + +sys.path.append('src/actions') +import unittest +from unittest.mock import patch, call, Mock + +__author__ = 'Chris Holcombe ' + +sys.modules['action_set'] = Mock() +sys.modules['action_get'] = Mock() +sys.modules['action_fail'] = Mock() +sys.modules['xattr'] = Mock() +from get_quota import get_quota +from remove_quota import remove_quota +from set_quota import set_quota + + +def action_get_side_effect(*args): + if args[0] == 'max-files': + return 1024 + elif args[0] == 'max-bytes': + return 1024 + elif args[0] == 'directory': + return 'foo' + + +class CephActionsTestCase(unittest.TestCase): + @patch('get_quota.action_fail') + @patch('get_quota.action_set') + @patch('get_quota.action_get') + @patch('get_quota.os') + @patch('get_quota.xattr') + def test_get_quota(self, xattr, os, action_get, action_set, action_fail): + action_get.side_effect = action_get_side_effect + os.path.exists.return_value = True + xattr.getxattr.return_value = "1024" + get_quota() + action_get.assert_has_calls( + [call('max-files'), + call('max-bytes'), + call('directory')]) + action_fail.assert_not_called() + xattr.getxattr.assert_called_with('foo', + 'ceph.quota.max_files') + action_set.assert_called_with({'foo quota': "1024"}) + + @patch('set_quota.action_fail') + @patch('set_quota.action_get') + @patch('set_quota.os') + @patch('set_quota.xattr') + def test_set_quota(self, xattr, os, action_get, action_fail): + action_get.side_effect = action_get_side_effect + os.path.exists.return_value = True + set_quota() + xattr.setxattr.assert_called_with('foo', + 'ceph.quota.max_files', + '1024') + action_get.assert_has_calls( + [call('max-files'), + call('max-bytes'), + call('directory')]) + action_fail.assert_not_called() + + @patch('remove_quota.action_fail') + @patch('remove_quota.action_get') + @patch('remove_quota.os') + @patch('remove_quota.xattr') + def test_remove_quota(self, xattr, os, action_get, action_fail): + action_get.side_effect = action_get_side_effect + os.path.exists.return_value = True + remove_quota() + xattr.setxattr.assert_called_with('foo', + 'ceph.quota.max_files', + '0') + action_get.assert_has_calls( + [call('max-files'), + call('max-bytes'), + call('directory')]) + action_fail.assert_not_called() diff --git a/ceph-fs/unit_tests/test_lib_charm_openstack_ceph_fs.py b/ceph-fs/unit_tests/test_lib_charm_openstack_ceph_fs.py new file mode 100644 index 00000000..a60cbf2e --- /dev/null +++ b/ceph-fs/unit_tests/test_lib_charm_openstack_ceph_fs.py @@ -0,0 +1,91 @@ +# Copyright 2020 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest.mock as mock + +import charms_openstack.test_utils as test_utils + +import charm.openstack.ceph_fs as ceph_fs + + +class TestMitakaCephFsCharm(test_utils.PatchHelper): + + def setUp(self): + super().setUp() + self.patch_release('mitaka') + self.patch('socket.gethostname', name='gethostname') + self.gethostname.return_value = 'somehost' + self.target = ceph_fs.MitakaCephFSCharm() + + def test_packages(self): + # Package list is the only difference between the past version and + # future versions of this charm, see ``TestCephFsCharm`` for the rest + # of the tests + self.assertEqual(self.target.packages, [ + 'ceph-mds', 'gdisk', 'btrfs-tools', 'xfsprogs']) + + +class TestCephFsCharm(test_utils.PatchHelper): + + def setUp(self): + super().setUp() + self.patch_release('ussuri') + self.patch('socket.gethostname', name='gethostname') + self.gethostname.return_value = 'somehost' + self.target = ceph_fs.UssuriCephFSCharm() + + def patch_target(self, attr, return_value=None): + mocked = mock.patch.object(self.target, attr) + self._patches[attr] = mocked + started = mocked.start() + started.return_value = return_value + self._patches_start[attr] = started + setattr(self, attr, started) + + def test___init__(self): + self.assertEqual(self.target.services, [ + 'ceph-mds@somehost']) + self.assertDictEqual(self.target.restart_map, { + '/etc/ceph/ceph.conf': ['ceph-mds@somehost']}) + self.assertEqual(self.target.packages, [ + 'ceph-mds', 'gdisk', 'btrfs-progs', 'xfsprogs']) + + def test_configuration_class(self): + self.assertEqual(self.target.options.hostname, 'somehost') + self.assertEqual(self.target.options.mds_name, 'somehost') + self.patch_target('get_networks') + self.get_networks.return_value = ['fakeaddress'] + self.assertEqual(self.target.options.networks, ['fakeaddress']) + self.patch_object(ceph_fs.ch_core.hookenv, 'config') + self.config.side_effect = lambda x: {'prefer-ipv6': False}.get(x) + self.patch_object(ceph_fs, 'get_ipv6_addr') + self.get_ipv6_addr.return_value = ['2001:db8::fake'] + self.patch_target('get_public_addr') + self.get_public_addr.return_value = '192.0.2.42' + self.assertEqual( + self.target.options.public_addr, + '192.0.2.42') + self.config.side_effect = lambda x: {'prefer-ipv6': True}.get(x) + self.assertEqual( + self.target.options.public_addr, + '2001:db8::fake') + self.patch_target('get_mds_cache') + self.get_mds_cache.return_value = { + 'mds-cache-memory-limit': '4Gi', + 'mds-cache-reservation': 0.05, + 'mds-health-cache-threshold': 1.5} + self.assertEqual(self.target.options.mds_cache, { + 'mds-cache-memory-limit': '4Gi', + 'mds-cache-reservation': 0.05, + 'mds-health-cache-threshold': 1.5}) diff --git a/ceph-fs/unit_tests/test_reactive_ceph_fs.py b/ceph-fs/unit_tests/test_reactive_ceph_fs.py new file mode 100644 index 00000000..e165d8f8 --- /dev/null +++ b/ceph-fs/unit_tests/test_reactive_ceph_fs.py @@ -0,0 +1,156 @@ +# Copyright 2020 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest.mock as mock + +import charm.openstack.ceph_fs as ceph_fs +import reactive.ceph_fs as handlers + +import charms_openstack.test_utils as test_utils + + +class TestRegisteredHooks(test_utils.TestRegisteredHooks): + + def test_hooks(self): + defaults = [ + 'charm.installed', + 'config.changed', + 'config.rendered', + 'upgrade-charm', + 'update-status', + ] + hook_set = { + 'when': { + 'config_changed': ( + 'ceph-mds.pools.available', + ), + 'storage_ceph_connected': ( + 'ceph-mds.connected', + ), + 'cephfs_share_available': ( + 'cephfs.configured', + 'ceph-mds.pools.available', + 'cephfs-share.available', + ), + }, + 'when_none': { + 'config_changed': ( + 'charm.paused', + 'is-update-status-hook', + ), + 'storage_ceph_connected': ( + 'charm.paused', + 'is-update-status-hook', + ), + 'cephfs_share_available': ( + 'charm.paused', + 'is-update-status-hook', + ), + }, + } + # test that the hooks were registered via the reactive.ceph_fs module + self.registered_hooks_test_helper(handlers, hook_set, defaults) + + +class TestCephFSHandlers(test_utils.PatchHelper): + + def setUp(self): + super().setUp() + self.patch_release(ceph_fs.UssuriCephFSCharm.release) + self.target = mock.MagicMock() + self.patch_object(handlers.charm, 'provide_charm_instance', + new=mock.MagicMock()) + self.provide_charm_instance().__enter__.return_value = \ + self.target + self.provide_charm_instance().__exit__.return_value = None + + def test_config_changed(self): + self.patch_object(handlers.reactive, 'endpoint_from_flag') + self.patch_object(handlers.reactive, 'is_flag_set') + self.patch_object(handlers.reactive, 'clear_flag') + self.patch_object(handlers.reactive, 'set_flag') + self.patch_object(handlers.os.path, 'exists') + handlers.os.path.exists.return_value = False + ceph_mds = mock.MagicMock() + ceph_mds.mds_key.return_value = 'fakekey' + self.endpoint_from_flag.return_value = ceph_mds + self.is_flag_set.return_value = False + handlers.config_changed() + self.endpoint_from_flag.assert_called_once_with( + 'ceph-mds.pools.available') + self.target.configure_ceph_keyring.assert_called_once_with('fakekey') + self.target.render_with_interfaces.assert_called_once_with([ceph_mds]) + self.is_flag_set.assert_called_once_with('config.changed.source') + self.set_flag.assert_has_calls([ + mock.call('cephfs.configured'), + mock.call('config.rendered'), + ]) + self.target.install.assert_not_called() + self.target.upgrade_if_available.assert_not_called() + self.is_flag_set.return_value = True + handlers.config_changed() + self.target.install.assert_called_once_with() + self.target.upgrade_if_available.assert_called_once_with([ceph_mds]) + + def test_cephfs_share_available(self): + self.patch_object(handlers.reactive, 'endpoint_from_flag') + handlers.ch_core.hookenv.application_name.return_value = "ceph-fs" + handlers.ceph.get_broker_rsp_key.return_value = 'broker-rsp-ceph-fs-0' + + ceph_mds = mock.MagicMock() + ceph_mds.fsid = "354ca7c4-f10d-11ee-93f8-1f85f87b7845" + ceph_mds.mon_hosts.return_value = [ + "10.5.0.80:6789", "10.5.2.23:6789", "10.5.2.17:6789"] + ceph_mds.all_joined_units.received = { + "auth": "cephx", + "broker-rsp-ceph-fs-0": { + "exit-code": 0, + "key": "AQDvOE5mUfBIKxAAYT73/v7NzwWx2ovLB4nnOg==", + "request-id": "22dd9c7d8c7d392d44866b35219a654006fd90f0"}, + "ceph-public-address": "10.143.60.15", + "fsid": "354ca7c4-f10d-11ee-93f8-1f85f87b7845", + "juju-2ffa43-1_mds_key": + "AQDwOE5mmkQ1LBAAVrx4OXWwWM+XmK/KjnJcdA==", + } + + cephfs_share = mock.MagicMock() + + def mock_eff(flag): + if flag == "ceph-mds.pools.available": + return ceph_mds + elif flag == "cephfs-share.available": + return cephfs_share + else: + raise Exception("invalid input") + + self.endpoint_from_flag.side_effect = mock_eff + + handlers.cephfs_share_available() + + cephfs_share.set_share.assert_called_once_with( + share_info={ + "fsid": "354ca7c4-f10d-11ee-93f8-1f85f87b7845", + "name": "ceph-fs", + "path": "/", + "monitor_hosts": [ + "10.5.0.80:6789", + "10.5.2.23:6789", + "10.5.2.17:6789" + ], + }, + auth_info={ + "username": "ceph-fs-client", + "key": "AQDvOE5mUfBIKxAAYT73/v7NzwWx2ovLB4nnOg==" + } + ) diff --git a/ceph-fs/unit_tests/test_utils.py b/ceph-fs/unit_tests/test_utils.py new file mode 100644 index 00000000..06d0b072 --- /dev/null +++ b/ceph-fs/unit_tests/test_utils.py @@ -0,0 +1,116 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import unittest +import os +import yaml + +from unittest.mock import patch + + +def load_config(): + ''' + Walk backwords from __file__ looking for config.yaml, load and return the + 'options' section' + ''' + config = None + f = __file__ + while config is None: + d = os.path.dirname(f) + if os.path.isfile(os.path.join(d, 'config.yaml')): + config = os.path.join(d, 'config.yaml') + break + f = d + + if not config: + logging.error('Could not find config.yaml in any parent directory ' + 'of %s. ' % f) + raise Exception + + return yaml.safe_load(open(config).read())['options'] + + +def get_default_config(): + ''' + Load default charm config from config.yaml return as a dict. + If no default is set in config.yaml, its value is None. + ''' + default_config = {} + config = load_config() + for k, v in config.iteritems(): + if 'default' in v: + default_config[k] = v['default'] + else: + default_config[k] = None + return default_config + + +class CharmTestCase(unittest.TestCase): + + def setUp(self, obj, patches): + super(CharmTestCase, self).setUp() + self.patches = patches + self.obj = obj + self.test_config = TestConfig() + self.test_relation = TestRelation() + self.patch_all() + + def patch(self, method): + _m = patch.object(self.obj, method) + mock = _m.start() + self.addCleanup(_m.stop) + return mock + + def patch_all(self): + for method in self.patches: + setattr(self, method, self.patch(method)) + + +class TestConfig(object): + + def __init__(self): + self.config = get_default_config() + + def get(self, attr=None): + if not attr: + return self.get_all() + try: + return self.config[attr] + except KeyError: + return None + + def get_all(self): + return self.config + + def set(self, attr, value): + if attr not in self.config: + raise KeyError + self.config[attr] = value + + +class TestRelation(object): + + def __init__(self, relation_data={}): + self.relation_data = relation_data + + def set(self, relation_data): + self.relation_data = relation_data + + def get(self, attr=None, unit=None, rid=None): + if attr is None: + return self.relation_data + elif attr in self.relation_data: + return self.relation_data[attr] + return None diff --git a/ceph-iscsi/.gitignore b/ceph-iscsi/.gitignore new file mode 100644 index 00000000..f3c3e4d8 --- /dev/null +++ b/ceph-iscsi/.gitignore @@ -0,0 +1,8 @@ +.tox +**/*.swp +__pycache__ +.stestr/ +lib/* +!lib/README.txt +build +ceph-iscsi.charm diff --git a/ceph-iscsi/.gitmodules b/ceph-iscsi/.gitmodules new file mode 100644 index 00000000..e69de29b diff --git a/ceph-iscsi/.gitreview b/ceph-iscsi/.gitreview new file mode 100644 index 00000000..3525ef61 --- /dev/null +++ b/ceph-iscsi/.gitreview @@ -0,0 +1,6 @@ +[gerrit] +host=review.opendev.org +port=29418 +project=openstack/charm-ceph-iscsi.git + +defaultbranch=stable/squid-jammy diff --git a/ceph-iscsi/.jujuignore b/ceph-iscsi/.jujuignore new file mode 100644 index 00000000..ec8dde9b --- /dev/null +++ b/ceph-iscsi/.jujuignore @@ -0,0 +1,8 @@ +.stestr.conf +.gitmodules +.gitreview +.gitignore +ceph-iscsi.charm +.zuul.yaml +.stestr +unit_tests diff --git a/ceph-iscsi/.stestr.conf b/ceph-iscsi/.stestr.conf new file mode 100644 index 00000000..5fcccaca --- /dev/null +++ b/ceph-iscsi/.stestr.conf @@ -0,0 +1,3 @@ +[DEFAULT] +test_path=./unit_tests +top_dir=./ diff --git a/ceph-iscsi/.zuul.yaml b/ceph-iscsi/.zuul.yaml new file mode 100644 index 00000000..69974080 --- /dev/null +++ b/ceph-iscsi/.zuul.yaml @@ -0,0 +1,5 @@ +- project: + templates: + - openstack-python3-charm-yoga-jobs + - openstack-python3-charm-jobs + - openstack-cover-jobs diff --git a/ceph-iscsi/README.md b/ceph-iscsi/README.md new file mode 100644 index 00000000..b2970374 --- /dev/null +++ b/ceph-iscsi/README.md @@ -0,0 +1,131 @@ +# Overview + +The ceph-iscsi charm deploys the [Ceph iSCSI gateway +service][ceph-iscsi-upstream]. The charm is intended to be used in conjunction +with the [ceph-osd][ceph-osd-charm] and [ceph-mon][ceph-mon-charm] charms. + +# Notice for developers +The functional tests are expected to fail for this charm. This is because the +kernel module needed for the charm to function properly (iscsi_tcp) seems to +no longer be available on the virtual machines that are deployed by default +on LXD. + +In order to debug this issue, developers should install the `linux-modules` and +`linux-modules-extra` packages that are native on the running kernel, and then +load the module and test that it's running (`modprobe iscsi_tcp` and `lsmod | grep iscsi`). + +# Usage + +## Configuration + +See file `config.yaml` for the full list of options, along with their +descriptions and default values. + +## Ceph BlueStore compression + +This charm supports [BlueStore inline compression][ceph-bluestore-compression] +for its associated Ceph storage pool(s). The feature is enabled by assigning a +compression mode via the `bluestore-compression-mode` configuration option. The +default behaviour is to disable compression. + +The efficiency of compression depends heavily on what type of data is stored +in the pool and the charm provides a set of configuration options to fine tune +the compression behaviour. + +> **Note**: BlueStore compression is supported starting with Ceph Mimic. + +## Deployment + +We are assuming a pre-existing Ceph cluster. + +To provide multiple data paths to clients deploy exactly two ceph-iscsi units: + + juju deploy -n 2 ceph-iscsi + +Then add a relation to the ceph-mon application: + + juju add-relation ceph-iscsi:ceph-client ceph-mon:client + +**Notes**: + +* Deploying four ceph-iscsi units is theoretically possible but it is not an + officially supported configuration. + +* The ceph-iscsi application cannot be containerised. + +* Co-locating ceph-iscsi with another application is only supported with + ceph-osd, although doing so with other applications may still work. + +## Actions + +This section covers Juju [actions][juju-docs-actions] supported by the charm. +Actions allow specific operations to be performed on a per-unit basis. To +display action descriptions run `juju actions ceph-iscsi`. If the charm is not +deployed then see file `actions.yaml`. + +* `add-trusted-ip` +* `create-target` +* `pause` +* `resume` +* `security-checklist` + +To display action descriptions run `juju actions ceph-iscsi`. If the charm is +not deployed then see file `actions.yaml`. + +## iSCSI target management + +### Create an iSCSI target + +An iSCSI target can be created easily with the charm's `create-target` action: + + juju run-action --wait ceph-iscsi/0 create-target \ + client-initiatorname=iqn.1993-08.org.debian:01:aaa2299be916 \ + client-username=myiscsiusername \ + client-password=myiscsipassword \ + image-size=5G \ + image-name=small \ + pool-name=images + +In the above, all option values are generally user-defined with the exception +of the initiator name (`client-initiatorname`). An iSCSI initiator is +essentially an iSCSI client and so its name is client-dependent. Some +initiators may impose policy on credentials (`client-username` and +`client-password`). + +> **Important**: The underlying machines for the ceph-iscsi units must have + internal name resolution working (i.e. the machines must be able to resolve + each other's hostnames). + +### The `gwcli` utility + +The management of targets, beyond the target-creation action described above, +can be accomplished via the `gwcli` utility. This CLI tool has its own shell, +and is available from any ceph-iscsi unit: + + juju ssh ceph-iscsi/1 + sudo gwcli + /> help + +## VMware integration + +Ceph can be used to back iSCSI targets for VMware initiators. This is +documented under [VMware integration][ceph-docs-vmware-integration] in the +[Charmed Ceph documentation][ceph-docs]. + +# Bugs + +Please report bugs on [Launchpad][lp-bugs-charm-ceph-iscsi]. + +For general charm questions refer to the [OpenStack Charm Guide][cg]. + + + +[ceph-mon-charm]: https://jaas.ai/ceph-mon +[ceph-osd-charm]: https://jaas.ai/ceph-osd +[cg]: https://docs.openstack.org/charm-guide +[cdg]: https://docs.openstack.org/project-deploy-guide/charm-deployment-guide +[ceph-docs-vmware-integration]: https://ubuntu.com/ceph/docs/integration-vmware +[ceph-docs]: https://ubuntu.com/ceph/docs +[juju-docs-actions]: https://jaas.ai/docs/actions +[ceph-iscsi-upstream]: https://docs.ceph.com/docs/master/rbd/iscsi-overview/ +[lp-bugs-charm-ceph-iscsi]: https://bugs.launchpad.net/charm-ceph-iscsi/+filebug diff --git a/ceph-iscsi/actions.yaml b/ceph-iscsi/actions.yaml new file mode 100644 index 00000000..87205b90 --- /dev/null +++ b/ceph-iscsi/actions.yaml @@ -0,0 +1,65 @@ +pause: + description: | + Pause ceph-iscsi services. + If the ceph-iscsi deployment is clustered using the hacluster charm, the + corresponding hacluster unit on the node must first be paused as well. + Not doing so may lead to an interruption of service. +resume: + description: | + Resume ceph-iscsi services. + If the ceph-iscsi deployment is clustered using the hacluster charm, the + corresponding hacluster unit on the node must be resumed as well. +security-checklist: + description: Validate the running configuration against the OpenStack security guides checklist +add-trusted-ip: + description: "Add IP address that is permitted to talk to API" + params: + ips: + type: string + default: '' + description: "Space seperated list of trusted ips" + overwrite: + type: boolean + default: False + description: "If False append IPs to list" + required: + - ips +create-target: + description: "Create a new cache tier" + params: + gateway-units: + type: string + description: "Space seperated list of gateway units eg 'ceph-iscsi/0 ceph-scsi/1'" + iqn: + type: string + description: "iSCSI Qualified Name" + image-size: + type: string + description: "Target size" + image-name: + type: string + default: disk_1 + description: "Image name " + rbd-pool-name: + type: string + default: "" + description: "Name of ceph pool to use to back target " + ec-rbd-metadata-pool: + type: string + default: "" + description: "Name of the metadata pool to use with rbd-pool-name if rbd-pool-name is erasure coded." + client-initiatorname: + type: string + description: "The initiator name of the client that will mount the target" + client-username: + type: string + description: "The CHAPs username to be created for the client" + client-password: + type: string + description: "The CHAPs password to be created for the client" + required: + - image-size + - image-name + - client-initiatorname + - client-username + - client-password diff --git a/ceph-iscsi/build-requirements.txt b/ceph-iscsi/build-requirements.txt new file mode 100644 index 00000000..b6d2452f --- /dev/null +++ b/ceph-iscsi/build-requirements.txt @@ -0,0 +1,7 @@ +# NOTES(lourot): +# * We don't install charmcraft via pip anymore because it anyway spins up a +# container and scp the system's charmcraft snap inside it. So the charmcraft +# snap is necessary on the system anyway. +# * `tox -e build` successfully validated with charmcraft 1.2.1 + +cffi==1.14.6; python_version < '3.6' # cffi 1.15.0 drops support for py35. diff --git a/ceph-iscsi/charmcraft.yaml b/ceph-iscsi/charmcraft.yaml new file mode 100644 index 00000000..6eca8b72 --- /dev/null +++ b/ceph-iscsi/charmcraft.yaml @@ -0,0 +1,36 @@ +type: charm + +parts: + charm: + after: + - update-certificates + charm-python-packages: + # NOTE(lourot): see + # * https://github.com/canonical/charmcraft/issues/551 + # * https://github.com/canonical/charmcraft/issues/632 + - setuptools < 58 + build-packages: + - git + + update-certificates: + plugin: nil + # See https://github.com/canonical/charmcraft/issues/658 + override-build: | + apt update + apt install -y ca-certificates + update-ca-certificates + +base: ubuntu@22.04 +platforms: + amd64: + build-on: amd64 + build-for: amd64 + arm64: + build-on: arm64 + build-for: arm64 + ppc64el: + build-on: ppc64el + build-for: ppc64el + s390x: + build-on: s390x + build-for: s390x diff --git a/ceph-iscsi/config.yaml b/ceph-iscsi/config.yaml new file mode 100644 index 00000000..4274c8b6 --- /dev/null +++ b/ceph-iscsi/config.yaml @@ -0,0 +1,242 @@ +options: + loglevel: + default: 1 + type: int + description: Mon and OSD debug level. Max is 20. + source: + type: string + default: caracal + description: | + Optional configuration to support use of additional sources such as: + - ppa:myteam/ppa + - cloud:trusty-proposed/kilo + - http://my.archive.com/ubuntu main + The last option should be used in conjunction with the key configuration + option. + Note that a minimum ceph version of 0.48.2 is required for use with this + charm which is NOT provided by the packages in the main Ubuntu archive + for precise but is provided in the Ubuntu cloud archive. + key: + type: string + default: + description: | + Key ID to import to the apt keyring to support use with arbitary source + configuration from outside of Launchpad archives or PPA's. + use-syslog: + type: boolean + default: False + description: | + If set to True, supporting services will log to syslog. + ceph-public-network: + type: string + default: + description: | + The IP address and netmask of the public (front-side) network (e.g., + 192.168.0.0/24). + If multiple networks are to be used, a space-delimited list of a.b.c.d/x + can be provided. + gateway-metadata-pool: + type: string + default: iscsi + description: | + RBD pool to use to store gateway configuration. + prefer-ipv6: + type: boolean + default: False + description: | + If True enables IPv6 support. The charm will expect network interfaces + to be configured with an IPv6 address. If set to False (default) IPv4 + is expected. + + NOTE: these charms do not currently support IPv6 privacy extension. In + order for this charm to function correctly, the privacy extension must be + disabled and a non-temporary address must be configured/available on + your network interface. + ceph-osd-replication-count: + type: int + default: 3 + description: | + This value dictates the number of replicas ceph must make of any + object it stores within the images rbd pool. Of course, this only + applies if using Ceph as a backend store. Note that once the images + rbd pool has been created, changing this value will not have any + effect (although it can be changed in ceph by manually configuring + your ceph cluster). + ceph-pool-weight: + type: int + default: 5 + description: | + Defines a relative weighting of the pool as a percentage of the total + amount of data in the Ceph cluster. This effectively weights the number + of placement groups for the pool created to be appropriately portioned + to the amount of data expected. For example, if the compute images + for the OpenStack compute instances are expected to take up 20% of the + overall configuration then this value would be specified as 20. Note - + it is important to choose an appropriate value for the pool weight as + this directly affects the number of placement groups which will be + created for the pool. The number of placement groups for a pool can + only be increased, never decreased - so it is important to identify the + percent of data that will likely reside in the pool. + rbd-pool-name: + default: + type: string + description: | + Optionally specify an existing pool that gateway should map to. + pool-type: + type: string + default: replicated + description: | + Ceph pool type to use for storage - valid values include ‘replicated’ + and ‘erasure-coded’. + ec-profile-name: + type: string + default: + description: | + Name for the EC profile to be created for the EC pools. If not defined + a profile name will be generated based on the name of the pool used by + the application. + ec-rbd-metadata-pool: + type: string + default: + description: | + Name of the metadata pool to be created (for RBD use-cases). If not + defined a metadata pool name will be generated based on the name of + the data pool used by the application. The metadata pool is always + replicated, not erasure coded. + ec-profile-k: + type: int + default: 1 + description: | + Number of data chunks that will be used for EC data pool. K+M factors + should never be greater than the number of available zones (or hosts) + for balancing. + ec-profile-m: + type: int + default: 2 + description: | + Number of coding chunks that will be used for EC data pool. K+M factors + should never be greater than the number of available zones (or hosts) + for balancing. + ec-profile-locality: + type: int + default: + description: | + (lrc plugin - l) Group the coding and data chunks into sets of size l. + For instance, for k=4 and m=2, when l=3 two groups of three are created. + Each set can be recovered without reading chunks from another set. Note + that using the lrc plugin does incur more raw storage usage than isa or + jerasure in order to reduce the cost of recovery operations. + ec-profile-crush-locality: + type: string + default: + description: | + (lrc plugin) The type of the crush bucket in which each set of chunks + defined by l will be stored. For instance, if it is set to rack, each + group of l chunks will be placed in a different rack. It is used to + create a CRUSH rule step such as step choose rack. If it is not set, + no such grouping is done. + ec-profile-durability-estimator: + type: int + default: + description: | + (shec plugin - c) The number of parity chunks each of which includes + each data chunk in its calculation range. The number is used as a + durability estimator. For instance, if c=2, 2 OSDs can be down + without losing data. + ec-profile-helper-chunks: + type: int + default: + description: | + (clay plugin - d) Number of OSDs requested to send data during + recovery of a single chunk. d needs to be chosen such that + k+1 <= d <= k+m-1. Larger the d, the better the savings. + ec-profile-scalar-mds: + type: string + default: + description: | + (clay plugin) specifies the plugin that is used as a building + block in the layered construction. It can be one of jerasure, + isa, shec (defaults to jerasure). + ec-profile-plugin: + type: string + default: jerasure + description: | + EC plugin to use for this applications pool. The following list of + plugins acceptable - jerasure, lrc, isa, shec, clay. + ec-profile-technique: + type: string + default: + description: | + EC profile technique used for this applications pool - will be + validated based on the plugin configured via ec-profile-plugin. + Supported techniques are ‘reed_sol_van’, ‘reed_sol_r6_op’, + ‘cauchy_orig’, ‘cauchy_good’, ‘liber8tion’ for jerasure, + ‘reed_sol_van’, ‘cauchy’ for isa and ‘single’, ‘multiple’ + for shec. + ec-profile-device-class: + type: string + default: + description: | + Device class from CRUSH map to use for placement groups for + erasure profile - valid values: ssd, hdd or nvme (or leave + unset to not use a device class). + bluestore-compression-algorithm: + type: string + default: + description: | + Compressor to use (if any) for pools requested by this charm. + . + NOTE: The ceph-osd charm sets a global default for this value (defaults + to 'lz4' unless configured by the end user) which will be used unless + specified for individual pools. + bluestore-compression-mode: + type: string + default: + description: | + Policy for using compression on pools requested by this charm. + . + 'none' means never use compression. + 'passive' means use compression when clients hint that data is + compressible. + 'aggressive' means use compression unless clients hint that + data is not compressible. + 'force' means use compression under all circumstances even if the clients + hint that the data is not compressible. + bluestore-compression-required-ratio: + type: float + default: + description: | + The ratio of the size of the data chunk after compression relative to the + original size must be at least this small in order to store the + compressed version on pools requested by this charm. + bluestore-compression-min-blob-size: + type: int + default: + description: | + Chunks smaller than this are never compressed on pools requested by + this charm. + bluestore-compression-min-blob-size-hdd: + type: int + default: + description: | + Value of bluestore compression min blob size for rotational media on + pools requested by this charm. + bluestore-compression-min-blob-size-ssd: + type: int + default: + description: | + Value of bluestore compression min blob size for solid state media on + pools requested by this charm. + bluestore-compression-max-blob-size: + type: int + default: + description: | + Chunks larger than this are broken into smaller blobs sizing bluestore + compression max blob size before being compressed on pools requested by + this charm. + bluestore-compression-max-blob-size-hdd: + type: int + default: + description: | + Value of bluestore compression max blob size for rotational media on + pools requested by this charm. diff --git a/ceph-iscsi/copyright b/ceph-iscsi/copyright new file mode 100644 index 00000000..d0b7f44f --- /dev/null +++ b/ceph-iscsi/copyright @@ -0,0 +1,16 @@ +Format: http://dep.debian.net/deps/dep5/ + +Files: * +Copyright: Copyright 2015-2020, Canonical Ltd., All Rights Reserved. +License: Apache License 2.0 + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + . + http://www.apache.org/licenses/LICENSE-2.0 + . + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ceph-iscsi/metadata.yaml b/ceph-iscsi/metadata.yaml new file mode 100644 index 00000000..e98e9c21 --- /dev/null +++ b/ceph-iscsi/metadata.yaml @@ -0,0 +1,30 @@ +name: ceph-iscsi +summary: Gateway for provisioning iscsi devices backed by ceph. +maintainer: OpenStack Charmers +description: | + The iSCSI gateway is integrating Ceph Storage with the iSCSI standard to + provide a Highly Available (HA) iSCSI target that exports RADOS Block Device + (RBD) images as SCSI disks. +docs: https://discourse.charmhub.io/t/ceph-iscsi-docs-index/11222 +tags: + - openstack + - storage + - misc +series: +- focal +- jammy +subordinate: false +min-juju-version: 2.7.6 +extra-bindings: + public: +requires: + ceph-client: + interface: ceph-client + certificates: + interface: tls-certificates +provides: + admin-access: + interface: ceph-iscsi-admin-access +peers: + cluster: + interface: ceph-iscsi-peer diff --git a/ceph-iscsi/osci.yaml b/ceph-iscsi/osci.yaml new file mode 100644 index 00000000..54ddc196 --- /dev/null +++ b/ceph-iscsi/osci.yaml @@ -0,0 +1,55 @@ +- project: + templates: + - charm-unit-jobs-py310 + check: + jobs: + - ceph-iscsi-focal-quincy + - ceph-iscsi-focal-quincy-ec + - ceph-iscsi-jammy-reef + - ceph-iscsi-jammy-reef-ec + - ceph-iscsi-jammy-caracal + - ceph-iscsi-jammy-caracal-ec + vars: + needs_charm_build: true + charm_build_name: ceph-iscsi + build_type: charmcraft + charmcraft_channel: 2.2/stable +- job: + name: ceph-iscsi-focal-quincy + parent: func-target + vars: + tox_extra_args: -- focal +- job: + name: ceph-iscsi-focal-quincy-ec + parent: func-target + vars: + tox_extra_args: -- focal-ec +- job: + name: ceph-iscsi-jammy-reef + parent: func-target + dependencies: + - charm-build + - osci-lint + - name: tox-py310 + soft: true + vars: + tox_extra_args: -- jammy-reef +- job: + name: ceph-iscsi-jammy-reef-ec + parent: func-target + dependencies: + - ceph-iscsi-jammy-reef + vars: + tox_extra_args: -- jammy-reef-ec +- job: + name: ceph-iscsi-jammy-caracal + parent: func-target + vars: + tox_extra_args: -- jammy-caracal +- job: + name: ceph-iscsi-jammy-caracal-ec + parent: func-target + dependencies: + - ceph-iscsi-jammy-caracal + vars: + tox_extra_args: -- jammy-caracal-ec diff --git a/ceph-iscsi/rename.sh b/ceph-iscsi/rename.sh new file mode 100755 index 00000000..528d20f8 --- /dev/null +++ b/ceph-iscsi/rename.sh @@ -0,0 +1,14 @@ +#!/bin/bash +charm=$(grep "charm_build_name" osci.yaml | awk '{print $2}') +echo "renaming ${charm}_*.charm to ${charm}.charm" +echo -n "pwd: " +pwd +ls -al +echo "Removing bad downloaded charm maybe?" +if [[ -e "${charm}.charm" ]]; +then + rm "${charm}.charm" +fi +echo "Renaming charm here." +mv ${charm}_*.charm ${charm}.charm +cp ${charm}.charm ../ diff --git a/ceph-iscsi/requirements.txt b/ceph-iscsi/requirements.txt new file mode 100644 index 00000000..abcff2cc --- /dev/null +++ b/ceph-iscsi/requirements.txt @@ -0,0 +1,7 @@ +# requirements +ops <= 1.6.0 +git+https://github.com/juju/charm-helpers.git#egg=charmhelpers +git+https://opendev.org/openstack/charm-ops-interface-ceph-client#egg=interface_ceph_client +git+https://opendev.org/openstack/charm-ops-openstack#egg=ops_openstack +git+https://opendev.org/openstack/charm-ops-interface-tls-certificates#egg=interface_tls_certificates +git+https://github.com/openstack-charmers/ops-interface-ceph-iscsi-admin-access#egg=interface_ceph_iscsi_admin_access diff --git a/ceph-iscsi/src/charm.py b/ceph-iscsi/src/charm.py new file mode 100755 index 00000000..2352fa17 --- /dev/null +++ b/ceph-iscsi/src/charm.py @@ -0,0 +1,625 @@ +#!/usr/bin/env python3 + +# Copyright 2020 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Charm for deploying and maintaining the Ceph iSCSI service.""" + +import copy +import logging +import os +import subprocess +import sys +import string +import socket +import secrets +from pathlib import Path + +sys.path.append('lib') + +from ops.framework import ( + StoredState, +) +from ops.main import main +import ops.model +import charmhelpers.core.host as ch_host +import charmhelpers.core.templating as ch_templating +import interface_ceph_client.ceph_client as ceph_client +import interface_ceph_iscsi_admin_access.admin_access as admin_access +import interface_ceph_iscsi_peer +import interface_tls_certificates.ca_client as ca_client + +import ops_openstack.adapters +import ops_openstack.core +import ops_openstack.plugins.classes +import gwcli_client +import cryptography.hazmat.primitives.serialization as serialization +logger = logging.getLogger(__name__) + + +class CephClientAdapter(ops_openstack.adapters.OpenStackOperRelationAdapter): + """Adapter for ceph client interface.""" + + @property + def mon_hosts(self): + """Sorted list of ceph mon addresses. + + :returns: Ceph MON addresses. + :rtype: str + """ + hosts = self.relation.get_relation_data()['mon_hosts'] + return ' '.join(sorted(hosts)) + + @property + def auth_supported(self): + """Authentication type. + + :returns: Authentication type + :rtype: str + """ + return self.relation.get_relation_data()['auth'] + + @property + def key(self): + """Key client should use when communicating with Ceph cluster. + + :returns: Key + :rtype: str + """ + return self.relation.get_relation_data()['key'] + + +class GatewayClientPeerAdapter( + ops_openstack.adapters.OpenStackOperRelationAdapter): + """Adapter for Ceph iSCSI peer interface.""" + + @property + def gw_hosts(self): + """List of peer addresses. + + :returns: Ceph iSCSI peer addresses. + :rtype: str + """ + hosts = self.relation.peer_addresses + return ' '.join(sorted(hosts)) + + @property + def trusted_ips(self): + """List of IP addresses permitted to use API. + + :returns: Ceph iSCSI trusted ips. + :rtype: str + """ + ips = copy.deepcopy(self.allowed_ips) + ips.extend(self.relation.peer_addresses) + return ','.join(sorted(ips)) + + +class AdminAccessAdapter( + ops_openstack.adapters.OpenStackOperRelationAdapter): + + @property + def trusted_ips(self): + """List of IP addresses permitted to use API. + + :returns: Ceph iSCSI clients + :rtype: str + """ + return ','.join(sorted(self.relation.client_addresses)) + + +class TLSCertificatesAdapter( + ops_openstack.adapters.OpenStackOperRelationAdapter): + """Adapter for Ceph TLS Certificates interface.""" + + @property + def enable_tls(self): + """Whether to enable TLS. + + :returns: Whether TLS should be enabled + :rtype: bool + """ + try: + return bool(self.relation.application_certificate) + except ca_client.CAClientError: + return False + + +class CephISCSIGatewayAdapters( + ops_openstack.adapters.OpenStackRelationAdapters): + """Collection of relation adapters.""" + + relation_adapters = { + 'ceph-client': CephClientAdapter, + 'cluster': GatewayClientPeerAdapter, + 'certificates': TLSCertificatesAdapter, + 'admin-access': AdminAccessAdapter, + } + + +class CephISCSIGatewayCharmBase( + ops_openstack.plugins.classes.BaseCephClientCharm): + """Ceph iSCSI Base Charm.""" + + _stored = StoredState() + PACKAGES = ['ceph-iscsi', 'tcmu-runner', 'ceph-common'] + CEPH_CAPABILITIES = [ + "osd", "allow *", + "mon", "allow *", + "mgr", "allow r"] + + DEFAULT_TARGET = "iqn.2003-01.com.ubuntu.iscsi-gw:iscsi-igw" + REQUIRED_RELATIONS = ['ceph-client', 'cluster'] + + ALLOWED_UNIT_COUNTS = [2, 4] + + CEPH_CONFIG_PATH = Path('/etc/ceph') + CEPH_ISCSI_CONFIG_PATH = CEPH_CONFIG_PATH / 'iscsi' + GW_CONF = CEPH_CONFIG_PATH / 'iscsi-gateway.cfg' + CEPH_CONF = CEPH_ISCSI_CONFIG_PATH / 'ceph.conf' + GW_KEYRING = CEPH_ISCSI_CONFIG_PATH / 'ceph.client.ceph-iscsi.keyring' + TLS_KEY_PATH = CEPH_CONFIG_PATH / 'iscsi-gateway.key' + TLS_PUB_KEY_PATH = CEPH_CONFIG_PATH / 'iscsi-gateway-pub.key' + TLS_CERT_PATH = CEPH_CONFIG_PATH / 'iscsi-gateway.crt' + TLS_KEY_AND_CERT_PATH = CEPH_CONFIG_PATH / 'iscsi-gateway.pem' + TLS_CA_CERT_PATH = Path( + '/usr/local/share/ca-certificates/vault_ca_cert.crt') + + GW_SERVICES = ['rbd-target-api', 'rbd-target-gw'] + + RESTART_MAP = { + str(GW_CONF): GW_SERVICES, + str(CEPH_CONF): GW_SERVICES, + str(GW_KEYRING): GW_SERVICES} + + release = 'default' + + def __init__(self, framework): + """Setup adapters and observers.""" + super().__init__(framework) + super().register_status_check(self.custom_status_check) + logging.info("Using %s class", self.release) + self._stored.set_default( + target_created=False, + enable_tls=False) + self.ceph_client = ceph_client.CephClientRequires( + self, + 'ceph-client') + self.peers = interface_ceph_iscsi_peer.CephISCSIGatewayPeers( + self, + 'cluster') + self.admin_access = \ + admin_access.CephISCSIAdminAccessProvides( + self, + 'admin-access') + self.ca_client = ca_client.CAClient( + self, + 'certificates') + self.adapters = CephISCSIGatewayAdapters( + (self.ceph_client, self.peers, self.ca_client, self.admin_access), + self) + self.framework.observe( + self.admin_access.on.admin_access_request, + self.publish_admin_access_info) + self.framework.observe( + self.ceph_client.on.broker_available, + self.request_ceph_pool) + self.framework.observe( + self.ceph_client.on.pools_available, + self.render_config) + self.framework.observe( + self.peers.on.has_peers, + self.on_has_peers) + self.framework.observe( + self.peers.on.allowed_ips_changed, + self.render_config) + self.framework.observe( + self.ca_client.on.tls_app_config_ready, + self.on_tls_app_config_ready) + self.framework.observe( + self.ca_client.on.ca_available, + self.on_ca_available) + self.framework.observe( + self.on.config_changed, + self.render_config) + self.framework.observe( + self.on.config_changed, + self.request_ceph_pool) + self.framework.observe( + self.on.upgrade_charm, + self.render_config) + self.framework.observe( + self.on.create_target_action, + self.on_create_target_action) + self.framework.observe( + self.on.add_trusted_ip_action, + self.on_add_trusted_ip_action) + + def on_install(self, event): + """Install packages and check substrate is supported.""" + if ch_host.is_container(): + logging.info("Installing into a container is not supported") + self.update_status() + else: + self.install_pkgs() + + def on_has_peers(self, event): + """Setup and share admin password.""" + logging.info("Unit has peers") + if self.unit.is_leader() and not self.peers.admin_password: + logging.info("Setting admin password") + alphabet = string.ascii_letters + string.digits + password = ''.join(secrets.choice(alphabet) for i in range(8)) + self.peers.set_admin_password(password) + self.publish_admin_access_info(event) + + def config_get(self, key): + """Retrieve config option. + + :returns: Value of the corresponding config option or None. + :rtype: Any + """ + return self.model.config.get(key) + + @property + def data_pool_name(self): + """The name of the default rbd data pool to be used by targets. + + :returns: Data pool name. + :rtype: str + """ + if self.config_get('rbd-pool-name'): + pool_name = self.config_get('rbd-pool-name') + else: + pool_name = self.app.name + return pool_name + + @property + def metadata_pool_name(self): + """The name of the default rbd metadata pool to be used by targets. + + :returns: Metadata pool name. + :rtype: str + """ + return (self.config_get('ec-rbd-metadata-pool') or + "{}-metadata".format(self.app.name)) + + def request_ceph_pool(self, event): + """Request pools from Ceph cluster.""" + if not self.ceph_client.broker_available: + logging.info("Cannot request ceph setup at this time") + return + logging.info("Requesting replicated pool") + try: + bcomp_kwargs = self.get_bluestore_compression() + except ValueError as e: + # The end user has most likely provided a invalid value for + # a configuration option. Just log the traceback here, the + # end user will be notified by assess_status() called at + # the end of the hook execution. + logging.warn('Caught ValueError, invalid value provided for ' + 'configuration?: "{}"'.format(str(e))) + return + self.ceph_client.create_replicated_pool( + self.config_get('gateway-metadata-pool'), + app_name='rados') + weight = self.config_get('ceph-pool-weight') + replicas = self.config_get('ceph-osd-replication-count') + if self.config_get('pool-type') == 'erasure-coded': + # General EC plugin config + plugin = self.config_get('ec-profile-plugin') + technique = self.config_get('ec-profile-technique') + device_class = self.config_get('ec-profile-device-class') + bdm_k = self.config_get('ec-profile-k') + bdm_m = self.config_get('ec-profile-m') + # LRC plugin config + bdm_l = self.config_get('ec-profile-locality') + crush_locality = self.config_get('ec-profile-crush-locality') + # SHEC plugin config + bdm_c = self.config_get('ec-profile-durability-estimator') + # CLAY plugin config + bdm_d = self.config_get('ec-profile-helper-chunks') + scalar_mds = self.config_get('ec-profile-scalar-mds') + # Profile name + profile_name = ( + self.config_get('ec-profile-name') or + "{}-profile".format(self.app.name) + ) + # Metadata sizing is approximately 1% of overall data weight + # but is in effect driven by the number of rbd's rather than + # their size - so it can be very lightweight. + metadata_weight = weight * 0.01 + # Resize data pool weight to accomodate metadata weight + weight = weight - metadata_weight + # Create erasure profile + self.ceph_client.create_erasure_profile( + name=profile_name, + k=bdm_k, m=bdm_m, + lrc_locality=bdm_l, + lrc_crush_locality=crush_locality, + shec_durability_estimator=bdm_c, + clay_helper_chunks=bdm_d, + clay_scalar_mds=scalar_mds, + device_class=device_class, + erasure_type=plugin, + erasure_technique=technique + ) + + # Create EC data pool + self.ceph_client.create_erasure_pool( + name=self.data_pool_name, + erasure_profile=profile_name, + weight=weight, + allow_ec_overwrites=True, + app_name='rbd', + **bcomp_kwargs + ) + self.ceph_client.create_replicated_pool( + name=self.metadata_pool_name, + app_name='rbd', + weight=metadata_weight + ) + else: + self.ceph_client.create_replicated_pool( + name=self.data_pool_name, + replicas=replicas, + weight=weight, + app_name='rbd', + **bcomp_kwargs) + logging.info("Requesting permissions") + self.ceph_client.request_ceph_permissions( + 'ceph-iscsi', + self.CEPH_CAPABILITIES) + self.ceph_client.request_osd_settings({ + 'osd heartbeat grace': 20, + 'osd heartbeat interval': 5}) + + def refresh_request(self, event): + """Re-request Ceph pools and render config.""" + self.render_config(event) + self.request_ceph_pool(event) + + def render_config(self, event): + """Render config and restart services if config files change.""" + if not self.peers.admin_password: + logging.info("Defering setup") + event.defer() + return + if not self.ceph_client.pools_available: + logging.info("Defering setup") + event.defer() + return + + self.CEPH_ISCSI_CONFIG_PATH.mkdir( + exist_ok=True, + mode=0o750) + + def daemon_reload_and_restart(service_name): + subprocess.check_call(['systemctl', 'daemon-reload']) + subprocess.check_call(['systemctl', 'restart', service_name]) + + rfuncs = { + 'rbd-target-api': daemon_reload_and_restart} + + @ch_host.restart_on_change(self.RESTART_MAP, restart_functions=rfuncs) + def _render_configs(): + for config_file in self.RESTART_MAP.keys(): + ch_templating.render( + os.path.basename(config_file), + config_file, + self.adapters) + logging.info("Rendering config") + _render_configs() + # Make sure the gateway services are enabled after rendering the + # configurations and starting those. Those are disabled by + # default in the package. LP: #2045828 + for service_name in self.GW_SERVICES: + ch_host.service_enable(service_name) + logging.info("Setting started state") + self.peers.announce_ready() + self._stored.is_started = True + self.update_status() + logging.info("on_pools_available: status updated") + + def on_ca_available(self, event): + """Request TLS certificates.""" + addresses = set() + for binding_name in ['public', 'cluster']: + binding = self.model.get_binding(binding_name) + addresses.add(binding.network.ingress_address) + addresses.add(binding.network.bind_address) + sans = [str(s) for s in addresses] + sans.append(socket.gethostname()) + self.ca_client.request_application_certificate(socket.getfqdn(), sans) + + def on_tls_app_config_ready(self, event): + """Configure TLS.""" + self.TLS_KEY_PATH.write_bytes( + self.ca_client.application_key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.TraditionalOpenSSL, + encryption_algorithm=serialization.NoEncryption())) + self.TLS_CERT_PATH.write_bytes( + self.ca_client.application_certificate.public_bytes( + encoding=serialization.Encoding.PEM)) + self.TLS_CA_CERT_PATH.write_bytes( + self.ca_client.ca_certificate.public_bytes( + encoding=serialization.Encoding.PEM)) + self.TLS_KEY_AND_CERT_PATH.write_bytes( + self.ca_client.application_certificate.public_bytes( + encoding=serialization.Encoding.PEM) + + b'\n' + + self.ca_client.application_key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.TraditionalOpenSSL, + encryption_algorithm=serialization.NoEncryption()) + ) + self.TLS_PUB_KEY_PATH.write_bytes( + self.ca_client.application_key.public_key().public_bytes( + format=serialization.PublicFormat.SubjectPublicKeyInfo, + encoding=serialization.Encoding.PEM)) + subprocess.check_call(['update-ca-certificates']) + self._stored.enable_tls = True + # Endpoint has switch to TLS, need to inform users. + self.publish_admin_access_info(event) + self.render_config(event) + + def publish_admin_access_info(self, event): + """Publish creds and endpoint to related charms""" + if not self.peers.admin_password: + logging.info("Defering setup") + event.defer() + return + if self._stored.enable_tls: + scheme = 'https' + else: + scheme = 'http' + self.admin_access.publish_gateway( + socket.getfqdn(), + 'admin', + self.peers.admin_password, + scheme) + + def custom_status_check(self): + """Custom update status checks.""" + if ch_host.is_container(): + return ops.model.BlockedStatus( + 'Charm cannot be deployed into a container') + if self.peers.unit_count not in self.ALLOWED_UNIT_COUNTS: + return ops.model.BlockedStatus( + '{} is an invalid unit count'.format(self.peers.unit_count)) + return ops.model.ActiveStatus() + + # Actions + + def on_add_trusted_ip_action(self, event): + """Add an IP to the allowed list for API access.""" + if self.unit.is_leader(): + ips = event.params.get('ips').split() + self.peers.set_allowed_ips( + ips, + append=not event.params['overwrite']) + self.render_config(event) + else: + event.fail("Action must be run on leader") + + def calculate_target_pools(self, event): + if event.params['ec-rbd-metadata-pool']: + ec_rbd_metadata_pool = event.params['ec-rbd-metadata-pool'] + rbd_pool_name = event.params['rbd-pool-name'] + elif event.params['rbd-pool-name']: + ec_rbd_metadata_pool = None + rbd_pool_name = event.params['rbd-pool-name'] + # Action did not specify pools to derive them from charm config. + elif self.model.config['pool-type'] == 'erasure-coded': + ec_rbd_metadata_pool = self.metadata_pool_name + rbd_pool_name = self.data_pool_name + else: + ec_rbd_metadata_pool = None + rbd_pool_name = self.data_pool_name + return rbd_pool_name, ec_rbd_metadata_pool + + def _validate_str(self, value, allowed, min_len, max_len, typ): + if any(s for s in value if s not in allowed): + raise ValueError('%s can only contain: %s' % (typ, allowed)) + elif len(value) < min_len or len(value) > max_len: + raise ValueError('%s must be between %d and %d characters long' % + (typ, min_len, max_len)) + + def _validate_username(self, value): + self._validate_str(value, string.ascii_letters + string.digits + + '.@-_:', 8, 64, 'username') + + def _validate_password(self, value): + self._validate_str(value, string.ascii_letters + string.digits + + '@-_/', 12, 16, 'password') + + def on_create_target_action(self, event): + """Create an iSCSI target.""" + gw_client = gwcli_client.GatewayClient() + target = event.params.get('iqn', self.DEFAULT_TARGET) + username = event.params['client-username'] + passwd = event.params['client-password'] + try: + self._validate_username(username) + self._validate_password(passwd) + except ValueError as exc: + logging.error(str(exc)) + fail_str = 'invalid username or password: %s' % str(exc) + event.fail(fail_str) + event.set_results({'err': fail_str}) + return + + gateway_units = event.params.get( + 'gateway-units', + [u for u in self.peers.ready_peer_details.keys()]) + rbd_pool_name, ec_rbd_metadata_pool = self.calculate_target_pools( + event) + if ec_rbd_metadata_pool: + # When using erasure-coded pools the image needs to be pre-created + # as the gwcli does not currently handle the creation. + cmd = [ + 'rbd', + '--id', 'ceph-iscsi', + '--conf', str(self.CEPH_CONF), + 'create', + '--size', event.params['image-size'], + '{}/{}'.format( + ec_rbd_metadata_pool, + event.params['image-name']), + '--data-pool', rbd_pool_name] + logging.info(cmd) + subprocess.check_call(cmd) + target_pool = ec_rbd_metadata_pool + else: + target_pool = rbd_pool_name + gw_client.create_target(target) + for gw_unit, gw_config in self.peers.ready_peer_details.items(): + added_gateways = [] + if gw_unit in gateway_units: + gw_client.add_gateway_to_target( + target, + gw_config['ip'], + gw_config['fqdn']) + added_gateways.append(gw_unit) + gw_client.create_pool( + target_pool, + event.params['image-name'], + event.params['image-size']) + gw_client.add_client_to_target( + target, + event.params['client-initiatorname']) + gw_client.add_client_auth( + target, + event.params['client-initiatorname'], + username, + passwd) + gw_client.add_disk_to_client( + target, + event.params['client-initiatorname'], + target_pool, + event.params['image-name']) + event.set_results({'iqn': target}) + + +@ops_openstack.core.charm_class +class CephISCSIGatewayCharmOcto(CephISCSIGatewayCharmBase): + """Ceph iSCSI Charm for Octopus.""" + + _stored = StoredState() + release = 'octopus' + + +if __name__ == '__main__': + main(ops_openstack.core.get_charm_class_for_release()) diff --git a/ceph-iscsi/src/gwcli_client.py b/ceph-iscsi/src/gwcli_client.py new file mode 100644 index 00000000..6dcae253 --- /dev/null +++ b/ceph-iscsi/src/gwcli_client.py @@ -0,0 +1,60 @@ +import logging +import subprocess + +logger = logging.getLogger() + + +class GatewayClient(): + + def run(self, path, cmd): + _cmd = ['gwcli', path] + # NOTE(lourot): we don't print the full command here as it might + # contain secrets. + logging.info(' '.join(_cmd) + ' ...') + _cmd.extend(cmd.split()) + + error_msg = None + try: + subprocess.check_output(_cmd, stderr=subprocess.PIPE) + except subprocess.CalledProcessError as e: + error_msg = 'gwcli failed with {}'.format(e.returncode) + logging.error(error_msg) + logging.error('stdout: {}'.format(e.stdout)) + logging.error('stderr: {}'.format(e.stderr)) + + if error_msg: + # NOTE(lourot): we re-raise another free-of-secrets exception: + raise RuntimeError(error_msg) + + def create_target(self, iqn): + self.run( + "/iscsi-targets/", + "create {}".format(iqn)) + + def add_gateway_to_target(self, iqn, gateway_ip, gateway_fqdn): + self.run( + "/iscsi-targets/{}/gateways/".format(iqn), + "create {} {}".format(gateway_fqdn, gateway_ip)) + + def create_pool(self, pool_name, image_name, image_size): + self.run( + "/disks", + "create pool={} image={} size={}".format( + pool_name, + image_name, + image_size)) + + def add_client_to_target(self, iqn, initiatorname): + self.run( + "/iscsi-targets/{}/hosts/".format(iqn), + "create {}".format(initiatorname)) + + def add_client_auth(self, iqn, initiatorname, username, password): + self.run( + "/iscsi-targets/{}/hosts/{}".format(iqn, initiatorname), + "auth username={} password={}".format(username, password)) + + def add_disk_to_client(self, iqn, initiatorname, pool_name, image_name): + self.run( + "/iscsi-targets/{}/hosts/{}".format(iqn, initiatorname), + "disk add {}/{}".format(pool_name, image_name)) diff --git a/ceph-iscsi/src/interface_ceph_iscsi_peer.py b/ceph-iscsi/src/interface_ceph_iscsi_peer.py new file mode 100644 index 00000000..a7f7ecbe --- /dev/null +++ b/ceph-iscsi/src/interface_ceph_iscsi_peer.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 + +import json +import logging +import socket + +from ops.framework import ( + StoredState, + EventBase, + ObjectEvents, + EventSource, + Object) + + +class HasPeersEvent(EventBase): + pass + + +class ReadyPeersEvent(EventBase): + pass + + +class AllowedIpsChangedEvent(EventBase): + pass + + +class CephISCSIGatewayPeerEvents(ObjectEvents): + has_peers = EventSource(HasPeersEvent) + ready_peers = EventSource(ReadyPeersEvent) + allowed_ips_changed = EventSource(AllowedIpsChangedEvent) + + +class CephISCSIGatewayPeers(Object): + + on = CephISCSIGatewayPeerEvents() + _stored = StoredState() + PASSWORD_KEY = 'admin_password' + READY_KEY = 'gateway_ready' + FQDN_KEY = 'gateway_fqdn' + ALLOWED_IPS_KEY = 'allowed_ips' + + def __init__(self, charm, relation_name): + super().__init__(charm, relation_name) + self.relation_name = relation_name + self.this_unit = self.framework.model.unit + self._stored.set_default( + allowed_ips=[]) + self.framework.observe( + charm.on[relation_name].relation_changed, + self.on_changed) + + def on_changed(self, event): + logging.info("CephISCSIGatewayPeers on_changed") + self.on.has_peers.emit() + if self.ready_peer_details: + self.on.ready_peers.emit() + if self.allowed_ips != self._stored.allowed_ips: + self.on.allowed_ips_changed.emit() + self._stored.allowed_ips = self.allowed_ips + + def set_admin_password(self, password): + logging.info("Setting admin password") + self.peer_rel.data[self.peer_rel.app][self.PASSWORD_KEY] = password + + def set_allowed_ips(self, ips, append=True): + trusted_ips = [] + if append and self.allowed_ips: + trusted_ips = self.allowed_ips + trusted_ips.extend(ips) + trusted_ips = sorted(list(set(trusted_ips))) + ip_str = json.dumps(trusted_ips) + logging.info("Setting allowed ips to: %s", ip_str) + self.peer_rel.data[self.peer_rel.app][self.ALLOWED_IPS_KEY] = ip_str + + def announce_ready(self): + logging.info("announcing ready") + self.peer_rel.data[self.this_unit][self.READY_KEY] = 'True' + self.peer_rel.data[self.this_unit][self.FQDN_KEY] = self.fqdn + + @property + def ready_peer_details(self): + peers = { + self.framework.model.unit.name: { + 'fqdn': self.fqdn, + 'ip': self.cluster_bind_address}} + for u in self.peer_rel.units: + if self.peer_rel.data[u].get(self.READY_KEY) == 'True': + peers[u.name] = { + 'fqdn': self.peer_rel.data[u][self.FQDN_KEY], + 'ip': self.peer_rel.data[u]['ingress-address']} + return peers + + @property + def fqdn(self): + return socket.getfqdn() + + @property + def is_joined(self): + return self.peer_rel is not None + + @property + def peer_rel(self): + return self.framework.model.get_relation(self.relation_name) + + @property + def peer_binding(self): + return self.framework.model.get_binding(self.peer_rel) + + @property + def cluster_bind_address(self): + return str(self.peer_binding.network.bind_address) + + @property + def admin_password(self): + if not self.peer_rel: + return None + return self.peer_rel.data[self.peer_rel.app].get(self.PASSWORD_KEY) + + @property + def allowed_ips(self): + if not self.peer_rel: + return None + ip_str = self.peer_rel.data[self.peer_rel.app].get( + self.ALLOWED_IPS_KEY, '[]') + return json.loads(ip_str) + + @property + def peer_addresses(self): + addresses = [self.cluster_bind_address] + for u in self.peer_rel.units: + addresses.append(self.peer_rel.data[u]['ingress-address']) + return sorted(addresses) + + @property + def peer_count(self): + if self.peer_rel: + return len(self.peer_rel.units) + else: + return 0 + + @property + def unit_count(self): + return self.peer_count + 1 diff --git a/ceph-iscsi/templates/ceph.client.ceph-iscsi.keyring b/ceph-iscsi/templates/ceph.client.ceph-iscsi.keyring new file mode 100644 index 00000000..fed64cf1 --- /dev/null +++ b/ceph-iscsi/templates/ceph.client.ceph-iscsi.keyring @@ -0,0 +1,3 @@ +[client.ceph-iscsi] + key = {{ ceph_client.key }} + diff --git a/ceph-iscsi/templates/ceph.conf b/ceph-iscsi/templates/ceph.conf new file mode 100644 index 00000000..5d0227bc --- /dev/null +++ b/ceph-iscsi/templates/ceph.conf @@ -0,0 +1,15 @@ +############################################################################### +# [ WARNING ] +# configuration file maintained by Juju +# local changes will be overwritten. +############################################################################### +[global] +auth supported = {{ ceph_client.auth_supported }} +mon host = {{ ceph_client.mon_hosts }} +keyring = /etc/ceph/iscsi/$cluster.$name.keyring + +[client.ceph-iscsi] +client mount uid = 0 +client mount gid = 0 +log file = /var/log/ceph/ceph-client.iscsi.log + diff --git a/ceph-iscsi/templates/iscsi-gateway.cfg b/ceph-iscsi/templates/iscsi-gateway.cfg new file mode 100644 index 00000000..eb7d0e02 --- /dev/null +++ b/ceph-iscsi/templates/iscsi-gateway.cfg @@ -0,0 +1,18 @@ +[config] +logger_level = DEBUG +cluster_name = ceph +cluster_client_name = client.ceph-iscsi +pool = {{ options.gateway_metadata_pool }} + +gateway_keyring = ceph.client.ceph-iscsi.keyring +ceph_config_dir = /etc/ceph/iscsi + +api_secure = {{ certificates.enable_tls }} +api_user = admin +api_password = {{ cluster.admin_password }} +api_port = 5000 +{% if admin_access.trusted_ips -%} +trusted_ip_list = {{ cluster.trusted_ips }},{{ admin_access.trusted_ips }} +{% else -%} +trusted_ip_list = {{ cluster.trusted_ips }} +{% endif -%} diff --git a/ceph-iscsi/test-requirements.txt b/ceph-iscsi/test-requirements.txt new file mode 100644 index 00000000..e47e81d8 --- /dev/null +++ b/ceph-iscsi/test-requirements.txt @@ -0,0 +1,10 @@ +coverage>=3.6 +mock>=1.2 +flake8 +stestr>=2.2.0 +requests>=2.18.4 +psutil +git+https://github.com/openstack-charmers/zaza.git#egg=zaza +git+https://github.com/openstack-charmers/zaza-openstack-tests.git#egg=zaza.openstack +pyudev # for ceph-* charm unit tests (not mocked?) +protobuf<3.21.0 # https://github.com/juju/python-libjuju/issues/914 diff --git a/ceph-iscsi/tests/README.md b/ceph-iscsi/tests/README.md new file mode 100644 index 00000000..31363eee --- /dev/null +++ b/ceph-iscsi/tests/README.md @@ -0,0 +1,19 @@ +# Overview + +This directory provides Zaza test definitions and bundles to verify basic +deployment functionality from the perspective of this charm, its requirements +and its features, as exercised in a subset of the full OpenStack deployment +test bundle topology. + +Run the smoke tests with: + +```bash +cd ../ +tox -e build +cd tests/ +tox -e func-smoke +``` + +For full details on functional testing of OpenStack charms please refer to +the [functional testing](https://docs.openstack.org/charm-guide/latest/reference/testing.html#functional-testing) +section of the OpenStack Charm Guide. diff --git a/ceph-iscsi/tests/bundles/jammy-caracal.yaml b/ceph-iscsi/tests/bundles/jammy-caracal.yaml new file mode 100644 index 00000000..2fba5d22 --- /dev/null +++ b/ceph-iscsi/tests/bundles/jammy-caracal.yaml @@ -0,0 +1,61 @@ +local_overlay_enabled: False +series: jammy + +variables: + openstack-origin: &openstack-origin cloud:jammy-caracal + +machines: + '0': + '1': + '2': + '3': + constraints: cores=2 mem=4G root-disk=25G virt-type=virtual-machine + '4': + constraints: cores=2 mem=4G root-disk=25G virt-type=virtual-machine + '5': + constraints: cores=2 mem=4G root-disk=25G virt-type=virtual-machine + '6': + constraints: cores=2 mem=4G root-disk=25G virt-type=virtual-machine + '7': + constraints: cores=2 mem=4G root-disk=25G virt-type=virtual-machine + +applications: + ceph-iscsi: + charm: ch:ceph-iscsi + num_units: 2 + channel: squid/edge + options: + gateway-metadata-pool: iscsi-foo-metadata + to: + - '6' + - '7' + + ceph-osd: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 'loop,10G' + options: + osd-devices: '/dev/test-non-existent' + to: + - '3' + - '4' + - '5' + channel: squid/edge + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + monitor-count: '3' + to: + - '0' + - '1' + - '2' + channel: squid/edge + +relations: + - - 'ceph-mon:client' + - 'ceph-iscsi:ceph-client' + - - 'ceph-osd:mon' + - 'ceph-mon:osd' diff --git a/ceph-iscsi/tests/target.py b/ceph-iscsi/tests/target.py new file mode 100644 index 00000000..7dde7029 --- /dev/null +++ b/ceph-iscsi/tests/target.py @@ -0,0 +1,326 @@ +# Copyright 2020 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Encapsulating `ceph-iscsi` testing.""" + +import logging +import tempfile + +import zaza +import zaza.model +import zaza.openstack.charm_tests.test_utils as test_utils +import zaza.openstack.utilities.generic as generic_utils + + +def basic_guest_setup(): + """Run basic setup for iscsi guest.""" + for unit in zaza.model.get_units('ceph-osd'): + setup_cmds = [ + "apt install --yes open-iscsi multipath-tools", + "systemctl start iscsi", + "systemctl start iscsid"] + for cmd in setup_cmds: + zaza.model.run_on_unit( + unit.entity_id, + cmd) + + +class CephISCSIGatewayTest(test_utils.BaseCharmTest): + """Class for `ceph-iscsi` tests.""" + + GW_IQN = "iqn.2003-03.com.canonical.iscsi-gw:iscsi-igw" + DATA_POOL_NAME = 'zaza_rep_pool' + EC_PROFILE_NAME = 'zaza_iscsi' + EC_DATA_POOL = 'zaza_ec_data_pool' + EC_METADATA_POOL = 'zaza_ec_metadata_pool' + + def get_client_initiatorname(self, unit): + """Return the initiatorname for the given unit. + + :param unit_name: Name of unit to match + :type unit: str + :returns: Initiator name + :rtype: str + """ + generic_utils.assertRemoteRunOK(zaza.model.run_on_unit( + unit, + ('cp /etc/iscsi/initiatorname.iscsi /tmp; ' + 'chmod 644 /tmp/initiatorname.iscsi'))) + with tempfile.TemporaryDirectory() as tmpdirname: + tmp_file = '{}/{}'.format(tmpdirname, 'initiatorname.iscsi') + zaza.model.scp_from_unit( + unit, + '/tmp/initiatorname.iscsi', + tmp_file) + with open(tmp_file, 'r') as stream: + contents = stream.readlines() + initiatorname = None + for line in contents: + if line.startswith('InitiatorName'): + initiatorname = line.split('=')[1].rstrip() + return initiatorname + + def get_base_ctxt(self): + """Generate a context for running gwcli commands to create a target. + + :returns: Base gateway context + :rtype: Dict + """ + gw_units = zaza.model.get_units('ceph-iscsi') + host_names = generic_utils.get_unit_hostnames(gw_units, fqdn=True) + client_entity_ids = [ + u.entity_id for u in zaza.model.get_units('ceph-osd')] + ctxt = { + 'client_entity_ids': sorted(client_entity_ids), + 'gw_iqn': self.GW_IQN, + 'chap_creds': 'username={chap_username} password={chap_password}', + 'gwcli_gw_dir': '/iscsi-targets/{gw_iqn}/gateways', + 'gwcli_hosts_dir': '/iscsi-targets/{gw_iqn}/hosts', + 'gwcli_disk_dir': '/disks', + 'gwcli_client_dir': '{gwcli_hosts_dir}/{client_initiatorname}', + } + ctxt['gateway_units'] = [ + { + 'entity_id': u.entity_id, + 'ip': zaza.model.get_unit_public_address(u), + 'hostname': host_names[u.entity_id]} + for u in zaza.model.get_units('ceph-iscsi')] + ctxt['gw_ip'] = sorted([g['ip'] for g in ctxt['gateway_units']])[0] + return ctxt + + def run_commands(self, unit_name, commands, ctxt): + """Run commands on unit. + + Iterate over each command and apply the context to the command, then + run the command on the supplied unit. + + :param unit_name: Name of unit to match + :type unit: str + :param commands: List of commands to run. + :type commands: List[str] + :param ctxt: Context to apply to each command. + :type ctxt: Dict + :raises: AssertionError + """ + for _cmd in commands: + cmd = _cmd.format(**ctxt) + generic_utils.assertRemoteRunOK(zaza.model.run_on_unit( + unit_name, + cmd)) + + def create_iscsi_target(self, ctxt): + """Create target on gateway. + + :param ctxt: Base gateway context + :type ctxt: Dict + """ + generic_utils.assertActionRanOK(zaza.model.run_action_on_leader( + 'ceph-iscsi', + 'create-target', + action_params={ + 'gateway-units': ' '.join([g['entity_id'] + for g in ctxt['gateway_units']]), + 'iqn': self.GW_IQN, + 'rbd-pool-name': ctxt.get('pool_name', ''), + 'ec-rbd-metadata-pool': ctxt.get('ec_meta_pool_name', ''), + 'image-size': ctxt['img_size'], + 'image-name': ctxt['img_name'], + 'client-initiatorname': ctxt['client_initiatorname'], + 'client-username': ctxt['chap_username'], + 'client-password': ctxt['chap_password'] + })) + + def login_iscsi_target(self, ctxt): + """Login to the iscsi target on client. + + :param ctxt: Base gateway context + :type ctxt: Dict + """ + logging.info("Logging in to iscsi target") + base_op_cmd = ('iscsiadm --mode node --targetname {gw_iqn} ' + '--op=update ').format(**ctxt) + setup_cmds = [ + 'iscsiadm -m discovery -t st -p {gw_ip}', + base_op_cmd + '-n node.session.auth.authmethod -v CHAP', + base_op_cmd + '-n node.session.auth.username -v {chap_username}', + base_op_cmd + '-n node.session.auth.password -v {chap_password}', + 'iscsiadm --mode node --targetname {gw_iqn} --login'] + self.run_commands(ctxt['client_entity_id'], setup_cmds, ctxt) + + def logout_iscsi_targets(self, ctxt): + """Logout of iscsi target on client. + + :param ctxt: Base gateway context + :type ctxt: Dict + """ + logging.info("Logging out of iscsi target") + logout_cmds = [ + 'iscsiadm --mode node --logoutall=all'] + self.run_commands(ctxt['client_entity_id'], logout_cmds, ctxt) + + def check_client_device(self, ctxt, init_client=True): + """Wait for multipath device to appear on client and test access. + + :param ctxt: Base gateway context + :type ctxt: Dict + :param init_client: Initialise client if this is the first time it has + been used. + :type init_client: bool + """ + logging.info("Checking multipath device is present.") + device_ctxt = { + 'bdevice': '/dev/dm-0', + 'mount_point': '/mnt/iscsi', + 'test_file': '/mnt/iscsi/test.data'} + ls_bdevice_cmd = 'ls -l {bdevice}' + mkfs_cmd = 'mke2fs {bdevice}' + mkdir_cmd = 'mkdir {mount_point}' + mount_cmd = 'mount {bdevice} {mount_point}' + umount_cmd = 'umount {mount_point}' + check_mounted_cmd = 'mountpoint {mount_point}' + write_cmd = 'truncate -s 1M {test_file}' + check_file = 'ls -l {test_file}' + if init_client: + commands = [ + mkfs_cmd, + mkdir_cmd, + mount_cmd, + check_mounted_cmd, + write_cmd, + check_file, + umount_cmd] + else: + commands = [ + mount_cmd, + check_mounted_cmd, + check_file, + umount_cmd] + + async def check_device_present(): + run = await zaza.model.async_run_on_unit( + ctxt['client_entity_id'], + ls_bdevice_cmd.format(bdevice=device_ctxt['bdevice'])) + return device_ctxt['bdevice'] in run['stdout'] + + logging.info("Checking {} is present on {}".format( + device_ctxt['bdevice'], + ctxt['client_entity_id'])) + zaza.model.block_until(check_device_present) + logging.info("Checking mounting device and access") + self.run_commands(ctxt['client_entity_id'], commands, device_ctxt) + + def create_data_pool(self): + """Create data pool to back iscsi targets.""" + generic_utils.assertActionRanOK(zaza.model.run_action_on_leader( + 'ceph-mon', + 'create-pool', + action_params={ + 'name': self.DATA_POOL_NAME})) + + def create_ec_data_pool(self): + """Create data pool to back iscsi targets.""" + generic_utils.assertActionRanOK(zaza.model.run_action_on_leader( + 'ceph-mon', + 'create-erasure-profile', + action_params={ + 'name': self.EC_PROFILE_NAME, + 'coding-chunks': 2, + 'data-chunks': 4, + 'plugin': 'jerasure'})) + generic_utils.assertActionRanOK(zaza.model.run_action_on_leader( + 'ceph-mon', + 'create-pool', + action_params={ + 'name': self.EC_DATA_POOL, + 'pool-type': 'erasure-coded', + 'allow-ec-overwrites': True, + 'erasure-profile-name': self.EC_PROFILE_NAME})) + generic_utils.assertActionRanOK(zaza.model.run_action_on_leader( + 'ceph-mon', + 'create-pool', + action_params={ + 'name': self.EC_METADATA_POOL})) + + def run_client_checks(self, test_ctxt): + """Check access to mulipath device. + + Write a filesystem to device, mount it and write data. Then unmount + and logout the iscsi target, finally reconnect and remount checking + data is still present. + + :param test_ctxt: Test context. + :type test_ctxt: Dict + """ + self.create_iscsi_target(test_ctxt) + self.login_iscsi_target(test_ctxt) + self.check_client_device(test_ctxt, init_client=True) + self.logout_iscsi_targets(test_ctxt) + self.login_iscsi_target(test_ctxt) + self.check_client_device(test_ctxt, init_client=False) + + def test_create_and_mount_volume(self): + """Test creating a target and mounting it on a client.""" + self.create_data_pool() + ctxt = self.get_base_ctxt() + client_entity_id = ctxt['client_entity_ids'][0] + ctxt.update({ + 'client_entity_id': client_entity_id, + 'client_initiatorname': self.get_client_initiatorname( + client_entity_id), + 'pool_name': self.DATA_POOL_NAME, + 'chap_username': 'myiscsiusername1', + 'chap_password': 'myiscsipassword1', + 'img_size': '1G', + 'img_name': 'disk_rep_1'}) + self.run_client_checks(ctxt) + + def test_create_and_mount_ec_backed_volume(self): + """Test creating an EC backed target and mounting it on a client.""" + self.create_ec_data_pool() + ctxt = self.get_base_ctxt() + client_entity_id = ctxt['client_entity_ids'][1] + ctxt.update({ + 'client_entity_id': client_entity_id, + 'client_initiatorname': self.get_client_initiatorname( + client_entity_id), + 'pool_name': self.EC_DATA_POOL, + 'ec_meta_pool_name': self.EC_METADATA_POOL, + 'chap_username': 'myiscsiusername2', + 'chap_password': 'myiscsipassword2', + 'img_size': '2G', + 'img_name': 'disk_ec_1'}) + self.run_client_checks(ctxt) + + def test_create_and_mount_volume_default_pool(self): + """Test creating a target and mounting it on a client.""" + self.create_data_pool() + ctxt = self.get_base_ctxt() + client_entity_id = ctxt['client_entity_ids'][2] + ctxt.update({ + 'client_entity_id': client_entity_id, + 'client_initiatorname': self.get_client_initiatorname( + client_entity_id), + 'chap_username': 'myiscsiusername3', + 'chap_password': 'myiscsipassword3', + 'img_size': '3G', + 'img_name': 'disk_default_1'}) + self.run_client_checks(ctxt) + + def test_pause_resume(self): + """Test pausing and resuming a unit.""" + with self.pause_resume( + ['rbd-target-api', 'rbd-target-gw'], + pgrep_full=True): + logging.info("Testing pause resume") diff --git a/ceph-iscsi/tests/tests.yaml b/ceph-iscsi/tests/tests.yaml new file mode 100644 index 00000000..d5408139 --- /dev/null +++ b/ceph-iscsi/tests/tests.yaml @@ -0,0 +1,17 @@ +charm_name: ceph-iscsi + +gate_bundles: + - jammy-caracal + +smoke_bundles: + - jammy-caracal + +dev_bundles: + - jammy-caracal + +configure: + - tests.target.basic_guest_setup +tests: + - zaza.charm_tests.lifecycle.refresh.CharmRefreshAll + - tests.target.CephISCSIGatewayTest + diff --git a/ceph-iscsi/tox.ini b/ceph-iscsi/tox.ini new file mode 100644 index 00000000..89e845ee --- /dev/null +++ b/ceph-iscsi/tox.ini @@ -0,0 +1,156 @@ +# Classic charm (with zaza): ./tox.ini +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of tox.ini for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools +# +# TODO: Distill the func test requirements from the lint/unit test +# requirements. They are intertwined. Also, Zaza itself should specify +# all of its own requirements and if it doesn't, fix it there. +[tox] +envlist = pep8,py3 +skipsdist = True +# NOTE: Avoid build/test env pollution by not enabling sitepackages. +sitepackages = False +# NOTE: Avoid false positives by not skipping missing interpreters. +skip_missing_interpreters = False + +# NOTE: https://wiki.canonical.com/engineering/OpenStack/InstallLatestToxOnOsci +minversion = 3.18.0 + +[testenv] +setenv = VIRTUAL_ENV={envdir} + PYTHONHASHSEED=0 + TEST_JUJU3=1 + CHARM_DIR={envdir} + CHARMS_ARTIFACT_DIR={toxinidir}/.. + TEST_MODEL_SETTINGS = automatically-retry-hooks=true + TEST_MAX_RESOLVE_COUNT = 5 +install_command = + pip install {opts} {packages} +commands = stestr run --slowest {posargs} +allowlist_externals = + charmcraft + {toxinidir}/rename.sh +passenv = + HOME + TERM + CS_* + OS_* + TEST_* +deps = -r{toxinidir}/test-requirements.txt + +[testenv:build] +basepython = python3 +deps = -r{toxinidir}/build-requirements.txt +commands = + charmcraft clean + charmcraft -v pack + {toxinidir}/rename.sh + +[testenv:py36] +basepython = python3.6 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py37] +basepython = python3.7 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py38] +basepython = python3.8 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py39] +basepython = python3.9 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py310] +basepython = python3.10 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py311] +basepython = python3.11 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py312] +basepython = python3.12 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py3] +basepython = python3 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:pep8] +basepython = python3 +deps = flake8 + charm-tools +commands = flake8 {posargs} unit_tests tests src + +[testenv:cover] +# Technique based heavily upon +# https://github.com/openstack/nova/blob/master/tox.ini +basepython = python3 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt +setenv = + {[testenv]setenv} + PYTHON=coverage run +commands = + coverage erase + stestr run --slowest {posargs} + coverage combine + coverage html -d cover + coverage xml -o cover/coverage.xml + coverage report + +[coverage:run] +branch = True +concurrency = multiprocessing +parallel = True +source = + . +omit = + .tox/* + */charmhelpers/* + unit_tests/* + +[testenv:venv] +basepython = python3 +commands = {posargs} + +[testenv:func-noop] +basepython = python3 +commands = + functest-run-suite --help + +[testenv:func] +basepython = python3 +commands = + functest-run-suite --keep-model + +[testenv:func-smoke] +basepython = python3 +commands = + functest-run-suite --keep-model --smoke + +[testenv:func-dev] +basepython = python3 +commands = + functest-run-suite --keep-model --dev + +[testenv:func-target] +basepython = python3 +commands = + functest-run-suite --keep-model --bundle {posargs} + +[flake8] +ignore = E402,E226,W503,W504 +exclude = */charmhelpers diff --git a/ceph-iscsi/unit_tests/__init__.py b/ceph-iscsi/unit_tests/__init__.py new file mode 100644 index 00000000..7c565f59 --- /dev/null +++ b/ceph-iscsi/unit_tests/__init__.py @@ -0,0 +1,31 @@ +# Copyright 2020 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import mock + +# Mock out secrets to make py35 happy. +sys.modules['secrets'] = mock.MagicMock() + +# Patch out lsb_release() and get_platform() as unit tests should be fully +# insulated from the underlying platform. Unit tests assume that the system is +# ubuntu jammy. +mock.patch( + 'charmhelpers.osplatform.get_platform', return_value='ubuntu' +).start() +mock.patch( + 'charmhelpers.core.host.lsb_release', + return_value={ + 'DISTRIB_CODENAME': 'jammy' + }).start() diff --git a/ceph-iscsi/unit_tests/test_ceph_iscsi_charm.py b/ceph-iscsi/unit_tests/test_ceph_iscsi_charm.py new file mode 100644 index 00000000..e9ee7e8e --- /dev/null +++ b/ceph-iscsi/unit_tests/test_ceph_iscsi_charm.py @@ -0,0 +1,508 @@ +#!/usr/bin/env python3 + +# Copyright 2020 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import json +import unittest +import sys +from pathlib import Path + +sys.path.append('lib') # noqa +sys.path.append('src') # noqa + +from unittest.mock import call, patch, MagicMock, ANY, Mock + +from ops.testing import Harness, _TestingModelBackend +from ops.model import ( + BlockedStatus, +) +from ops import framework, model + +with patch('charmhelpers.core.host_factory.ubuntu.cmp_pkgrevno', + Mock(return_value=1)): + import charm + +TEST_CA = '''-----BEGIN CERTIFICATE----- +MIIC8TCCAdmgAwIBAgIUIchLT42Gy3QexrQbppgWb+xF2SgwDQYJKoZIhvcNAQEL +BQAwGjEYMBYGA1UEAwwPRGl2aW5lQXV0aG9yaXR5MB4XDTIwMDUwNTA5NDIzMVoX +DTIwMDYwNDA5NDIzMlowGjEYMBYGA1UEAwwPRGl2aW5lQXV0aG9yaXR5MIIBIjAN +BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA54oZkgz+xpaM8AKfHTT19lwqvVSr +W3uZiyyiNAWBX+Ru5/5RqQONKmjPqU3Bh966IBxo8hGYsk7MJ3LobvuG6j497SUc +nn4JECm/mOKGeQvSSGnor93ropyWAQDQ3U1JVxV/K4sw2EpwwxfaJAM4L5rVi9EK +TsN23cPI81DKLuDxeXGGDPXMgQuTqfGD74jk6oTpfEHNmQB1Lcj+t+HxQqyoHyo5 +RPNRpntgPAvrF8i1ktJ/EH4GJxSBwm7098JcMgQSif9PHzL0UKehC2mlNX7ljGQ+ +eOLo6XNHYnq6DfxO6c3TbOIYt7VSc8K3IG500/4IzIT3+mtZ3rrM3mQWDwIDAQAB +oy8wLTAaBgNVHREEEzARgg9EaXZpbmVBdXRob3JpdHkwDwYDVR0TAQH/BAUwAwEB +/zANBgkqhkiG9w0BAQsFAAOCAQEAfzQSUzfaUv5Q4Eqz2YiWFx2zRYi0mUjYrGf9 +1qcprgpAq7F72+ed3uLGEmMr53+wgL4XdzLnSZwpYRFNBI7/t6hU3kxw9fJC5wMg +LHLdNlNqXAfoGVVTjcWPiQDF6tguccqyE3UWksl+2fncgkkcUpH4IP0AZVYlCsrz +mzs5P3ATpdTE1BZiw4WEiE4+N8ZC7Rcz0icfCEbKJduMkkxpJlvp5LwSsmtrpS3v +IZvomDHx8ypr+byzUTsfbAExdXVpctkG/zLMAi6/ZApO8GlD8ga8BUn2NGfBO5Q8 +28kEjS5DV835Re4hHE6pTC4HEjq0D2r1/4OG7ijt8emO5XPoMg== +-----END CERTIFICATE-----''' + +TEST_APP_CERT = '''-----BEGIN CERTIFICATE----- +MIID9jCCAt6gAwIBAgIUX5lsqmlS3aFLw7+IqSqadI7W1yswDQYJKoZIhvcNAQEL +BQAwRTFDMEEGA1UEAxM6VmF1bHQgSW50ZXJtZWRpYXRlIENlcnRpZmljYXRlIEF1 +dGhvcml0eSAoY2hhcm0tcGtpLWxvY2FsKTAeFw0yMDA1MDUwOTQyMTdaFw0yMTA1 +MDUwODQyNDdaMA4xDDAKBgNVBAMTA2FwcDCCASIwDQYJKoZIhvcNAQEBBQADggEP +ADCCAQoCggEBALfmMzGbbShmQGduZImaGsJWd6vGriVwgYlIV60Kb1MLxuLvMyzV +tBseRH1izKgPDEmMRafU9N4DC0jRb+04APBM8QBWEDrrYgRQQSNxlCDVMn4Q4iHO +72FwCqI1HuW0R5J3yik4FkW3Kb8Uq5KDsKWqTLtaBW5X40toi1bkyFTnRZ6/3vmt +9arAfqmZyXlZK3rN+uiznLx8/rYU5umkicNGfDcWI37wjdYvK/tIE79vPom5VhGb +R+rz+hri7JmiaYkzrTWWibyjPNK0aGHa5OUIiFJfAtfyjoT1d/pxwS301BWLicw1 +vSzCJcTwpkzh2EWvuquK2sUjgHNR1qAkGIECAwEAAaOCARMwggEPMA4GA1UdDwEB +/wQEAwIDqDAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwHQYDVR0OBBYE +FL0B0hMaFwG0I0WR4CiOZnrqRHoLMEkGCCsGAQUFBwEBBD0wOzA5BggrBgEFBQcw +AoYtaHR0cDovLzE3Mi4yMC4wLjE5OjgyMDAvdjEvY2hhcm0tcGtpLWxvY2FsL2Nh +MDMGA1UdEQQsMCqCA2FwcIIDYXBwgghhcHB1bml0MYIIYXBwdW5pdDKHBKwAAAGH +BKwAAAIwPwYDVR0fBDgwNjA0oDKgMIYuaHR0cDovLzE3Mi4yMC4wLjE5OjgyMDAv +djEvY2hhcm0tcGtpLWxvY2FsL2NybDANBgkqhkiG9w0BAQsFAAOCAQEAbf6kIurd +pBs/84YD59bgeytlo8RatUzquwCRgRSv6N81+dYFBHtEVOoLwy/4wJAH2uMSKK+/ +C13vTBj/cx+SxWSIccPS0rglwEKhRF/u3n9hrFAL3QMLQPEXAJ5rJtapZ7a8uIWy +bChTMhoL4bApCXG+SH4mbhkD6SWQ1zPgfXD4ZiVtjEVIdyn63/fbNFUfhFKba8BE +wQUYw0yWq0/8ILq/WPyjKBvhSinIauy+ybdzaDMEg0Grq1n0K5l/WyK+t9tQd+UG +cLjamd6EKZ2OvOxZN6/cJlHDY2NKfjGF6KhQ5D2cseYK7dhOQ9AFjUCB/NgIAH9D +8vVp8VJOx6plOw== +-----END CERTIFICATE-----''' + +TEST_APP_KEY = '''-----BEGIN RSA PRIVATE KEY----- +MIIEpAIBAAKCAQEAt+YzMZttKGZAZ25kiZoawlZ3q8auJXCBiUhXrQpvUwvG4u8z +LNW0Gx5EfWLMqA8MSYxFp9T03gMLSNFv7TgA8EzxAFYQOutiBFBBI3GUINUyfhDi +Ic7vYXAKojUe5bRHknfKKTgWRbcpvxSrkoOwpapMu1oFblfjS2iLVuTIVOdFnr/e ++a31qsB+qZnJeVkres366LOcvHz+thTm6aSJw0Z8NxYjfvCN1i8r+0gTv28+iblW +EZtH6vP6GuLsmaJpiTOtNZaJvKM80rRoYdrk5QiIUl8C1/KOhPV3+nHBLfTUFYuJ +zDW9LMIlxPCmTOHYRa+6q4raxSOAc1HWoCQYgQIDAQABAoIBAD92GUSNNmYyoxcO +aXNy0rktza5hqccRxCHz7Q2yBCjMb53wneBi/vw8vbXnWmjEiKD43zDDtJzIwCQo +4k8ifHBwnNpY2ND8WZ7TcycgEtYhvIL0oJS6LLGbUJAZdMggJnLNE96VlFoKk0V1 +hJ/TAiqpUkF1F1q0yaNEOJGL8fYaI5Mz1pU+rspxS2uURFYGcD78Ouda5Pruwcp3 +A0Sbo+5P0FZRy79zpZbIzlvcS9R7wKuDJExCXXCsoZ+G0BWwTJPsDhkmcuXdS7f3 +3k3VO4Y8rcsOIHtI0Gj38yhO6giDjPeZWmXF6h7+zSWPaZydswTqtyS2BbvUmE3N +t/HYCOECgYEA2AYQZqAeFk5i7Qnb80pG9q1THZOM4V/FQsyfb9Bzw+nANP6LMd3D +tnY7BUNj0vTJVy/wnwFSmryQn3OqsxHYbOaor9xjuCauAGzp/4cj0anTySz0pZiQ +TzVepB35bj8ghRsQ1TO+7FQtMMZQGrNf1i6e3p9+hpKUA6ZwP0OEbpMCgYEA2e5E +Uqqj1u0pnUAeXp/2VbQS4rmxUrRsbdbiyoypNJOp+Olfi2DjQNgji0XDBdTLhDNv +nFtHY7TW4HJrwVAAqBlYKkunf6zGlP3iEGhk7RF1LSyGZXjfLACe7kzqlAx34Ue9 +9ynkesNKeT8kOOCC08llHuInMjfgfN0c7jWYNRsCgYEAgzBrlWd33iQMf9eU89MP +9Y6dA0EwNU5sBX0u9kCpjTjPuV88OTRsPsreXPvoC50NCR3cCzRKbh5F1g/wgn87 +6CbMGsDE7njPAwMhuEThw9pW+72JdWeJfBD1QMXTTNiZbzxYpKGgOPWF3DETRKPa +d8AoSxqhRCiQKwdQ85qVOnECgYAu6dfTY+B5N/ypWVAwVocU0/rsy8ScZTKiQov3 +xmf2ZYNFjhd/TZAeOWkNZishajmVb+0q34tyr09Cad9AchRyG2KbWEXqeisVj8HG +fnKbhhKPcvJLjcWdF1UfP3eP/08fM+508pO4yamSiEEn7Uy8grI9/7koWlb9Cixc +KzVk2QKBgQCdA3eoJHu4nTHRNgcvU3pxbRU4HQV8e+Hiw1tcxjprkACrNVvd7wZS +wULKjMb8z0RZyTBXLdNw3YKYOk/B7e/e9D+Zve4PTEL23Fcdt532x/7hBQ+7o6/4 +7RxsGx5/PXZI0/YKMKk9hsrdMl4/UAd0izvwPCQbB3eisuZYU/i8Jw== +-----END RSA PRIVATE KEY-----''' + + +class CharmTestCase(unittest.TestCase): + + def setUp(self, obj, patches): + super().setUp() + self.patches = patches + self.obj = obj + self.patch_all() + + def patch(self, method): + _m = patch.object(self.obj, method) + mock = _m.start() + self.addCleanup(_m.stop) + return mock + + def patch_all(self): + for method in self.patches: + setattr(self, method, self.patch(method)) + + +class _CephISCSIGatewayCharmBase(charm.CephISCSIGatewayCharmBase): + + @staticmethod + def get_bluestore_compression(): + return {} + + +class TestCephISCSIGatewayCharmBase(CharmTestCase): + + PATCHES = [ + 'ch_templating', + 'gwcli_client', + 'os', + 'secrets', + 'socket', + 'subprocess', + ] + + def setUp(self): + super().setUp(charm, self.PATCHES) + self.harness = Harness( + _CephISCSIGatewayCharmBase, + ) + self.test_hostname = 'server1' + self.socket.gethostname.return_value = self.test_hostname + self.test_fqdn = self.test_hostname + '.foo' + self.socket.getfqdn.return_value = self.test_fqdn + self.secrets.choice.return_value = 'r' + self.test_admin_password = 'rrrrrrrr' + self.gwc = MagicMock() + self.gwcli_client.GatewayClient.return_value = self.gwc + patch_srv_enable = patch.object(charm.ch_host, 'service_enable') + patch_srv_enable.start() + self.addCleanup(patch_srv_enable.stop) + + # BEGIN: Workaround until network_get is implemented + class _TestingOPSModelBackend(_TestingModelBackend): + + def network_get(self, endpoint_name, relation_id=None): + network_data = { + 'bind-addresses': [{ + 'interface-name': 'eth0', + 'addresses': [{ + 'cidr': '10.0.0.0/24', + 'value': '10.0.0.10'}]}], + 'ingress-addresses': ['10.0.0.10'], + 'egress-subnets': ['10.0.0.0/24']} + return network_data + + config_ = self.harness._get_config(charm_config=None) + self.harness._backend = _TestingOPSModelBackend( + self.harness._unit_name, self.harness._meta, config_) + self.harness._model = model.Model( + self.harness._meta, + self.harness._backend) + self.harness._framework = framework.Framework( + ":memory:", + self.harness._charm_dir, + self.harness._meta, + self.harness._model) + # END Workaround + + def test_init(self): + self.harness.begin() + self.assertFalse(self.harness.charm._stored.target_created) + self.assertFalse(self.harness.charm._stored.enable_tls) + + def add_base_cluster_relation(self): + rel_id = self.harness.add_relation('cluster', 'ceph-iscsi') + self.harness.add_relation_unit( + rel_id, + 'ceph-iscsi/1') + return rel_id + + def complete_cluster_relation(self, rel_id): + self.harness.update_relation_data( + rel_id, + 'ceph-iscsi/1', + { + 'ingress-address': '10.0.0.2', + 'gateway_ready': 'True', + 'gateway_fqdn': 'ceph-iscsi-1.example' + }) + + def add_admin_access_relation(self): + rel_id = self.harness.add_relation('admin-access', 'ceph-dashboard') + self.harness.add_relation_unit( + rel_id, + 'ceph-dashboard/0') + self.harness.update_relation_data( + rel_id, + 'ceph-dashboard/0', + { + 'ingress-address': '10.0.0.2', + }) + return rel_id + + @patch('socket.getfqdn') + def test_on_create_target_action(self, _getfqdn): + _getfqdn.return_value = 'ceph-iscsi-0.example' + cluster_rel_id = self.add_base_cluster_relation() + self.complete_cluster_relation(cluster_rel_id) + self.harness.begin() + action_event = MagicMock() + action_event.params = { + 'iqn': 'iqn.mock.iscsi-gw:iscsi-igw', + 'gateway-units': 'ceph-iscsi/0 ceph-iscsi/1', + 'rbd-pool-name': 'iscsi-pool', + 'ec-rbd-metadata-pool': '', + 'image-name': 'disk1', + 'image-size': '5G', + 'client-initiatorname': 'client-initiator', + 'client-username': 'myusername', + 'client-password': 'mypassword123'} + self.harness.charm.on_create_target_action(action_event) + self.gwc.add_gateway_to_target.assert_has_calls([ + call( + 'iqn.mock.iscsi-gw:iscsi-igw', + '10.0.0.10', + 'ceph-iscsi-0.example'), + call( + 'iqn.mock.iscsi-gw:iscsi-igw', + '10.0.0.2', + 'ceph-iscsi-1.example')]) + + self.gwc.create_pool.assert_called_once_with( + 'iscsi-pool', + 'disk1', + '5G') + self.gwc.add_client_to_target.assert_called_once_with( + 'iqn.mock.iscsi-gw:iscsi-igw', + 'client-initiator') + self.gwc.add_client_auth.assert_called_once_with( + 'iqn.mock.iscsi-gw:iscsi-igw', + 'client-initiator', + 'myusername', + 'mypassword123') + self.gwc.add_disk_to_client.assert_called_once_with( + 'iqn.mock.iscsi-gw:iscsi-igw', + 'client-initiator', + 'iscsi-pool', + 'disk1') + + @patch('socket.getfqdn') + def test_on_create_target_action_ec(self, _getfqdn): + _getfqdn.return_value = 'ceph-iscsi-0.example' + cluster_rel_id = self.add_base_cluster_relation() + self.complete_cluster_relation(cluster_rel_id) + self.harness.begin() + action_event = MagicMock() + action_event.params = { + 'iqn': 'iqn.mock.iscsi-gw:iscsi-igw', + 'gateway-units': 'ceph-iscsi/0 ceph-iscsi/1', + 'rbd-pool-name': 'iscsi-pool', + 'ec-rbd-metadata-pool': 'iscsi-metapool', + 'image-name': 'disk1', + 'image-size': '5G', + 'client-initiatorname': 'client-initiator', + 'client-username': 'myusername', + 'client-password': 'mypassword123'} + self.harness.charm.on_create_target_action(action_event) + self.subprocess.check_call.assert_called_once_with( + [ + 'rbd', + '--id', 'ceph-iscsi', + '--conf', '/etc/ceph/iscsi/ceph.conf', + 'create', + '--size', '5G', + 'iscsi-metapool/disk1', + '--data-pool', 'iscsi-pool']) + self.gwc.add_gateway_to_target.assert_has_calls([ + call( + 'iqn.mock.iscsi-gw:iscsi-igw', + '10.0.0.10', + 'ceph-iscsi-0.example'), + call( + 'iqn.mock.iscsi-gw:iscsi-igw', + '10.0.0.2', + 'ceph-iscsi-1.example')]) + + self.gwc.create_pool.assert_called_once_with( + 'iscsi-metapool', + 'disk1', + '5G') + self.gwc.add_client_to_target.assert_called_once_with( + 'iqn.mock.iscsi-gw:iscsi-igw', + 'client-initiator') + self.gwc.add_client_auth.assert_called_once_with( + 'iqn.mock.iscsi-gw:iscsi-igw', + 'client-initiator', + 'myusername', + 'mypassword123') + self.gwc.add_disk_to_client.assert_called_once_with( + 'iqn.mock.iscsi-gw:iscsi-igw', + 'client-initiator', + 'iscsi-metapool', + 'disk1') + + def test_on_has_peers(self): + rel_id = self.harness.add_relation('cluster', 'ceph-iscsi') + self.harness.begin() + self.harness.add_relation_unit( + rel_id, + 'ceph-iscsi/1') + self.assertIsNone( + self.harness.charm.peers.admin_password) + self.harness.set_leader() + self.harness.update_relation_data( + rel_id, + 'ceph-iscsi/1', + { + 'ingress-address': '10.0.0.2', + 'gateway_ready': 'True', + 'gateway_fqdn': 'ceph-iscsi-1.example' + }) + self.assertEqual( + self.harness.charm.peers.admin_password, self.test_admin_password) + + def test_on_has_peers_not_leader(self): + self.add_base_cluster_relation() + self.harness.begin() + self.assertIsNone( + self.harness.charm.peers.admin_password) + self.harness.set_leader(False) + self.harness.charm.peers.on.has_peers.emit() + self.assertIsNone( + self.harness.charm.peers.admin_password) + + def test_on_has_peers_existing_password(self): + rel_id = self.add_base_cluster_relation() + self.harness.update_relation_data( + rel_id, + 'ceph-iscsi', + {'admin_password': 'existing password'}) + self.harness.begin() + self.harness.set_leader() + self.harness.charm.peers.on.has_peers.emit() + self.assertEqual( + self.harness.charm.peers.admin_password, + 'existing password') + + def test_on_ceph_client_relation_joined(self): + self.maxDiff = None + rel_id = self.harness.add_relation('ceph-client', 'ceph-mon') + self.harness.update_config( + key_values={'gateway-metadata-pool': 'iscsi-pool'}) + self.harness.begin() + self.harness.add_relation_unit( + rel_id, + 'ceph-mon/0') + self.harness.update_relation_data( + rel_id, + 'ceph-mon/0', + {'ingress-address': '10.0.0.3'}) + rel_data = self.harness.get_relation_data(rel_id, 'ceph-iscsi/0') + actual_req_osd_settings = json.loads(rel_data['osd-settings']) + self.assertEqual( + actual_req_osd_settings, + {'osd heartbeat grace': 20, 'osd heartbeat interval': 5}) + actual_req_pool_ops = json.loads(rel_data['broker_req'])['ops'] + self.assertEqual(actual_req_pool_ops[0]['op'], 'create-pool') + self.assertEqual(actual_req_pool_ops[0]['name'], 'iscsi-pool') + self.assertEqual(actual_req_pool_ops[1]['op'], 'create-pool') + self.assertEqual(actual_req_pool_ops[1]['name'], 'ceph-iscsi') + self.assertEqual(actual_req_pool_ops[2]['op'], 'set-key-permissions') + self.assertEqual(actual_req_pool_ops[2]['client'], 'ceph-iscsi') + + def test_on_pools_available(self): + self.os.path.exists.return_value = False + self.os.path.basename = os.path.basename + rel_id = self.add_base_cluster_relation() + self.harness.update_relation_data( + rel_id, + 'ceph-iscsi', + {'admin_password': 'existing password', + 'gateway_ready': 'False'}) + self.harness.begin() + self.harness.charm.ceph_client._stored.pools_available = True + with patch.object(Path, 'mkdir') as mock_mkdir: + self.harness.charm.ceph_client.on.pools_available.emit() + mock_mkdir.assert_called_once_with(exist_ok=True, mode=488) + self.ch_templating.render.assert_has_calls([ + call('ceph.conf', '/etc/ceph/iscsi/ceph.conf', ANY), + call('iscsi-gateway.cfg', '/etc/ceph/iscsi-gateway.cfg', ANY), + call( + 'ceph.client.ceph-iscsi.keyring', + '/etc/ceph/iscsi/ceph.client.ceph-iscsi.keyring', ANY)], + any_order=True) + self.assertTrue(self.harness.charm._stored.is_started) + rel_data = self.harness.get_relation_data(rel_id, 'ceph-iscsi/0') + self.assertEqual(rel_data['gateway_ready'], 'True') + + def test_on_certificates_relation_joined(self): + rel_id = self.harness.add_relation('certificates', 'vault') + self.harness.begin() + self.harness.add_relation_unit( + rel_id, + 'vault/0') + self.harness.update_relation_data( + rel_id, + 'vault/0', + {'ingress-address': '10.0.0.3'}) + rel_data = self.harness.get_relation_data(rel_id, 'ceph-iscsi/0') + self.assertEqual( + rel_data['application_cert_requests'], + '{"server1.foo": {"sans": ["10.0.0.10", "server1"]}}') + + def test_on_certificates_relation_changed(self): + mock_TLS_CERT_PATH = MagicMock() + mock_TLS_CA_CERT_PATH = MagicMock() + mock_TLS_KEY_PATH = MagicMock() + mock_KEY_AND_CERT_PATH = MagicMock() + mock_TLS_PUB_KEY_PATH = MagicMock() + self.subprocess.check_output.return_value = b'pubkey' + rel_id = self.harness.add_relation('certificates', 'vault') + self.add_base_cluster_relation() + self.harness.begin() + self.harness.charm.TLS_CERT_PATH = mock_TLS_CERT_PATH + self.harness.charm.TLS_CA_CERT_PATH = mock_TLS_CA_CERT_PATH + self.harness.charm.TLS_KEY_PATH = mock_TLS_KEY_PATH + self.harness.charm.TLS_KEY_AND_CERT_PATH = mock_KEY_AND_CERT_PATH + self.harness.charm.TLS_PUB_KEY_PATH = mock_TLS_PUB_KEY_PATH + self.harness.add_relation_unit( + rel_id, + 'vault/0') + rel_data = { + 'app_data': { + 'cert': TEST_APP_CERT, + 'key': TEST_APP_KEY}} + self.harness.update_relation_data( + rel_id, + 'vault/0', + { + 'ceph-iscsi_0.processed_application_requests': json.dumps( + rel_data), + 'ca': TEST_CA}) + mock_TLS_CERT_PATH.write_bytes.assert_called_once() + mock_TLS_CA_CERT_PATH.write_bytes.assert_called_once() + mock_TLS_KEY_PATH.write_bytes.assert_called_once() + mock_KEY_AND_CERT_PATH.write_bytes.assert_called_once() + mock_TLS_PUB_KEY_PATH.write_bytes.assert_called_once() + self.subprocess.check_call.assert_called_once_with( + ['update-ca-certificates']) + self.assertTrue(self.harness.charm._stored.enable_tls) + + def test_custom_status_check(self): + self.harness.add_relation('ceph-client', 'ceph-mon') + self.harness.add_relation('cluster', 'ceph-iscsi') + self.harness.begin() + self.harness.charm.on.update_status.emit() + self.assertEqual( + self.harness.charm.unit.status.message, + '1 is an invalid unit count') + self.assertIsInstance( + self.harness.charm.unit.status, + BlockedStatus) + + def test_publish_admin_access_info(self): + cluster_rel_id = self.add_base_cluster_relation() + admin_access_rel_id = self.add_admin_access_relation() + self.harness.begin() + self.harness.set_leader() + self.complete_cluster_relation(cluster_rel_id) + self.assertEqual( + self.harness.get_relation_data( + admin_access_rel_id, + 'ceph-iscsi/0'), + { + 'host': '10.0.0.10', + 'name': self.test_fqdn, + 'port': '5000', + 'scheme': 'http'}) + self.assertEqual( + self.harness.get_relation_data( + admin_access_rel_id, + 'ceph-iscsi'), + { + 'password': self.test_admin_password, + 'username': 'admin'}) diff --git a/ceph-iscsi/unit_tests/test_interface_ceph_iscsi_peer.py b/ceph-iscsi/unit_tests/test_interface_ceph_iscsi_peer.py new file mode 100644 index 00000000..a6acb942 --- /dev/null +++ b/ceph-iscsi/unit_tests/test_interface_ceph_iscsi_peer.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python3 + +import unittest +import sys + +sys.path.append('lib') # noqa +sys.path.append('src') # noqa + +import interface_ceph_iscsi_peer + +import unittest.mock as mock +from unittest.mock import PropertyMock + +from ops import framework +from ops.testing import Harness +from ops.charm import CharmBase + +from interface_ceph_iscsi_peer import CephISCSIGatewayPeers, ReadyPeersEvent + + +class TestCephISCSIGatewayPeers(unittest.TestCase): + + def setUp(self): + self.harness = Harness(CharmBase, meta=''' + name: ceph-iscsi + peers: + cluster: + interface: ceph-iscsi-peer + ''') + + @mock.patch.object(CephISCSIGatewayPeers, 'cluster_bind_address', + new_callable=PropertyMock) + @mock.patch('socket.getfqdn') + def test_on_changed(self, _getfqdn, _cluster_bind_address): + our_fqdn = 'ceph-iscsi-0.example' + _getfqdn.return_value = our_fqdn + # TODO: Replace this with calls to the test harness once + # https://github.com/canonical/operator/issues/222 is fixed. + _cluster_bind_address.return_value = '192.0.2.1' + + class TestReceiver(framework.Object): + + def __init__(self, parent, key): + super().__init__(parent, key) + self.observed_events = [] + + def on_ready_peers(self, event): + self.observed_events.append(event) + + self.harness.begin() + self.peers = CephISCSIGatewayPeers(self.harness.charm, 'cluster') + + receiver = TestReceiver(self.harness.framework, 'receiver') + self.harness.framework.observe(self.peers.on.ready_peers, + receiver.on_ready_peers) + relation_id = self.harness.add_relation('cluster', 'ceph-iscsi') + self.harness.add_relation_unit( + relation_id, + 'ceph-iscsi/1') + self.harness.update_relation_data( + relation_id, + 'ceph-iscsi/1', + { + 'ingress-address': '192.0.2.2', + 'gateway_ready': 'True', + 'gateway_fqdn': 'ceph-iscsi-1.example' + }) + self.assertEqual(len(receiver.observed_events), 1) + self.assertIsInstance(receiver.observed_events[0], + ReadyPeersEvent) + + def test_set_admin_password(self): + self.harness.set_leader() + self.harness.begin() + self.peers = CephISCSIGatewayPeers(self.harness.charm, 'cluster') + self.harness.add_relation('cluster', 'ceph-iscsi') + + self.peers.set_admin_password('s3cr3t') + rel_data = self.harness.charm.model.get_relation('cluster').data + our_app = self.harness.charm.app + self.assertEqual(rel_data[our_app]['admin_password'], 's3cr3t') + + @mock.patch('socket.getfqdn') + def test_announce_ready(self, _getfqdn): + our_fqdn = 'ceph-iscsi-0.example' + _getfqdn.return_value = our_fqdn + self.harness.begin() + self.peers = CephISCSIGatewayPeers(self.harness.charm, 'cluster') + self.harness.add_relation('cluster', 'ceph-iscsi') + + self.peers.announce_ready() + rel_data = self.harness.charm.model.get_relation('cluster').data + our_unit = self.harness.charm.unit + self.assertEqual(rel_data[our_unit]['gateway_fqdn'], our_fqdn) + self.assertEqual(rel_data[our_unit]['gateway_ready'], 'True') + + @mock.patch.object(CephISCSIGatewayPeers, 'cluster_bind_address', + new_callable=PropertyMock) + @mock.patch('socket.getfqdn') + def test_ready_peer_details(self, _getfqdn, _cluster_bind_address): + _getfqdn.return_value = 'ceph-iscsi-0.example' + # TODO: Replace this with calls to the test harness once + # https://github.com/canonical/operator/issues/222 is fixed. + _cluster_bind_address.return_value = '192.0.2.1' + + self.harness.begin() + self.peers = CephISCSIGatewayPeers(self.harness.charm, 'cluster') + relation_id = self.harness.add_relation('cluster', 'ceph-iscsi') + + self.harness.add_relation_unit( + relation_id, + 'ceph-iscsi/1') + self.harness.update_relation_data( + relation_id, + 'ceph-iscsi/1', + { + 'ingress-address': '192.0.2.2', + 'gateway_ready': 'True', + 'gateway_fqdn': 'ceph-iscsi-1.example' + }) + self.harness.add_relation_unit( + relation_id, + 'ceph-iscsi/2') + self.harness.update_relation_data( + relation_id, + 'ceph-iscsi/2', + { + 'ingress-address': '192.0.2.3', + 'gateway_ready': 'True', + 'gateway_fqdn': 'ceph-iscsi-2.example', + }) + self.harness.add_relation_unit( + relation_id, + 'ceph-iscsi/3') + self.harness.update_relation_data( + relation_id, + 'ceph-iscsi/3', + {'ingress-address': '192.0.2.4'}) + + self.peers.ready_peer_details + + @mock.patch.object(interface_ceph_iscsi_peer.CephISCSIGatewayPeers, + 'cluster_bind_address', new_callable=PropertyMock) + def test_ready_peer_addresses(self, _cluster_bind_address): + # TODO: Replace this with calls to the test harness once + # https://github.com/canonical/operator/issues/222 is fixed. + _cluster_bind_address.return_value = '192.0.2.1' + + self.harness.begin() + self.peers = CephISCSIGatewayPeers(self.harness.charm, 'cluster') + relation_id = self.harness.add_relation('cluster', 'ceph-iscsi') + + self.harness.add_relation_unit( + relation_id, + 'ceph-iscsi/1') + self.harness.update_relation_data( + relation_id, + 'ceph-iscsi/1', + { + 'ingress-address': '192.0.2.2', + 'gateway_ready': 'True', + 'gateway_fqdn': 'ceph-iscsi-1.example' + }) + self.harness.add_relation_unit( + relation_id, + 'ceph-iscsi/2') + self.harness.update_relation_data( + relation_id, + 'ceph-iscsi/2', + { + 'ingress-address': '192.0.2.3', + 'gateway_ready': 'True', + 'gateway_fqdn': 'ceph-iscsi-2.example', + }) + self.assertEqual(['192.0.2.1', '192.0.2.2', '192.0.2.3'], + self.peers.peer_addresses) + + +if __name__ == '__main__': + unittest.main() diff --git a/ceph-mon/.gitignore b/ceph-mon/.gitignore new file mode 100644 index 00000000..901e8bd5 --- /dev/null +++ b/ceph-mon/.gitignore @@ -0,0 +1,12 @@ +bin +.idea +.coverage +.testrepository +.tox +*.sw[nop] +*.charm +.idea +*.pyc +func-results.json +.stestr +__pycache__ diff --git a/ceph-mon/.gitreview b/ceph-mon/.gitreview new file mode 100644 index 00000000..6ebc9abf --- /dev/null +++ b/ceph-mon/.gitreview @@ -0,0 +1,5 @@ +[gerrit] +host=review.opendev.org +port=29418 +project=openstack/charm-ceph-mon.git +defaultbranch=stable/squid-jammy diff --git a/ceph-mon/.project b/ceph-mon/.project new file mode 100644 index 00000000..17434fc2 --- /dev/null +++ b/ceph-mon/.project @@ -0,0 +1,17 @@ + + + ceph-mon + + + + + + org.python.pydev.PyDevBuilder + + + + + + org.python.pydev.pythonNature + + diff --git a/ceph-mon/.pydevproject b/ceph-mon/.pydevproject new file mode 100644 index 00000000..683d89d8 --- /dev/null +++ b/ceph-mon/.pydevproject @@ -0,0 +1,11 @@ + + +python 2.7 +Default + +/ceph-mon/hooks +/ceph-mon/unit_tests +/ceph-mon/tests +/ceph-mon/actions + + diff --git a/ceph-mon/.stestr.conf b/ceph-mon/.stestr.conf new file mode 100644 index 00000000..5fcccaca --- /dev/null +++ b/ceph-mon/.stestr.conf @@ -0,0 +1,3 @@ +[DEFAULT] +test_path=./unit_tests +top_dir=./ diff --git a/ceph-mon/.zuul.yaml b/ceph-mon/.zuul.yaml new file mode 100644 index 00000000..fd20909e --- /dev/null +++ b/ceph-mon/.zuul.yaml @@ -0,0 +1,4 @@ +- project: + templates: + - openstack-python3-charm-jobs + - openstack-cover-jobs diff --git a/ceph-mon/LICENSE b/ceph-mon/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/ceph-mon/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ceph-mon/README.md b/ceph-mon/README.md new file mode 100644 index 00000000..ccb57d38 --- /dev/null +++ b/ceph-mon/README.md @@ -0,0 +1,241 @@ +# Overview + +[Ceph][ceph-upstream] is a unified, distributed storage system designed for +excellent performance, reliability, and scalability. + +The ceph-mon charm deploys Ceph monitor nodes, allowing one to create a monitor +cluster. It is used in conjunction with the [ceph-osd][ceph-osd-charm] charm. +Together, these charms can scale out the amount of storage available in a Ceph +cluster. + +# Usage + +## Configuration + +This section covers common and/or important configuration options. See file +`config.yaml` for the full list of options, along with their descriptions and +default values. See the [Juju documentation][juju-docs-config-apps] for details +on configuring applications. + +#### `customize-failure-domain` + +The `customize-failure-domain` option determines how a Ceph CRUSH map is +configured. + +A value of 'false' (the default) will lead to a map that will replicate data +across hosts (implemented as [Ceph bucket type][upstream-ceph-buckets] 'host'). +With a value of 'true' all MAAS-defined zones will be used to generate a map +that will replicate data across Ceph availability zones (implemented as bucket +type 'rack'). + +This option is also supported by the ceph-osd charm. Its value must be the same +for both charms. + +#### `monitor-count` + +The `monitor-count` option gives the number of ceph-mon units in the monitor +sub-cluster (where one ceph-mon unit represents one MON). The default value is +'3' and is generally a good choice, but it is good practice to set this +explicitly to avoid a possible race condition during the formation of the +sub-cluster. To establish quorum and enable partition tolerance an odd number +of ceph-mon units is required. + +> **Important**: A monitor count of less than three is not recommended for + production environments. Test environments can use a single ceph-mon unit by + setting this option to '1'. + +#### `expected-osd-count` + +The `expected-osd-count` option states the number of OSDs expected to be +deployed in the cluster. This value can influence the number of placement +groups (PGs) to use per pool. The PG calculation is based either on the actual +number of OSDs or this option's value, whichever is greater. The default value +is '0', which tells the charm to only consider the actual number of OSDs. If +the actual number of OSDs is less than three then this option must explicitly +state that number. Only until a sufficient (or prescribed) number of OSDs has +been attained will the charm be able to create Ceph pools. + +> **Note**: The inability to create a pool due to an insufficient number of + OSDs will cause any consuming application (characterised by a relation + involving the `ceph-mon:client` endpoint) to remain in the 'waiting' state. + +#### `source` + +The `source` option states the software sources. A common value is an OpenStack +UCA release (e.g. 'cloud:xenial-queens' or 'cloud:bionic-ussuri'). See [Ceph +and the UCA][cloud-archive-ceph]. The underlying host's existing apt sources +will be used if this option is not specified (this behaviour can be explicitly +chosen by using the value of 'distro'). + +## Deployment + +A cloud with three MON nodes is a typical design whereas three OSDs are +considered the minimum. For example, to deploy a Ceph cluster consisting of +three OSDs (one per ceph-osd unit) and three MONs: + + juju deploy -n 3 --config ceph-osd.yaml ceph-osd + juju deploy -n 3 --to lxd:0,lxd:1,lxd:2 ceph-mon + juju add-relation ceph-osd:mon ceph-mon:osd + +Here, a containerised MON is running alongside each storage node. We've assumed +that the machines spawned in the first command are assigned IDs of 0, 1, and 2. + +By default, the monitor cluster will not be complete until three ceph-mon units +have been deployed. This is to ensure that a quorum is achieved prior to the +addition of storage devices. + +See the [Ceph documentation][ceph-docs-monitors] for notes on monitor cluster +deployment strategies. + +> **Note**: Refer to the [Install OpenStack][cdg-install-openstack] page in the + OpenStack Charms Deployment Guide for instructions on installing a monitor + cluster for use with OpenStack. + +## Network spaces + +This charm supports the use of Juju [network spaces][juju-docs-spaces] (Juju +`v.2.0`). This feature optionally allows specific types of the application's +network traffic to be bound to subnets that the underlying hardware is +connected to. + +> **Note**: Spaces must be configured in the backing cloud prior to deployment. + +The ceph-mon charm exposes the following Ceph traffic types (bindings): + +* 'public' (front-side) +* 'cluster' (back-side) + +For example, providing that spaces 'data-space' and 'cluster-space' exist, the +deploy command above could look like this: + + juju deploy -n 3 --config ceph-mon.yaml ceph-mon \ + --bind "public=data-space cluster=cluster-space" + +Alternatively, configuration can be provided as part of a bundle: + +```yaml + ceph-mon: + charm: cs:ceph-mon + num_units: 1 + bindings: + public: data-space + cluster: cluster-space +``` + +Refer to the [Ceph Network Reference][ceph-docs-network-ref] to learn about the +implications of segregating Ceph network traffic. + +> **Note**: Existing ceph-mon units configured with the `ceph-public-network` + or `ceph-cluster-network` options will continue to honour them. Furthermore, + these options override any space bindings, if set. + +## Monitoring + +The charm supports Ceph metric monitoring with Prometheus. Add relations to the +[prometheus][prometheus-charm] application in this way: + + juju deploy prometheus2 + juju add-relation ceph-mon prometheus2 + +> **Note**: Prometheus support is available starting with Ceph Luminous + (xenial-queens UCA pocket). + +Alternatively, integration with the [COS Lite][cos-lite] observability +stack is available via the metrics-endpoint relation. + +Relating to prometheus-k8s via the metrics-endpoint interface (as is +found in the [COS Lite][cos-lite] bundle) will send metrics to +prometheus. Additionally, alerting rules will be configured for +prometheus as well. Alerting rules are configured as a resource +`alert-rules`; the default rules are taken from [upstream ceph +rules][ceph-rules]. It is possible to replace the default with +customized rules by attaching a resource: + + juju attach ceph-mon alert-rules=./my-prom-alerts.yaml.rules + +## Actions + +This section lists Juju [actions][juju-docs-actions] supported by the charm. +Actions allow specific operations to be performed on a per-unit basis. To +display action descriptions run `juju actions ceph-mon`. If the charm is not +deployed then see file `actions.yaml`. + +* `change-osd-weight` +* `copy-pool` +* `create-cache-tier` +* `create-crush-rule` +* `create-erasure-profile` +* `create-pool` +* `crushmap-update` +* `delete-erasure-profile` +* `delete-pool` +* `get-erasure-profile` +* `get-health` +* `list-erasure-profiles` +* `list-inconsistent-objs` +* `list-pools` +* `pause-health` +* `pool-get` +* `pool-set` +* `pool-statistics` +* `purge-osd` +* `remove-cache-tier` +* `remove-pool-snapshot` +* `rename-pool` +* `resume-health` +* `security-checklist` +* `set-noout` +* `set-pool-max-bytes` +* `show-disk-free` +* `snapshot-pool` +* `unset-noout` + +## Presenting the list of Ceph pools with details + +The following example returns the list of pools with details: `id`, `name`, +`size` and `min_size`. +The [jq][jq] utility has been used to parse the action output in json format. + + juju run-action --wait ceph-mon/leader list-pools detail=true \ + --format json | jq '.[].results.pools | fromjson | .[] + | {pool:.pool, name:.pool_name, size:.size, min_size:.min_size}' + +Sample output: + + { + "pool": 1, + "name": "test", + "size": 3, + "min_size": 2 + } + { + "pool": 2, + "name": "test2", + "size": 3, + "min_size": 2 + } + +# Bugs + +Please report bugs on [Launchpad][lp-bugs-charm-ceph-mon]. + +For general charm questions refer to the OpenStack [Charm Guide][cg]. + + + +[ceph-upstream]: https://ceph.io +[cg]: https://docs.openstack.org/charm-guide +[ceph-osd-charm]: https://jaas.ai/ceph-osd +[juju-docs-actions]: https://jaas.ai/docs/actions +[juju-docs-spaces]: https://jaas.ai/docs/spaces +[juju-docs-config-apps]: https://juju.is/docs/configuring-applications +[ceph-docs-network-ref]: http://docs.ceph.com/docs/master/rados/configuration/network-config-ref +[ceph-docs-monitors]: https://docs.ceph.com/docs/master/dev/mon-bootstrap +[lp-bugs-charm-ceph-mon]: https://bugs.launchpad.net/charm-ceph-mon/+filebug +[cdg-install-openstack]: https://docs.openstack.org/project-deploy-guide/charm-deployment-guide/latest/install-openstack.html +[prometheus-charm]: https://jaas.ai/prometheus2 +[cloud-archive-ceph]: https://wiki.ubuntu.com/OpenStack/CloudArchive#Ceph_and_the_UCA +[upstream-ceph-buckets]: https://docs.ceph.com/docs/master/rados/operations/crush-map/#types-and-buckets +[jq]: https://stedolan.github.io/jq/ +[cos-lite]: https://charmhub.io/cos-lite +[ceph-rules]: https://github.com/ceph/ceph/blob/351e1ac63950164ea5f08a6bfc7c14af586bb208/monitoring/ceph-mixin/prometheus_alerts.yml diff --git a/ceph-mon/actions.yaml b/ceph-mon/actions.yaml new file mode 100644 index 00000000..215c7051 --- /dev/null +++ b/ceph-mon/actions.yaml @@ -0,0 +1,463 @@ +pause-health: + description: "Pause ceph health operations across the entire ceph cluster" +resume-health: + description: "Resume ceph health operations across the entire ceph cluster" +get-health: + description: "Output the current cluster health reported by `ceph health`" +create-cache-tier: + description: "Create a new cache tier" + params: + backer-pool: + type: string + description: "The name of the pool that will back the cache tier. Also known as the cold pool" + cache-pool: + type: string + description: "The name of the pool that will be the cache pool. Also known as the hot pool" + cache-mode: + type: string + default: writeback + enum: + - writeback + - readonly + description: "The mode of the caching tier. Please refer to the Ceph docs for more information" + required: + - backer-pool + - cache-pool + additionalProperties: false +remove-cache-tier: + description: "Remove an existing cache tier" + params: + backer-pool: + type: string + description: "The name of the pool that backs the cache tier. Also known as the cold pool" + cache-pool: + type: string + description: "The name of the pool that is the cache pool. Also known as the hot pool" + required: + - backer-pool + - cache-pool + additionalProperties: false +create-pool: + description: "Creates a pool" + params: + name: + type: string + description: "The name of the pool" + app-name: + type: string + description: "App name to set on the newly created pool." + profile-name: + type: string + description: "The crush profile to use for this pool. The ruleset must exist first." + pool-type: + type: string + default: replicated + enum: + - replicated + - erasure + - erasure-coded + description: "The pool type which may either be replicated to recover from lost OSDs by keeping multiple copies of the objects or erasure to get a kind of generalized RAID5 capability." + replicas: + type: integer + default: 3 + description: "For the replicated pool this is the number of replicas to store of each object." + erasure-profile-name: + type: string + default: default + description: "The name of the erasure coding profile to use for this pool. Note this profile must exist before calling create-pool" + percent-data: + type: integer + default: 10 + description: "The percentage of data that is expected to be contained in the pool for the specific OSD set. Default value is to assume 10% of the data is for this pool, which is a relatively low % of the data but allows for the pg_num to be increased." + allow-ec-overwrites: + type: boolean + description: "Permit overwrites for erasure coded pool types." + required: + - name + additionalProperties: false +create-crush-rule: + description: "Create a new replicated CRUSH rule to use on a pool." + params: + name: + type: string + description: "The name of the rule" + failure-domain: + type: string + default: host + enum: + - chassis + - datacenter + - host + - osd + - pdu + - pod + - rack + - region + - room + - root + - row + description: "The failure-domain=host will create a CRUSH ruleset that ensures no two chunks are stored in the same host." + device-class: + type: string + enum: + - hdd + - ssd + - nvme + description: "CRUSH device class to use for new rule." + required: + - name + additionalProperties: false +create-erasure-profile: + description: "Create a new erasure code profile to use on a pool." + params: + name: + type: string + description: "The name of the profile" + failure-domain: + type: string + enum: + - chassis + - datacenter + - host + - osd + - pdu + - pod + - rack + - region + - room + - root + - row + description: "The failure-domain=host will create a CRUSH ruleset that ensures no two chunks are stored in the same host." + plugin: + type: string + default: jerasure + enum: + - jerasure + - isa + - lrc + - shec + - clay + description: "The erasure plugin to use for this profile. See http://docs.ceph.com/docs/master/rados/operations/erasure-code-profile/ for more details" + data-chunks: + type: integer + default: 3 + description: "The number of data chunks, i.e. the number of chunks in which the original object is divided. For instance if K = 2 a 10KB object will be divided into K objects of 5KB each." + coding-chunks: + type: integer + default: 2 + description: "The number of coding chunks, i.e. the number of additional chunks computed by the encoding functions. If there are 2 coding chunks, it means 2 OSDs can be out without losing data." + locality-chunks: + type: integer + description: "LRC plugin - Group the coding and data chunks into sets of size locality. For instance, for k=4 and m=2, when locality=3 two groups of three are created. Each set can be recovered without reading chunks from another set." + crush-locality: + type: string + enum: + - chassis + - datacenter + - host + - osd + - pdu + - pod + - rack + - region + - room + - root + - row + description: "LRC plugin - The type of CRUSH bucket in which each set of chunks defined by locality-chunks will be stored." + durability-estimator: + type: integer + description: "SHEC plugin - the number of parity chunks each of which includes each data chunk in its calculation range. The number is used as a durability estimator. For instance, if c=2, 2 OSDs can be down without losing data." + helper-chunks: + type: integer + description: "CLAY plugin - number of OSDs requests to send data during recovery of a single chunk." + scalar-mds: + type: string + enum: + - jerasure + - isa + - shec + description: "CLAY plugin - specifies the plugin that is used as a building block in the layered construction." + device-class: + type: string + enum: + - hdd + - ssd + - nvme + description: "CRUSH device class to use for erasure profile." + required: + - name + additionalProperties: false +get-erasure-profile: + description: "Display an erasure code profile." + params: + name: + type: string + description: "The name of the profile" + required: + - name + additionalProperties: false +delete-erasure-profile: + description: "Deletes an erasure code profile." + params: + name: + type: string + description: "The name of the profile" + required: + - name + additionalProperties: false +list-erasure-profiles: + description: "List the names of all erasure code profiles" + additionalProperties: false +list-inconsistent-objs: + description: "List the names of the inconsistent objects per PG" + params: + format: + type: string + enum: + - json + - yaml + - text + default: text + description: "The output format, either json, yaml or text (default)" + additionalProperties: false +list-pools: + description: "List your cluster's pools" + params: + format: + type: string + default: text + enum: + - text + - text-full + - json + description: "Specify output format (text|text-full|json). The formats `text-full` and `json` provide the same level of details." + additionalProperties: false +set-pool-max-bytes: + description: "Set pool quotas for the maximum number of bytes." + params: + max: + type: integer + description: "The name of the pool" + name: + type: string + description: "The name of the pool" + required: + - name + - max + additionalProperties: false +delete-pool: + description: "Deletes the named pool" + params: + name: + type: string + description: "The name of the pool" + required: + - name + additionalProperties: false +rename-pool: + description: "Rename a pool" + params: + name: + type: string + description: "The name of the pool" + new-name: + type: string + description: "The new name of the pool" + required: + - name + - new-name + additionalProperties: false +pool-statistics: + description: "Show a pool's utilization statistics" + additionalProperties: false +snapshot-pool: + description: "Snapshot a pool" + params: + name: + type: string + description: "The name of the pool" + snapshot-name: + type: string + description: "The name of the snapshot" + required: + - snapshot-name + - name + additionalProperties: false +remove-pool-snapshot: + description: "Remove a pool snapshot" + params: + name: + type: string + description: "The name of the pool" + snapshot-name: + type: string + description: "The name of the snapshot" + required: + - snapshot-name + - name + additionalProperties: false +pool-set: + description: "Set a value for the pool" + params: + name: + type: string + description: "The pool to set this variable on." + key: + type: string + description: "Any valid Ceph key from http://docs.ceph.com/docs/master/rados/operations/pools/#set-pool-values" + value: + # LP: #1838650 - unfortunately, Juju appears to consider '3' on the + # command line as not being a string, and has to be quoted as "'3'". So, + # we actually let the charm do the verification, and let any value + # through here. + description: "The value to set" + required: + - key + - value + - name + additionalProperties: false +pool-get: + description: "Get a value for the pool" + params: + name: + type: string + description: "The pool to get this variable from." + key: + type: string + description: "Any valid Ceph key from http://docs.ceph.com/docs/master/rados/operations/pools/#get-pool-values" + required: + - key + - name + additionalProperties: false +crushmap-update: + description: "Apply a json crushmap definition. This will throw away the existing ceph crushmap and apply the new definition. Use with extreme caution. WARNING - This function is extremely dangerous if misused. It can very easily break your cluster in unexpected ways." + params: + map: + type: string + description: "The json crushmap blob" + required: + - map + additionalProperties: false +show-disk-free: + description: "Show disk utilization by host and OSD." + params: + format: + type: string + enum: + - json + - json-pretty + - xml + - xml-pretty + - plain + default: plain + description: "Output format, either json, json-pretty, xml, xml-pretty, plain; defaults to plain" + additionalProperties: false +copy-pool: + description: "Copy contents of a pool to a new pool." + params: + source: + type: string + description: "Pool to copy data from." + target: + type: string + description: "Pool to copy data to." + required: + - source + - target + additionalProperties: false +set-noout: + description: "Set ceph noout across the cluster." +unset-noout: + description: "Unset ceph noout across the cluster." +security-checklist: + description: Validate the running configuration against the OpenStack security guides checklist +purge-osd: + description: "Removes an OSD from a cluster map, removes its authentication key, removes the OSD from the OSD map. The OSD must have zero weight before running this action, to avoid excessive I/O on the cluster." + params: + osd: + type: integer + description: "ID of the OSD to remove, e.g. for osd.53, supply 53." + i-really-mean-it: + type: boolean + description: "This must be toggled to enable actually performing this action." + required: + - osd + - i-really-mean-it +change-osd-weight: + description: "Set the crush weight of an OSD to the new value supplied." + params: + osd: + type: integer + description: "ID of the OSD to operate on, e.g. for osd.53, supply 53." + weight: + type: number + description: "The new weight of the OSD, must be a decimal number, e.g. 1.04" + required: + - osd + - weight +get-quorum-status: + description: "Return lists of the known mons, and online mons, to determine if there is quorum." + params: + format: + type: string + default: text + enum: + - text + - json + description: Specify output format (text|json). +list-crush-rules: + description: "List CEPH crush rules" + params: + format: + type: string + enum: + - json + - yaml + - text + default: text + description: "The output format, either json, yaml or text (default)" + additionalProperties: false +get-or-create-user: + description: "Get or create a user and it's capabilities." + params: + username: + type: string + description: "User ID to get or create." + mon-caps: + type: string + default: allow rw + description: "Monitor capabilities include r, w, x access settings or profile {name}." + osd-caps: + type: string + default: allow rw + description: "OSD capabilities include r, w, x, class-read, class-write access settings or profile {name}." + required: [username] +delete-user: + description: "Delete a user." + params: + username: + type: string + description: "User ID to delete." + required: [username] +pg-repair: + description: "Repair inconsistent placement groups, if safe to do so." +reset-osd-count-report: + description: "Update report of osds present in osd tree. Used for monitoring." +list-entities: + description: "Returns a list of entities recognized by the Ceph cluster." + params: + format: + type: string + enum: + - json + - yaml + - text + default: text + description: "The output format, either json, yaml or text (default)" +rotate-key: + description: "Rotate the key of an entity in the Ceph cluster" + params: + entity: + type: string + description: The entity for which to rotate the key + required: [entity] diff --git a/ceph-mon/actions/__init__.py b/ceph-mon/actions/__init__.py new file mode 100644 index 00000000..26092e0f --- /dev/null +++ b/ceph-mon/actions/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-mon/actions/ceph_ops.py b/ceph-mon/actions/ceph_ops.py new file mode 100755 index 00000000..ac8c1464 --- /dev/null +++ b/ceph-mon/actions/ceph_ops.py @@ -0,0 +1,155 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from subprocess import CalledProcessError, check_output + +from charmhelpers.core.hookenv import ( + action_get, + action_fail, +) +from charmhelpers.contrib.storage.linux.ceph import pool_set, \ + set_pool_quota, snapshot_pool, remove_pool_snapshot + + +def list_pools(): + """Return a list of all Ceph pools.""" + try: + pool_list = check_output(['ceph', 'osd', 'pool', 'ls']).decode('UTF-8') + return pool_list + except CalledProcessError as e: + action_fail(str(e)) + + +def pool_get(): + """ + Returns a key from a pool using 'ceph osd pool get'. + + The key is provided via the 'key' action parameter and the + pool provided by the 'pool_name' parameter. These are used when + running 'ceph osd pool get ', the result of + which is returned. + + On failure, 'unknown' will be returned. + """ + key = action_get("key") + pool_name = action_get("pool_name") + try: + value = (check_output(['ceph', 'osd', 'pool', 'get', pool_name, key]) + .decode('UTF-8')) + return value + except CalledProcessError as e: + action_fail(str(e)) + return 'unknown' + + +def set_pool(): + """ + Sets an arbitrary key key in a Ceph pool. + + Sets the key specified by the action parameter 'key' to the value + specified in the action parameter 'value' for the pool specified + by the action parameter 'pool_name' using the charmhelpers + 'pool_set' function. + """ + key = action_get("key") + value = action_get("value") + pool_name = action_get("pool_name") + pool_set(service='ceph', pool_name=pool_name, key=key, value=value) + + +def pool_stats(): + """ + Returns statistics for a pool. + + The pool name is provided by the action parameter 'name'. + """ + try: + pool_name = action_get("name") + stats = ( + check_output(['ceph', 'osd', 'pool', 'stats', pool_name]) + .decode('UTF-8') + ) + return stats + except CalledProcessError as e: + action_fail(str(e)) + + +def delete_pool_snapshot(): + """ + Delete a pool snapshot. + + Deletes a snapshot from the pool provided by the action + parameter 'name', with the snapshot name provided by + action parameter 'snapshot-name' + """ + pool_name = action_get("name") + snapshot_name = action_get("snapshot-name") + remove_pool_snapshot(service='ceph', + pool_name=pool_name, + snapshot_name=snapshot_name) + + +# Note only one or the other can be set +def set_pool_max_bytes(): + """ + Sets the max bytes quota for a pool. + + Sets the pool quota maximum bytes for the pool specified by + the action parameter 'name' to the value specified by + the action parameter 'max' + """ + pool_name = action_get("name") + max_bytes = action_get("max") + set_pool_quota(service='ceph', + pool_name=pool_name, + max_bytes=max_bytes) + + +def snapshot_ceph_pool(): + """ + Snapshots a Ceph pool. + + Snapshots the pool provided in action parameter 'name' and + uses the parameter provided in the action parameter 'snapshot-name' + as the name for the snapshot. + """ + pool_name = action_get("name") + snapshot_name = action_get("snapshot-name") + snapshot_pool(service='ceph', + pool_name=pool_name, + snapshot_name=snapshot_name) + + +def get_quorum_status(format_type="text"): + """ + Return the output of 'ceph quorum_status'. + + On error, function_fail() is called with the exception info. + """ + ceph_output = check_output(['ceph', 'quorum_status'], + timeout=60).decode("utf-8") + ceph_output_json = json.loads(ceph_output) + + if format_type == "json": + return {"message": json.dumps(ceph_output_json)} + else: + return { + "election-epoch": ceph_output_json.get("election_epoch"), + "quorum-age": ceph_output_json.get("quorum_age"), + "quorum-leader-name": ceph_output_json.get("quorum_leader_name", + "unknown"), + "quorum-names": ", ".join(ceph_output_json.get("quorum_names", + [])), + } diff --git a/ceph-mon/actions/create-cache-tier b/ceph-mon/actions/create-cache-tier new file mode 120000 index 00000000..90631ac7 --- /dev/null +++ b/ceph-mon/actions/create-cache-tier @@ -0,0 +1 @@ +create_cache_tier.py \ No newline at end of file diff --git a/ceph-mon/actions/create-pool b/ceph-mon/actions/create-pool new file mode 120000 index 00000000..226b1774 --- /dev/null +++ b/ceph-mon/actions/create-pool @@ -0,0 +1 @@ +create_pool.py \ No newline at end of file diff --git a/ceph-mon/actions/create_cache_tier.py b/ceph-mon/actions/create_cache_tier.py new file mode 100755 index 00000000..cc68257e --- /dev/null +++ b/ceph-mon/actions/create_cache_tier.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import CalledProcessError + +from charmhelpers.contrib.storage.linux.ceph import Pool, pool_exists +from charmhelpers.core.hookenv import action_get, log, action_fail + + +def make_cache_tier(): + backer_pool = action_get("backer-pool") + cache_pool = action_get("cache-pool") + cache_mode = action_get("cache-mode") + + # Pre flight checks + if not pool_exists('admin', backer_pool): + log("Please create {} pool before calling create-cache-tier".format( + backer_pool)) + action_fail("create-cache-tier failed. Backer pool {} must exist " + "before calling this".format(backer_pool)) + + if not pool_exists('admin', cache_pool): + log("Please create {} pool before calling create-cache-tier".format( + cache_pool)) + action_fail("create-cache-tier failed. Cache pool {} must exist " + "before calling this".format(cache_pool)) + + pool = Pool(service='admin', name=backer_pool) + try: + pool.add_cache_tier(cache_pool=cache_pool, mode=cache_mode) + except CalledProcessError as err: + log("Add cache tier failed with message: {}" + .format(str(err))) + action_fail("create-cache-tier failed. Add cache tier failed with " + "message: {}".format(str(err))) + + +if __name__ == '__main__': + make_cache_tier() diff --git a/ceph-mon/actions/create_pool.py b/ceph-mon/actions/create_pool.py new file mode 100755 index 00000000..7b9582e2 --- /dev/null +++ b/ceph-mon/actions/create_pool.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import CalledProcessError +from charmhelpers.core.hookenv import action_get, log, action_fail +from charmhelpers.contrib.storage.linux.ceph import ErasurePool, ReplicatedPool + + +def create_pool(): + pool_name = action_get("name") + pool_type = action_get("pool-type") + percent_data = action_get("percent-data") or 10 + app_name = action_get("app-name") or 'unknown' + try: + if pool_type == "replicated": + replicas = action_get("replicas") + crush_profile_name = action_get("profile-name") + replicated_pool = ReplicatedPool(name=pool_name, + service='admin', + replicas=replicas, + app_name=app_name, + profile_name=crush_profile_name, + percent_data=float(percent_data), + ) + replicated_pool.create() + + elif pool_type in ("erasure", "erasure-coded"): + crush_profile_name = action_get("erasure-profile-name") + allow_ec_overwrites = action_get("allow-ec-overwrites") + erasure_pool = ErasurePool(name=pool_name, + erasure_code_profile=crush_profile_name, + service='admin', + app_name=app_name, + percent_data=float(percent_data), + allow_ec_overwrites=allow_ec_overwrites, + ) + erasure_pool.create() + else: + log("Unknown pool type of {}. Only erasure or replicated is " + "allowed".format(pool_type)) + action_fail("Unknown pool type of {}. Only erasure or replicated " + "is allowed".format(pool_type)) + except CalledProcessError as e: + action_fail("Pool creation failed because of a failed process. " + "Ret Code: {} Message: {}".format(e.returncode, str(e))) + + +if __name__ == '__main__': + create_pool() diff --git a/ceph-mon/actions/crushmap-update b/ceph-mon/actions/crushmap-update new file mode 120000 index 00000000..1c7ffb94 --- /dev/null +++ b/ceph-mon/actions/crushmap-update @@ -0,0 +1 @@ +crushmap_update.py \ No newline at end of file diff --git a/ceph-mon/actions/crushmap_update.py b/ceph-mon/actions/crushmap_update.py new file mode 100755 index 00000000..fbe188fc --- /dev/null +++ b/ceph-mon/actions/crushmap_update.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +from charmhelpers.core.hookenv import action_get, action_fail +from subprocess import check_output, CalledProcessError, PIPE, Popen + + +def update_crushmap(): + try: + encoded_text = action_get("map") + json_map = base64.b64decode(encoded_text) + try: + # This needs json_map passed to it from stdin + crushtool = Popen( + ["crushtool", "-o", "compiled_crushmap", "-m", "compile"], + stdin=PIPE) + crushtool_stdout, crushtool_stderr = crushtool.communicate( + input=json_map) + if crushtool_stderr is not None: + action_fail( + "Failed to compile json: {}".format(crushtool_stderr)) + check_output( + ["ceph", "osd", "setcrushmap", "-i", "compiled_crushmap"]) + except (CalledProcessError, OSError) as err2: + action_fail("Crush compile or load failed with error: {}".format( + err2.output)) + except TypeError as err: + action_fail( + "Unable to base64 decode: {}. Error: {}".format(encoded_text, err)) + + +if __name__ == '__main__': + update_crushmap() diff --git a/ceph-mon/actions/delete-erasure-profile b/ceph-mon/actions/delete-erasure-profile new file mode 120000 index 00000000..65b7c04f --- /dev/null +++ b/ceph-mon/actions/delete-erasure-profile @@ -0,0 +1 @@ +delete_erasure_profile.py \ No newline at end of file diff --git a/ceph-mon/actions/delete-pool b/ceph-mon/actions/delete-pool new file mode 120000 index 00000000..586a3ae6 --- /dev/null +++ b/ceph-mon/actions/delete-pool @@ -0,0 +1 @@ +delete_pool.py \ No newline at end of file diff --git a/ceph-mon/actions/delete-user b/ceph-mon/actions/delete-user new file mode 120000 index 00000000..f55bc90f --- /dev/null +++ b/ceph-mon/actions/delete-user @@ -0,0 +1 @@ +delete_user.py \ No newline at end of file diff --git a/ceph-mon/actions/delete_erasure_profile.py b/ceph-mon/actions/delete_erasure_profile.py new file mode 100755 index 00000000..748ce5a6 --- /dev/null +++ b/ceph-mon/actions/delete_erasure_profile.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import CalledProcessError + +__author__ = 'chris' + +from charmhelpers.contrib.storage.linux.ceph import remove_erasure_profile +from charmhelpers.core.hookenv import action_get, log, action_fail + + +def delete_erasure_profile(): + name = action_get("name") + + try: + remove_erasure_profile(service='admin', profile_name=name) + except CalledProcessError as e: + log("Remove erasure profile failed with error {}".format(str(e)), + level="ERROR") + action_fail("Remove erasure profile failed with error: {}" + .format(str(e))) + + +if __name__ == '__main__': + delete_erasure_profile() diff --git a/ceph-mon/actions/delete_pool.py b/ceph-mon/actions/delete_pool.py new file mode 100755 index 00000000..3d7460e3 --- /dev/null +++ b/ceph-mon/actions/delete_pool.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import subprocess + +from charmhelpers.core.hookenv import action_get, log, action_fail + + +def set_mon_allow_pool_delete(delete=False): + subprocess.check_call([ + 'ceph', 'tell', 'mon.*', + 'injectargs', + '--mon-allow-pool-delete={}'.format('true' if delete else 'false') + ]) + + +def remove_pool(): + try: + pool_name = action_get("name") + set_mon_allow_pool_delete(delete=True) + subprocess.check_call([ + 'ceph', 'osd', 'pool', 'delete', + pool_name, pool_name, + '--yes-i-really-really-mean-it', + ]) + except subprocess.CalledProcessError as e: + log(e) + action_fail("Error deleting pool: {}".format(str(e))) + finally: + set_mon_allow_pool_delete(delete=False) + + +if __name__ == '__main__': + remove_pool() diff --git a/ceph-mon/actions/delete_user.py b/ceph-mon/actions/delete_user.py new file mode 100755 index 00000000..4dc8283b --- /dev/null +++ b/ceph-mon/actions/delete_user.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +# +# Copyright 2022 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import action_get, action_fail, action_set, log +from subprocess import CalledProcessError, check_output, STDOUT + + +def delete_user(): + username = action_get("username") + client = "client.{}".format(username) + try: + log(f'Attempting to delete credentials for entity {client}.') + output = check_output(['ceph', 'auth', 'del', client], + stderr=STDOUT).decode("utf-8") + return output + except CalledProcessError as e: + log(f'Failed to delete credentials for entity {client}.') + action_fail("User creation failed because of a failed process. " + "Ret Code: {} Message: {}".format(e.returncode, str(e))) + + +def main(): + action_set({"message": delete_user()}) + + +if __name__ == "__main__": + main() diff --git a/ceph-mon/actions/get-or-create-user b/ceph-mon/actions/get-or-create-user new file mode 120000 index 00000000..0060cdb0 --- /dev/null +++ b/ceph-mon/actions/get-or-create-user @@ -0,0 +1 @@ +get_or_create_user.py \ No newline at end of file diff --git a/ceph-mon/actions/get-quorum-status b/ceph-mon/actions/get-quorum-status new file mode 120000 index 00000000..2ec9f01b --- /dev/null +++ b/ceph-mon/actions/get-quorum-status @@ -0,0 +1 @@ +get_quorum_status.py \ No newline at end of file diff --git a/ceph-mon/actions/get_or_create_user.py b/ceph-mon/actions/get_or_create_user.py new file mode 100755 index 00000000..a841dd66 --- /dev/null +++ b/ceph-mon/actions/get_or_create_user.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +# +# Copyright 2022 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + +from charmhelpers.core.hookenv import action_get, action_fail, action_set, log +from subprocess import CalledProcessError, check_output + + +def get_or_create_user(): + username = action_get("username") + client = "client.{}".format(username) + try: + log(f'Attempting to retrieve existing credentials for entity {client}') + keyring = json.loads( + check_output(["ceph", "auth", "get", client, + "--format=json"]).decode("utf-8") + ) + log(f'Found existing credentials for entity {client}') + return json.dumps(keyring, indent=2) + except CalledProcessError: + log(f'Credentials for entity {client} not found') + pass + try: + log(f'Attempting to create new credentials for entity {client}') + mon_caps = action_get("mon-caps") + osd_caps = action_get("osd-caps") + log(f'with the following mon capabilities: {mon_caps},') + log(f'and osd capabilities: {osd_caps}.') + keyring = json.loads( + check_output(["ceph", "auth", "get-or-create", + client, "mon", mon_caps, "osd", osd_caps, + "--format=json"]).decode("utf-8") + ) + log(f'New credentials for entity {client} created') + return json.dumps(keyring, indent=2) + except CalledProcessError as e: + log(f'Failed to get or create credentials for entity {client}.') + action_fail("User creation failed because of a failed process. " + "Ret Code: {} Message: {}".format(e.returncode, str(e))) + + +def main(): + action_set({"message": get_or_create_user()}) + + +if __name__ == "__main__": + main() diff --git a/ceph-mon/actions/get_quorum_status.py b/ceph-mon/actions/get_quorum_status.py new file mode 100755 index 00000000..a537bce3 --- /dev/null +++ b/ceph-mon/actions/get_quorum_status.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +# +# Copyright 2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Run action to collect Ceph quorum_status output.""" +import json + +from subprocess import CalledProcessError + +from ceph_ops import get_quorum_status +from charmhelpers.core.hookenv import function_fail, function_get, function_set + +if __name__ == "__main__": + """Run action to collect Ceph quorum_status output.""" + try: + function_set(get_quorum_status(function_get("format"))) + except CalledProcessError as error: + function_fail("Failed to run ceph quorum_status, {}".format(error)) + except (json.decoder.JSONDecodeErro, KeyError) as error: + function_fail( + "Failed to parse ceph quorum_status output. {}".format(error) + ) diff --git a/ceph-mon/actions/list-crush-rules b/ceph-mon/actions/list-crush-rules new file mode 120000 index 00000000..30736b0d --- /dev/null +++ b/ceph-mon/actions/list-crush-rules @@ -0,0 +1 @@ +list_crush_rules.py \ No newline at end of file diff --git a/ceph-mon/actions/list-erasure-profiles b/ceph-mon/actions/list-erasure-profiles new file mode 120000 index 00000000..6cdaf358 --- /dev/null +++ b/ceph-mon/actions/list-erasure-profiles @@ -0,0 +1 @@ +list_erasure_profiles.py \ No newline at end of file diff --git a/ceph-mon/actions/list-inconsistent-objs b/ceph-mon/actions/list-inconsistent-objs new file mode 120000 index 00000000..e6aa6390 --- /dev/null +++ b/ceph-mon/actions/list-inconsistent-objs @@ -0,0 +1 @@ +list_inconsistent_objs.py \ No newline at end of file diff --git a/ceph-mon/actions/list-pools b/ceph-mon/actions/list-pools new file mode 120000 index 00000000..65e0c222 --- /dev/null +++ b/ceph-mon/actions/list-pools @@ -0,0 +1 @@ +list_pools.py \ No newline at end of file diff --git a/ceph-mon/actions/list_crush_rules.py b/ceph-mon/actions/list_crush_rules.py new file mode 100755 index 00000000..6f57cc45 --- /dev/null +++ b/ceph-mon/actions/list_crush_rules.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# +# Copyright 2022 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import yaml +from subprocess import check_output, CalledProcessError + +from charmhelpers.core.hookenv import ( + ERROR, + log, + function_fail, + function_get, + function_set +) + + +def get_list_crush_rules(output_format="text"): + """Get list of Ceph crush rules. + + :param output_format: specify output format + :type output_format: str + :returns: text: list of tuple ( ) or + yaml: list of crush rules in yaml format + json: list of crush rules in json format + :rtype: str + """ + crush_rules = check_output(["ceph", "--id", "admin", "osd", "crush", + "rule", "dump", "-f", "json"]).decode("UTF-8") + crush_rules = json.loads(crush_rules) + + if output_format == "text": + return ",".join(["({}, {})".format(rule["rule_id"], rule["rule_name"]) + for rule in crush_rules]) + elif output_format == "yaml": + return yaml.dump(crush_rules) + else: + return json.dumps(crush_rules) + + +def main(): + try: + list_crush_rules = get_list_crush_rules(function_get("format")) + function_set({"message": list_crush_rules}) + except CalledProcessError as error: + log(error, ERROR) + function_fail("List crush rules failed with error: {}".format(error)) + + +if __name__ == "__main__": + main() diff --git a/ceph-mon/actions/list_erasure_profiles.py b/ceph-mon/actions/list_erasure_profiles.py new file mode 100755 index 00000000..2c067583 --- /dev/null +++ b/ceph-mon/actions/list_erasure_profiles.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import check_output, CalledProcessError + +from charmhelpers.core.hookenv import action_get, log, action_set, action_fail + +if __name__ == '__main__': + name = action_get("name") + try: + out = check_output(['ceph', + '--id', 'admin', + 'osd', + 'erasure-code-profile', + 'ls']).decode('UTF-8') + action_set({'message': out}) + except CalledProcessError as e: + log(e) + action_fail("Listing erasure profiles failed with error: {}" + .format(str(e))) diff --git a/ceph-mon/actions/list_inconsistent_objs.py b/ceph-mon/actions/list_inconsistent_objs.py new file mode 100755 index 00000000..5112166b --- /dev/null +++ b/ceph-mon/actions/list_inconsistent_objs.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +# +# Copyright 2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import re +from subprocess import check_output, CalledProcessError +import yaml + +from charmhelpers.core.hookenv import function_fail, function_get, \ + function_set, log + + +VALID_FORMATS = ('text', 'json', 'yaml') + + +def get_health_detail(): + return check_output(['ceph', 'health', 'detail']).decode('UTF-8') + + +def get_rados_inconsistent(pg): + return check_output(['rados', 'list-inconsistent-obj', pg]).decode('UTF-8') + + +def get_inconsistent_objs(): + # For the call to 'ceph health detail' we are interested in + # lines with the form: + # pg $PG is ...inconsistent... + rx = re.compile('pg (\\S+) .+inconsistent') + out = get_health_detail() + msg = {} # Maps PG -> object name list. + + for line in out.split('\n'): + res = rx.search(line) + if res is None: + continue + + pg = res.groups()[0] + out = get_rados_inconsistent(pg) + js = json.loads(out) + inconsistents = js.get('inconsistents') + + if not inconsistents: + continue + + msg.setdefault(pg, []).extend(x['object']['name'] + for x in inconsistents) + + return msg + + +def text_format(obj): + ret = '' + for pg, objs in obj.items(): + ret += '{}: {}'.format(pg, ','.join(objs)) + return ret + + +if __name__ == '__main__': + try: + fmt = function_get('format') + if fmt and fmt not in VALID_FORMATS: + function_fail('Unknown format specified: {}'.format(fmt)) + else: + msg = get_inconsistent_objs() + if fmt == 'yaml': + msg = yaml.dump(msg) + elif fmt == 'json': + msg = json.dumps(msg, indent=4, sort_keys=True) + else: + msg = text_format(msg) + function_set({'message': msg}) + except CalledProcessError as e: + log(e) + function_fail("Listing inconsistent objects failed with error {}" + .format(str(e))) diff --git a/ceph-mon/actions/list_pools.py b/ceph-mon/actions/list_pools.py new file mode 100755 index 00000000..4c1384a9 --- /dev/null +++ b/ceph-mon/actions/list_pools.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from subprocess import check_output, CalledProcessError + +from charmhelpers.core.hookenv import ( + log, + function_fail, + function_get, + function_set +) + + +def get_list_pools(output_format="text"): + """Get list of Ceph pools. + + :param output_format: specify output format + :type output_format: str + :returns: joined list of string or + dump list of pools with details + :rtype: str + """ + if output_format == "text": + return check_output(["ceph", "--id", "admin", "osd", + "lspools"]).decode("UTF-8") + + ceph_osd_dump = check_output(["ceph", "--id", "admin", "osd", "dump", + "--format=json"]).decode("UTF-8") + pools = json.loads(ceph_osd_dump).get("pools", []) + return json.dumps(pools, + indent=2 if output_format == "text-full" else None) + + +def main(): + try: + list_pools = get_list_pools(function_get("format")) + function_set({"message": list_pools}) + except CalledProcessError as e: + log(e) + function_fail("List pools failed with error: {}".format(str(e))) + + +if __name__ == "__main__": + main() diff --git a/ceph-mon/actions/pause-health b/ceph-mon/actions/pause-health new file mode 100755 index 00000000..e00afd15 --- /dev/null +++ b/ceph-mon/actions/pause-health @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eux + +ceph osd set nodown +ceph osd set noout diff --git a/ceph-mon/actions/pg-repair b/ceph-mon/actions/pg-repair new file mode 120000 index 00000000..e60c9660 --- /dev/null +++ b/ceph-mon/actions/pg-repair @@ -0,0 +1 @@ +pg_repair.py \ No newline at end of file diff --git a/ceph-mon/actions/pg_repair.py b/ceph-mon/actions/pg_repair.py new file mode 100755 index 00000000..be440f5e --- /dev/null +++ b/ceph-mon/actions/pg_repair.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +# +# Copyright 2022 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from subprocess import check_output, CalledProcessError + + +from charmhelpers.core.hookenv import ( + log, + function_fail, + function_set, +) +from charms_ceph.utils import list_pools + + +def get_rados_inconsistent_objs(pg): + """Get all inconsistent objects for a given placement group. + + :param pg: Name of a placement group + :type pg: str + :return: list of inconsistent objects + :rtype: list[str] + """ + return json.loads( + check_output( + ["rados", "list-inconsistent-obj", pg, "--format=json-pretty"] + ).decode("UTF-8") + ) + + +def get_rados_inconsistent_pgs(pool): + """Get all inconsistent placement groups for a given pool. + + :param pool: Name of a Ceph pool + :type pool: str + :returns: list of inconsistent placement group IDs + :rtype: list[str] + """ + return json.loads( + check_output(["rados", "list-inconsistent-pg", pool]).decode("UTF-8") + ) + + +def get_inconsistent_pgs(ceph_pools): + """Get all inconsistent placement groups for a list of pools. + + :param ceph_pools: List of names of Ceph pools + :type ceph_pools: list[str] + :returns: list of inconsistent placement group IDs as a set + :rtype: set[str] + """ + inconsistent_pgs = set() + for pool in ceph_pools: + inconsistent_pgs.update(get_rados_inconsistent_pgs(pool)) + return inconsistent_pgs + + +def get_safe_pg_repairs(inconsistent_pgs): + """Filters inconsistent placement groups for ones that are safe to repair. + + :param inconsistent_pgs: List of inconsistent placement groups + :type inconsistent_pgs: list[str] + :returns: list of safely repairable placement groups as a set + :rtype: set[str] + """ + return {pg for pg in inconsistent_pgs if is_pg_safe_to_repair(pg)} + + +def is_pg_safe_to_repair(pg): + """Determines if a placement group is safe to repair. + + :param pg: Name of an inconsistent placement group + :type pg: str + :returns: placement group is safe to repair + :rtype: bool + """ + # Additional tests for known safe cases can be added here. + return has_read_error_only(pg) + + +def has_read_error_only(pg): + """Determines if an inconsistent placement group is caused by a read error. + Returns False if no read errors are found, or if any errors other than read + errors are found. + + :param pg: ID of an inconsistent placement group + :type pg: str + :returns: placement group is safe to repair + :rtype: bool + """ + rados_inconsistent_objs = get_rados_inconsistent_objs(pg) + read_error_found = False + for inconsistent in rados_inconsistent_objs.get("inconsistents", []): + for shard in inconsistent.get("shards", []): + errors = shard.get("errors", []) + if errors == ["read_error"]: + if read_error_found: + return False + read_error_found = True + continue + elif errors: + # Error other than "read_error" detected + return False + return read_error_found + + +def perform_pg_repairs(pgs): + """Runs `ceph pg repair` on a group of placement groups. + All placement groups provided should be confirmed as safe prior to using + this method. + + :param pgs: List of safe-to-repair placement groups + :type pg: list[str] + """ + for pg in pgs: + log("Repairing ceph placement group {}".format(pg)) + check_output(["ceph", "pg", "repair", pg]) + + +def pg_repair(): + """Repair all inconsistent placement groups caused by read errors.""" + ceph_pools = list_pools() + if not ceph_pools: + msg = "No Ceph pools found." + log(msg) + function_set(msg) + return + + # Get inconsistent placement groups + inconsistent_pgs = get_inconsistent_pgs(ceph_pools) + if not inconsistent_pgs: + msg = "No inconsistent placement groups found." + log(msg) + function_set(msg) + return + + # Filter for known safe cases + safe_pg_repairs = get_safe_pg_repairs(inconsistent_pgs) + unsafe_pg_repairs = inconsistent_pgs.difference(safe_pg_repairs) + + # Perform safe placement group repairs + if unsafe_pg_repairs: + log( + "Ignoring unsafe placement group repairs: {}".format( + unsafe_pg_repairs + ) + ) + if safe_pg_repairs: + log("Safe placement group repairs found: {}".format(safe_pg_repairs)) + perform_pg_repairs(safe_pg_repairs) + function_set( + { + "message": "placement groups repaired: {}".format( + sorted(safe_pg_repairs) + ) + } + ) + else: + msg = "No safe placement group repairs found." + log(msg) + function_set(msg) + + +def main(): + try: + pg_repair() + except CalledProcessError as e: + log(e) + function_fail( + "Safe placement group repair failed with error: {}".format(str(e)) + ) + + +if __name__ == "__main__": + main() diff --git a/ceph-mon/actions/pool-get b/ceph-mon/actions/pool-get new file mode 120000 index 00000000..ad4b9fa0 --- /dev/null +++ b/ceph-mon/actions/pool-get @@ -0,0 +1 @@ +pool_get.py \ No newline at end of file diff --git a/ceph-mon/actions/pool-set b/ceph-mon/actions/pool-set new file mode 120000 index 00000000..9339f5e7 --- /dev/null +++ b/ceph-mon/actions/pool-set @@ -0,0 +1 @@ +pool_set.py \ No newline at end of file diff --git a/ceph-mon/actions/pool-statistics b/ceph-mon/actions/pool-statistics new file mode 120000 index 00000000..dbf59233 --- /dev/null +++ b/ceph-mon/actions/pool-statistics @@ -0,0 +1 @@ +pool_statistics.py \ No newline at end of file diff --git a/ceph-mon/actions/pool_get.py b/ceph-mon/actions/pool_get.py new file mode 100755 index 00000000..b139d0dc --- /dev/null +++ b/ceph-mon/actions/pool_get.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import check_output, CalledProcessError + +from charmhelpers.core.hookenv import log, action_set, action_get, action_fail + +if __name__ == '__main__': + name = action_get('name') + key = action_get('key') + try: + out = check_output(['ceph', '--id', 'admin', + 'osd', 'pool', 'get', name, key]).decode('UTF-8') + action_set({'message': out}) + except CalledProcessError as e: + log(e) + action_fail("Pool get failed with message: {}".format(str(e))) diff --git a/ceph-mon/actions/pool_set.py b/ceph-mon/actions/pool_set.py new file mode 100755 index 00000000..fafa6898 --- /dev/null +++ b/ceph-mon/actions/pool_set.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import CalledProcessError + +from charmhelpers.core.hookenv import action_get, log, action_fail +from charms_ceph.broker import handle_set_pool_value + +if __name__ == '__main__': + name = action_get("name") + key = action_get("key") + value = action_get("value") + request = {'name': name, + 'key': key, + 'value': value} + + try: + # Bug: #1838650 -- force coercion to an int for the value if required. + handle_set_pool_value(service='admin', request=request, coerce=True) + except CalledProcessError as e: + log(str(e)) + action_fail("Setting pool key: {} and value: {} failed with " + "message: {}".format(key, value, str(e))) diff --git a/ceph-mon/actions/pool_statistics.py b/ceph-mon/actions/pool_statistics.py new file mode 100755 index 00000000..e6e8e796 --- /dev/null +++ b/ceph-mon/actions/pool_statistics.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import check_output, CalledProcessError +from charmhelpers.core.hookenv import log, action_set, action_fail + +if __name__ == '__main__': + try: + out = check_output(['ceph', '--id', 'admin', + 'df']).decode('UTF-8') + action_set({'message': out}) + except CalledProcessError as e: + log(e) + action_fail("ceph df failed with message: {}".format(str(e))) diff --git a/ceph-mon/actions/purge-osd b/ceph-mon/actions/purge-osd new file mode 120000 index 00000000..7ff58b21 --- /dev/null +++ b/ceph-mon/actions/purge-osd @@ -0,0 +1 @@ +purge_osd.py \ No newline at end of file diff --git a/ceph-mon/actions/purge_osd.py b/ceph-mon/actions/purge_osd.py new file mode 100755 index 00000000..e884186f --- /dev/null +++ b/ceph-mon/actions/purge_osd.py @@ -0,0 +1,86 @@ +#! /usr/bin/env python3 +# +# Copyright 2020 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Removes an OSD from a cluster map. + +Runs the ceph osd purge command, or earlier equivalents, removing an OSD from +the cluster map, removes its authentication key, removes the OSD from the OSD +map. +""" + +from subprocess import ( + check_call, + CalledProcessError, +) + +from charmhelpers.core.hookenv import ( + function_get, + log, + function_fail +) + +from charmhelpers.core.host import cmp_pkgrevno +from charmhelpers.contrib.storage.linux import ceph +from charms_ceph.utils import get_osd_weight + + +def purge_osd(osd): + """Run the OSD purge action. + + :param osd: the OSD ID to operate on + """ + svc = 'admin' + osd_str = str(osd) + osd_name = "osd.{}".format(osd_str) + current_osds = ceph.get_osds(svc) + if osd not in current_osds: + function_fail("OSD {} is not in the current list of OSDs".format(osd)) + return + + osd_weight = get_osd_weight(osd_name) + if osd_weight > 0: + function_fail("OSD has weight {}, must have zero weight before " + "this operation".format(osd_weight)) + return + + luminous_or_later = cmp_pkgrevno('ceph-common', '12.0.0') >= 0 + if not function_get('i-really-mean-it'): + function_fail('i-really-mean-it is a required parameter') + return + if luminous_or_later: + cmds = [ + ["ceph", "osd", "out", osd_name], + ['ceph', 'osd', 'purge', osd_str, '--yes-i-really-mean-it'] + ] + else: + cmds = [ + ["ceph", "osd", "out", osd_name], + ["ceph", "osd", "crush", "remove", "osd.{}".format(osd)], + ["ceph", "auth", "del", osd_name], + ['ceph', 'osd', 'rm', osd_str], + ] + for cmd in cmds: + try: + check_call(cmd) + except CalledProcessError as e: + log(e) + function_fail("OSD Purge for OSD {} failed".format(osd)) + return + + +if __name__ == '__main__': + osd = function_get("osd") + purge_osd(osd) diff --git a/ceph-mon/actions/remove-cache-tier b/ceph-mon/actions/remove-cache-tier new file mode 120000 index 00000000..11090fd5 --- /dev/null +++ b/ceph-mon/actions/remove-cache-tier @@ -0,0 +1 @@ +remove_cache_tier.py \ No newline at end of file diff --git a/ceph-mon/actions/remove-pool-snapshot b/ceph-mon/actions/remove-pool-snapshot new file mode 120000 index 00000000..21fd5ae0 --- /dev/null +++ b/ceph-mon/actions/remove-pool-snapshot @@ -0,0 +1 @@ +remove_pool_snapshot.py \ No newline at end of file diff --git a/ceph-mon/actions/remove_cache_tier.py b/ceph-mon/actions/remove_cache_tier.py new file mode 100755 index 00000000..18c816c5 --- /dev/null +++ b/ceph-mon/actions/remove_cache_tier.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import CalledProcessError + +from charmhelpers.contrib.storage.linux.ceph import Pool, pool_exists +from charmhelpers.core.hookenv import action_get, log, action_fail + +__author__ = 'chris' + + +def delete_cache_tier(): + backer_pool = action_get("backer-pool") + cache_pool = action_get("cache-pool") + + # Pre flight checks + if not pool_exists('admin', backer_pool): + log("Backer pool {} must exist before calling this".format( + backer_pool)) + action_fail("remove-cache-tier failed. Backer pool {} must exist " + "before calling this".format(backer_pool)) + + if not pool_exists('admin', cache_pool): + log("Cache pool {} must exist before calling this".format( + cache_pool)) + action_fail("remove-cache-tier failed. Cache pool {} must exist " + "before calling this".format(cache_pool)) + + pool = Pool(service='admin', name=backer_pool) + try: + pool.remove_cache_tier(cache_pool=cache_pool) + except CalledProcessError as err: + log("Removing the cache tier failed with message: {}".format(str(err))) + action_fail("remove-cache-tier failed. Removing the cache tier failed " + "with message: {}".format(str(err))) + + +if __name__ == '__main__': + delete_cache_tier() diff --git a/ceph-mon/actions/remove_pool_snapshot.py b/ceph-mon/actions/remove_pool_snapshot.py new file mode 100755 index 00000000..065f6f67 --- /dev/null +++ b/ceph-mon/actions/remove_pool_snapshot.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import CalledProcessError +from charmhelpers.core.hookenv import action_get, log, action_fail +from charmhelpers.contrib.storage.linux.ceph import remove_pool_snapshot + +if __name__ == '__main__': + name = action_get("name") + snapname = action_get("snapshot-name") + try: + remove_pool_snapshot(service='admin', + pool_name=name, + snapshot_name=snapname) + except CalledProcessError as e: + log(e) + action_fail("Remove pool snapshot failed with message: {}" + .format(str(e))) diff --git a/ceph-mon/actions/rename-pool b/ceph-mon/actions/rename-pool new file mode 120000 index 00000000..37007c6f --- /dev/null +++ b/ceph-mon/actions/rename-pool @@ -0,0 +1 @@ +rename_pool.py \ No newline at end of file diff --git a/ceph-mon/actions/rename_pool.py b/ceph-mon/actions/rename_pool.py new file mode 100755 index 00000000..7a759d15 --- /dev/null +++ b/ceph-mon/actions/rename_pool.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import CalledProcessError +from charmhelpers.core.hookenv import action_get, log, action_fail +from charmhelpers.contrib.storage.linux.ceph import rename_pool + +if __name__ == '__main__': + name = action_get("name") + new_name = action_get("new-name") + try: + rename_pool(service='admin', old_name=name, new_name=new_name) + except CalledProcessError as e: + log(e) + action_fail("Renaming pool failed with message: {}".format(str(e))) diff --git a/ceph-mon/actions/reset-osd-count-report b/ceph-mon/actions/reset-osd-count-report new file mode 120000 index 00000000..ce265d1e --- /dev/null +++ b/ceph-mon/actions/reset-osd-count-report @@ -0,0 +1 @@ +reset_osd_count_report.py \ No newline at end of file diff --git a/ceph-mon/actions/reset_osd_count_report.py b/ceph-mon/actions/reset_osd_count_report.py new file mode 100755 index 00000000..0334c441 --- /dev/null +++ b/ceph-mon/actions/reset_osd_count_report.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +# +# Copyright 2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +sys.path.append("hooks") +from ceph_hooks import update_host_osd_count_report + + +def reset_osd_count_report(): + update_host_osd_count_report(reset=True) + + +if __name__ == '__main__': + reset_osd_count_report() diff --git a/ceph-mon/actions/resume-health b/ceph-mon/actions/resume-health new file mode 100755 index 00000000..f42397ed --- /dev/null +++ b/ceph-mon/actions/resume-health @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eux + +ceph osd unset nodown +ceph osd unset noout diff --git a/ceph-mon/actions/security-checklist b/ceph-mon/actions/security-checklist new file mode 120000 index 00000000..47464970 --- /dev/null +++ b/ceph-mon/actions/security-checklist @@ -0,0 +1 @@ +security_checklist.py \ No newline at end of file diff --git a/ceph-mon/actions/security_checklist.py b/ceph-mon/actions/security_checklist.py new file mode 100755 index 00000000..8bc1b27b --- /dev/null +++ b/ceph-mon/actions/security_checklist.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# +# Copyright 2019 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +import charmhelpers.contrib.openstack.audits as audits +from charmhelpers.contrib.openstack.audits import ( + openstack_security_guide, +) + +# Via the openstack_security_guide above, we are running the following +# security assertions automatically: +# +# - validate-file-ownership +# - validate-file-permissions + + +def main(): + config = { + 'audit_type': audits.AuditType.OpenStackSecurityGuide, + 'files': openstack_security_guide.FILE_ASSERTIONS['ceph-mon'], + 'excludes': [ + 'validate-uses-keystone', + 'validate-uses-tls-for-glance', + 'validate-uses-tls-for-keystone', + ], + } + return audits.action_parse_results(audits.run(config)) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/ceph-mon/actions/set-noout b/ceph-mon/actions/set-noout new file mode 120000 index 00000000..d2ac02d8 --- /dev/null +++ b/ceph-mon/actions/set-noout @@ -0,0 +1 @@ +set_noout.py \ No newline at end of file diff --git a/ceph-mon/actions/set-pool-max-bytes b/ceph-mon/actions/set-pool-max-bytes new file mode 120000 index 00000000..f65ca1e0 --- /dev/null +++ b/ceph-mon/actions/set-pool-max-bytes @@ -0,0 +1 @@ +set_pool_max_bytes.py \ No newline at end of file diff --git a/ceph-mon/actions/set_noout.py b/ceph-mon/actions/set_noout.py new file mode 100755 index 00000000..47ebad80 --- /dev/null +++ b/ceph-mon/actions/set_noout.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +# +# Copyright 2017 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import action_set, action_fail +from charms_ceph.utils import osd_noout + +if __name__ == '__main__': + result = osd_noout(True) + if result: + action_set({'message': 'Ceph osd noout has been set'}) + else: + action_fail('Ceph osd noout failed to set') diff --git a/ceph-mon/actions/set_pool_max_bytes.py b/ceph-mon/actions/set_pool_max_bytes.py new file mode 100755 index 00000000..7ffc662a --- /dev/null +++ b/ceph-mon/actions/set_pool_max_bytes.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import CalledProcessError +from charmhelpers.core.hookenv import action_get, log, action_fail +from charmhelpers.contrib.storage.linux.ceph import set_pool_quota + +if __name__ == '__main__': + max_bytes = action_get("max") + name = action_get("name") + try: + set_pool_quota(service='admin', pool_name=name, max_bytes=max_bytes) + except CalledProcessError as e: + log(e) + action_fail("Set pool quota failed with message: {}".format(str(e))) diff --git a/ceph-mon/actions/show-disk-free b/ceph-mon/actions/show-disk-free new file mode 120000 index 00000000..a50016bc --- /dev/null +++ b/ceph-mon/actions/show-disk-free @@ -0,0 +1 @@ +show_disk_free.py \ No newline at end of file diff --git a/ceph-mon/actions/show_disk_free.py b/ceph-mon/actions/show_disk_free.py new file mode 100755 index 00000000..1b372782 --- /dev/null +++ b/ceph-mon/actions/show_disk_free.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import check_output, CalledProcessError +from charmhelpers.core.hookenv import log, action_get, action_set, action_fail + +if __name__ == '__main__': + # constrained to enum: json,json-pretty,xml,xml-pretty,plain + fmt = action_get("format") + try: + out = check_output(['ceph', '--id', 'admin', + 'osd', 'df', 'tree', '-f', fmt]).decode('UTF-8') + action_set({'message': out}) + except CalledProcessError as e: + log(e) + action_fail( + "ceph osd df tree failed with message: {}".format(str(e))) diff --git a/ceph-mon/actions/snapshot-pool b/ceph-mon/actions/snapshot-pool new file mode 120000 index 00000000..dd9c8578 --- /dev/null +++ b/ceph-mon/actions/snapshot-pool @@ -0,0 +1 @@ +snapshot_pool.py \ No newline at end of file diff --git a/ceph-mon/actions/snapshot_pool.py b/ceph-mon/actions/snapshot_pool.py new file mode 100755 index 00000000..251d3fe1 --- /dev/null +++ b/ceph-mon/actions/snapshot_pool.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import CalledProcessError +from charmhelpers.core.hookenv import action_get, log, action_fail +from charmhelpers.contrib.storage.linux.ceph import snapshot_pool + +if __name__ == '__main__': + name = action_get("name") + snapname = action_get("snapshot-name") + try: + snapshot_pool(service='admin', + pool_name=name, + snapshot_name=snapname) + except CalledProcessError as e: + log(e) + action_fail("Snapshot pool failed with message: {}".format(str(e))) diff --git a/ceph-mon/actions/unset-noout b/ceph-mon/actions/unset-noout new file mode 120000 index 00000000..807c18a7 --- /dev/null +++ b/ceph-mon/actions/unset-noout @@ -0,0 +1 @@ +unset_noout.py \ No newline at end of file diff --git a/ceph-mon/actions/unset_noout.py b/ceph-mon/actions/unset_noout.py new file mode 100755 index 00000000..30035cc9 --- /dev/null +++ b/ceph-mon/actions/unset_noout.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +# +# Copyright 2017 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import action_set, action_fail +from charms_ceph.utils import osd_noout + +if __name__ == '__main__': + result = osd_noout(False) + if result: + action_set({'message': 'Ceph osd noout has been unset'}) + else: + action_fail('Ceph osd noout failed to unset') diff --git a/ceph-mon/bindep.txt b/ceph-mon/bindep.txt new file mode 100644 index 00000000..9cce56b5 --- /dev/null +++ b/ceph-mon/bindep.txt @@ -0,0 +1,5 @@ +libxml2-dev [platform:dpkg test] +libxslt1-dev [platform:dpkg test] +build-essential [platform:dpkg test] +zlib1g-dev [platform:dpkg test] +libffi-dev [platform:dpkg test] diff --git a/ceph-mon/build-requirements.txt b/ceph-mon/build-requirements.txt new file mode 100644 index 00000000..b6d2452f --- /dev/null +++ b/ceph-mon/build-requirements.txt @@ -0,0 +1,7 @@ +# NOTES(lourot): +# * We don't install charmcraft via pip anymore because it anyway spins up a +# container and scp the system's charmcraft snap inside it. So the charmcraft +# snap is necessary on the system anyway. +# * `tox -e build` successfully validated with charmcraft 1.2.1 + +cffi==1.14.6; python_version < '3.6' # cffi 1.15.0 drops support for py35. diff --git a/ceph-mon/charmcraft.yaml b/ceph-mon/charmcraft.yaml new file mode 100644 index 00000000..e36a47bf --- /dev/null +++ b/ceph-mon/charmcraft.yaml @@ -0,0 +1,34 @@ +type: charm + +parts: + charm: + after: + - update-certificates + charm-python-packages: + # Use the updated version of setuptools (needed by jinja2). + - setuptools + build-packages: + - git + + update-certificates: + # Ensure that certificates in the base image are up-to-date. + plugin: nil + override-build: | + apt update + apt install -y ca-certificates + update-ca-certificates + +base: ubuntu@22.04 +platforms: + amd64: + build-on: amd64 + build-for: amd64 + arm64: + build-on: arm64 + build-for: arm64 + ppc64el: + build-on: ppc64el + build-for: ppc64el + s390x: + build-on: s390x + build-for: s390x diff --git a/ceph-mon/config.yaml b/ceph-mon/config.yaml new file mode 100644 index 00000000..9be01566 --- /dev/null +++ b/ceph-mon/config.yaml @@ -0,0 +1,361 @@ +options: + loglevel: + type: int + default: 1 + description: Mon and OSD debug level. Max is 20. + use-syslog: + type: boolean + default: False + description: | + If set to True, supporting services will log to syslog. + source: + type: string + default: caracal + description: | + Optional configuration to support use of additional sources such as: + . + - ppa:myteam/ppa + - cloud:bionic-ussuri + - cloud:xenial-proposed/queens + - http://my.archive.com/ubuntu main + . + The last option should be used in conjunction with the key configuration + option. + key: + type: string + default: + description: | + Key ID to import to the apt keyring to support use with arbitary source + configuration from outside of Launchpad archives or PPA's. + harden: + type: string + default: + description: | + Apply system hardening. Supports a space-delimited list of modules + to run. Supported modules currently include os, ssh, apache and mysql. + fsid: + type: string + default: + description: | + The unique identifier (fsid) of the Ceph cluster. + . + WARNING: this option should only be used when performing an in-place + migration of an existing non-charm deployed Ceph cluster to a charm + managed deployment. + config-flags: + type: string + default: + description: | + User provided Ceph configuration. Supports a string representation of + a python dictionary where each top-level key represents a section in + the ceph.conf template. You may only use sections supported in the + template. + . + WARNING: this is not the recommended way to configure the underlying + services that this charm installs and is used at the user's own risk. + This option is mainly provided as a stop-gap for users that either + want to test the effect of modifying some config or who have found + a critical bug in the way the charm has configured their services + and need it fixed immediately. We ask that whenever this is used, + that the user consider opening a bug on this charm at + http://bugs.launchpad.net/charms providing an explanation of why the + config was needed so that we may consider it for inclusion as a + natively supported config in the charm. + auth-supported: + type: string + default: cephx + description: | + [DEPRECATED] Which authentication flavour to use. + . + This option no longer has any effect. It's insecure and breaks expected + Ceph functionality when assigned to None. The charm now ignores the + option and always sets auth to cephx. + . + Original description: + . + [DEPRECATED] Valid options are "cephx" and "none". If "none" is + specified, keys will still be created and deployed so that it can be + enabled later. + monitor-secret: + type: string + default: + description: | + The Ceph secret key used by Ceph monitors. This value will become the + mon.key. To generate a suitable value use: + . + ceph-authtool /dev/stdout --name=mon. --gen-key + . + If left empty, a secret key will be generated. + . + NOTE: Changing this configuration after deployment is not supported and + new service units will not be able to join the cluster. + monitor-count: + type: int + default: 3 + description: | + Number of ceph-mon units to wait for before attempting to bootstrap the + monitor cluster. For production clusters the default value of 3 ceph-mon + units is normally a good choice. + . + For test and development environments you can enable single-unit + deployment by setting this to 1. + . + NOTE: To establish quorum and enable partition tolerance a odd number of + ceph-mon units is required. + monitor-hosts: + type: string + default: + description: | + A space-separated list of ceph mon hosts to use. This field is only used + to migrate an existing cluster to a juju-managed solution and should + otherwise be left unset. + monitor-data-available-warning: + type: int + default: 30 + description: | + Raise HEALTH_WARN status when the filesystem that houses a monitor's data + store reports that its available capacity is less than or equal to this + percentage. + monitor-data-available-critical: + type: int + default: 5 + description: | + Raise HEALTH_ERR status when the filesystem that houses a monitor's data + store reports that its available capacity is less than or equal to this + percentage. + expected-osd-count: + type: int + default: 0 + description: | + The number of OSDs expected to be deployed in the cluster. This value can + influence the number of placement groups (PGs) to use for pools. The PG + calculation is based either on the actual number of OSDs or this option's + value, whichever is greater. The default value is '0', which tells the + charm to only consider the actual number of OSDs. If the actual number of + OSDs is less than three then this option must explicitly state that + number. + pgs-per-osd: + type: int + default: 100 + description: | + The number of placement groups per OSD to target. It is important to + properly size the number of placement groups per OSD as too many + or too few placement groups per OSD may cause resource constraints and + performance degradation. This value comes from the recommendation of + the Ceph placement group calculator (http://ceph.com/pgcalc/) and + recommended values are: + . + 100 - If the cluster OSD count is not expected to increase in the + foreseeable future. + 200 - If the cluster OSD count is expected to increase (up to 2x) in the + foreseeable future. + 300 - If the cluster OSD count is expected to increase between 2x and 3x + in the foreseeable future. + ceph-public-network: + type: string + default: + description: | + The IP address and netmask of the public (front-side) network (e.g., + 192.168.0.0/24) + . + If multiple networks are to be used, a space-delimited list of a.b.c.d/x + can be provided. + ceph-cluster-network: + type: string + default: + description: | + The IP address and netmask of the cluster (back-side) network (e.g., + 192.168.0.0/24) + . + If multiple networks are to be used, a space-delimited list of a.b.c.d/x + can be provided. + prefer-ipv6: + type: boolean + default: False + description: | + If True enables IPv6 support. The charm will expect network interfaces + to be configured with an IPv6 address. If set to False (default) IPv4 + is expected. + . + NOTE: these charms do not currently support IPv6 privacy extension. In + order for this charm to function correctly, the privacy extension must be + disabled and a non-temporary address must be configured/available on + your network interface. + sysctl: + type: string + default: '{ kernel.pid_max : 2097152, vm.max_map_count : 524288, + kernel.threads-max: 2097152 }' + description: | + YAML-formatted associative array of sysctl key/value pairs to be set + persistently. By default we set pid_max, max_map_count and + threads-max to a high value to avoid problems with large numbers (>20) + of OSDs recovering. very large clusters should set those values even + higher (e.g. max for kernel.pid_max is 4194303). + customize-failure-domain: + type: boolean + default: false + description: | + Setting this to true will tell Ceph to replicate across Juju's + Availability Zone instead of specifically by host. + nagios_context: + type: string + default: "juju" + description: | + Used by the nrpe-external-master subordinate charm. + A string that will be prepended to instance name to set the hostname + in nagios. So for instance the hostname would be something like: + . + juju-myservice-0 + . + If you're running multiple environments with the same services in them + this allows you to differentiate between them. + nagios_servicegroups: + type: string + default: "" + description: | + A comma-separated list of nagios servicegroups. If left empty, the + nagios_context will be used as the servicegroup. + nagios_degraded_thresh: + default: 0.1 + type: float + description: "Threshold for degraded ratio (0.1 = 10%)" + nagios_misplaced_thresh: + default: 0.1 + type: float + description: "Threshold for misplaced ratio (0.1 = 10%)" + nagios_recovery_rate: + default: '100' + type: string + description: | + Recovery rate (in objects/s) below which we consider recovery + to be stalled. + nagios_raise_nodeepscrub: + default: True + type: boolean + description: | + Whether to report Critical instead of Warning when the nodeep-scrub + flag is set. + nagios_check_num_osds: + default: False + type: boolean + description: | + Whether to report an error when number of known OSDs does not equal + to the number of OSDs in or up. + nagios_additional_checks: + default: "" + type: string + description: | + Dictionary describing additional checks. Key is a name of a check + which will be visible in Nagios. Value is a string (regular expression) + which is checked against status messages. + . + Example: + . + {'noout_set': 'noout', 'too_few_PGs': 'too few PGs', 'clock': 'clock skew', + 'degraded_redundancy': 'Degraded data redundancy'} + . + nagios_additional_checks_critical: + default: False + type: boolean + description: | + Whether additional checks report warning or error when their checks + are positive. + nagios_rgw_zones: + default: "" + type: string + description: | + Comma-separated list of zones that are expected to be connected to this + radosgw. These will be checked by the line "data sync source... + (zone-name)" in the output of `radosgw-admin sync status`. + . + Example: + . + zone1,zone2 + nagios_rgw_additional_checks: + default: "" + type: string + description: | + List describing additional checks. Each item is a regular expression to + search in the output of radosgw-admin sync status. Note, this is a + list unlike `nagios_additional_checks` which uses a dictionary. + . + Example: + . + ['data is behind on'] + . + use-direct-io: + type: boolean + default: True + description: Configure use of direct IO for OSD journals. + default-rbd-features: + type: int + default: + description: | + Default RBD Features to use when creating new images. The value of this + configuration option will be shared with consumers of the ``ceph-client`` + interface and client charms may choose to add this to the Ceph + configuration file on the units they manage. + + Example: + + rbd default features = 1 + + NOTE: If you have clients using the kernel RBD driver you must set this + configuration option to a value corrensponding to the features the driver + in your kernel supports. The kernel RBD driver tends to be multiple + cycles behind the userspace driver available for libvirt/qemu. Nova LXD + is among the clients depending on the kernel RBD driver. + + NOTE: If you want to use the RBD Mirroring feature you must either let + this configuration option be the default or make sure the value you set + includes the ``exclusive-lock`` and ``journaling`` features. + no-bootstrap: + type: boolean + default: False + description: | + Causes the charm to not do any of the initial bootstrapping of the + Ceph monitor cluster. This is only intended to be used when migrating + from the ceph all-in-one charm to a ceph-mon / ceph-osd deployment. + Refer to the Charm Deployment guide at https://docs.openstack.org/charm-deployment-guide/latest/ + for more information. + disable-pg-max-object-skew: + type: boolean + default: False + description: | + Openstack clouds that use ceph will typically start their life with at + least one pool (glance) loaded with a disproportionately high amount of + data/objects where other pools may remain empty. This can trigger HEALTH_WARN + if mon_pg_warn_max_object_skew is exceeded but that is actually false positive. + pg-autotune: + type: string + default: auto + description: | + The default configuration for pg-autotune will be to automatically enable + the module for new cluster installs on Ceph Nautilus, but to leave it + disabled for all cluster upgrades to Nautilus. To enable the pg-autotune + feature for upgraded clusters, the pg-autotune option should be set to + 'true'. To disable the autotuner for new clusters, the pg-autotune option + should be set to 'false'. + permit-insecure-cmr: + type: boolean + default: False + description: | + The charm does not segregate access to pools from different models properly, + this means that the correct charm settings can result with client model B + having access to the data from model A. + balancer-mode: + type: string + default: + description: | + The balancer mode used by the Ceph manager. Can only be set for Luminous or + later versions, and only when the balancer module is enabled. + rbd-stats-pools: + type: string + default: "" + description: | + Set pools to collect RBD per-image IO statistics by enabling dynamic OSD performance counters. + It can be set to: + - a comma separated list of RBD pools to enable (eg. "pool1,pool2,poolN") + - "*" to enable for all RBD pools + - "" to disable statistics + For more information: https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics diff --git a/ceph-mon/copyright b/ceph-mon/copyright new file mode 100644 index 00000000..c801b143 --- /dev/null +++ b/ceph-mon/copyright @@ -0,0 +1,16 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0 + +Files: * +Copyright: 2012, Canonical Ltd. +License: Apache-2.0 + Licensed under the Apache License, Version 2.0 (the "License"); you may + not use this file except in compliance with the License. You may obtain + a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + License for the specific language governing permissions and limitations + under the License. diff --git a/ceph-mon/files/grafana_dashboards/ceph-cluster-advanced.json b/ceph-mon/files/grafana_dashboards/ceph-cluster-advanced.json new file mode 100644 index 00000000..db61f332 --- /dev/null +++ b/ceph-mon/files/grafana_dashboards/ceph-cluster-advanced.json @@ -0,0 +1,3792 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.3.2" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + }, + { + "id": "heatmap", + "name": "Heatmap", + "type": "panel", + "version": "5.0.0" + }, + { + "id": "singlestat", + "name": "Singlestat", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "Ceph cluster overview", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CLUSTER STATE", + "titleSize": "h6", + "type": "row" + }, + { + "colors": null, + "datasource": "${prometheusds}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ + { + "id": 0, + "options": { + "0": { + "text": "HEALTHY" + }, + "1": { + "text": "WARNING" + }, + "2": { + "text": "ERROR" + } + }, + "type": "value" + }, + { + "id": 1, + "options": { + "match": null, + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#9ac48a" + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 2 + } + ] + }, + "unit": "none" + } + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 3, + "interval": "1m", + "links": [ ], + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "ceph_health_status{}", + "format": "time_series", + "instant": true, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "Ceph health status", + "transparent": true, + "type": "stat" + }, + { + "datasource": "${prometheusds}", + "description": "", + "fieldConfig": { + "defaults": { + "links": [ ], + "mappings": [ + { + "id": 0, + "options": { + "match": null, + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(245, 54, 54, 0.9)" + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.10000000000000001 + }, + { + "color": "rgba(50, 172, 45, 0.97)", + "value": 0.29999999999999999 + } + ] + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 3, + "y": 1 + }, + "id": 4, + "interval": "1m", + "links": [ ], + "maxDataPoints": 100, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "(ceph_cluster_total_bytes{}-ceph_cluster_total_used_bytes{})/ceph_cluster_total_bytes{}", + "format": "time_series", + "instant": true, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "Available Capacity", + "transparent": false, + "type": "gauge" + }, + { + "colors": null, + "datasource": "${prometheusds}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 2, + "links": [ ], + "mappings": [ + { + "id": 0, + "options": { + "match": null, + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)" + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.025000000000000001 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 1 + } + ] + }, + "unit": "decbytes" + } + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 6, + "y": 1 + }, + "id": 5, + "interval": "1m", + "links": [ ], + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "ceph_cluster_total_bytes{}", + "format": "time_series", + "instant": true, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "Cluster Capacity", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${prometheusds}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 1, + "links": [ ], + "mappings": [ + { + "id": 0, + "options": { + "match": null, + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 9, + "y": 1 + }, + "id": 6, + "links": [ ], + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "sum(irate(ceph_osd_op_w_in_bytes{}[5m]))", + "format": "time_series", + "instant": true, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Write Throughput", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${prometheusds}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 1, + "links": [ ], + "mappings": [ + { + "id": 0, + "options": { + "match": null, + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a" + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0 + }, + { + "color": "#9ac48a", + "value": 0 + } + ] + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 12, + "y": 1 + }, + "id": 7, + "links": [ ], + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "sum(irate(ceph_osd_op_r_out_bytes{}[5m]))", + "format": "time_series", + "instant": true, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Read Throughput", + "transparent": false, + "type": "stat" + }, + { + "colorMode": "Panel", + "colors": { + "crit": "rgb(255, 0, 0)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgba(50, 128, 45, 0.9)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 0, + "datasource": "${prometheusds}", + "description": "", + "displayName": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + } + }, + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 3, + "w": 6, + "x": 15, + "y": 1 + }, + "id": 8, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [ ], + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "aggregation": "Last", + "alias": "All", + "datasource": "${prometheusds}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_osd_metadata)", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "All", + "refId": "A", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "In", + "datasource": "${prometheusds}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_osd_in)", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "In", + "refId": "B", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "Out", + "datasource": "${prometheusds}", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osd_in == bool 0)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Out", + "refId": "C", + "units": "none", + "valueHandler": "Number Threshold", + "warn": 1 + }, + { + "aggregation": "Last", + "alias": "Up", + "datasource": "${prometheusds}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osd_up)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Up", + "refId": "D", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "Down", + "datasource": "${prometheusds}", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osd_up == bool 0)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Down", + "refId": "E", + "units": "none", + "valueHandler": "Number Threshold", + "warn": 1 + } + ], + "title": "OSDs", + "transparent": false, + "type": "stat" + }, + { + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgba(50, 128, 45, 0.9)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 1, + "datasource": "${prometheusds}", + "description": "", + "displayName": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + } + }, + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 6, + "w": 3, + "x": 21, + "y": 1 + }, + "id": 9, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [ ], + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "aggregation": "Last", + "alias": "Active", + "datasource": "${prometheusds}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_mgr_status == 1) or vector(0)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Active", + "refId": "A", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "Standby", + "datasource": "${prometheusds}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_mgr_status == 0) or vector(0)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Standby", + "refId": "B", + "units": "none", + "valueHandler": "Number Threshold" + } + ], + "title": "MGRs", + "transparent": false, + "type": "stat" + }, + { + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgba(50, 128, 45, 0.9)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 1, + "datasource": "${prometheusds}", + "description": "", + "displayName": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Critical" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Warning" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#987d24", + "mode": "fixed" + } + } + ] + } + ] + }, + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 3, + "w": 3, + "x": 0, + "y": 4 + }, + "id": 10, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [ ], + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "aggregation": "Last", + "alias": "Active", + "datasource": "${prometheusds}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ALERTS{alertstate=\"firing\",alertname=~\"^Ceph.+\", severity=\"critical\"}) OR vector(0)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Critical", + "refId": "A", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "Standby", + "datasource": "${prometheusds}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ALERTS{alertstate=\"firing\",alertname=~\"^Ceph.+\", severity=\"warning\"}) OR vector(0)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Warning", + "refId": "B", + "units": "none", + "valueHandler": "Number Threshold" + } + ], + "title": "Firing Alerts", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${prometheusds}", + "description": "", + "displayName": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ + { + "id": 0, + "options": { + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.025000000000000001 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 0.10000000000000001 + } + ] + }, + "unit": "decbytes" + } + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 6, + "y": 4 + }, + "id": 11, + "links": [ ], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "ceph_cluster_total_used_bytes{}", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Used Capacity", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${prometheusds}", + "description": "", + "displayName": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ + { + "id": 0, + "options": { + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 9, + "y": 4 + }, + "id": 12, + "links": [ ], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "sum(irate(ceph_osd_op_w{}[$__rate_interval]))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Write IOPS", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${prometheusds}", + "description": "", + "displayName": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ + { + "id": 0, + "options": { + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0 + }, + { + "color": "#9ac48a", + "value": 0 + } + ] + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 12, + "y": 4 + }, + "id": 13, + "links": [ ], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "sum(irate(ceph_osd_op_r{}[$__rate_interval]))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Read IOPS", + "transparent": false, + "type": "stat" + }, + { + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgba(50, 128, 45, 0.9)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 1, + "datasource": "${prometheusds}", + "description": "", + "displayName": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + } + }, + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 3, + "w": 6, + "x": 15, + "y": 4 + }, + "id": 14, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [ ], + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "aggregation": "Last", + "alias": "In Quorum", + "datasource": "${prometheusds}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_mon_quorum_status)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "In Quorum", + "refId": "A", + "units": "none", + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "Total", + "crit": 1, + "datasource": "${prometheusds}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_mon_quorum_status)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Total", + "refId": "B", + "units": "none", + "valueHandler": "Text Only", + "warn": 2 + }, + { + "aggregation": "Last", + "alias": "MONs out of Quorum", + "crit": 1.6000000000000001, + "datasource": "${prometheusds}", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Annotation", + "displayValueWithAlias": "Never", + "expr": "count(ceph_mon_quorum_status) - sum(ceph_mon_quorum_status)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "MONs out of Quorum", + "range": true, + "refId": "C", + "units": "none", + "valueHandler": "Number Threshold", + "warn": 1.1000000000000001 + } + ], + "title": "Monitors", + "transparent": false, + "type": "stat" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 15, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CLUSTER STATS", + "titleSize": "h6", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 8 + }, + "id": 16, + "limit": 10, + "onlyAlertsOnDashboard": true, + "options": { + "alertInstanceLabelFilter": "{alertname=~\"^Ceph.+\"}", + "alertName": "", + "dashboardAlerts": false, + "groupBy": [ ], + "groupMode": "default", + "maxItems": 20, + "sortOrder": 1, + "stateFilter": { + "error": true, + "firing": true, + "noData": false, + "normal": false, + "pending": true + }, + "viewMode": "list" + }, + "show": "current", + "sortOrder": 1, + "stateFilter": [ ], + "title": "Alerts", + "type": "alertlist" + }, + { + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 40, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#c0921f", + "value": 75 + }, + { + "color": "#E02F44", + "value": 85 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total Capacity" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Used" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + }, + { + "id": "custom.thresholdsStyle", + "value": { + "mode": "dashed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 8 + }, + "id": 17, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "ceph_cluster_total_bytes{}", + "format": "time_series", + "instant": false, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Total Capacity", + "range": true, + "refId": "A", + "step": 300 + }, + { + "datasource": "${prometheusds}", + "expr": "ceph_cluster_total_used_bytes{}", + "format": "time_series", + "instant": false, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Used", + "range": true, + "refId": "B", + "step": 300 + } + ], + "title": "Capacity", + "type": "timeseries" + }, + { + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 85 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 8 + }, + "id": 18, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "sum(irate(ceph_osd_op_w_in_bytes{}[5m]))", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Write", + "range": true, + "refId": "A", + "step": 300 + }, + { + "datasource": "${prometheusds}", + "expr": "sum(irate(ceph_osd_op_r_out_bytes{}[5m]))", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Read", + "range": true, + "refId": "B", + "step": 300 + } + ], + "title": "Cluster Throughput", + "type": "timeseries" + }, + { + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 16 + }, + "id": 19, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "sum(irate(ceph_osd_op_w{}[$__rate_interval]))", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Write", + "range": true, + "refId": "A", + "step": 300 + }, + { + "datasource": "${prometheusds}", + "expr": "sum(irate(ceph_osd_op_r{}[$__rate_interval]))", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Read", + "range": true, + "refId": "B", + "step": 300 + } + ], + "title": "IOPS", + "type": "timeseries" + }, + { + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 16 + }, + "id": 20, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "(ceph_pool_bytes_used{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A", + "step": 300 + } + ], + "title": "Pool Used Bytes", + "type": "timeseries" + }, + { + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "rbd Stored" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "transparent", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 16 + }, + "id": 21, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "(ceph_pool_stored_raw{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "range": true, + "refId": "A", + "step": 300 + } + ], + "title": "Pool Used RAW Bytes", + "type": "timeseries" + }, + { + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 24 + }, + "id": 22, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "(ceph_pool_quota_objects{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A", + "step": 300 + } + ], + "title": "Pool Objects Quota", + "type": "timeseries" + }, + { + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 24 + }, + "id": 23, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "(ceph_pool_quota_bytes{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A", + "step": 300 + } + ], + "title": "Pool Quota Bytes", + "type": "timeseries" + }, + { + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 24 + }, + "id": 24, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "(ceph_pool_objects{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "title": "Objects Per Pool", + "type": "timeseries" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 25, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OBJECTS", + "titleSize": "h6", + "type": "row" + }, + { + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^Total.*$/" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 6, + "x": 0, + "y": 32 + }, + "id": 26, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pool_objects)", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Total", + "range": true, + "refId": "A", + "step": 200 + } + ], + "title": "OSD Type Count", + "type": "timeseries" + }, + { + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^Total.*$/" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 6, + "y": 32 + }, + "id": 27, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_active{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Active", + "range": true, + "refId": "A" + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_clean{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Clean", + "range": true, + "refId": "B" + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_peering{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Peering", + "range": true, + "refId": "C" + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_degraded{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Degraded", + "range": true, + "refId": "D", + "step": 300 + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_stale{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Stale", + "range": true, + "refId": "E", + "step": 300 + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_unclean_pgs{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Unclean", + "range": true, + "refId": "F", + "step": 300 + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_undersized{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Undersized", + "range": true, + "refId": "G", + "step": 300 + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_incomplete{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Incomplete", + "range": true, + "refId": "H" + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_forced_backfill{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Forced Backfill", + "range": true, + "refId": "I" + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_forced_recovery{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Forced Recovery", + "range": true, + "refId": "J" + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_creating{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Creating", + "range": true, + "refId": "K" + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_wait_backfill{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Wait Backfill", + "range": true, + "refId": "L" + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_deep{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Deep", + "range": true, + "refId": "M" + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_scrubbing{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Scrubbing", + "range": true, + "refId": "N" + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_recovering{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Recovering", + "range": true, + "refId": "O" + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_repair{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Repair", + "range": true, + "refId": "P" + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_down{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Down", + "range": true, + "refId": "Q" + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_peered{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Peered", + "range": true, + "refId": "R" + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_backfill{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Backfill", + "range": true, + "refId": "S" + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_remapped{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Remapped", + "range": true, + "refId": "T" + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_backfill_toofull{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Backfill Toofull", + "range": true, + "refId": "U" + } + ], + "title": "PGs State", + "type": "timeseries" + }, + { + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^Total.*$/" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 10, + "x": 14, + "y": 32 + }, + "id": 28, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_degraded{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Degraded", + "range": true, + "refId": "A", + "step": 300 + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_stale{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Stale", + "range": true, + "refId": "B", + "step": 300 + }, + { + "datasource": "${prometheusds}", + "expr": "sum(ceph_pg_undersized{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Undersized", + "range": true, + "refId": "C", + "step": 300 + } + ], + "title": "Stuck PGs", + "type": "timeseries" + }, + { + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 6, + "w": 10, + "x": 14, + "y": 38 + }, + "id": 29, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "sum(irate(ceph_osd_recovery_ops{}[$interval]))", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "OPS", + "refId": "A", + "step": 300 + } + ], + "title": "Recovery Operations", + "type": "timeseries" + }, + { + "collapse": false, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 44 + }, + "id": 30, + "panels": [ + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "opacity" + }, + "dataFormat": "timeseries", + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 42 + }, + "heatmap": { }, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 31, + "legend": { + "show": true + }, + "options": { + "calculate": true, + "calculation": { + "yBuckets": { + "mode": "count", + "scale": { + "log": 2, + "type": "log" + }, + "value": "1" + } + }, + "cellGap": 2, + "cellValues": { }, + "color": { + "exponent": 0.5, + "fill": "#b4ff00", + "mode": "opacity", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1.0000000000000001e-09 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "min": "0", + "reverse": false, + "unit": "ms" + } + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "ceph_osd_apply_latency_ms{}", + "format": "time_series", + "instant": false, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "OSD Apply Latency Distribution", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": "", + "yAxis": { + "decimals": null, + "format": "ms", + "logBase": 2, + "max": null, + "min": "0", + "show": true, + "splitFactor": 1 + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": 10 + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#65c5db", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "opacity" + }, + "dataFormat": "timeseries", + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 42 + }, + "heatmap": { }, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 32, + "legend": { + "show": true + }, + "options": { + "calculate": true, + "calculation": { + "yBuckets": { + "mode": "count", + "scale": { + "log": 2, + "type": "log" + } + } + }, + "cellGap": 2, + "cellValues": { }, + "color": { + "exponent": 0.5, + "fill": "#65c5db", + "mode": "opacity", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1.0000000000000001e-09 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "min": "0", + "reverse": false, + "unit": "ms" + } + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "ceph_osd_commit_latency_ms{}", + "format": "time_series", + "instant": false, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "OSD Commit Latency Distribution", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": "", + "yAxis": { + "decimals": null, + "format": "ms", + "logBase": 2, + "max": null, + "min": "0", + "show": true, + "splitFactor": 1 + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": 10 + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#806eb7", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "opacity" + }, + "dataFormat": "timeseries", + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 50 + }, + "heatmap": { }, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 33, + "legend": { + "show": true + }, + "options": { + "calculate": true, + "calculation": { + "yBuckets": { + "mode": "count", + "scale": { + "log": 2, + "type": "log" + } + } + }, + "cellGap": 2, + "cellValues": { }, + "color": { + "exponent": 0.5, + "fill": "#806eb7", + "mode": "opacity", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1.0000000000000001e-09 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "decimals": 2, + "min": "0", + "reverse": false, + "unit": "ms" + } + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "rate(ceph_osd_op_r_latency_sum{}[5m]) / rate(ceph_osd_op_r_latency_count{}[5m]) >= 0", + "format": "time_series", + "instant": false, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "OSD Read Op Latency Distribution", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": "", + "yAxis": { + "decimals": null, + "format": "ms", + "logBase": 2, + "max": null, + "min": "0", + "show": true, + "splitFactor": 1 + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#f9934e", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "opacity" + }, + "dataFormat": "timeseries", + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 50 + }, + "heatmap": { }, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 34, + "legend": { + "show": true + }, + "options": { + "calculate": true, + "calculation": { + "yBuckets": { + "mode": "count", + "scale": { + "log": 2, + "type": "log" + } + } + }, + "cellGap": 2, + "cellValues": { }, + "color": { + "exponent": 0.5, + "fill": "#f9934e", + "mode": "opacity", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1.0000000000000001e-09 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "decimals": 2, + "min": "0", + "reverse": false, + "unit": "ms" + } + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "rate(ceph_osd_op_w_latency_sum{}[5m]) / rate(ceph_osd_op_w_latency_count{}[5m]) >= 0", + "format": "time_series", + "instant": false, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "OSD Write Op Latency Distribution", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": "", + "yAxis": { + "decimals": null, + "format": "ms", + "logBase": 2, + "max": null, + "min": "0", + "show": true, + "splitFactor": 1 + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 58 + }, + "id": 35, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "avg(rate(ceph_osd_op_r_latency_sum{}[5m]) / rate(ceph_osd_op_r_latency_count{}[5m]) >= 0)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Read", + "refId": "A" + }, + { + "datasource": "${prometheusds}", + "expr": "avg(rate(ceph_osd_op_w_latency_sum{}[5m]) / rate(ceph_osd_op_w_latency_count{}[5m]) >= 0)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Write", + "refId": "B" + } + ], + "title": "Recovery Operations", + "type": "timeseries" + }, + { + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 58 + }, + "id": 36, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${prometheusds}", + "expr": "avg(ceph_osd_apply_latency_ms{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "apply", + "metric": "ceph_osd_perf_apply_latency_seconds", + "refId": "A", + "step": 4 + }, + { + "datasource": "${prometheusds}", + "expr": "avg(ceph_osd_commit_latency_ms{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "commit", + "metric": "ceph_osd_perf_commit_latency_seconds", + "refId": "B", + "step": 4 + } + ], + "title": "AVG OSD Apply + Commit Latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "LATENCY", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": true, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 45 + }, + "id": 37, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6", + "type": "row" + }, + { + "columns": [ ], + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 38, + "links": [ ], + "options": { + "footer": { + "countRows": false, + "enablePagination": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 1, + "showHeader": true + }, + "pluginVersion": "9.4.7", + "styles": "", + "targets": [ + { + "datasource": "${prometheusds}", + "exemplar": false, + "expr": "count by (ceph_version)(ceph_osd_metadata{})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "OSD Services", + "range": false, + "refId": "A" + }, + { + "datasource": "${prometheusds}", + "exemplar": false, + "expr": "count by (ceph_version)(ceph_mon_metadata{})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Mon Services", + "range": false, + "refId": "B" + }, + { + "datasource": "${prometheusds}", + "exemplar": false, + "expr": "count by (ceph_version)(ceph_mds_metadata{})", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "MDS Services", + "range": false, + "refId": "C" + }, + { + "datasource": "${prometheusds}", + "exemplar": false, + "expr": "count by (ceph_version)(ceph_rgw_metadata{})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "RGW Services", + "range": false, + "refId": "D" + }, + { + "datasource": "${prometheusds}", + "exemplar": false, + "expr": "count by (ceph_version)(ceph_mgr_metadata{})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "MGR Services", + "range": false, + "refId": "E" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Ceph Versions", + "transformations": [ + { + "id": "merge", + "options": { } + }, + { + "id": "organize", + "options": { + "excludeByName": { }, + "indexByName": { }, + "renameByName": { + "Time": "", + "Value #A": "OSD Services", + "Value #B": "Mon Services", + "Value #C": "MDS Services", + "Value #D": "RGW Services", + "Value #E": "MGR Services", + "ceph_version": "Ceph Version" + } + } + } + ], + "type": "table" + } + ], + "refresh": "1m", + "rows": [ ], + "schemaVersion": 38, + "style": "dark", + "tags": [ + "ceph-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "prometheusds", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "auto": true, + "auto_count": 10, + "auto_min": "1m", + "current": { + "text": "$__auto_interval_interval", + "value": "$__auto_interval_interval" + }, + "hide": 0, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "5s", + "value": "5s" + }, + { + "selected": false, + "text": "10s", + "value": "10s" + }, + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "5s,10s,30s,1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "type": "interval", + "valuelabels": { } + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Ceph Cluster - Advanced", + "version": 0 +} diff --git a/ceph-mon/files/grafana_dashboards/ceph-cluster.json b/ceph-mon/files/grafana_dashboards/ceph-cluster.json new file mode 100644 index 00000000..9d1fbd73 --- /dev/null +++ b/ceph-mon/files/grafana_dashboards/ceph-cluster.json @@ -0,0 +1,1269 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "heatmap", + "name": "Heatmap", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + } + ], + "annotations": { + "list": [] + }, + "description": "Ceph cluster overview", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1525415495309, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 128, 45, 0.9)", + "rgba(237, 129, 40, 0.9)", + "rgb(255, 0, 0)" + ], + "datasource": "${prometheusds}", + "editable": false, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 0, + "y": 0 + }, + "hideTimeOverride": true, + "id": 21, + "interval": "1m", + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "ceph_health_status", + "format": "time_series", + "instant": true, + "interval": "$interval", + "intervalFactor": 1, + "refId": "A", + "step": 60 + } + ], + "thresholds": "1,2", + "timeFrom": null, + "title": "Health Status", + "transparent": false, + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "OK", + "value": "0" + }, + { + "op": "=", + "text": "WARN", + "value": "1" + }, + { + "op": "=", + "text": "ERR", + "value": "2" + } + ], + "valueName": "current" + }, + { + "colorMode": "Panel", + "colors": { + "crit": "rgb(255, 0, 0)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgba(50, 128, 45, 0.9)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 0, + "datasource": "${prometheusds}", + "displayName": "", + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 3, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 43, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [], + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "targets": [ + { + "aggregation": "Last", + "alias": "All", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_osd_metadata)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "All", + "refId": "A", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "In", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osd_in)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "In", + "refId": "B", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "Out", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osd_in == bool 0)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Out", + "refId": "C", + "units": "none", + "valueHandler": "Number Threshold", + "warn": 1 + }, + { + "aggregation": "Last", + "alias": "Up", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osd_up)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Up", + "refId": "D", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "Down", + "crit": 2, + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osd_up == bool 0)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Down", + "refId": "E", + "units": "none", + "valueHandler": "Number Threshold", + "warn": 1 + } + ], + "title": "OSDs", + "type": "stat" + }, + { + "clusterName": "", + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgba(50, 128, 45, 0.9)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 1, + "datasource": "${prometheusds}", + "displayName": "", + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 3, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 41, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [], + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "targets": [ + { + "aggregation": "Last", + "alias": "In Quorum", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_mon_quorum_status)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "In Quorum", + "refId": "A", + "units": "none", + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "Total", + "crit": 1, + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_mon_quorum_status)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Total", + "refId": "B", + "units": "none", + "valueHandler": "Text Only", + "warn": 2 + }, + { + "aggregation": "Last", + "alias": "MONs out of Quorum", + "crit": 1.6, + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Annotation", + "displayValueWithAlias": "Never", + "expr": "count(ceph_mon_quorum_status) - sum(ceph_mon_quorum_status)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "MONs out of Quorum", + "refId": "C", + "units": "none", + "valueHandler": "Number Threshold", + "warn": 1.1 + } + ], + "title": "Monitors", + "type": "stat" + }, + { + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgba(50, 128, 45, 0.9)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 1, + "datasource": "${prometheusds}", + "displayName": "", + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 3, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 68, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [], + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "targets": [ + { + "aggregation": "Last", + "alias": "Active", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_mgr_status == 1) or vector(0)", + "format": "time_series", + "intervalFactor": 1, + "instant": true, + "legendFormat": "Active", + "refId": "A", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "Standby", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_mgr_status == 0) or vector(0)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Standby", + "refId": "B", + "units": "none", + "valueHandler": "Number Threshold" + } + ], + "title": "MGRs", + "type": "stat" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "decimals": 2, + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 6 + }, + "id": 47, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_osd_stat_bytes_used)/sum(ceph_osd_stat_bytes)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used", + "refId": "A" + } + ], + "thresholds": "0.7,0.8", + "title": "Capacity used", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "fill": 0, + "gridPos": { + "h": 6, + "w": 9, + "x": 6, + "y": 6 + }, + "id": 53, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Active", + "color": "#508642", + "fill": 1, + "stack": "A" + }, + { + "alias": "Total", + "color": "#f9e2d2" + }, + { + "alias": "Degraded", + "color": "#eab839" + }, + { + "alias": "Undersized", + "color": "#f9934e" + }, + { + "alias": "Inconsistent", + "color": "#e24d42" + }, + { + "alias": "Down", + "color": "#bf1b00" + }, + { + "alias": "Inactive", + "color": "#bf1b00", + "fill": 4, + "linewidth": 0, + "stack": "A" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(ceph_pg_total)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Total", + "refId": "A" + }, + { + "expr": "sum(ceph_pg_active)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Active", + "refId": "B" + }, + { + "expr": "sum(ceph_pg_total - ceph_pg_active)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Inactive", + "refId": "G" + }, + { + "expr": "sum(ceph_pg_undersized)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Undersized", + "refId": "F" + }, + { + "expr": "sum(ceph_pg_degraded)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Degraded", + "refId": "C" + }, + { + "expr": "sum(ceph_pg_inconsistent)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Inconsistent", + "refId": "D" + }, + { + "expr": "sum(ceph_pg_down)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Down", + "refId": "E" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "PG States", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "fill": 0, + "gridPos": { + "h": 6, + "w": 9, + "x": 15, + "y": 6 + }, + "id": 66, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Avg Apply Latency", + "color": "#7eb26d" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile(0.95, ceph_osd_apply_latency_ms)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Apply Latency P_95", + "refId": "A" + }, + { + "expr": "quantile(0.95, ceph_osd_commit_latency_ms)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Commit Latency P_95", + "refId": "B" + }, + { + "expr": "avg(ceph_osd_apply_latency_ms)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Avg Apply Latency", + "refId": "C" + }, + { + "expr": "avg(ceph_osd_commit_latency_ms)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Avg Commit Latency", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Latencies", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 45, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 0.5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Reads", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(ceph_osd_op_w_in_bytes[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Writes", + "refId": "A" + }, + { + "expr": "sum(irate(ceph_osd_op_r_out_bytes[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Reads", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 62, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(deriv(ceph_pool_stored[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "In-/Egress", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": " Egress (-) / Ingress (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cards": { + "cardPadding": null, + "cardRound": 1 + }, + "color": { + "cardColor": "rgb(0, 254, 255)", + "colorScale": "sqrt", + "colorScheme": "interpolateBlues", + "exponent": 0.5, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": "${prometheusds}", + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 15 + }, + "heatmap": {}, + "highlightCards": true, + "id": 55, + "legend": { + "show": true + }, + "links": [], + "span": 12, + "targets": [ + { + "expr": "ceph_osd_stat_bytes_used / ceph_osd_stat_bytes", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Util (%)", + "refId": "A", + "step": 60 + } + ], + "timeFrom": null, + "title": "OSD Capacity Utilization", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": "", + "yAxis": { + "decimals": 2, + "format": "percentunit", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": 1 + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateBlues", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": "${prometheusds}", + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 15 + }, + "heatmap": {}, + "highlightCards": true, + "id": 59, + "legend": { + "show": true + }, + "links": [], + "targets": [ + { + "expr": "ceph_osd_numpg", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "#PGs", + "refId": "A" + } + ], + "title": "PGs per OSD", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": "", + "yAxis": { + "decimals": null, + "format": "none", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "fill": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 64, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(ceph_osd_recovery_ops[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Op/s", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Recovery Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": "Recovery Ops/s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "refresh": "30s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph", + "cluster" + ], + "templating": { + "list": [ + { + "auto": true, + "auto_count": 10, + "auto_min": "1m", + "current": { + "text": "auto", + "value": "$__auto_interval_interval" + }, + "datasource": null, + "hide": 0, + "includeAll": false, + "label": "Interval", + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "type": "interval" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Ceph - Cluster", + "version": 13 + } diff --git a/ceph-mon/files/grafana_dashboards/cephfs-overview.json b/ceph-mon/files/grafana_dashboards/cephfs-overview.json new file mode 100644 index 00000000..dd3a025f --- /dev/null +++ b/ceph-mon/files/grafana_dashboards/cephfs-overview.json @@ -0,0 +1,348 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.3.2" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "MDS Performance", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*Reads/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(ceph_objecter_op_r{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Read Ops", + "refId": "A" + }, + { + "expr": "sum(rate(ceph_objecter_op_w{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Write Ops", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "MDS Workload - $mds_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "none", + "label": "Reads(-) / Writes (+)", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_mds_server_handle_client_request{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ceph_daemon}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Client Request Load - $mds_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "none", + "label": "Client Requests", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin" + ], + "templating": { + "list": [ + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": "MDS Server", + "multi": false, + "name": "mds_servers", + "options": [ ], + "query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "MDS Performance", + "uid": "tbO9LAiZz", + "version": 0 +} diff --git a/ceph-mon/files/grafana_dashboards/host-details.json b/ceph-mon/files/grafana_dashboards/host-details.json new file mode 100644 index 00000000..5d6a3060 --- /dev/null +++ b/ceph-mon/files/grafana_dashboards/host-details.json @@ -0,0 +1,1314 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.3.2" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + }, + { + "id": "singlestat", + "name": "Singlestat", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "$ceph_hosts System Overview", + "titleSize": "h6", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 3, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{job=~\"$job\", instance='$ceph_hosts'}))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + "interrupt": "#447EBC", + "steal": "#6D1F62", + "system": "#890F02", + "user": "#3F6833", + "wait": "#C15C17" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 6, + "x": 3, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (mode) (\n rate(node_cpu{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[$__rate_interval]) or\n rate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[$__rate_interval])\n) / (\n scalar(\n sum(rate(node_cpu{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]))\n ) * 100\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{mode}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percent", + "label": "% Utilization", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Available": "#508642", + "Free": "#508642", + "Total": "#bf1b00", + "Used": "#bf1b00", + "total": "#bf1b00", + "used": "#0a50a1" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 6, + "x": 9, + "y": 1 + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "total", + "color": "#bf1b00", + "fill": 0, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemFree{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Free", + "refId": "A" + }, + { + "expr": "node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "total", + "refId": "B" + }, + { + "expr": "(\n node_memory_Cached{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n) + (\n node_memory_Buffers{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n) + (\n node_memory_Slab{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "buffers/cache", + "refId": "C" + }, + { + "expr": "(\n node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n) - (\n (\n node_memory_MemFree{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n ) + (\n node_memory_Cached{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n ) + (\n node_memory_Buffers{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n ) +\n (\n node_memory_Slab{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n )\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "used", + "refId": "D" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "RAM Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": "RAM used", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "Show the network load (rx,tx) across all interfaces (excluding loopback 'lo')", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 6, + "x": 15, + "y": 1 + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (device) (\n rate(\n node_network_receive_bytes{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[$__rate_interval]\n )\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.rx", + "refId": "A" + }, + { + "expr": "sum by (device) (\n rate(node_network_transmit_bytes{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[$__rate_interval])\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.tx", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "Send (-) / Receive (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 3, + "x": 21, + "y": 1 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_drop{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_network_receive_drop_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.rx", + "refId": "A" + }, + { + "expr": "rate(node_network_transmit_drop{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_network_transmit_drop_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.tx", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Network drop rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "pps", + "label": "Send (-) / Receive (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 6 + }, + "id": 8, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(\n ceph_osd_stat_bytes{job=~\"$job\"} and\n on (ceph_daemon) ceph_disk_occupation{job=~\"$job\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Raw Capacity", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 3, + "x": 21, + "y": 6 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_errs{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_network_receive_errs_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.rx", + "refId": "A" + }, + { + "expr": "rate(node_network_transmit_errs{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_network_transmit_errs_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.tx", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Network error rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "pps", + "label": "Send (-) / Receive (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 10, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Disk Performance Statistics", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "For any OSD devices on the host, this chart shows the iops per physical device. Each device is shown by it's name and corresponding OSD id value", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 11, + "x": 0, + "y": 12 + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*reads/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n (\n rate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) writes", + "refId": "A" + }, + { + "expr": "label_replace(\n (\n rate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\"},\"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) reads", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Disk IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "For OSD hosts, this chart shows the disk bandwidth (read bytes/sec + write bytes/sec) of the physical OSD device. Each device is shown by device name, and corresponding OSD id", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 11, + "x": 12, + "y": 12 + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*read/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n (\n rate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) write", + "refId": "A" + }, + { + "expr": "label_replace(\n (\n rate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) read", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Throughput by Disk", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "For OSD hosts, this chart shows the latency at the physical drive. Each drive is shown by device name, with it's corresponding OSD id", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 11, + "x": 0, + "y": 21 + }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(instance, device) (label_replace(\n (rate(node_disk_write_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])) /\n clamp_min(rate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]), 0.001) or\n (rate(node_disk_read_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])) /\n clamp_min(rate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]), 0.001),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}})", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Disk Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 11, + "x": 12, + "y": 21 + }, + "id": 14, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n (\n (rate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) / 10) or\n rate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) * 100\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}})", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Disk utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percent", + "label": "%Util", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "columns": [ ], + "datasource": "${prometheusds}", + "description": "This table shows the 10 hosts with the highest number of slow ops", + "gridPos": { + "h": 8, + "w": 4, + "x": 0, + "y": 40 + }, + "id": 15, + "links": [ ], + "sort": { + "col": 2, + "desc": true + }, + "styles": [ + { + "alias": "Instance", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "instance", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Slow Ops", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "none", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "/.*/", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + } + ], + "targets": [ + { + "expr": "topk(10,\n (sum by (instance)(ceph_daemon_health_metrics{type=\"SLOW_OPS\", ceph_daemon=~\"osd.*\"}))\n)\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top Slow Ops per Host", + "transform": "table", + "type": "table" + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin", + "overview" + ], + "templating": { + "list": [ + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": false, + "label": "Hostname", + "multi": false, + "name": "ceph_hosts", + "options": [ ], + "query": "label_values(ceph_disk_occupation, instance)", + "refresh": 1, + "regex": "(.*)", + "sort": 3, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Ceph OSD Host Details", + "uid": "rtOg0AiWz", + "version": 0 +} diff --git a/ceph-mon/files/grafana_dashboards/hosts-overview.json b/ceph-mon/files/grafana_dashboards/hosts-overview.json new file mode 100644 index 00000000..d9250e5b --- /dev/null +++ b/ceph-mon/files/grafana_dashboards/hosts-overview.json @@ -0,0 +1,880 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.3.2" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + }, + { + "id": "singlestat", + "name": "Singlestat", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 2, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(sum by (instance) (ceph_osd_metadata{job=~\"$job\"}))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "OSD Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "description": "Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster", + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 3, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg(1 - (\n avg by(instance) (\n rate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval]) or\n rate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval])\n )\n))\n", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "AVG CPU Busy", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "description": "Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)", + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 4, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg ((\n (\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) - ((\n node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (\n node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n )\n )\n) / (\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"}\n))\n", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "AVG RAM Utilization", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "description": "IOPS Load at the device as reported by the OS on all OSD hosts", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 5, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum ((\n rate(node_disk_reads_completed{instance=~\"($osd_hosts).*\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($osd_hosts).*\"}[$__rate_interval])\n) + (\n rate(node_disk_writes_completed{instance=~\"($osd_hosts).*\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($osd_hosts).*\"}[$__rate_interval])\n))\n", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Physical IOPS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "description": "Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 6, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg (\n label_replace(\n (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or\n (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),\n \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", instance=~\"($osd_hosts).*\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n )\n)\n", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "AVG Disk Utilization", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "description": "Total send/receive network load across all hosts in the ceph cluster", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 7, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (\n (\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n) +\nsum (\n (\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n)\n", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Network Load", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "Show the top 10 busiest hosts by cpu", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 5 + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,\n 100 * (\n 1 - (\n avg by(instance) (\n rate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval]) or\n rate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval])\n )\n )\n )\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Busy - Top 10 Hosts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "Top 10 hosts by network load", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 5 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, (sum by(instance) (\n(\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) +\n(\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\"))\n))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Network Load - Top 10 Hosts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin" + ], + "templating": { + "list": [ + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "osd_hosts", + "options": [ ], + "query": "label_values(ceph_disk_occupation{job=~\"$job\"}, exported_instance)", + "refresh": 1, + "regex": "([^.]*).*", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "mon_hosts", + "options": [ ], + "query": "label_values(ceph_mon_metadata{job=~\"$job\"}, ceph_daemon)", + "refresh": 1, + "regex": "mon.(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "mds_hosts", + "options": [ ], + "query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)", + "refresh": 1, + "regex": "mds.(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "rgw_hosts", + "options": [ ], + "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)", + "refresh": 1, + "regex": "rgw.(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Ceph OSD Host Overview", + "uid": "y0KGL0iZz", + "version": 0 +} diff --git a/ceph-mon/files/grafana_dashboards/osd-device-details.json b/ceph-mon/files/grafana_dashboards/osd-device-details.json new file mode 100644 index 00000000..2e817476 --- /dev/null +++ b/ceph-mon/files/grafana_dashboards/osd-device-details.json @@ -0,0 +1,857 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.3.2" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Performance", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "read", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_osd_op_r_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "read", + "refId": "A" + }, + { + "expr": "rate(ceph_osd_op_w_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "write", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$osd Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "Reads", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_osd_op_r{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Reads", + "refId": "A" + }, + { + "expr": "rate(ceph_osd_op_w{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Writes", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$osd R/W IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 12, + "y": 1 + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "Read Bytes", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_osd_op_r_out_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Read Bytes", + "refId": "A" + }, + { + "expr": "rate(ceph_osd_op_w_in_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Write Bytes", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$osd R/W Bytes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 6, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Physical Device Performance", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 11 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*Reads/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(\n label_replace(\n rate(node_disk_read_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n ) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}/{{device}} Reads", + "refId": "A" + }, + { + "expr": "(\n label_replace(\n rate(node_disk_write_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device)\n label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n )\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}/{{device}} Writes", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Physical Device Latency for $osd", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 11 + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*Reads/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} on {{instance}} Writes", + "refId": "A" + }, + { + "expr": "label_replace(\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} on {{instance}} Reads", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Physical Device R/W IOPS for $osd", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 12, + "y": 11 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*Reads/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n rate(node_disk_read_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}} {{device}} Reads", + "refId": "A" + }, + { + "expr": "label_replace(\n rate(node_disk_written_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}} {{device}} Writes", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Physical Device R/W Bytes for $osd", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 18, + "y": 11 + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n rate(node_disk_io_time_seconds_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} on {{instance}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Physical Device Util% for $osd", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin" + ], + "templating": { + "list": [ + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": false, + "label": "OSD", + "multi": false, + "name": "osd", + "options": [ ], + "query": "label_values(ceph_osd_metadata{job=~\"$job\"}, ceph_daemon)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "OSD device details", + "uid": "CrAHE0iZz", + "version": 0 +} diff --git a/ceph-mon/files/grafana_dashboards/osds-overview.json b/ceph-mon/files/grafana_dashboards/osds-overview.json new file mode 100644 index 00000000..50d0254f --- /dev/null +++ b/ceph-mon/files/grafana_dashboards/osds-overview.json @@ -0,0 +1,1028 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.0.0" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + }, + { + "id": "table", + "name": "Table", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "aliasColors": { + "@95%ile": "#e0752d" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) * 1000\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "AVG read", + "refId": "A" + }, + { + "expr": "max(\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) * 1000\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "MAX read", + "refId": "B" + }, + { + "expr": "quantile(0.95,\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval])\n * 1000\n )\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "@95%ile", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "OSD Read Latencies", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ] + }, + { + "columns": [ ], + "datasource": "${prometheusds}", + "description": "This table shows the osd's that are delivering the 10 highest read latencies within the cluster", + "gridPos": { + "h": 8, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 3, + "links": [ ], + "sort": { + "col": 2, + "desc": true + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "ceph_daemon", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Latency (ms)", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "none", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "/.*/", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + } + ], + "targets": [ + { + "expr": "topk(10,\n (sort(\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n )\n ))\n)\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Highest READ Latencies", + "transform": "table", + "type": "table" + }, + { + "aliasColors": { + "@95%ile write": "#e0752d" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 12, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval])\n * 1000\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "AVG write", + "refId": "A" + }, + { + "expr": "max(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "MAX write", + "refId": "B" + }, + { + "expr": "quantile(0.95, (\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "@95%ile write", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "OSD Write Latencies", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ] + }, + { + "columns": [ ], + "datasource": "${prometheusds}", + "description": "This table shows the osd's that are delivering the 10 highest write latencies within the cluster", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 5, + "links": [ ], + "sort": { + "col": 2, + "desc": true + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "ceph_daemon", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Latency (ms)", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "none", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "/.*/", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + } + ], + "targets": [ + { + "expr": "topk(10,\n (sort(\n (rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000)\n ))\n)\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Highest WRITE Latencies", + "transform": "table", + "type": "table" + }, + { + "aliasColors": { }, + "datasource": "${prometheusds}", + "description": "", + "gridPos": { + "h": 8, + "w": 4, + "x": 0, + "y": 8 + }, + "id": 6, + "legend": { + "percentage": true, + "show": true, + "values": true + }, + "legendType": "Under graph", + "pieType": "pie", + "targets": [ + { + "expr": "count by (device_class) (ceph_osd_metadata{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device_class}}", + "refId": "A" + } + ], + "title": "OSD Types Summary", + "type": "piechart", + "valueName": "current" + }, + { + "aliasColors": { + "Non-Encrypted": "#E5AC0E" + }, + "datasource": "${prometheusds}", + "description": "", + "gridPos": { + "h": 8, + "w": 4, + "x": 4, + "y": 8 + }, + "id": 7, + "legend": { + "percentage": true, + "show": true, + "values": true + }, + "legendType": "Under graph", + "pieType": "pie", + "targets": [ + { + "expr": "count(ceph_bluefs_wal_total_bytes{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "bluestore", + "refId": "A" + }, + { + "expr": "absent(ceph_bluefs_wal_total_bytes{job=~\"$job\"}) * count(ceph_osd_metadata{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "filestore", + "refId": "B" + } + ], + "title": "OSD Objectstore Types", + "type": "piechart", + "valueName": "current" + }, + { + "aliasColors": { }, + "datasource": "${prometheusds}", + "description": "The pie chart shows the various OSD sizes used within the cluster", + "gridPos": { + "h": 8, + "w": 4, + "x": 8, + "y": 8 + }, + "id": 8, + "legend": { + "percentage": true, + "show": true, + "values": true + }, + "legendType": "Under graph", + "pieType": "pie", + "targets": [ + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} < 1099511627776)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<1TB", + "refId": "A" + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 1099511627776 < 2199023255552)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<2TB", + "refId": "B" + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 2199023255552 < 3298534883328)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<3TB", + "refId": "C" + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 3298534883328 < 4398046511104)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<4TB", + "refId": "D" + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 4398046511104 < 6597069766656)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<6TB", + "refId": "E" + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 6597069766656 < 8796093022208)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<8TB", + "refId": "F" + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 8796093022208 < 10995116277760)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<10TB", + "refId": "G" + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 10995116277760 < 13194139533312)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<12TB", + "refId": "H" + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 13194139533312)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<12TB+", + "refId": "I" + } + ], + "title": "OSD Size Summary", + "type": "piechart", + "valueName": "current" + }, + { + "aliasColors": { }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 12, + "y": 8 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_osd_numpg{job=~\"$job\"}", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "PGs per OSD", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Distribution of PGs per OSD", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": 20, + "mode": "histogram", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": "# of OSDs", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "description": "This gauge panel shows onode Hits ratio to help determine if increasing RAM per OSD could help improve the performance of the cluster", + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 8 + }, + "id": 10, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_bluestore_onode_hits{job=~\"$job\"}) / (\n sum(ceph_bluestore_onode_hits{job=~\"$job\"}) +\n sum(ceph_bluestore_onode_misses{job=~\"$job\"})\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": ".75", + "title": "OSD onode Hits Ratio", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 11, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "R/W Profile", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "Show the read/write workload profile overtime", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum(rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Reads", + "refId": "A" + }, + { + "expr": "round(sum(rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Writes", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Read/Write Profile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "columns": [ ], + "datasource": "${prometheusds}", + "description": "This table shows the 10 OSDs with the highest number of slow ops", + "gridPos": { + "h": 8, + "w": 4, + "x": 0, + "y": 20 + }, + "id": 13, + "links": [ ], + "sort": { + "col": 2, + "desc": true + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "ceph_daemon", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Slow Ops", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "none", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "/.*/", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + } + ], + "targets": [ + { + "expr": "topk(10,\n (ceph_daemon_health_metrics{type=\"SLOW_OPS\", ceph_daemon=~\"osd.*\"})\n)\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top Slow Ops", + "transform": "table", + "type": "table" + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin" + ], + "templating": { + "list": [ + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "OSD Overview", + "uid": "lo02I1Aiz", + "version": 0 +} diff --git a/ceph-mon/files/grafana_dashboards/pool-detail.json b/ceph-mon/files/grafana_dashboards/pool-detail.json new file mode 100644 index 00000000..9e4cf744 --- /dev/null +++ b/ceph-mon/files/grafana_dashboards/pool-detail.json @@ -0,0 +1,694 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.3.2" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + }, + { + "id": "singlestat", + "name": "Singlestat", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 0, + "y": 0 + }, + "id": 2, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "(ceph_pool_stored{job=~\"$job\"} / (ceph_pool_stored{job=~\"$job\"} + ceph_pool_max_avail{job=~\"$job\"})) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": ".7,.8", + "title": "Capacity used", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": 100, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "description": "Time till pool is full assuming the average fill rate of the last 6 hours", + "format": "s", + "gauge": { + "maxValue": false, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 7, + "y": 0 + }, + "id": 3, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": "" + }, + "tableColumn": "", + "targets": [ + { + "expr": "(ceph_pool_max_avail{job=~\"$job\"} / deriv(ceph_pool_stored{job=~\"$job\"}[6h])) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"} > 0\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "current", + "title": "Time till full", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": false + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "deriv(ceph_pool_objects{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Objects per second", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$pool_name Object Ingress/Egress", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": "Objects out(-) / in(+) ", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "reads", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "reads", + "refId": "A" + }, + { + "expr": "rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "writes", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$pool_name Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "iops", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "reads", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "reads", + "refId": "A" + }, + { + "expr": "rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "writes", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$pool_name Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_pool_objects{job=~\"$job\"} *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Number of Objects", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$pool_name Objects", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": "Objects", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 22, + "style": "dark", + "tags": [ + "ceph-mixin" + ], + "templating": { + "list": [ + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": false, + "label": "Pool Name", + "multi": false, + "name": "pool_name", + "options": [ ], + "query": "label_values(ceph_pool_metadata{job=~\"$job\"}, name)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Ceph Pool Details", + "uid": "-xyV8KCiz", + "version": 0 +} diff --git a/ceph-mon/files/grafana_dashboards/pool-overview.json b/ceph-mon/files/grafana_dashboards/pool-overview.json new file mode 100644 index 00000000..6316d4c5 --- /dev/null +++ b/ceph-mon/files/grafana_dashboards/pool-overview.json @@ -0,0 +1,1711 @@ +{ + "__inputs": [ ], + "__requires": [ ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 0, + "y": 0 + }, + "id": 2, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_pool_metadata{job=~\"$job\"})", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Pools", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "description": "Count of the pools that have compression enabled", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 3, + "y": 0 + }, + "id": 3, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_pool_metadata{job=~\"$job\", compression_mode!=\"none\"})", + "format": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Pools with Compression", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "description": "Total raw capacity available to the cluster", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 6, + "y": 0 + }, + "id": 4, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_osd_stat_bytes{job=~\"$job\"})", + "format": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Total Raw Capacity", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "description": "Total raw capacity consumed by user data and associated overheads (metadata + redundancy)", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 9, + "y": 0 + }, + "id": 5, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_pool_bytes_used{job=~\"$job\"})", + "format": "", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Raw Capacity Consumed", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "description": "Total of client data stored in the cluster", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 12, + "y": 0 + }, + "id": 6, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_pool_stored{job=~\"$job\"})", + "format": "", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Logical Stored ", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "description": "A compression saving is determined as the data eligible to be compressed minus the capacity used to store the data after compression", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 15, + "y": 0 + }, + "id": 7, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\"}\n)\n", + "format": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Compression Savings", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "description": "Indicates how suitable the data is within the pools that are/have been enabled for compression - averaged across all pools holding compressed data", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 18, + "y": 0 + }, + "id": 8, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(\n sum(ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n sum(ceph_pool_stored_raw{job=~\"$job\"} and ceph_pool_compress_under_bytes{job=~\"$job\"} > 0)\n) * 100\n", + "format": "table", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Compression Eligibility", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${prometheusds}", + "description": "This factor describes the average ratio of data eligible to be compressed divided by the data actually stored. It does not account for data written that was ineligible for compression (too small, or compression yield too low)", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 21, + "y": 0 + }, + "id": 9, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\"} > 0)\n / sum(ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n)\n", + "format": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Compression Factor", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "columns": [ ], + "datasource": "${prometheusds}", + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "instance" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "job" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "name" + }, + "properties": [ + { + "id": "displayName", + "value": "Pool Name" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "pool_id" + }, + "properties": [ + { + "id": "displayName", + "value": "Pool ID" + }, + { + "id": "unit", + "value": "none" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "displayName", + "value": "Compression Factor" + }, + { + "id": "unit", + "value": "none" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #D" + }, + "properties": [ + { + "id": "displayName", + "value": "% Used" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(245, 54, 54, 0.9)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 70 + }, + { + "color": "rgba(50, 172, 45, 0.97)", + "value": 85 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" + }, + "properties": [ + { + "id": "displayName", + "value": "Usable Free" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #C" + }, + "properties": [ + { + "id": "displayName", + "value": "Compression Eligibility" + }, + { + "id": "unit", + "value": "percent" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #E" + }, + "properties": [ + { + "id": "displayName", + "value": "Compression Savings" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #F" + }, + "properties": [ + { + "id": "displayName", + "value": "Growth (5d)" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(245, 54, 54, 0.9)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 70 + }, + { + "color": "rgba(50, 172, 45, 0.97)", + "value": 85 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #G" + }, + "properties": [ + { + "id": "displayName", + "value": "IOPS" + }, + { + "id": "unit", + "value": "none" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #H" + }, + "properties": [ + { + "id": "displayName", + "value": "Bandwidth" + }, + { + "id": "unit", + "value": "Bps" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "__name__" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "type" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "compression_mode" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "description" + }, + "properties": [ + { + "id": "displayName", + "value": "Type" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #J" + }, + "properties": [ + { + "id": "displayName", + "value": "Stored" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #I" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #K" + }, + "properties": [ + { + "id": "displayName", + "value": "Compression" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 10, + "links": [ ], + "options": { + "footer": { + "countRows": false, + "enablePagination": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 1, + "showHeader": true + }, + "pluginVersion": "10.4.0", + "styles": "", + "targets": [ + { + "expr": "(\n ceph_pool_compress_under_bytes{job=~\"$job\"} /\n ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n) and on(pool_id) (\n (\n (ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\"}\n ) * 100 > 0.5\n)\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "A", + "refId": "A" + }, + { + "expr": "ceph_pool_max_avail{job=~\"$job\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\"}\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "B", + "refId": "B" + }, + { + "expr": "(\n (ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\"}\n) * 100\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "C", + "refId": "C" + }, + { + "expr": "ceph_pool_percent_used{job=~\"$job\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\"}\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "D", + "refId": "D" + }, + { + "expr": "ceph_pool_compress_under_bytes{job=~\"$job\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "E", + "refId": "E" + }, + { + "expr": "delta(ceph_pool_stored{job=~\"$job\"}[5d])", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "F", + "refId": "F" + }, + { + "expr": "rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval])\n + rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "G", + "refId": "G" + }, + { + "expr": "rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval])\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "H", + "refId": "H" + }, + { + "expr": "ceph_pool_metadata{job=~\"$job\"}", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "I", + "refId": "I" + }, + { + "expr": "ceph_pool_stored{job=~\"$job\"} * on(pool_id) group_left ceph_pool_metadata{job=~\"$job\"}", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "J", + "refId": "J" + }, + { + "expr": "ceph_pool_metadata{job=~\"$job\", compression_mode!=\"none\"}", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "K", + "refId": "K" + }, + { + "expr": "", + "format": "", + "intervalFactor": "", + "legendFormat": "L", + "refId": "L" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Pool Overview", + "transformations": [ + { + "id": "merge", + "options": { } + }, + { + "id": "seriesToRows", + "options": { } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value #A": true, + "Value #B": false, + "Value #C": true, + "Value #D": false, + "Value #E": true, + "Value #I": true, + "Value #K": true, + "__name__": true, + "cluster": true, + "compression_mode": true, + "instance": true, + "job": true, + "pool_id": true, + "type": true + }, + "includeByName": { }, + "indexByName": { }, + "renameByName": { } + } + } + ], + "type": "table" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "This chart shows the sum of read and write IOPS from all clients by pool", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 8, + "showPoints": "never" + }, + "unit": "short" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($topk,\n round(\n (\n rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])\n ), 1\n ) * on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\"})\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}} ", + "refId": "A" + }, + { + "expr": "topk($topk,\n rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\"}\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}} - write", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $topk Client IOPS by Pool", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "The chart shows the sum of read and write bytes from all clients, by pool", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 8, + "showPoints": "never" + }, + "unit": "Bps" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($topk,\n (\n rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval])\n ) * on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\"}\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $topk Client Bandwidth by Pool", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": "Throughput", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "Historical view of capacity usage, to help identify growth and trends in pool consumption", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 8, + "showPoints": "never" + }, + "unit": "bytes" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_pool_bytes_used{job=~\"$job\"} * on(pool_id) group_right ceph_pool_metadata{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Pool Capacity Usage (RAW)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Capacity Used", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 22, + "style": "dark", + "tags": [ + "ceph-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "15", + "value": "15" + }, + "hide": 0, + "includeAll": false, + "label": "TopK", + "multi": false, + "name": "topk", + "options": [ + { + "text": "15", + "value": "15" + } + ], + "query": "15", + "refresh": 0, + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Ceph Pools Overview", + "uid": "z99hzWtmk", + "version": 0 +} diff --git a/ceph-mon/files/grafana_dashboards/radosgw-detail.json b/ceph-mon/files/grafana_dashboards/radosgw-detail.json new file mode 100644 index 00000000..58d17389 --- /dev/null +++ b/ceph-mon/files/grafana_dashboards/radosgw-detail.json @@ -0,0 +1,522 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.0.0" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Host Detail : $rgw_servers", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance_id) (\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GET {{ceph_daemon}}", + "refId": "A" + }, + { + "expr": "sum by (instance_id) (\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUT {{ceph_daemon}}", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$rgw_servers GET/PUT Latencies", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 7, + "x": 6, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs {{ceph_daemon}}", + "refId": "A" + }, + { + "expr": "rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs {{ceph_daemon}}", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Bandwidth by HTTP Operation", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + "GETs": "#7eb26d", + "Other": "#447ebc", + "PUTs": "#eab839", + "Requests": "#3f2b5b", + "Requests Failed": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 7, + "x": 13, + "y": 1 + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\",ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Requests Failed {{ceph_daemon}}", + "refId": "A" + }, + { + "expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs {{ceph_daemon}}", + "refId": "B" + }, + { + "expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs {{ceph_daemon}}", + "refId": "C" + }, + { + "expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Other {{ceph_daemon}}", + "refId": "D" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "HTTP Request Breakdown", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + "Failures": "#bf1b00", + "GETs": "#7eb26d", + "Other (HEAD,POST,DELETE)": "#447ebc", + "PUTs": "#eab839", + "Requests": "#3f2b5b" + }, + "datasource": "${prometheusds}", + "description": "", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 6, + "legend": { + "percentage": true, + "show": true, + "values": true + }, + "legendType": "Under graph", + "pieType": "pie", + "targets": [ + { + "expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Failures {{ceph_daemon}}", + "refId": "A" + }, + { + "expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs {{ceph_daemon}}", + "refId": "B" + }, + { + "expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs {{ceph_daemon}}", + "refId": "C" + }, + { + "expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Other (DELETE,LIST) {{ceph_daemon}}", + "refId": "D" + } + ], + "title": "Workload Breakdown", + "type": "piechart", + "valueName": "current" + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin", + "overview" + ], + "templating": { + "list": [ + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": "", + "multi": false, + "name": "rgw_servers", + "options": [ ], + "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "RGW Instance Detail", + "uid": "x5ARzZtmk", + "version": 0 +} diff --git a/ceph-mon/files/grafana_dashboards/radosgw-overview.json b/ceph-mon/files/grafana_dashboards/radosgw-overview.json new file mode 100644 index 00000000..12c56b6e --- /dev/null +++ b/ceph-mon/files/grafana_dashboards/radosgw-overview.json @@ -0,0 +1,695 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.0.0" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Overview - All Gateways", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GET {{rgw_host}}", + "refId": "A" + }, + { + "expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUT {{rgw_host}}", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Average GET/PUT Latencies by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 7, + "x": 8, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Total Requests/sec by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 15, + "y": 1 + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "GET Latencies by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "Total bytes transferred in/out of all radosgw instances within the cluster", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 8 + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs", + "refId": "A" + }, + { + "expr": "sum(rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Bandwidth Consumed by Type", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "Total bytes transferred in/out through get/put operations, by radosgw instance", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 7, + "x": 8, + "y": 8 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Bandwidth by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 15, + "y": 8 + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "PUT Latencies by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin", + "overview" + ], + "templating": { + "list": [ + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": "", + "multi": false, + "name": "rgw_servers", + "options": [ ], + "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)", + "refresh": 1, + "regex": "RGW Server", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "RGW Overview", + "uid": "WAkugZpiz", + "version": 0 +} diff --git a/ceph-mon/files/grafana_dashboards/radosgw-sync-overview.json b/ceph-mon/files/grafana_dashboards/radosgw-sync-overview.json new file mode 100644 index 00000000..49db9e00 --- /dev/null +++ b/ceph-mon/files/grafana_dashboards/radosgw-sync-overview.json @@ -0,0 +1,490 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.0.0" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{source_zone}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Replication (throughput) from Source Zone", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{source_zone}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Replication (objects) from Source Zone", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": "Objects/s", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{source_zone}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Polling Request Latency from Source Zone", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{source_zone}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Unsuccessful Object Replications from Source Zone", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": "Count/s", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin", + "overview" + ], + "templating": { + "list": [ + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": "", + "multi": false, + "name": "rgw_servers", + "options": [ ], + "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)", + "refresh": 1, + "regex": "RGW Server", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "RGW Sync Overview", + "uid": "rgw-sync-overview", + "version": 0 +} diff --git a/ceph-mon/files/grafana_dashboards/rbd-details.json b/ceph-mon/files/grafana_dashboards/rbd-details.json new file mode 100644 index 00000000..8efc8f47 --- /dev/null +++ b/ceph-mon/files/grafana_dashboards/rbd-details.json @@ -0,0 +1,444 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.3.3" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "Detailed Performance of RBD Images (IOPS/Throughput/Latency)", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rbd_write_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pool}} Write", + "refId": "A" + }, + { + "expr": "rate(ceph_rbd_read_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pool}} Read", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "iops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "iops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rbd_write_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pool}} Write", + "refId": "A" + }, + { + "expr": "rate(ceph_rbd_read_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pool}} Read", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rbd_write_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_write_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pool}} Write", + "refId": "A" + }, + { + "expr": "rate(ceph_rbd_read_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_read_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pool}} Read", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Average Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ns", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ns", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin" + ], + "templating": { + "list": [ + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": false, + "label": "", + "multi": false, + "name": "pool", + "options": [ ], + "query": "label_values(pool)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": false, + "label": "", + "multi": false, + "name": "image", + "options": [ ], + "query": "label_values(image)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "RBD Details", + "uid": "YhCYGcuZz", + "version": 0 +} diff --git a/ceph-mon/files/grafana_dashboards/rbd-overview.json b/ceph-mon/files/grafana_dashboards/rbd-overview.json new file mode 100644 index 00000000..d46297fa --- /dev/null +++ b/ceph-mon/files/grafana_dashboards/rbd-overview.json @@ -0,0 +1,723 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.4.2" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + }, + { + "id": "prometheus", + "name": "Prometheus", + "type": "datasource", + "version": "5.0.0" + }, + { + "id": "table", + "name": "Table", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum(rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Writes", + "refId": "A" + }, + { + "expr": "round(sum(rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Reads", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum(rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Write", + "refId": "A" + }, + { + "expr": "round(sum(rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Read", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${prometheusds}", + "description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(\n sum(rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Write", + "refId": "A" + }, + { + "expr": "round(\n sum(rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Read", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Average Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ns", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "columns": [ ], + "datasource": "${prometheusds}", + "description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.", + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "id": 5, + "links": [ ], + "sort": { + "col": 3, + "desc": true + }, + "styles": [ + { + "alias": "Pool", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pool", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Image", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "image", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "IOPS", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "iops", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "/.*/", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + } + ], + "targets": [ + { + "expr": "topk(10,\n (\n sort((\n rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval]) +\n on (image, pool, namespace) rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])\n ))\n )\n)\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Highest IOPS", + "transform": "table", + "type": "table" + }, + { + "columns": [ ], + "datasource": "${prometheusds}", + "description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.", + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 7 + }, + "id": 6, + "links": [ ], + "sort": { + "col": 3, + "desc": true + }, + "styles": [ + { + "alias": "Pool", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pool", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Image", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "image", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Throughput", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "Bps", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "/.*/", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + } + ], + "targets": [ + { + "expr": "topk(10,\n sort(\n sum(\n rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])\n ) by (pool, image, namespace)\n )\n)\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Highest Throughput", + "transform": "table", + "type": "table" + }, + { + "columns": [ ], + "datasource": "${prometheusds}", + "description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.", + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 7 + }, + "id": 7, + "links": [ ], + "sort": { + "col": 3, + "desc": true + }, + "styles": [ + { + "alias": "Pool", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pool", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Image", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "image", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Latency", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "ns", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "/.*/", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + } + ], + "targets": [ + { + "expr": "topk(10,\n sum(\n rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]), 1) +\n rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]), 1)\n ) by (pool, image, namespace)\n)\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Highest Latency", + "transform": "table", + "type": "table" + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin", + "overview" + ], + "templating": { + "list": [ + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "${prometheusds}", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "RBD Overview", + "uid": "41FrpeUiz", + "version": 0 +} diff --git a/ceph-mon/files/nagios/check_ceph_osd_count.py b/ceph-mon/files/nagios/check_ceph_osd_count.py new file mode 100755 index 00000000..0703bfd7 --- /dev/null +++ b/ceph-mon/files/nagios/check_ceph_osd_count.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2021 Canonical +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +import sys +import time + + +EXIT_OK = 0 +EXIT_WARN = 1 +EXIT_CRIT = 2 +EXIT_UNKNOWN = 3 +EXIT_CODE_TEXT = ["OK", "WARN", "CRITICAL", "UNKNOWN"] + +CURRENT_OSD_COUNT_FILE = "/var/lib/nagios/current-ceph-osd-count.json" + + +class CriticalError(Exception): + """This indicates a critical error.""" + + +def check_file_freshness(filename, newer_than=3600): + """Check a file exists, is readable and is newer than seconds. + + :param filename: The filename to check + :type filename: str + :param newer_than: The file should be newer than n seconds, default 3600 + :type: newer_than: int + :raises CriticalError: If file is not readable or older then seconds + """ + # First check the file exists and is readable + if not os.path.exists(filename): + raise CriticalError("%s: does not exist." % (filename)) + if os.access(filename, os.R_OK) == 0: + raise CriticalError("%s: is not readable." % (filename)) + + # Then ensure the file is up-to-date enough + mtime = os.stat(filename).st_mtime + last_modified = time.time() - mtime + if last_modified > newer_than: + raise CriticalError("%s: was last modified on %s and is too old " + "(> %s seconds)." + % (filename, time.ctime(mtime), newer_than)) + if last_modified < 0: + raise CriticalError("%s: was last modified on %s which is in the " + "future." + % (filename, time.ctime(mtime))) + + +def check_ceph_osd_count(host_osd_count_report): + + with open(host_osd_count_report, "r") as f: + expected_osd_map = json.load(f) + + current_osd_map = get_osd_tree() + + exit_code = EXIT_OK + err_msgs = [] + for host, osd_list in expected_osd_map.items(): + if host not in current_osd_map: + err_msgs.append("Missing host {}".format(host)) + current_osd_map[host] = {} + + if len(osd_list) <= len(current_osd_map[host]): + continue + + missing_osds = list(set(osd_list) - set(current_osd_map[host])) + if missing_osds: + osd_ids = [str(osd) for osd in missing_osds] + err_msgs.append("Missing osds on " + "{}: {}".format(host, + ", ".join(osd_ids))) + exit_code = EXIT_CRIT + + return (exit_code, err_msgs) + + +def get_osd_tree(): + """Read CURRENT_OSD_COUNT_FILE to get the host osd map. + + :return: The map of node and osd ids. + :rtype: Dict[str: List[str]] + """ + check_file_freshness(CURRENT_OSD_COUNT_FILE) + with open(CURRENT_OSD_COUNT_FILE, "r") as f: + current_osd_counts = json.load(f) + + host_osd_map = {} + for node in current_osd_counts["nodes"]: + if node["type"] != "host": + continue + + host_osd_map[node["name"]] = node["children"] + + return host_osd_map + + +if __name__ == "__main__": + host_osd_report = sys.argv[1] + if not os.path.isfile(host_osd_report): + print("UNKNOWN: report file missing: {}".format(host_osd_report)) + sys.exit(EXIT_UNKNOWN) + + (exit_code, err_msgs) = check_ceph_osd_count(host_osd_report) + print("{} {}".format(EXIT_CODE_TEXT[exit_code], + ", ".join(err_msgs))) + sys.exit(exit_code) diff --git a/ceph-mon/files/nagios/check_ceph_status.py b/ceph-mon/files/nagios/check_ceph_status.py new file mode 100755 index 00000000..11e32595 --- /dev/null +++ b/ceph-mon/files/nagios/check_ceph_status.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2005, 2006, 2007, 2012 James Troup +# Copyright (C) 2014, 2017 Canonical +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Authors: Jacek Nykis +# Xav Paice +# James Troup + +import re +import argparse +import json +import os +import subprocess +import sys +import time +import traceback + + +class CriticalError(Exception): + """This indicates a critical error.""" + pass + + +class WarnError(Exception): + """This indicates a warning condition.""" + pass + + +class UnknownError(Exception): + """This indicates a unknown error was encountered.""" + pass + + +def check_file_freshness(filename, newer_than=3600): + """ + Check a file exists, is readable and is newer than seconds (where + defaults to 3600). + """ + # First check the file exists and is readable + if not os.path.exists(filename): + raise CriticalError("%s: does not exist." % (filename)) + if os.access(filename, os.R_OK) == 0: + raise CriticalError("%s: is not readable." % (filename)) + + # Then ensure the file is up-to-date enough + mtime = os.stat(filename).st_mtime + last_modified = time.time() - mtime + if last_modified > newer_than: + raise CriticalError("%s: was last modified on %s and is too old " + "(> %s seconds)." + % (filename, time.ctime(mtime), newer_than)) + if last_modified < 0: + raise CriticalError("%s: was last modified on %s which is in the " + "future." + % (filename, time.ctime(mtime))) + + +def get_ceph_version(): + """ + Uses CLI to get the ceph version, because the status output changes from + Luminous onwards (12.2.0 or higher) + + :returns: list of integers, just the actual version number + :raises: UnknownError + """ + try: + out_string = subprocess.check_output(['ceph', + '--version']).decode('UTF-8') + except subprocess.CalledProcessError as e: + raise UnknownError( + "UNKNOWN: could not determine Ceph version, error: {}".format(e)) + out_version = [int(x) for x in out_string.split(" ")[2].split(".")] + return out_version + + +def get_status_and_messages(status_data): + """ + Used to get general status of a Ceph cluster as well as a list of + error/warning messages. + + :param status_data: JSON formatted output from ceph health + :type status_data: str + :returns: + - string representing overall status of the cluster + - list of error or warning messages + :rtype: tuple(str, list) + :raises: UnknownError + """ + + try: + ceph_version = get_ceph_version() + except UnknownError as e: + raise UnknownError(e) + if ceph_version[0] >= 12 and ceph_version[1] >= 2: + # This is Luminous or above + overall_status = status_data['health'].get('status') + status_messages = [x['summary']['message'] for x in + status_data['health'].get('checks', {}).values()] + else: + overall_status = status_data['health'].get('overall_status') + status_messages = [x['summary'] for x in + status_data['health']['summary']] + return overall_status, status_messages + + +def check_ceph_status(args): + """ + Used to check the status of a Ceph cluster. Uses the output of 'ceph + status' to determine if health is OK, and if not, should we alert on that + situation. + + If status is HEALTH_OK then this function returns OK with no further check. + Otherwise, look for known situations which could cause ceph status to + return not OK, but things which represent general operations and don't + warrant a pager event. These include OSD reweight actions, and + nodeep-scrub flag setting, with limits for the amount of misplaced data. + + :param args: argparse object formatted in the convention of generic Nagios + checks + :returns string, describing the status of the ceph cluster. + :raises: UnknownError + """ + + status_critical = False + if args.status_file: + check_file_freshness(args.status_file) + with open(args.status_file) as f: + tree = f.read() + status_data = json.loads(tree) + else: + try: + tree = (subprocess.check_output(['ceph', + 'status', + '--format', 'json']) + .decode('UTF-8')) + except subprocess.CalledProcessError as e: + raise UnknownError( + "UNKNOWN: ceph status command failed with error: {}".format(e)) + status_data = json.loads(tree) + + required_keys = ['health', 'monmap', 'pgmap'] + if not all(key in status_data.keys() for key in required_keys): + raise UnknownError('UNKNOWN: status data is incomplete') + + try: + overall_status, status_messages = get_status_and_messages(status_data) + except UnknownError as e: + raise UnknownError(e) + + message_all_ok = "All OK" + + # if it is just additional check, deal with it and ignore overall health + if args.additional_check is not None: + for status_message in status_messages: + if re.search(args.additional_check, status_message) is not None: + if args.additional_check_critical: + msg = "CRITICAL: {}".format(status_message) + raise CriticalError(msg) + else: + msg = "WARNING: {}".format(status_message) + raise WarnError(msg) + print(message_all_ok) + return message_all_ok + + # if it is just --check_num_osds, deal with it and ignore overall health + if args.check_num_osds: + osdmap = status_data['osdmap']['osdmap'] + num_osds = osdmap['num_osds'] + num_up_osds = osdmap['num_up_osds'] + num_in_osds = osdmap['num_in_osds'] + if num_osds != num_up_osds or num_up_osds != num_in_osds: + msg = "CRITICAL: OSDs: {}, OSDs up: {}, OSDs in: {}".format( + num_osds, num_up_osds, num_in_osds) + raise CriticalError(msg) + message_ok = "OK: {} OSDs, all up and in".format(num_osds) + print(message_ok) + return message_ok + + if overall_status != 'HEALTH_OK': + # Health is not OK, collect status message(s) and + # decide whether to return warning or critical + status_critical = False + status_msg = [] + for status in status_messages: + status_msg.append(status) + # Check if nedeepscrub is set and whether it should raise an error + if args.raise_nodeepscrub: + if re.match("nodeep-scrub flag", status): + status_critical = True + # Check if noout is set + if re.match("noout flag", status): + status_critical = True + status_msg.append("noout flag is set") + if overall_status == 'HEALTH_CRITICAL' or \ + overall_status == 'HEALTH_ERR': + # HEALTH_ERR, report critical + status_critical = True + else: + # HEALTH_WARN + # Check the threshold for a list of operational tasks, + # and return CRITICAL if exceeded + degraded_ratio = float(status_data['pgmap'].get('degraded_ratio', + 0.0)) + if degraded_ratio > args.degraded_thresh: + status_critical = True + if degraded_ratio > 0: + status_msg.append("Degraded ratio: {}".format(degraded_ratio)) + misplaced_ratio = float(status_data['pgmap'].get('misplaced_ratio', + 0.0)) + if misplaced_ratio > args.misplaced_thresh: + status_critical = True + if misplaced_ratio > 0: + status_msg.append("Misplaced ratio: {}". + format(misplaced_ratio)) + recovering = float(status_data['pgmap']. + get('recovering_objects_per_sec', 0.0)) + if (degraded_ratio > 0 or misplaced_ratio > 0) \ + and recovering > 0 \ + and recovering < args.recovery_rate: + status_critical = True + if recovering > 0: + status_msg.append("Recovering objects/s {}".format(recovering)) + if status_critical: + msg = 'CRITICAL: ceph health: "{} {}"'.format( + overall_status, + ", ".join(status_msg)) + raise CriticalError(msg) + else: + # overall_status == 'HEALTH_WARN': + msg = "WARNING: {}".format(", ".join(status_msg)) + raise WarnError(msg) + print(message_all_ok) + return message_all_ok + + +def parse_args(args): + parser = argparse.ArgumentParser(description='Check ceph status') + parser.add_argument('-f', '--file', dest='status_file', + default=False, + help='Optional file with "ceph status" output. ' + 'Generally useful for testing, and if the Nagios ' + 'user account does not have rights for the Ceph ' + 'config files.') + parser.add_argument('--degraded_thresh', dest='degraded_thresh', + default=1.0, type=float, + help="Threshold for degraded ratio (0.1 = 10%)") + parser.add_argument('--misplaced_thresh', dest='misplaced_thresh', + default=1.0, type=float, + help="Threshold for misplaced ratio (0.1 = 10%)") + parser.add_argument('--recovery_rate', dest='recovery_rate', + default=1, type=int, + help="Recovery rate (in objects/s) below which we" + "consider recovery to be stalled") + parser.add_argument('--raise_nodeepscrub', dest='raise_nodeepscrub', + default=False, action='store_true', + help="Whether to raise an error for the nodeep-scrub" + "flag. If the nodeep-scrub flag is set," + "the check returns critical if this param is" + "passed, otherwise it returns warning.") + parser.add_argument('--additional_check', dest='additional_check', + default=None, + help="Check if a given pattern exists in any status" + "message. If it does, report warning or critical" + "for this check according to content of" + "additional_check_critical parameter") + parser.add_argument('--additional_check_critical', + dest='additional_check_critical', default=False, + action='store_true', + help="Specifies what is returned if a check is" + "positive. If the argument is not provided," + "check returns a warning. Otherwise it " + "returns an error condition.") + parser.add_argument('--check_num_osds', + dest='check_num_osds', default=False, + action='store_true', + help="Check whether all OSDs are up and in") + + return parser.parse_args(args) + + +def main(args): + EXIT_CODES = {'ok': 0, 'warning': 1, 'critical': 2, 'unknown': 3} + exitcode = 'ok' + try: + check_ceph_status(args) + except UnknownError as msg: + print(msg) + exitcode = 'unknown' + except CriticalError as msg: + print(msg) + exitcode = 'critical' + except WarnError as msg: + print(msg) + exitcode = 'warning' + except Exception: + print("%s raised unknown exception '%s'" % ('check_ceph_status', + sys.exc_info()[0])) + print('=' * 60) + traceback.print_exc(file=sys.stdout) + print('=' * 60) + exitcode = 'unknown' + return EXIT_CODES[exitcode] + + +if __name__ == '__main__': + args = parse_args(sys.argv[1:]) + status = main(args) + sys.exit(status) diff --git a/ceph-mon/files/nagios/check_radosgw_sync_status.py b/ceph-mon/files/nagios/check_radosgw_sync_status.py new file mode 100755 index 00000000..01edfa24 --- /dev/null +++ b/ceph-mon/files/nagios/check_radosgw_sync_status.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2005, 2006, 2007, 2012 James Troup +# Copyright (C) 2014, 2017 Canonical +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Authors: Danny Cocks +# Based on check_ceph_status.py and authors therein + +import re +import argparse +import os +import subprocess +import sys +import time +import traceback + + +class CriticalError(Exception): + """This indicates a critical error.""" + pass + + +class UnknownError(Exception): + """This indicates a unknown error was encountered.""" + pass + + +def check_file_freshness(filename, newer_than=3600): + """ + Check a file exists, is readable and is newer than seconds (where + defaults to 3600). + """ + # First check the file exists and is readable + if not os.path.exists(filename): + raise CriticalError("%s: does not exist." % (filename)) + if os.access(filename, os.R_OK) == 0: + raise CriticalError("%s: is not readable." % (filename)) + + # Then ensure the file is up-to-date enough + mtime = os.stat(filename).st_mtime + last_modified = time.time() - mtime + if last_modified > newer_than: + raise CriticalError("%s: was last modified on %s and is too old " + "(> %s seconds)." + % (filename, time.ctime(mtime), newer_than)) + if last_modified < 0: + raise CriticalError("%s: was last modified on %s which is in the " + "future." + % (filename, time.ctime(mtime))) + + +def check_radosgw_status(args): + """ + Used to check the status of multizone RadosGW Ceph. Uses the output of + 'radosgw-admin sync status', generated during the separate cronjob, to + determine if health is OK, and if not, should we alert on that situation. + + As this is the first iteration of this function, we will only do a very + basic check and will rely on the charm config option + `nagios_rgw_additional_checks` which is passed to this script via + `args.additional_check` + + :param args: argparse object formatted in the convention of generic Nagios + checks + :returns string, describing the status of the ceph cluster. + :raises: UnknownError, CriticalError + """ + + if args.status_file: + check_file_freshness(args.status_file) + with open(args.status_file) as f: + status_data = f.read() + else: + try: + status_data = (subprocess.check_output(['radosgw-admin', + 'sync', + 'status']) + .decode('UTF-8')) + except subprocess.CalledProcessError as e: + raise UnknownError( + "UNKNOWN: radosgw-admin sync status command" + "failed with error: {}".format(e)) + + # If the realm name is empty, i.e. the first line is + # realm () + # then we assume this means this is not multizone, so exit early. + lines = status_data.split('\n') + if len(lines) >= 1 and re.match(r"realm .* \(\)", lines[0].strip()): + return "No multizone detected" + + # This is a hangover from check_ceph_status.py and not directly applicable + # here. I include it for an additional check. + required_strings = ['realm', 'zonegroup', 'zone'] + if not all(s in status_data for s in required_strings): + raise UnknownError('UNKNOWN: status data is incomplete') + + # The default message if we end up with no alerts + message_all_ok = "All OK" + # The list to collect messages + msgs = [] + + # The always-done checks go here. + # Currently none + + # Handle checks to do with given expected zones that should be connected. + if args.zones: + for zone in args.zones.split(','): + search_regex = r"data sync source:.*\(" + zone + r"\)" + if re.search(search_regex, status_data) is None: + msg = ("CRITICAL: Missing expected sync source '{}'" + .format(zone)) + msgs.append(msg) + + # For additional checks, also test these things + if args.additional_checks: + for check in args.additional_checks: + m = re.search(check, status_data) + if m is not None: + msgs.append("CRITICAL: {}".format(m.group(0))) + + complete_output = '\n'.join(msgs) + if any(msg.startswith("CRITICAL") for msg in msgs): + raise CriticalError(complete_output) + elif len(msgs) >= 1: + raise UnknownError(complete_output) + else: + return message_all_ok + + +def parse_args(args): + parser = argparse.ArgumentParser(description='Check ceph status') + parser.add_argument('-f', '--file', dest='status_file', + default=False, + help='Optional file with "ceph status" output. ' + 'Generally useful for testing, and if the Nagios ' + 'user account does not have rights for the Ceph ' + 'config files.') + parser.add_argument('--zones', dest='zones', + default=None, + help="Check if the given zones, as a comma-separated " + "list, are present in the output. If they are " + "missing report critical.") + parser.add_argument('--additional_check', dest='additional_checks', + action='append', + help="Check if a given pattern exists in any status" + "message. If it does, report critical") + + return parser.parse_args(args) + + +def main(args): + # Note: leaving "warning" in here, as a reminder for the expected NRPE + # returncodes, even though this script doesn't output any warnings. + EXIT_CODES = {'ok': 0, 'warning': 1, 'critical': 2, 'unknown': 3} + exitcode = 'unknown' + try: + output_msg = check_radosgw_status(args) + print(output_msg) + exitcode = 'ok' + except UnknownError as msg: + print(msg) + exitcode = 'unknown' + except CriticalError as msg: + print(msg) + exitcode = 'critical' + except Exception: + print("%s raised unknown exception '%s'" % ('check_ceph_status', + sys.exc_info()[0])) + print('=' * 60) + traceback.print_exc(file=sys.stdout) + print('=' * 60) + exitcode = 'unknown' + return EXIT_CODES[exitcode] + + +if __name__ == '__main__': + args = parse_args(sys.argv[1:]) + status = main(args) + sys.exit(status) diff --git a/ceph-mon/files/nagios/collect_ceph_status.sh b/ceph-mon/files/nagios/collect_ceph_status.sh new file mode 100755 index 00000000..962687a3 --- /dev/null +++ b/ceph-mon/files/nagios/collect_ceph_status.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Copyright (C) 2014 Canonical +# All Rights Reserved +# Author: Jacek Nykis + +LOCK=/var/lock/ceph-status.lock +lockfile-create -r2 --lock-name $LOCK > /dev/null 2>&1 +if [ $? -ne 0 ]; then + exit 1 +fi +trap "rm -f $LOCK > /dev/null 2>&1" exit + +DATA_DIR="/var/lib/nagios" +if [ ! -d $DATA_DIR ]; then + mkdir -p $DATA_DIR +fi +DATA_FILE="${DATA_DIR}/cat-ceph-status.txt" +TMP_FILE=$(mktemp -p ${DATA_DIR}) + +ceph status --format json >${TMP_FILE} + +chown root:nagios ${TMP_FILE} +chmod 0640 ${TMP_FILE} +mv ${TMP_FILE} ${DATA_FILE} + +DATA_FILE="${DATA_DIR}/current-ceph-osd-count.json" +TMP_FILE=$(mktemp -p ${DATA_DIR}) + +ceph osd tree --format json > ${TMP_FILE} + +chown root:nagios ${TMP_FILE} +chmod 0640 ${TMP_FILE} +mv ${TMP_FILE} ${DATA_FILE} + + +# Note: radosgw-admin sync status doesn't support outputting in json at time of writing +DATA_FILE="${DATA_DIR}/current-radosgw-admin-sync-status.raw" +TMP_FILE=$(mktemp -p ${DATA_DIR}) + +radosgw-admin sync status > ${TMP_FILE} + +chown root:nagios ${TMP_FILE} +chmod 0640 ${TMP_FILE} +mv ${TMP_FILE} ${DATA_FILE} diff --git a/ceph-mon/files/prometheus_alert_rules/prometheus_alerts.yaml b/ceph-mon/files/prometheus_alert_rules/prometheus_alerts.yaml new file mode 100644 index 00000000..6e662928 --- /dev/null +++ b/ceph-mon/files/prometheus_alert_rules/prometheus_alerts.yaml @@ -0,0 +1,682 @@ +groups: + - name: "cluster health" + rules: + - alert: "CephHealthError" + annotations: + description: "The cluster state has been HEALTH_ERROR for more than 5 minutes. Please check 'ceph health detail' for more information." + summary: "Ceph is in the ERROR state" + expr: "ceph_health_status == 2" + for: "5m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.2.1" + severity: "critical" + type: "ceph_default" + - alert: "CephHealthWarning" + annotations: + description: "The cluster state has been HEALTH_WARN for more than 15 minutes. Please check 'ceph health detail' for more information." + summary: "Ceph is in the WARNING state" + expr: "ceph_health_status == 1" + for: "15m" + labels: + severity: "warning" + type: "ceph_default" + - name: "mon" + rules: + - alert: "CephMonDownQuorumAtRisk" + annotations: + description: "{{ $min := query \"floor(count(ceph_mon_metadata) / 2) + 1\" | first | value }}Quorum requires a majority of monitors (x {{ $min }}) to be active. Without quorum the cluster will become inoperable, affecting all services and connected clients. The following monitors are down: {{- range query \"(ceph_mon_quorum_status == 0) + on(ceph_daemon) group_left(hostname) (ceph_mon_metadata * 0)\" }} - {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }} {{- end }}" + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-down" + summary: "Monitor quorum is at risk" + expr: | + ( + (ceph_health_detail{name="MON_DOWN"} == 1) * on() ( + count(ceph_mon_quorum_status == 1) == bool (floor(count(ceph_mon_metadata) / 2) + 1) + ) + ) == 1 + for: "30s" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.3.1" + severity: "critical" + type: "ceph_default" + - alert: "CephMonDown" + annotations: + description: | + {{ $down := query "count(ceph_mon_quorum_status == 0)" | first | value }}{{ $s := "" }}{{ if gt $down 1.0 }}{{ $s = "s" }}{{ end }}You have {{ $down }} monitor{{ $s }} down. Quorum is still intact, but the loss of an additional monitor will make your cluster inoperable. The following monitors are down: {{- range query "(ceph_mon_quorum_status == 0) + on(ceph_daemon) group_left(hostname) (ceph_mon_metadata * 0)" }} - {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }} {{- end }} + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-down" + summary: "One or more monitors down" + expr: | + count(ceph_mon_quorum_status == 0) <= (count(ceph_mon_metadata) - floor(count(ceph_mon_metadata) / 2) + 1) + for: "30s" + labels: + severity: "warning" + type: "ceph_default" + - alert: "CephMonDiskspaceCritical" + annotations: + description: "The free space available to a monitor's store is critically low. You should increase the space available to the monitor(s). The default directory is /var/lib/ceph/mon-*/data/store.db on traditional deployments, and /var/lib/rook/mon-*/data/store.db on the mon pod's worker node for Rook. Look for old, rotated versions of *.log and MANIFEST*. Do NOT touch any *.sst files. Also check any other directories under /var/lib/rook and other directories on the same filesystem, often /var/log and /var/tmp are culprits. Your monitor hosts are; {{- range query \"ceph_mon_metadata\"}} - {{ .Labels.hostname }} {{- end }}" + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-disk-crit" + summary: "Filesystem space on at least one monitor is critically low" + expr: "ceph_health_detail{name=\"MON_DISK_CRIT\"} == 1" + for: "1m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.3.2" + severity: "critical" + type: "ceph_default" + - alert: "CephMonDiskspaceLow" + annotations: + description: "The space available to a monitor's store is approaching full (>70% is the default). You should increase the space available to the monitor(s). The default directory is /var/lib/ceph/mon-*/data/store.db on traditional deployments, and /var/lib/rook/mon-*/data/store.db on the mon pod's worker node for Rook. Look for old, rotated versions of *.log and MANIFEST*. Do NOT touch any *.sst files. Also check any other directories under /var/lib/rook and other directories on the same filesystem, often /var/log and /var/tmp are culprits. Your monitor hosts are; {{- range query \"ceph_mon_metadata\"}} - {{ .Labels.hostname }} {{- end }}" + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-disk-low" + summary: "Drive space on at least one monitor is approaching full" + expr: "ceph_health_detail{name=\"MON_DISK_LOW\"} == 1" + for: "5m" + labels: + severity: "warning" + type: "ceph_default" + - alert: "CephMonClockSkew" + annotations: + description: "Ceph monitors rely on closely synchronized time to maintain quorum and cluster consistency. This event indicates that the time on at least one mon has drifted too far from the lead mon. Review cluster status with ceph -s. This will show which monitors are affected. Check the time sync status on each monitor host with 'ceph time-sync-status' and the state and peers of your ntpd or chrony daemon." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-clock-skew" + summary: "Clock skew detected among monitors" + expr: "ceph_health_detail{name=\"MON_CLOCK_SKEW\"} == 1" + for: "1m" + labels: + severity: "warning" + type: "ceph_default" + - name: "osd" + rules: + - alert: "CephOSDDownHigh" + annotations: + description: "{{ $value | humanize }}% or {{ with query \"count(ceph_osd_up == 0)\" }}{{ . | first | value }}{{ end }} of {{ with query \"count(ceph_osd_up)\" }}{{ . | first | value }}{{ end }} OSDs are down (>= 10%). The following OSDs are down: {{- range query \"(ceph_osd_up * on(ceph_daemon) group_left(hostname) ceph_osd_metadata) == 0\" }} - {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }} {{- end }}" + summary: "More than 10% of OSDs are down" + expr: "count(ceph_osd_up == 0) / count(ceph_osd_up) * 100 >= 10" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.4.1" + severity: "critical" + type: "ceph_default" + - alert: "CephOSDHostDown" + annotations: + description: "The following OSDs are down: {{- range query \"(ceph_osd_up * on(ceph_daemon) group_left(hostname) ceph_osd_metadata) == 0\" }} - {{ .Labels.hostname }} : {{ .Labels.ceph_daemon }} {{- end }}" + summary: "An OSD host is offline" + expr: "ceph_health_detail{name=\"OSD_HOST_DOWN\"} == 1" + for: "5m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.4.8" + severity: "warning" + type: "ceph_default" + - alert: "CephOSDDown" + annotations: + description: | + {{ $num := query "count(ceph_osd_up == 0)" | first | value }}{{ $s := "" }}{{ if gt $num 1.0 }}{{ $s = "s" }}{{ end }}{{ $num }} OSD{{ $s }} down for over 5mins. The following OSD{{ $s }} {{ if eq $s "" }}is{{ else }}are{{ end }} down: {{- range query "(ceph_osd_up * on(ceph_daemon) group_left(hostname) ceph_osd_metadata) == 0"}} - {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }} {{- end }} + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-down" + summary: "An OSD has been marked down" + expr: "ceph_health_detail{name=\"OSD_DOWN\"} == 1" + for: "5m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.4.2" + severity: "warning" + type: "ceph_default" + - alert: "CephOSDNearFull" + annotations: + description: "One or more OSDs have reached the NEARFULL threshold. Use 'ceph health detail' and 'ceph osd df' to identify the problem. To resolve, add capacity to the affected OSD's failure domain, restore down/out OSDs, or delete unwanted data." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-nearfull" + summary: "OSD(s) running low on free space (NEARFULL)" + expr: "ceph_health_detail{name=\"OSD_NEARFULL\"} == 1" + for: "5m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.4.3" + severity: "warning" + type: "ceph_default" + - alert: "CephOSDFull" + annotations: + description: "An OSD has reached the FULL threshold. Writes to pools that share the affected OSD will be blocked. Use 'ceph health detail' and 'ceph osd df' to identify the problem. To resolve, add capacity to the affected OSD's failure domain, restore down/out OSDs, or delete unwanted data." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-full" + summary: "OSD full, writes blocked" + expr: "ceph_health_detail{name=\"OSD_FULL\"} > 0" + for: "1m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.4.6" + severity: "critical" + type: "ceph_default" + - alert: "CephOSDBackfillFull" + annotations: + description: "An OSD has reached the BACKFILL FULL threshold. This will prevent rebalance operations from completing. Use 'ceph health detail' and 'ceph osd df' to identify the problem. To resolve, add capacity to the affected OSD's failure domain, restore down/out OSDs, or delete unwanted data." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-backfillfull" + summary: "OSD(s) too full for backfill operations" + expr: "ceph_health_detail{name=\"OSD_BACKFILLFULL\"} > 0" + for: "1m" + labels: + severity: "warning" + type: "ceph_default" + - alert: "CephOSDTooManyRepairs" + annotations: + description: "Reads from an OSD have used a secondary PG to return data to the client, indicating a potential failing drive." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-too-many-repairs" + summary: "OSD reports a high number of read errors" + expr: "ceph_health_detail{name=\"OSD_TOO_MANY_REPAIRS\"} == 1" + for: "30s" + labels: + severity: "warning" + type: "ceph_default" + - alert: "CephOSDTimeoutsPublicNetwork" + annotations: + description: "OSD heartbeats on the cluster's 'public' network (frontend) are running slow. Investigate the network for latency or loss issues. Use 'ceph health detail' to show the affected OSDs." + summary: "Network issues delaying OSD heartbeats (public network)" + expr: "ceph_health_detail{name=\"OSD_SLOW_PING_TIME_FRONT\"} == 1" + for: "1m" + labels: + severity: "warning" + type: "ceph_default" + - alert: "CephOSDTimeoutsClusterNetwork" + annotations: + description: "OSD heartbeats on the cluster's 'cluster' network (backend) are slow. Investigate the network for latency issues on this subnet. Use 'ceph health detail' to show the affected OSDs." + summary: "Network issues delaying OSD heartbeats (cluster network)" + expr: "ceph_health_detail{name=\"OSD_SLOW_PING_TIME_BACK\"} == 1" + for: "1m" + labels: + severity: "warning" + type: "ceph_default" + - alert: "CephOSDInternalDiskSizeMismatch" + annotations: + description: "One or more OSDs have an internal inconsistency between metadata and the size of the device. This could lead to the OSD(s) crashing in future. You should redeploy the affected OSDs." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#bluestore-disk-size-mismatch" + summary: "OSD size inconsistency error" + expr: "ceph_health_detail{name=\"BLUESTORE_DISK_SIZE_MISMATCH\"} == 1" + for: "1m" + labels: + severity: "warning" + type: "ceph_default" + - alert: "CephDeviceFailurePredicted" + annotations: + description: "The device health module has determined that one or more devices will fail soon. To review device status use 'ceph device ls'. To show a specific device use 'ceph device info '. Mark the OSD out so that data may migrate to other OSDs. Once the OSD has drained, destroy the OSD, replace the device, and redeploy the OSD." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#id2" + summary: "Device(s) predicted to fail soon" + expr: "ceph_health_detail{name=\"DEVICE_HEALTH\"} == 1" + for: "1m" + labels: + severity: "warning" + type: "ceph_default" + - alert: "CephDeviceFailurePredictionTooHigh" + annotations: + description: "The device health module has determined that devices predicted to fail can not be remediated automatically, since too many OSDs would be removed from the cluster to ensure performance and availabililty. Prevent data integrity issues by adding new OSDs so that data may be relocated." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#device-health-toomany" + summary: "Too many devices are predicted to fail, unable to resolve" + expr: "ceph_health_detail{name=\"DEVICE_HEALTH_TOOMANY\"} == 1" + for: "1m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.4.7" + severity: "critical" + type: "ceph_default" + - alert: "CephDeviceFailureRelocationIncomplete" + annotations: + description: "The device health module has determined that one or more devices will fail soon, but the normal process of relocating the data on the device to other OSDs in the cluster is blocked. \nEnsure that the cluster has available free space. It may be necessary to add capacity to the cluster to allow data from the failing device to successfully migrate, or to enable the balancer." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#device-health-in-use" + summary: "Device failure is predicted, but unable to relocate data" + expr: "ceph_health_detail{name=\"DEVICE_HEALTH_IN_USE\"} == 1" + for: "1m" + labels: + severity: "warning" + type: "ceph_default" + - alert: "CephOSDFlapping" + annotations: + description: "OSD {{ $labels.ceph_daemon }} on {{ $labels.hostname }} was marked down and back up {{ $value | humanize }} times once a minute for 5 minutes. This may indicate a network issue (latency, packet loss, MTU mismatch) on the cluster network, or the public network if no cluster network is deployed. Check the network stats on the listed host(s)." + documentation: "https://docs.ceph.com/en/latest/rados/troubleshooting/troubleshooting-osd#flapping-osds" + summary: "Network issues are causing OSDs to flap (mark each other down)" + expr: "(rate(ceph_osd_up[5m]) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata) * 60 > 1" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.4.4" + severity: "warning" + type: "ceph_default" + - alert: "CephOSDReadErrors" + annotations: + description: "An OSD has encountered read errors, but the OSD has recovered by retrying the reads. This may indicate an issue with hardware or the kernel." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#bluestore-spurious-read-errors" + summary: "Device read errors detected" + expr: "ceph_health_detail{name=\"BLUESTORE_SPURIOUS_READ_ERRORS\"} == 1" + for: "30s" + labels: + severity: "warning" + type: "ceph_default" + - alert: "CephPGImbalance" + annotations: + description: "OSD {{ $labels.ceph_daemon }} on {{ $labels.hostname }} deviates by more than 30% from average PG count." + summary: "PGs are not balanced across OSDs" + expr: | + abs( + ((ceph_osd_numpg > 0) - on (job) group_left avg(ceph_osd_numpg > 0) by (job)) / + on (job) group_left avg(ceph_osd_numpg > 0) by (job) + ) * on (ceph_daemon) group_left(hostname) ceph_osd_metadata > 0.30 + for: "5m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.4.5" + severity: "warning" + type: "ceph_default" + - name: "mds" + rules: + - alert: "CephFilesystemDamaged" + annotations: + description: "Filesystem metadata has been corrupted. Data may be inaccessible. Analyze metrics from the MDS daemon admin socket, or escalate to support." + documentation: "https://docs.ceph.com/en/latest/cephfs/health-messages#cephfs-health-messages" + summary: "CephFS filesystem is damaged." + expr: "ceph_health_detail{name=\"MDS_DAMAGE\"} > 0" + for: "1m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.5.1" + severity: "critical" + type: "ceph_default" + - alert: "CephFilesystemOffline" + annotations: + description: "All MDS ranks are unavailable. The MDS daemons managing metadata are down, rendering the filesystem offline." + documentation: "https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-all-down" + summary: "CephFS filesystem is offline" + expr: "ceph_health_detail{name=\"MDS_ALL_DOWN\"} > 0" + for: "1m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.5.3" + severity: "critical" + type: "ceph_default" + - alert: "CephFilesystemDegraded" + annotations: + description: "One or more metadata daemons (MDS ranks) are failed or in a damaged state. At best the filesystem is partially available, at worst the filesystem is completely unusable." + documentation: "https://docs.ceph.com/en/latest/cephfs/health-messages/#fs-degraded" + summary: "CephFS filesystem is degraded" + expr: "ceph_health_detail{name=\"FS_DEGRADED\"} > 0" + for: "1m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.5.4" + severity: "critical" + type: "ceph_default" + - alert: "CephFilesystemMDSRanksLow" + annotations: + description: "The filesystem's 'max_mds' setting defines the number of MDS ranks in the filesystem. The current number of active MDS daemons is less than this value." + documentation: "https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-up-less-than-max" + summary: "Ceph MDS daemon count is lower than configured" + expr: "ceph_health_detail{name=\"MDS_UP_LESS_THAN_MAX\"} > 0" + for: "1m" + labels: + severity: "warning" + type: "ceph_default" + - alert: "CephFilesystemInsufficientStandby" + annotations: + description: "The minimum number of standby daemons required by standby_count_wanted is less than the current number of standby daemons. Adjust the standby count or increase the number of MDS daemons." + documentation: "https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-insufficient-standby" + summary: "Ceph filesystem standby daemons too few" + expr: "ceph_health_detail{name=\"MDS_INSUFFICIENT_STANDBY\"} > 0" + for: "1m" + labels: + severity: "warning" + type: "ceph_default" + - alert: "CephFilesystemFailureNoStandby" + annotations: + description: "An MDS daemon has failed, leaving only one active rank and no available standby. Investigate the cause of the failure or add a standby MDS." + documentation: "https://docs.ceph.com/en/latest/cephfs/health-messages/#fs-with-failed-mds" + summary: "MDS daemon failed, no further standby available" + expr: "ceph_health_detail{name=\"FS_WITH_FAILED_MDS\"} > 0" + for: "1m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.5.5" + severity: "critical" + type: "ceph_default" + - alert: "CephFilesystemReadOnly" + annotations: + description: "The filesystem has switched to READ ONLY due to an unexpected error when writing to the metadata pool. Either analyze the output from the MDS daemon admin socket, or escalate to support." + documentation: "https://docs.ceph.com/en/latest/cephfs/health-messages#cephfs-health-messages" + summary: "CephFS filesystem in read only mode due to write error(s)" + expr: "ceph_health_detail{name=\"MDS_HEALTH_READ_ONLY\"} > 0" + for: "1m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.5.2" + severity: "critical" + type: "ceph_default" + - name: "mgr" + rules: + - alert: "CephMgrModuleCrash" + annotations: + description: "One or more mgr modules have crashed and have yet to be acknowledged by an administrator. A crashed module may impact functionality within the cluster. Use the 'ceph crash' command to determine which module has failed, and archive it to acknowledge the failure." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#recent-mgr-module-crash" + summary: "A manager module has recently crashed" + expr: "ceph_health_detail{name=\"RECENT_MGR_MODULE_CRASH\"} == 1" + for: "5m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.6.1" + severity: "critical" + type: "ceph_default" + - alert: "CephMgrPrometheusModuleInactive" + annotations: + description: "The mgr/prometheus module at {{ $labels.instance }} is unreachable. This could mean that the module has been disabled or the mgr daemon itself is down. Without the mgr/prometheus module metrics and alerts will no longer function. Open a shell to an admin node or toolbox pod and use 'ceph -s' to to determine whether the mgr is active. If the mgr is not active, restart it, otherwise you can determine module status with 'ceph mgr module ls'. If it is not listed as enabled, enable it with 'ceph mgr module enable prometheus'." + summary: "The mgr/prometheus module is not available" + expr: "up{} == 0" + for: "1m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.6.2" + severity: "critical" + type: "ceph_default" + - name: "pgs" + rules: + - alert: "CephPGsInactive" + annotations: + description: "{{ $value }} PGs have been inactive for more than 5 minutes in pool {{ $labels.name }}. Inactive placement groups are not able to serve read/write requests." + summary: "One or more placement groups are inactive" + expr: "ceph_pool_metadata * on(pool_id,instance) group_left() (ceph_pg_total - ceph_pg_active) > 0" + for: "5m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.7.1" + severity: "critical" + type: "ceph_default" + - alert: "CephPGsUnclean" + annotations: + description: "{{ $value }} PGs have been unclean for more than 15 minutes in pool {{ $labels.name }}. Unclean PGs have not recovered from a previous failure." + summary: "One or more placement groups are marked unclean" + expr: "ceph_pool_metadata * on(pool_id,instance) group_left() (ceph_pg_total - ceph_pg_clean) > 0" + for: "15m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.7.2" + severity: "warning" + type: "ceph_default" + - alert: "CephPGsDamaged" + annotations: + description: "During data consistency checks (scrub), at least one PG has been flagged as being damaged or inconsistent. Check to see which PG is affected, and attempt a manual repair if necessary. To list problematic placement groups, use 'rados list-inconsistent-pg '. To repair PGs use the 'ceph pg repair ' command." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-damaged" + summary: "Placement group damaged, manual intervention needed" + expr: "ceph_health_detail{name=~\"PG_DAMAGED|OSD_SCRUB_ERRORS\"} == 1" + for: "5m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.7.4" + severity: "critical" + type: "ceph_default" + - alert: "CephPGRecoveryAtRisk" + annotations: + description: "Data redundancy is at risk since one or more OSDs are at or above the 'full' threshold. Add more capacity to the cluster, restore down/out OSDs, or delete unwanted data." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-recovery-full" + summary: "OSDs are too full for recovery" + expr: "ceph_health_detail{name=\"PG_RECOVERY_FULL\"} == 1" + for: "1m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.7.5" + severity: "critical" + type: "ceph_default" + - alert: "CephPGUnavilableBlockingIO" + annotations: + description: "Data availability is reduced, impacting the cluster's ability to service I/O. One or more placement groups (PGs) are in a state that blocks I/O." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-availability" + summary: "PG is unavailable, blocking I/O" + expr: "((ceph_health_detail{name=\"PG_AVAILABILITY\"} == 1) - scalar(ceph_health_detail{name=\"OSD_DOWN\"})) == 1" + for: "1m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.7.3" + severity: "critical" + type: "ceph_default" + - alert: "CephPGBackfillAtRisk" + annotations: + description: "Data redundancy may be at risk due to lack of free space within the cluster. One or more OSDs have reached the 'backfillfull' threshold. Add more capacity, or delete unwanted data." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-backfill-full" + summary: "Backfill operations are blocked due to lack of free space" + expr: "ceph_health_detail{name=\"PG_BACKFILL_FULL\"} == 1" + for: "1m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.7.6" + severity: "critical" + type: "ceph_default" + - alert: "CephPGNotScrubbed" + annotations: + description: "One or more PGs have not been scrubbed recently. Scrubs check metadata integrity, protecting against bit-rot. They check that metadata is consistent across data replicas. When PGs miss their scrub interval, it may indicate that the scrub window is too small, or PGs were not in a 'clean' state during the scrub window. You can manually initiate a scrub with: ceph pg scrub " + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-not-scrubbed" + summary: "Placement group(s) have not been scrubbed" + expr: "ceph_health_detail{name=\"PG_NOT_SCRUBBED\"} == 1" + for: "5m" + labels: + severity: "warning" + type: "ceph_default" + - alert: "CephPGsHighPerOSD" + annotations: + description: "The number of placement groups per OSD is too high (exceeds the mon_max_pg_per_osd setting).\n Check that the pg_autoscaler has not been disabled for any pools with 'ceph osd pool autoscale-status', and that the profile selected is appropriate. You may also adjust the target_size_ratio of a pool to guide the autoscaler based on the expected relative size of the pool ('ceph osd pool set cephfs.cephfs.meta target_size_ratio .1') or set the pg_autoscaler mode to 'warn' and adjust pg_num appropriately for one or more pools." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks/#too-many-pgs" + summary: "Placement groups per OSD is too high" + expr: "ceph_health_detail{name=\"TOO_MANY_PGS\"} == 1" + for: "1m" + labels: + severity: "warning" + type: "ceph_default" + - alert: "CephPGNotDeepScrubbed" + annotations: + description: "One or more PGs have not been deep scrubbed recently. Deep scrubs protect against bit-rot. They compare data replicas to ensure consistency. When PGs miss their deep scrub interval, it may indicate that the window is too small or PGs were not in a 'clean' state during the deep-scrub window." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-not-deep-scrubbed" + summary: "Placement group(s) have not been deep scrubbed" + expr: "ceph_health_detail{name=\"PG_NOT_DEEP_SCRUBBED\"} == 1" + for: "5m" + labels: + severity: "warning" + type: "ceph_default" + - name: "nodes" + rules: + - alert: "CephNodeRootFilesystemFull" + annotations: + description: "Root volume is dangerously full: {{ $value | humanize }}% free." + summary: "Root filesystem is dangerously full" + expr: "node_filesystem_avail_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"} * 100 < 5" + for: "5m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.8.1" + severity: "critical" + type: "ceph_default" + - alert: "CephNodeNetworkPacketDrops" + annotations: + description: "Node {{ $labels.instance }} experiences packet drop > 0.5% or > 10 packets/s on interface {{ $labels.device }}." + summary: "One or more NICs reports packet drops" + expr: | + ( + rate(node_network_receive_drop_total{device!="lo"}[1m]) + + rate(node_network_transmit_drop_total{device!="lo"}[1m]) + ) / ( + rate(node_network_receive_packets_total{device!="lo"}[1m]) + + rate(node_network_transmit_packets_total{device!="lo"}[1m]) + ) >= 0.0050000000000000001 and ( + rate(node_network_receive_drop_total{device!="lo"}[1m]) + + rate(node_network_transmit_drop_total{device!="lo"}[1m]) + ) >= 10 + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.8.2" + severity: "warning" + type: "ceph_default" + - alert: "CephNodeNetworkPacketErrors" + annotations: + description: "Node {{ $labels.instance }} experiences packet errors > 0.01% or > 10 packets/s on interface {{ $labels.device }}." + summary: "One or more NICs reports packet errors" + expr: | + ( + rate(node_network_receive_errs_total{device!="lo"}[1m]) + + rate(node_network_transmit_errs_total{device!="lo"}[1m]) + ) / ( + rate(node_network_receive_packets_total{device!="lo"}[1m]) + + rate(node_network_transmit_packets_total{device!="lo"}[1m]) + ) >= 0.0001 or ( + rate(node_network_receive_errs_total{device!="lo"}[1m]) + + rate(node_network_transmit_errs_total{device!="lo"}[1m]) + ) >= 10 + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.8.3" + severity: "warning" + type: "ceph_default" + - alert: "CephNodeDiskspaceWarning" + annotations: + description: "Mountpoint {{ $labels.mountpoint }} on {{ $labels.nodename }} will be full in less than 5 days based on the 48 hour trailing fill rate." + summary: "Host filesystem free space is getting low" + expr: "predict_linear(node_filesystem_free_bytes{device=~\"/.*\"}[2d], 3600 * 24 * 5) *on(instance) group_left(nodename) node_uname_info < 0" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.8.4" + severity: "warning" + type: "ceph_default" + - alert: "CephNodeInconsistentMTU" + annotations: + description: "Node {{ $labels.instance }} has a different MTU size ({{ $value }}) than the median of devices named {{ $labels.device }}." + summary: "MTU settings across Ceph hosts are inconsistent" + expr: "node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( max by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )or node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( min by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )" + labels: + severity: "warning" + type: "ceph_default" + - name: "pools" + rules: + - alert: "CephPoolGrowthWarning" + annotations: + description: "Pool '{{ $labels.name }}' will be full in less than 5 days assuming the average fill-up rate of the past 48 hours." + summary: "Pool growth rate may soon exceed capacity" + expr: "(predict_linear(ceph_pool_percent_used[2d], 3600 * 24 * 5) * on(pool_id) group_right ceph_pool_metadata) >= 95" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.9.2" + severity: "warning" + type: "ceph_default" + - alert: "CephPoolBackfillFull" + annotations: + description: "A pool is approaching the near full threshold, which will prevent recovery/backfill operations from completing. Consider adding more capacity." + summary: "Free space in a pool is too low for recovery/backfill" + expr: "ceph_health_detail{name=\"POOL_BACKFILLFULL\"} > 0" + labels: + severity: "warning" + type: "ceph_default" + - alert: "CephPoolFull" + annotations: + description: "A pool has reached its MAX quota, or OSDs supporting the pool have reached the FULL threshold. Until this is resolved, writes to the pool will be blocked. Pool Breakdown (top 5) {{- range query \"topk(5, sort_desc(ceph_pool_percent_used * on(pool_id) group_right ceph_pool_metadata))\" }} - {{ .Labels.name }} at {{ .Value }}% {{- end }} Increase the pool's quota, or add capacity to the cluster first then increase the pool's quota (e.g. ceph osd pool set quota max_bytes )" + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#pool-full" + summary: "Pool is full - writes are blocked" + expr: "ceph_health_detail{name=\"POOL_FULL\"} > 0" + for: "1m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.9.1" + severity: "critical" + type: "ceph_default" + - alert: "CephPoolNearFull" + annotations: + description: "A pool has exceeded the warning (percent full) threshold, or OSDs supporting the pool have reached the NEARFULL threshold. Writes may continue, but you are at risk of the pool going read-only if more capacity isn't made available. Determine the affected pool with 'ceph df detail', looking at QUOTA BYTES and STORED. Increase the pool's quota, or add capacity to the cluster first then increase the pool's quota (e.g. ceph osd pool set quota max_bytes ). Also ensure that the balancer is active." + summary: "One or more Ceph pools are nearly full" + expr: "ceph_health_detail{name=\"POOL_NEAR_FULL\"} > 0" + for: "5m" + labels: + severity: "warning" + type: "ceph_default" + - name: "healthchecks" + rules: + - alert: "CephSlowOps" + annotations: + description: "{{ $value }} OSD requests are taking too long to process (osd_op_complaint_time exceeded)" + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#slow-ops" + summary: "OSD operations are slow to complete" + expr: "ceph_healthcheck_slow_ops > 0" + for: "30s" + labels: + severity: "warning" + type: "ceph_default" + - name: "cephadm" + rules: + - alert: "CephadmUpgradeFailed" + annotations: + description: "The cephadm cluster upgrade process has failed. The cluster remains in an undetermined state. Please review the cephadm logs, to understand the nature of the issue" + summary: "Ceph version upgrade has failed" + expr: "ceph_health_detail{name=\"UPGRADE_EXCEPTION\"} > 0" + for: "30s" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.11.2" + severity: "critical" + type: "ceph_default" + - alert: "CephadmDaemonFailed" + annotations: + description: "A daemon managed by cephadm is no longer active. Determine, which daemon is down with 'ceph health detail'. you may start daemons with the 'ceph orch daemon start '" + summary: "A ceph daemon manged by cephadm is down" + expr: "ceph_health_detail{name=\"CEPHADM_FAILED_DAEMON\"} > 0" + for: "30s" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.11.1" + severity: "critical" + type: "ceph_default" + - alert: "CephadmPaused" + annotations: + description: "Cluster management has been paused manually. This will prevent the orchestrator from service management and reconciliation. If this is not intentional, resume cephadm operations with 'ceph orch resume'" + documentation: "https://docs.ceph.com/en/latest/cephadm/operations#cephadm-paused" + summary: "Orchestration tasks via cephadm are PAUSED" + expr: "ceph_health_detail{name=\"CEPHADM_PAUSED\"} > 0" + for: "1m" + labels: + severity: "warning" + type: "ceph_default" + - name: "PrometheusServer" + rules: + - alert: "PrometheusJobMissing" + annotations: + description: "The prometheus job that scrapes from Ceph is no longer defined, this will effectively mean you'll have no metrics or alerts for the cluster. Please review the job definitions in the prometheus.yml file of the prometheus instance." + summary: "The scrape job for Ceph is missing from Prometheus" + expr: "absent(up{})" + for: "30s" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.12.1" + severity: "critical" + type: "ceph_default" + - name: "rados" + rules: + - alert: "CephObjectMissing" + annotations: + description: "The latest version of a RADOS object can not be found, even though all OSDs are up. I/O requests for this object from clients will block (hang). Resolving this issue may require the object to be rolled back to a prior version manually, and manually verified." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#object-unfound" + summary: "Object(s) marked UNFOUND" + expr: "(ceph_health_detail{name=\"OBJECT_UNFOUND\"} == 1) * on() (count(ceph_osd_up == 1) == bool count(ceph_osd_metadata)) == 1" + for: "30s" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.10.1" + severity: "critical" + type: "ceph_default" + - name: "generic" + rules: + - alert: "CephDaemonCrash" + annotations: + description: "One or more daemons have crashed recently, and need to be acknowledged. This notification ensures that software crashes do not go unseen. To acknowledge a crash, use the 'ceph crash archive ' command." + documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks/#recent-crash" + summary: "One or more Ceph daemons have crashed, and are pending acknowledgement" + expr: "ceph_health_detail{name=\"RECENT_CRASH\"} == 1" + for: "1m" + labels: + oid: "1.3.6.1.4.1.50495.1.2.1.1.2" + severity: "critical" + type: "ceph_default" + - name: "rgwmultisite" + rules: + - alert: "CephRGWMultisiteFetchError" + annotations: + description: "Unsuccessful Object Replications from source zone threshold has been exceeded. The threshold is defined as 2 errors per 15min" + summary: "Unsuccessful Object Replications from Source Zone Threshold Exceeded" + expr: "increase(ceph_data_sync_from_zone_fetch_errors[15m]) > 2" + for: "5m" + labels: + severity: "warning" + type: "ceph_default" + - alert: "CephRGWMultisitePollError" + annotations: + description: "Unsuccessful replication log request errors threshold has been exceeded. The threshold is defined as 2 errors per 15min" + summary: "Unsuccessful Replication Log Request Errors Threshold Exceeded" + expr: "increase(ceph_data_sync_from_zone_poll_errors[15m]) > 2" + for: "5m" + labels: + severity: "warning" + type: "ceph_default" + - alert: "CephRGWMultisiteFetchErrorCritical" + annotations: + description: "Critical: Unsuccessful Object Replications from source zone threshold has been exceeded. The threshold is defined as 50 errors per 15min" + summary: "Critical: Unsuccessful Object Replications from Source Zone Threshold Exceeded" + expr: "increase(ceph_data_sync_from_zone_fetch_errors[15m]) > 50" + for: "5m" + labels: + severity: "critical" + type: "ceph_default" + - alert: "CephRGWMultisitePollErrorCritical" + annotations: + description: "Critical: Unsuccessful replication log request errors threshold has been exceeded. The threshold is defined as 50 errors per 15min" + summary: "Critical: Unsuccessful Replication Log Request Errors Threshold Exceeded" + expr: "increase(ceph_data_sync_from_zone_poll_errors[15m]) > 50" + for: "5m" + labels: + severity: "critical" + type: "ceph_default" + - alert: "CephRGWMultisitePollLatency" + annotations: + description: "Latency for poll request threshold exceeded. The threshold is defined as 600s latency per 15min" + summary: "Poll Request Latency Threshold Exceeded" + expr: "increase(ceph_data_sync_from_zone_poll_latency_sum[15m]) > 600" + for: "5m" + labels: + severity: "warning" + type: "ceph_default" diff --git a/ceph-mon/hardening.yaml b/ceph-mon/hardening.yaml new file mode 100644 index 00000000..314bb385 --- /dev/null +++ b/ceph-mon/hardening.yaml @@ -0,0 +1,5 @@ +# Overrides file for contrib.hardening. See README.hardening in +# contrib.hardening for info on how to use this file. +ssh: + server: + use_pam: 'yes' # juju requires this diff --git a/ceph-mon/icon.svg b/ceph-mon/icon.svg new file mode 100644 index 00000000..e9383990 --- /dev/null +++ b/ceph-mon/icon.svg @@ -0,0 +1,311 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + diff --git a/ceph-mon/lib/charms/grafana_agent/v0/cos_agent.py b/ceph-mon/lib/charms/grafana_agent/v0/cos_agent.py new file mode 100644 index 00000000..d3130b2b --- /dev/null +++ b/ceph-mon/lib/charms/grafana_agent/v0/cos_agent.py @@ -0,0 +1,842 @@ +# Copyright 2023 Canonical Ltd. +# See LICENSE file for licensing details. + +r"""## Overview. + +This library can be used to manage the cos_agent relation interface: + +- `COSAgentProvider`: Use in machine charms that need to have a workload's metrics + or logs scraped, or forward rule files or dashboards to Prometheus, Loki or Grafana through + the Grafana Agent machine charm. + +- `COSAgentConsumer`: Used in the Grafana Agent machine charm to manage the requirer side of + the `cos_agent` interface. + + +## COSAgentProvider Library Usage + +Grafana Agent machine Charmed Operator interacts with its clients using the cos_agent library. +Charms seeking to send telemetry, must do so using the `COSAgentProvider` object from +this charm library. + +Using the `COSAgentProvider` object only requires instantiating it, +typically in the `__init__` method of your charm (the one which sends telemetry). + +The constructor of `COSAgentProvider` has only one required and nine optional parameters: + +```python + def __init__( + self, + charm: CharmType, + relation_name: str = DEFAULT_RELATION_NAME, + metrics_endpoints: Optional[List[_MetricsEndpointDict]] = None, + metrics_rules_dir: str = "./src/prometheus_alert_rules", + logs_rules_dir: str = "./src/loki_alert_rules", + recurse_rules_dirs: bool = False, + log_slots: Optional[List[str]] = None, + dashboard_dirs: Optional[List[str]] = None, + refresh_events: Optional[List] = None, + scrape_configs: Optional[Union[List[Dict], Callable]] = None, + ): +``` + +### Parameters + +- `charm`: The instance of the charm that instantiates `COSAgentProvider`, typically `self`. + +- `relation_name`: If your charmed operator uses a relation name other than `cos-agent` to use + the `cos_agent` interface, this is where you have to specify that. + +- `metrics_endpoints`: In this parameter you can specify the metrics endpoints that Grafana Agent + machine Charmed Operator will scrape. The configs of this list will be merged with the configs + from `scrape_configs`. + +- `metrics_rules_dir`: The directory in which the Charmed Operator stores its metrics alert rules + files. + +- `logs_rules_dir`: The directory in which the Charmed Operator stores its logs alert rules files. + +- `recurse_rules_dirs`: This parameters set whether Grafana Agent machine Charmed Operator has to + search alert rules files recursively in the previous two directories or not. + +- `log_slots`: Snap slots to connect to for scraping logs in the form ["snap-name:slot", ...]. + +- `dashboard_dirs`: List of directories where the dashboards are stored in the Charmed Operator. + +- `refresh_events`: List of events on which to refresh relation data. + +- `scrape_configs`: List of standard scrape_configs dicts or a callable that returns the list in + case the configs need to be generated dynamically. The contents of this list will be merged + with the configs from `metrics_endpoints`. + + +### Example 1 - Minimal instrumentation: + +In order to use this object the following should be in the `charm.py` file. + +```python +from charms.grafana_agent.v0.cos_agent import COSAgentProvider +... +class TelemetryProviderCharm(CharmBase): + def __init__(self, *args): + ... + self._grafana_agent = COSAgentProvider(self) +``` + +### Example 2 - Full instrumentation: + +In order to use this object the following should be in the `charm.py` file. + +```python +from charms.grafana_agent.v0.cos_agent import COSAgentProvider +... +class TelemetryProviderCharm(CharmBase): + def __init__(self, *args): + ... + self._grafana_agent = COSAgentProvider( + self, + relation_name="custom-cos-agent", + metrics_endpoints=[ + # specify "path" and "port" to scrape from localhost + {"path": "/metrics", "port": 9000}, + {"path": "/metrics", "port": 9001}, + {"path": "/metrics", "port": 9002}, + ], + metrics_rules_dir="./src/alert_rules/prometheus", + logs_rules_dir="./src/alert_rules/loki", + recursive_rules_dir=True, + log_slots=["my-app:slot"], + dashboard_dirs=["./src/dashboards_1", "./src/dashboards_2"], + refresh_events=["update-status", "upgrade-charm"], + scrape_configs=[ + { + "job_name": "custom_job", + "metrics_path": "/metrics", + "authorization": {"credentials": "bearer-token"}, + "static_configs": [ + { + "targets": ["localhost:9003"]}, + "labels": {"key": "value"}, + }, + ], + }, + ] + ) +``` + +### Example 3 - Dynamic scrape configs generation: + +Pass a function to the `scrape_configs` to decouple the generation of the configs +from the instantiation of the COSAgentProvider object. + +```python +from charms.grafana_agent.v0.cos_agent import COSAgentProvider +... + +class TelemetryProviderCharm(CharmBase): + def generate_scrape_configs(self): + return [ + { + "job_name": "custom", + "metrics_path": "/metrics", + "static_configs": [{"targets": ["localhost:9000"]}], + }, + ] + + def __init__(self, *args): + ... + self._grafana_agent = COSAgentProvider( + self, + scrape_configs=self.generate_scrape_configs, + ) +``` + +## COSAgentConsumer Library Usage + +This object may be used by any Charmed Operator which gathers telemetry data by +implementing the consumer side of the `cos_agent` interface. +For instance Grafana Agent machine Charmed Operator. + +For this purpose the charm needs to instantiate the `COSAgentConsumer` object with one mandatory +and two optional arguments. + +### Parameters + +- `charm`: A reference to the parent (Grafana Agent machine) charm. + +- `relation_name`: The name of the relation that the charm uses to interact + with its clients that provides telemetry data using the `COSAgentProvider` object. + + If provided, this relation name must match a provided relation in metadata.yaml with the + `cos_agent` interface. + The default value of this argument is "cos-agent". + +- `refresh_events`: List of events on which to refresh relation data. + + +### Example 1 - Minimal instrumentation: + +In order to use this object the following should be in the `charm.py` file. + +```python +from charms.grafana_agent.v0.cos_agent import COSAgentConsumer +... +class GrafanaAgentMachineCharm(GrafanaAgentCharm) + def __init__(self, *args): + ... + self._cos = COSAgentRequirer(self) +``` + + +### Example 2 - Full instrumentation: + +In order to use this object the following should be in the `charm.py` file. + +```python +from charms.grafana_agent.v0.cos_agent import COSAgentConsumer +... +class GrafanaAgentMachineCharm(GrafanaAgentCharm) + def __init__(self, *args): + ... + self._cos = COSAgentRequirer( + self, + relation_name="cos-agent-consumer", + refresh_events=["update-status", "upgrade-charm"], + ) +``` +""" + +import base64 +import json +import logging +import lzma +from collections import namedtuple +from itertools import chain +from pathlib import Path +from typing import TYPE_CHECKING, Any, Callable, ClassVar, Dict, List, Optional, Set, Union + +import pydantic +from cosl import JujuTopology +from cosl.rules import AlertRules +from ops.charm import RelationChangedEvent +from ops.framework import EventBase, EventSource, Object, ObjectEvents +from ops.model import Relation, Unit +from ops.testing import CharmType + +if TYPE_CHECKING: + try: + from typing import TypedDict + + class _MetricsEndpointDict(TypedDict): + path: str + port: int + + except ModuleNotFoundError: + _MetricsEndpointDict = Dict # pyright: ignore + +LIBID = "dc15fa84cef84ce58155fb84f6c6213a" +LIBAPI = 0 +LIBPATCH = 6 + +PYDEPS = ["cosl", "pydantic < 2"] + +DEFAULT_RELATION_NAME = "cos-agent" +DEFAULT_PEER_RELATION_NAME = "peers" +DEFAULT_SCRAPE_CONFIG = { + "static_configs": [{"targets": ["localhost:80"]}], + "metrics_path": "/metrics", +} + +logger = logging.getLogger(__name__) +SnapEndpoint = namedtuple("SnapEndpoint", "owner, name") + + +class GrafanaDashboard(str): + """Grafana Dashboard encoded json; lzma-compressed.""" + + # TODO Replace this with a custom type when pydantic v2 released (end of 2023 Q1?) + # https://github.com/pydantic/pydantic/issues/4887 + @staticmethod + def _serialize(raw_json: Union[str, bytes]) -> "GrafanaDashboard": + if not isinstance(raw_json, bytes): + raw_json = raw_json.encode("utf-8") + encoded = base64.b64encode(lzma.compress(raw_json)).decode("utf-8") + return GrafanaDashboard(encoded) + + def _deserialize(self) -> Dict: + try: + raw = lzma.decompress(base64.b64decode(self.encode("utf-8"))).decode() + return json.loads(raw) + except json.decoder.JSONDecodeError as e: + logger.error("Invalid Dashboard format: %s", e) + return {} + + def __repr__(self): + """Return string representation of self.""" + return "" + + +class CosAgentProviderUnitData(pydantic.BaseModel): + """Unit databag model for `cos-agent` relation.""" + + # The following entries are the same for all units of the same principal. + # Note that the same grafana agent subordinate may be related to several apps. + # this needs to make its way to the gagent leader + metrics_alert_rules: dict + log_alert_rules: dict + dashboards: List[GrafanaDashboard] + subordinate: Optional[bool] + + # The following entries may vary across units of the same principal app. + # this data does not need to be forwarded to the gagent leader + metrics_scrape_jobs: List[Dict] + log_slots: List[str] + + # when this whole datastructure is dumped into a databag, it will be nested under this key. + # while not strictly necessary (we could have it 'flattened out' into the databag), + # this simplifies working with the model. + KEY: ClassVar[str] = "config" + + +class CosAgentPeersUnitData(pydantic.BaseModel): + """Unit databag model for `peers` cos-agent machine charm peer relation.""" + + # We need the principal unit name and relation metadata to be able to render identifiers + # (e.g. topology) on the leader side, after all the data moves into peer data (the grafana + # agent leader can only see its own principal, because it is a subordinate charm). + principal_unit_name: str + principal_relation_id: str + principal_relation_name: str + + # The only data that is forwarded to the leader is data that needs to go into the app databags + # of the outgoing o11y relations. + metrics_alert_rules: Optional[dict] + log_alert_rules: Optional[dict] + dashboards: Optional[List[GrafanaDashboard]] + + # when this whole datastructure is dumped into a databag, it will be nested under this key. + # while not strictly necessary (we could have it 'flattened out' into the databag), + # this simplifies working with the model. + KEY: ClassVar[str] = "config" + + @property + def app_name(self) -> str: + """Parse out the app name from the unit name. + + TODO: Switch to using `model_post_init` when pydantic v2 is released? + https://github.com/pydantic/pydantic/issues/1729#issuecomment-1300576214 + """ + return self.principal_unit_name.split("/")[0] + + +class COSAgentProvider(Object): + """Integration endpoint wrapper for the provider side of the cos_agent interface.""" + + def __init__( + self, + charm: CharmType, + relation_name: str = DEFAULT_RELATION_NAME, + metrics_endpoints: Optional[List["_MetricsEndpointDict"]] = None, + metrics_rules_dir: str = "./src/prometheus_alert_rules", + logs_rules_dir: str = "./src/loki_alert_rules", + recurse_rules_dirs: bool = False, + log_slots: Optional[List[str]] = None, + dashboard_dirs: Optional[List[str]] = None, + refresh_events: Optional[List] = None, + *, + scrape_configs: Optional[Union[List[dict], Callable]] = None, + ): + """Create a COSAgentProvider instance. + + Args: + charm: The `CharmBase` instance that is instantiating this object. + relation_name: The name of the relation to communicate over. + metrics_endpoints: List of endpoints in the form [{"path": path, "port": port}, ...]. + This argument is a simplified form of the `scrape_configs`. + The contents of this list will be merged with the contents of `scrape_configs`. + metrics_rules_dir: Directory where the metrics rules are stored. + logs_rules_dir: Directory where the logs rules are stored. + recurse_rules_dirs: Whether to recurse into rule paths. + log_slots: Snap slots to connect to for scraping logs + in the form ["snap-name:slot", ...]. + dashboard_dirs: Directory where the dashboards are stored. + refresh_events: List of events on which to refresh relation data. + scrape_configs: List of standard scrape_configs dicts or a callable + that returns the list in case the configs need to be generated dynamically. + The contents of this list will be merged with the contents of `metrics_endpoints`. + """ + super().__init__(charm, relation_name) + dashboard_dirs = dashboard_dirs or ["./src/grafana_dashboards"] + + self._charm = charm + self._relation_name = relation_name + self._metrics_endpoints = metrics_endpoints or [] + self._scrape_configs = scrape_configs or [] + self._metrics_rules = metrics_rules_dir + self._logs_rules = logs_rules_dir + self._recursive = recurse_rules_dirs + self._log_slots = log_slots or [] + self._dashboard_dirs = dashboard_dirs + self._refresh_events = refresh_events or [self._charm.on.config_changed] + + events = self._charm.on[relation_name] + self.framework.observe(events.relation_joined, self._on_refresh) + self.framework.observe(events.relation_changed, self._on_refresh) + for event in self._refresh_events: + self.framework.observe(event, self._on_refresh) + + def _on_refresh(self, event): + """Trigger the class to update relation data.""" + relations = self._charm.model.relations[self._relation_name] + + for relation in relations: + # Before a principal is related to the grafana-agent subordinate, we'd get + # ModelError: ERROR cannot read relation settings: unit "zk/2": settings not found + # Add a guard to make sure it doesn't happen. + if relation.data and self._charm.unit in relation.data: + # Subordinate relations can communicate only over unit data. + try: + data = CosAgentProviderUnitData( + metrics_alert_rules=self._metrics_alert_rules, + log_alert_rules=self._log_alert_rules, + dashboards=self._dashboards, + metrics_scrape_jobs=self._scrape_jobs, + log_slots=self._log_slots, + subordinate=self._charm.meta.subordinate, + ) + relation.data[self._charm.unit][data.KEY] = data.json() + except ( + pydantic.ValidationError, + json.decoder.JSONDecodeError, + ) as e: + logger.error("Invalid relation data provided: %s", e) + + @property + def _scrape_jobs(self) -> List[Dict]: + """Return a prometheus_scrape-like data structure for jobs. + + https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config + """ + if callable(self._scrape_configs): + scrape_configs = self._scrape_configs() + else: + # Create a copy of the user scrape_configs, since we will mutate this object + scrape_configs = self._scrape_configs.copy() + + # Convert "metrics_endpoints" to standard scrape_configs, and add them in + for endpoint in self._metrics_endpoints: + scrape_configs.append( + { + "metrics_path": endpoint["path"], + "static_configs": [{"targets": [f"localhost:{endpoint['port']}"]}], + } + ) + + scrape_configs = scrape_configs or [DEFAULT_SCRAPE_CONFIG] + + # Augment job name to include the app name and a unique id (index) + for idx, scrape_config in enumerate(scrape_configs): + scrape_config["job_name"] = "_".join( + [self._charm.app.name, str(idx), scrape_config.get("job_name", "default")] + ) + + return scrape_configs + + @property + def _metrics_alert_rules(self) -> Dict: + """Use (for now) the prometheus_scrape AlertRules to initialize this.""" + alert_rules = AlertRules( + query_type="promql", topology=JujuTopology.from_charm(self._charm) + ) + alert_rules.add_path(self._metrics_rules, recursive=self._recursive) + return alert_rules.as_dict() + + @property + def _log_alert_rules(self) -> Dict: + """Use (for now) the loki_push_api AlertRules to initialize this.""" + alert_rules = AlertRules(query_type="logql", topology=JujuTopology.from_charm(self._charm)) + alert_rules.add_path(self._logs_rules, recursive=self._recursive) + return alert_rules.as_dict() + + @property + def _dashboards(self) -> List[GrafanaDashboard]: + dashboards: List[GrafanaDashboard] = [] + for d in self._dashboard_dirs: + for path in Path(d).glob("*"): + dashboard = GrafanaDashboard._serialize(path.read_bytes()) + dashboards.append(dashboard) + return dashboards + + +class COSAgentDataChanged(EventBase): + """Event emitted by `COSAgentRequirer` when relation data changes.""" + + +class COSAgentValidationError(EventBase): + """Event emitted by `COSAgentRequirer` when there is an error in the relation data.""" + + def __init__(self, handle, message: str = ""): + super().__init__(handle) + self.message = message + + def snapshot(self) -> Dict: + """Save COSAgentValidationError source information.""" + return {"message": self.message} + + def restore(self, snapshot): + """Restore COSAgentValidationError source information.""" + self.message = snapshot["message"] + + +class COSAgentRequirerEvents(ObjectEvents): + """`COSAgentRequirer` events.""" + + data_changed = EventSource(COSAgentDataChanged) + validation_error = EventSource(COSAgentValidationError) + + +class MultiplePrincipalsError(Exception): + """Custom exception for when there are multiple principal applications.""" + + pass + + +class COSAgentRequirer(Object): + """Integration endpoint wrapper for the Requirer side of the cos_agent interface.""" + + on = COSAgentRequirerEvents() # pyright: ignore + + def __init__( + self, + charm: CharmType, + *, + relation_name: str = DEFAULT_RELATION_NAME, + peer_relation_name: str = DEFAULT_PEER_RELATION_NAME, + refresh_events: Optional[List[str]] = None, + ): + """Create a COSAgentRequirer instance. + + Args: + charm: The `CharmBase` instance that is instantiating this object. + relation_name: The name of the relation to communicate over. + peer_relation_name: The name of the peer relation to communicate over. + refresh_events: List of events on which to refresh relation data. + """ + super().__init__(charm, relation_name) + self._charm = charm + self._relation_name = relation_name + self._peer_relation_name = peer_relation_name + self._refresh_events = refresh_events or [self._charm.on.config_changed] + + events = self._charm.on[relation_name] + self.framework.observe( + events.relation_joined, self._on_relation_data_changed + ) # TODO: do we need this? + self.framework.observe(events.relation_changed, self._on_relation_data_changed) + for event in self._refresh_events: + self.framework.observe(event, self.trigger_refresh) # pyright: ignore + + # Peer relation events + # A peer relation is needed as it is the only mechanism for exchanging data across + # subordinate units. + # self.framework.observe( + # self.on[self._peer_relation_name].relation_joined, self._on_peer_relation_joined + # ) + peer_events = self._charm.on[peer_relation_name] + self.framework.observe(peer_events.relation_changed, self._on_peer_relation_changed) + + @property + def peer_relation(self) -> Optional["Relation"]: + """Helper function for obtaining the peer relation object. + + Returns: peer relation object + (NOTE: would return None if called too early, e.g. during install). + """ + return self.model.get_relation(self._peer_relation_name) + + def _on_peer_relation_changed(self, _): + # Peer data is used for forwarding data from principal units to the grafana agent + # subordinate leader, for updating the app data of the outgoing o11y relations. + if self._charm.unit.is_leader(): + self.on.data_changed.emit() # pyright: ignore + + def _on_relation_data_changed(self, event: RelationChangedEvent): + # Peer data is the only means of communication between subordinate units. + if not self.peer_relation: + event.defer() + return + + cos_agent_relation = event.relation + if not event.unit or not cos_agent_relation.data.get(event.unit): + return + principal_unit = event.unit + + # Coherence check + units = cos_agent_relation.units + if len(units) > 1: + # should never happen + raise ValueError( + f"unexpected error: subordinate relation {cos_agent_relation} " + f"should have exactly one unit" + ) + + if not (raw := cos_agent_relation.data[principal_unit].get(CosAgentProviderUnitData.KEY)): + return + + if not (provider_data := self._validated_provider_data(raw)): + return + + # Copy data from the principal relation to the peer relation, so the leader could + # follow up. + # Save the originating unit name, so it could be used for topology later on by the leader. + data = CosAgentPeersUnitData( # peer relation databag model + principal_unit_name=event.unit.name, + principal_relation_id=str(event.relation.id), + principal_relation_name=event.relation.name, + metrics_alert_rules=provider_data.metrics_alert_rules, + log_alert_rules=provider_data.log_alert_rules, + dashboards=provider_data.dashboards, + ) + self.peer_relation.data[self._charm.unit][ + f"{CosAgentPeersUnitData.KEY}-{event.unit.name}" + ] = data.json() + + # We can't easily tell if the data that was changed is limited to only the data + # that goes into peer relation (in which case, if this is not a leader unit, we wouldn't + # need to emit `on.data_changed`), so we're emitting `on.data_changed` either way. + self.on.data_changed.emit() # pyright: ignore + + def _validated_provider_data(self, raw) -> Optional[CosAgentProviderUnitData]: + try: + return CosAgentProviderUnitData(**json.loads(raw)) + except (pydantic.ValidationError, json.decoder.JSONDecodeError) as e: + self.on.validation_error.emit(message=str(e)) # pyright: ignore + return None + + def trigger_refresh(self, _): + """Trigger a refresh of relation data.""" + # FIXME: Figure out what we should do here + self.on.data_changed.emit() # pyright: ignore + + @property + def _principal_unit(self) -> Optional[Unit]: + """Return the principal unit for a relation. + + Assumes that the relation is of type subordinate. + Relies on the fact that, for subordinate relations, the only remote unit visible to + *this unit* is the principal unit that this unit is attached to. + """ + if relations := self._principal_relations: + # Technically it's a list, but for subordinates there can only be one relation + principal_relation = next(iter(relations)) + if units := principal_relation.units: + # Technically it's a list, but for subordinates there can only be one + return next(iter(units)) + + return None + + @property + def _principal_relations(self): + relations = [] + for relation in self._charm.model.relations[self._relation_name]: + if not json.loads(relation.data[next(iter(relation.units))]["config"]).get( + ["subordinate"], False + ): + relations.append(relation) + if len(relations) > 1: + logger.error( + "Multiple applications claiming to be principal. Update the cos-agent library in the client application charms." + ) + raise MultiplePrincipalsError("Multiple principal applications.") + return relations + + @property + def _remote_data(self) -> List[CosAgentProviderUnitData]: + """Return a list of remote data from each of the related units. + + Assumes that the relation is of type subordinate. + Relies on the fact that, for subordinate relations, the only remote unit visible to + *this unit* is the principal unit that this unit is attached to. + """ + all_data = [] + + for relation in self._charm.model.relations[self._relation_name]: + if not relation.units: + continue + unit = next(iter(relation.units)) + if not (raw := relation.data[unit].get(CosAgentProviderUnitData.KEY)): + continue + if not (provider_data := self._validated_provider_data(raw)): + continue + all_data.append(provider_data) + + return all_data + + def _gather_peer_data(self) -> List[CosAgentPeersUnitData]: + """Collect data from the peers. + + Returns a trimmed-down list of CosAgentPeersUnitData. + """ + relation = self.peer_relation + + # Ensure that whatever context we're running this in, we take the necessary precautions: + if not relation or not relation.data or not relation.app: + return [] + + # Iterate over all peer unit data and only collect every principal once. + peer_data: List[CosAgentPeersUnitData] = [] + app_names: Set[str] = set() + + for unit in chain((self._charm.unit,), relation.units): + if not relation.data.get(unit): + continue + + for unit_name in relation.data.get(unit): # pyright: ignore + if not unit_name.startswith(CosAgentPeersUnitData.KEY): + continue + raw = relation.data[unit].get(unit_name) + if raw is None: + continue + data = CosAgentPeersUnitData(**json.loads(raw)) + # Have we already seen this principal app? + if (app_name := data.app_name) in app_names: + continue + peer_data.append(data) + app_names.add(app_name) + + return peer_data + + @property + def metrics_alerts(self) -> Dict[str, Any]: + """Fetch metrics alerts.""" + alert_rules = {} + + seen_apps: List[str] = [] + for data in self._gather_peer_data(): + if rules := data.metrics_alert_rules: + app_name = data.app_name + if app_name in seen_apps: + continue # dedup! + seen_apps.append(app_name) + # This is only used for naming the file, so be as specific as we can be + identifier = JujuTopology( + model=self._charm.model.name, + model_uuid=self._charm.model.uuid, + application=app_name, + # For the topology unit, we could use `data.principal_unit_name`, but that unit + # name may not be very stable: `_gather_peer_data` de-duplicates by app name so + # the exact unit name that turns up first in the iterator may vary from time to + # time. So using the grafana-agent unit name instead. + unit=self._charm.unit.name, + ).identifier + + alert_rules[identifier] = rules + + return alert_rules + + @property + def metrics_jobs(self) -> List[Dict]: + """Parse the relation data contents and extract the metrics jobs.""" + scrape_jobs = [] + for data in self._remote_data: + for job in data.metrics_scrape_jobs: + # In #220, relation schema changed from a simplified dict to the standard + # `scrape_configs`. + # This is to ensure backwards compatibility with Providers older than v0.5. + if "path" in job and "port" in job and "job_name" in job: + job = { + "job_name": job["job_name"], + "metrics_path": job["path"], + "static_configs": [{"targets": [f"localhost:{job['port']}"]}], + } + + scrape_jobs.append(job) + + return scrape_jobs + + @property + def snap_log_endpoints(self) -> List[SnapEndpoint]: + """Fetch logging endpoints exposed by related snaps.""" + plugs = [] + for data in self._remote_data: + targets = data.log_slots + if targets: + for target in targets: + if target in plugs: + logger.warning( + f"plug {target} already listed. " + "The same snap is being passed from multiple " + "endpoints; this should not happen." + ) + else: + plugs.append(target) + + endpoints = [] + for plug in plugs: + if ":" not in plug: + logger.error(f"invalid plug definition received: {plug}. Ignoring...") + else: + endpoint = SnapEndpoint(*plug.split(":")) + endpoints.append(endpoint) + return endpoints + + @property + def logs_alerts(self) -> Dict[str, Any]: + """Fetch log alerts.""" + alert_rules = {} + seen_apps: List[str] = [] + + for data in self._gather_peer_data(): + if rules := data.log_alert_rules: + # This is only used for naming the file, so be as specific as we can be + app_name = data.app_name + if app_name in seen_apps: + continue # dedup! + seen_apps.append(app_name) + + identifier = JujuTopology( + model=self._charm.model.name, + model_uuid=self._charm.model.uuid, + application=app_name, + # For the topology unit, we could use `data.principal_unit_name`, but that unit + # name may not be very stable: `_gather_peer_data` de-duplicates by app name so + # the exact unit name that turns up first in the iterator may vary from time to + # time. So using the grafana-agent unit name instead. + unit=self._charm.unit.name, + ).identifier + + alert_rules[identifier] = rules + + return alert_rules + + @property + def dashboards(self) -> List[Dict[str, str]]: + """Fetch dashboards as encoded content. + + Dashboards are assumed not to vary across units of the same primary. + """ + dashboards: List[Dict[str, Any]] = [] + + seen_apps: List[str] = [] + for data in self._gather_peer_data(): + app_name = data.app_name + if app_name in seen_apps: + continue # dedup! + seen_apps.append(app_name) + + for encoded_dashboard in data.dashboards or (): + content = GrafanaDashboard(encoded_dashboard)._deserialize() + + title = content.get("title", "no_title") + + dashboards.append( + { + "relation_id": data.principal_relation_id, + # We have the remote charm name - use it for the identifier + "charm": f"{data.principal_relation_name}-{app_name}", + "content": content, + "title": title, + } + ) + + return dashboards diff --git a/ceph-mon/lib/charms/observability_libs/v0/juju_topology.py b/ceph-mon/lib/charms/observability_libs/v0/juju_topology.py new file mode 100644 index 00000000..c985b1e7 --- /dev/null +++ b/ceph-mon/lib/charms/observability_libs/v0/juju_topology.py @@ -0,0 +1,306 @@ +# Copyright 2022 Canonical Ltd. +# See LICENSE file for licensing details. +"""## Overview. + +This document explains how to use the `JujuTopology` class to +create and consume topology information from Juju in a consistent manner. + +The goal of the Juju topology is to uniquely identify a piece +of software running across any of your Juju-managed deployments. +This is achieved by combining the following four elements: + +- Model name +- Model UUID +- Application name +- Unit identifier + + +For a more in-depth description of the concept, as well as a +walk-through of it's use-case in observability, see +[this blog post](https://juju.is/blog/model-driven-observability-part-2-juju-topology-metrics) +on the Juju blog. + +## Library Usage + +This library may be used to create and consume `JujuTopology` objects. +The `JujuTopology` class provides three ways to create instances: + +### Using the `from_charm` method + +Enables instantiation by supplying the charm as an argument. When +creating topology objects for the current charm, this is the recommended +approach. + +```python +topology = JujuTopology.from_charm(self) +``` + +### Using the `from_dict` method + +Allows for instantion using a dictionary of relation data, like the +`scrape_metadata` from Prometheus or the labels of an alert rule. When +creating topology objects for remote charms, this is the recommended +approach. + +```python +scrape_metadata = json.loads(relation.data[relation.app].get("scrape_metadata", "{}")) +topology = JujuTopology.from_dict(scrape_metadata) +``` + +### Using the class constructor + +Enables instantiation using whatever values you want. While this +is useful in some very specific cases, this is almost certainly not +what you are looking for as setting these values manually may +result in observability metrics which do not uniquely identify a +charm in order to provide accurate usage reporting, alerting, +horizontal scaling, or other use cases. + +```python +topology = JujuTopology( + model="some-juju-model", + model_uuid="00000000-0000-0000-0000-000000000001", + application="fancy-juju-application", + unit="fancy-juju-application/0", + charm_name="fancy-juju-application-k8s", +) +``` + +""" + +import re +from collections import OrderedDict +from typing import Dict, List, Optional + +# The unique Charmhub library identifier, never change it +LIBID = "bced1658f20f49d28b88f61f83c2d232" + +LIBAPI = 0 +LIBPATCH = 2 + + +class InvalidUUIDError(Exception): + """Invalid UUID was provided.""" + + def __init__(self, uuid: str): + self.message = "'{}' is not a valid UUID.".format(uuid) + super().__init__(self.message) + + +class JujuTopology: + """JujuTopology is used for storing, generating and formatting juju topology information.""" + + def __init__( + self, + model: str, + model_uuid: str, + application: str, + unit: str = None, + charm_name: str = None, + ): + """Build a JujuTopology object. + + A `JujuTopology` object is used for storing and transforming + Juju topology information. This information is used to + annotate Prometheus scrape jobs and alert rules. Such + annotation when applied to scrape jobs helps in identifying + the source of the scrapped metrics. On the other hand when + applied to alert rules topology information ensures that + evaluation of alert expressions is restricted to the source + (charm) from which the alert rules were obtained. + + Args: + model: a string name of the Juju model + model_uuid: a globally unique string identifier for the Juju model + application: an application name as a string + unit: a unit name as a string + charm_name: name of charm as a string + """ + if not self.is_valid_uuid(model_uuid): + raise InvalidUUIDError(model_uuid) + + self._model = model + self._model_uuid = model_uuid + self._application = application + self._charm_name = charm_name + self._unit = unit + + def is_valid_uuid(self, uuid): + """Validate the supplied UUID against the Juju Model UUID pattern.""" + # TODO: + # Harness is harcoding an UUID that is v1 not v4: f2c1b2a6-e006-11eb-ba80-0242ac130004 + # See: https://github.com/canonical/operator/issues/779 + # + # >>> uuid.UUID("f2c1b2a6-e006-11eb-ba80-0242ac130004").version + # 1 + # + # we changed the validation of the 3ed UUID block: 4[a-f0-9]{3} -> [a-f0-9]{4} + # See: https://github.com/canonical/operator/blob/main/ops/testing.py#L1094 + # + # Juju in fact generates a UUID v4: https://github.com/juju/utils/blob/master/uuid.go#L62 + # but does not validate it is actually v4: + # See: + # - https://github.com/juju/utils/blob/master/uuid.go#L22 + # - https://github.com/juju/schema/blob/master/strings.go#L79 + # + # Once Harness fixes this, we should remove this comment and refactor the regex or + # the entire method using the uuid module to validate UUIDs + regex = re.compile( + "^[a-f0-9]{8}-?[a-f0-9]{4}-?[a-f0-9]{4}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}$" + ) + return bool(regex.match(uuid)) + + @classmethod + def from_charm(cls, charm): + """Creates a JujuTopology instance by using the model data available on a charm object. + + Args: + charm: a `CharmBase` object for which the `JujuTopology` will be constructed + Returns: + a `JujuTopology` object. + """ + return cls( + model=charm.model.name, + model_uuid=charm.model.uuid, + application=charm.model.app.name, + unit=charm.model.unit.name, + charm_name=charm.meta.name, + ) + + @classmethod + def from_dict(cls, data: dict): + """Factory method for creating `JujuTopology` children from a dictionary. + + Args: + data: a dictionary with five keys providing topology information. The keys are + - "model" + - "model_uuid" + - "application" + - "unit" + - "charm_name" + `unit` and `charm_name` may be empty, but will result in more limited + labels. However, this allows us to support charms without workloads. + + Returns: + a `JujuTopology` object. + """ + return cls( + model=data["model"], + model_uuid=data["model_uuid"], + application=data["application"], + unit=data.get("unit", ""), + charm_name=data.get("charm_name", ""), + ) + + def as_dict( + self, *, remapped_keys: Dict[str, str] = None, excluded_keys: List[str] = None + ) -> OrderedDict: + """Format the topology information into an ordered dict. + + Keeping the dictionary ordered is important to be able to + compare dicts without having to resort to deep comparisons. + + Args: + remapped_keys: A dictionary mapping old key names to new key names, + which will be substituted when invoked. + excluded_keys: A list of key names to exclude from the returned dict. + uuid_length: The length to crop the UUID to. + """ + ret = OrderedDict( + [ + ("model", self.model), + ("model_uuid", self.model_uuid), + ("application", self.application), + ("unit", self.unit), + ("charm_name", self.charm_name), + ] + ) + if excluded_keys: + ret = OrderedDict({k: v for k, v in ret.items() if k not in excluded_keys}) + + if remapped_keys: + ret = OrderedDict( + (remapped_keys.get(k), v) if remapped_keys.get(k) else (k, v) for k, v in ret.items() # type: ignore + ) + + return ret + + @property + def identifier(self) -> str: + """Format the topology information into a terse string. + + This crops the model UUID, making it unsuitable for comparisons against + anything but other identifiers. Mainly to be used as a display name or file + name where long strings might become an issue. + + >>> JujuTopology( \ + model = "a-model", \ + model_uuid = "00000000-0000-4000-8000-000000000000", \ + application = "some-app", \ + unit = "some-app/1" \ + ).identifier + 'a-model_00000000_some-app' + """ + parts = self.as_dict( + excluded_keys=["unit", "charm_name"], + ) + + parts["model_uuid"] = self.model_uuid_short + values = parts.values() + + return "_".join([str(val) for val in values]).replace("/", "_") + + @property + def label_matcher_dict(self) -> Dict[str, str]: + """Format the topology information into a dict with keys having 'juju_' as prefix. + + Relabelled topology never includes the unit as it would then only match + the leader unit (ie. the unit that produced the dict). + """ + items = self.as_dict( + remapped_keys={"charm_name": "charm"}, + excluded_keys=["unit"], + ).items() + + return {"juju_{}".format(key): value for key, value in items if value} + + @property + def label_matchers(self) -> str: + """Format the topology information into a promql/logql label matcher string. + + Topology label matchers should never include the unit as it + would then only match the leader unit (ie. the unit that + produced the matchers). + """ + items = self.label_matcher_dict.items() + return ", ".join(['{}="{}"'.format(key, value) for key, value in items if value]) + + @property + def model(self) -> str: + """Getter for the juju model value.""" + return self._model + + @property + def model_uuid(self) -> str: + """Getter for the juju model uuid value.""" + return self._model_uuid + + @property + def model_uuid_short(self) -> str: + """Getter for the juju model value, truncated to the first eight letters.""" + return self._model_uuid[:8] + + @property + def application(self) -> str: + """Getter for the juju application value.""" + return self._application + + @property + def charm_name(self) -> Optional[str]: + """Getter for the juju charm name value.""" + return self._charm_name + + @property + def unit(self) -> Optional[str]: + """Getter for the juju unit value.""" + return self._unit diff --git a/ceph-mon/lib/charms/operator_libs_linux/v0/apt.py b/ceph-mon/lib/charms/operator_libs_linux/v0/apt.py new file mode 100644 index 00000000..2b5c8f2e --- /dev/null +++ b/ceph-mon/lib/charms/operator_libs_linux/v0/apt.py @@ -0,0 +1,1329 @@ +# Copyright 2021 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Abstractions for the system's Debian/Ubuntu package information and repositories. + +This module contains abstractions and wrappers around Debian/Ubuntu-style repositories and +packages, in order to easily provide an idiomatic and Pythonic mechanism for adding packages and/or +repositories to systems for use in machine charms. + +A sane default configuration is attainable through nothing more than instantiation of the +appropriate classes. `DebianPackage` objects provide information about the architecture, version, +name, and status of a package. + +`DebianPackage` will try to look up a package either from `dpkg -L` or from `apt-cache` when +provided with a string indicating the package name. If it cannot be located, `PackageNotFoundError` +will be returned, as `apt` and `dpkg` otherwise return `100` for all errors, and a meaningful error +message if the package is not known is desirable. + +To install packages with convenience methods: + +```python +try: + # Run `apt-get update` + apt.update() + apt.add_package("zsh") + apt.add_package(["vim", "htop", "wget"]) +except PackageNotFoundError: + logger.error("a specified package not found in package cache or on system") +except PackageError as e: + logger.error("could not install package. Reason: %s", e.message) +```` + +To find details of a specific package: + +```python +try: + vim = apt.DebianPackage.from_system("vim") + + # To find from the apt cache only + # apt.DebianPackage.from_apt_cache("vim") + + # To find from installed packages only + # apt.DebianPackage.from_installed_package("vim") + + vim.ensure(PackageState.Latest) + logger.info("updated vim to version: %s", vim.fullversion) +except PackageNotFoundError: + logger.error("a specified package not found in package cache or on system") +except PackageError as e: + logger.error("could not install package. Reason: %s", e.message) +``` + + +`RepositoryMapping` will return a dict-like object containing enabled system repositories +and their properties (available groups, baseuri. gpg key). This class can add, disable, or +manipulate repositories. Items can be retrieved as `DebianRepository` objects. + +In order add a new repository with explicit details for fields, a new `DebianRepository` can +be added to `RepositoryMapping` + +`RepositoryMapping` provides an abstraction around the existing repositories on the system, +and can be accessed and iterated over like any `Mapping` object, to retrieve values by key, +iterate, or perform other operations. + +Keys are constructed as `{repo_type}-{}-{release}` in order to uniquely identify a repository. + +Repositories can be added with explicit values through a Python constructor. + +Example: + +```python +repositories = apt.RepositoryMapping() + +if "deb-example.com-focal" not in repositories: + repositories.add(DebianRepository(enabled=True, repotype="deb", + uri="https://example.com", release="focal", groups=["universe"])) +``` + +Alternatively, any valid `sources.list` line may be used to construct a new +`DebianRepository`. + +Example: + +```python +repositories = apt.RepositoryMapping() + +if "deb-us.archive.ubuntu.com-xenial" not in repositories: + line = "deb http://us.archive.ubuntu.com/ubuntu xenial main restricted" + repo = DebianRepository.from_repo_line(line) + repositories.add(repo) +``` +""" + +import fileinput +import glob +import logging +import os +import re +import subprocess +from collections.abc import Mapping +from enum import Enum +from subprocess import PIPE, CalledProcessError, check_call, check_output +from typing import Iterable, List, Optional, Tuple, Union +from urllib.parse import urlparse + +logger = logging.getLogger(__name__) + +# The unique Charmhub library identifier, never change it +LIBID = "7c3dbc9c2ad44a47bd6fcb25caa270e5" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 7 + + +VALID_SOURCE_TYPES = ("deb", "deb-src") +OPTIONS_MATCHER = re.compile(r"\[.*?\]") + + +class Error(Exception): + """Base class of most errors raised by this library.""" + + def __repr__(self): + """String representation of Error.""" + return "<{}.{} {}>".format(type(self).__module__, type(self).__name__, self.args) + + @property + def name(self): + """Return a string representation of the model plus class.""" + return "<{}.{}>".format(type(self).__module__, type(self).__name__) + + @property + def message(self): + """Return the message passed as an argument.""" + return self.args[0] + + +class PackageError(Error): + """Raised when there's an error installing or removing a package.""" + + +class PackageNotFoundError(Error): + """Raised when a requested package is not known to the system.""" + + +class PackageState(Enum): + """A class to represent possible package states.""" + + Present = "present" + Absent = "absent" + Latest = "latest" + Available = "available" + + +class DebianPackage: + """Represents a traditional Debian package and its utility functions. + + `DebianPackage` wraps information and functionality around a known package, whether installed + or available. The version, epoch, name, and architecture can be easily queried and compared + against other `DebianPackage` objects to determine the latest version or to install a specific + version. + + The representation of this object as a string mimics the output from `dpkg` for familiarity. + + Installation and removal of packages is handled through the `state` property or `ensure` + method, with the following options: + + apt.PackageState.Absent + apt.PackageState.Available + apt.PackageState.Present + apt.PackageState.Latest + + When `DebianPackage` is initialized, the state of a given `DebianPackage` object will be set to + `Available`, `Present`, or `Latest`, with `Absent` implemented as a convenience for removal + (though it operates essentially the same as `Available`). + """ + + def __init__( + self, name: str, version: str, epoch: str, arch: str, state: PackageState + ) -> None: + self._name = name + self._arch = arch + self._state = state + self._version = Version(version, epoch) + + def __eq__(self, other) -> bool: + """Equality for comparison. + + Args: + other: a `DebianPackage` object for comparison + + Returns: + A boolean reflecting equality + """ + return isinstance(other, self.__class__) and ( + self._name, + self._version.number, + ) == (other._name, other._version.number) + + def __hash__(self): + """A basic hash so this class can be used in Mappings and dicts.""" + return hash((self._name, self._version.number)) + + def __repr__(self): + """A representation of the package.""" + return "<{}.{}: {}>".format(self.__module__, self.__class__.__name__, self.__dict__) + + def __str__(self): + """A human-readable representation of the package.""" + return "<{}: {}-{}.{} -- {}>".format( + self.__class__.__name__, + self._name, + self._version, + self._arch, + str(self._state), + ) + + @staticmethod + def _apt( + command: str, + package_names: Union[str, List], + optargs: Optional[List[str]] = None, + ) -> None: + """Wrap package management commands for Debian/Ubuntu systems. + + Args: + command: the command given to `apt-get` + package_names: a package name or list of package names to operate on + optargs: an (Optional) list of additioanl arguments + + Raises: + PackageError if an error is encountered + """ + optargs = optargs if optargs is not None else [] + if isinstance(package_names, str): + package_names = [package_names] + _cmd = ["apt-get", "-y", *optargs, command, *package_names] + try: + check_call(_cmd, stderr=PIPE, stdout=PIPE) + except CalledProcessError as e: + raise PackageError( + "Could not {} package(s) [{}]: {}".format(command, [*package_names], e.output) + ) from None + + def _add(self) -> None: + """Add a package to the system.""" + self._apt( + "install", + "{}={}".format(self.name, self.version), + optargs=["--option=Dpkg::Options::=--force-confold"], + ) + + def _remove(self) -> None: + """Removes a package from the system. Implementation-specific.""" + return self._apt("remove", "{}={}".format(self.name, self.version)) + + @property + def name(self) -> str: + """Returns the name of the package.""" + return self._name + + def ensure(self, state: PackageState): + """Ensures that a package is in a given state. + + Args: + state: a `PackageState` to reconcile the package to + + Raises: + PackageError from the underlying call to apt + """ + if self._state is not state: + if state not in (PackageState.Present, PackageState.Latest): + self._remove() + else: + self._add() + self._state = state + + @property + def present(self) -> bool: + """Returns whether or not a package is present.""" + return self._state in (PackageState.Present, PackageState.Latest) + + @property + def latest(self) -> bool: + """Returns whether the package is the most recent version.""" + return self._state is PackageState.Latest + + @property + def state(self) -> PackageState: + """Returns the current package state.""" + return self._state + + @state.setter + def state(self, state: PackageState) -> None: + """Sets the package state to a given value. + + Args: + state: a `PackageState` to reconcile the package to + + Raises: + PackageError from the underlying call to apt + """ + if state in (PackageState.Latest, PackageState.Present): + self._add() + else: + self._remove() + self._state = state + + @property + def version(self) -> "Version": + """Returns the version for a package.""" + return self._version + + @property + def epoch(self) -> str: + """Returns the epoch for a package. May be unset.""" + return self._version.epoch + + @property + def arch(self) -> str: + """Returns the architecture for a package.""" + return self._arch + + @property + def fullversion(self) -> str: + """Returns the name+epoch for a package.""" + return "{}.{}".format(self._version, self._arch) + + @staticmethod + def _get_epoch_from_version(version: str) -> Tuple[str, str]: + """Pull the epoch, if any, out of a version string.""" + epoch_matcher = re.compile(r"^((?P\d+):)?(?P.*)") + matches = epoch_matcher.search(version).groupdict() + return matches.get("epoch", ""), matches.get("version") + + @classmethod + def from_system( + cls, package: str, version: Optional[str] = "", arch: Optional[str] = "" + ) -> "DebianPackage": + """Locates a package, either on the system or known to apt, and serializes the information. + + Args: + package: a string representing the package + version: an optional string if a specific version isr equested + arch: an optional architecture, defaulting to `dpkg --print-architecture`. If an + architecture is not specified, this will be used for selection. + + """ + try: + return DebianPackage.from_installed_package(package, version, arch) + except PackageNotFoundError: + logger.debug( + "package '%s' is not currently installed or has the wrong architecture.", package + ) + + # Ok, try `apt-cache ...` + try: + return DebianPackage.from_apt_cache(package, version, arch) + except (PackageNotFoundError, PackageError): + # If we get here, it's not known to the systems. + # This seems unnecessary, but virtually all `apt` commands have a return code of `100`, + # and providing meaningful error messages without this is ugly. + raise PackageNotFoundError( + "Package '{}{}' could not be found on the system or in the apt cache!".format( + package, ".{}".format(arch) if arch else "" + ) + ) from None + + @classmethod + def from_installed_package( + cls, package: str, version: Optional[str] = "", arch: Optional[str] = "" + ) -> "DebianPackage": + """Check whether the package is already installed and return an instance. + + Args: + package: a string representing the package + version: an optional string if a specific version isr equested + arch: an optional architecture, defaulting to `dpkg --print-architecture`. + If an architecture is not specified, this will be used for selection. + """ + system_arch = check_output( + ["dpkg", "--print-architecture"], universal_newlines=True + ).strip() + arch = arch if arch else system_arch + + # Regexps are a really terrible way to do this. Thanks dpkg + output = "" + try: + output = check_output(["dpkg", "-l", package], stderr=PIPE, universal_newlines=True) + except CalledProcessError: + raise PackageNotFoundError("Package is not installed: {}".format(package)) from None + + # Pop off the output from `dpkg -l' because there's no flag to + # omit it` + lines = str(output).splitlines()[5:] + + dpkg_matcher = re.compile( + r""" + ^(?P\w+?)\s+ + (?P.*?)(?P:\w+?)?\s+ + (?P.*?)\s+ + (?P\w+?)\s+ + (?P.*) + """, + re.VERBOSE, + ) + + for line in lines: + try: + matches = dpkg_matcher.search(line).groupdict() + package_status = matches["package_status"] + + if not package_status.endswith("i"): + logger.debug( + "package '%s' in dpkg output but not installed, status: '%s'", + package, + package_status, + ) + break + + epoch, split_version = DebianPackage._get_epoch_from_version(matches["version"]) + pkg = DebianPackage( + matches["package_name"], + split_version, + epoch, + matches["arch"], + PackageState.Present, + ) + if (pkg.arch == "all" or pkg.arch == arch) and ( + version == "" or str(pkg.version) == version + ): + return pkg + except AttributeError: + logger.warning("dpkg matcher could not parse line: %s", line) + + # If we didn't find it, fail through + raise PackageNotFoundError("Package {}.{} is not installed!".format(package, arch)) + + @classmethod + def from_apt_cache( + cls, package: str, version: Optional[str] = "", arch: Optional[str] = "" + ) -> "DebianPackage": + """Check whether the package is already installed and return an instance. + + Args: + package: a string representing the package + version: an optional string if a specific version isr equested + arch: an optional architecture, defaulting to `dpkg --print-architecture`. + If an architecture is not specified, this will be used for selection. + """ + system_arch = check_output( + ["dpkg", "--print-architecture"], universal_newlines=True + ).strip() + arch = arch if arch else system_arch + + # Regexps are a really terrible way to do this. Thanks dpkg + keys = ("Package", "Architecture", "Version") + + try: + output = check_output( + ["apt-cache", "show", package], stderr=PIPE, universal_newlines=True + ) + except CalledProcessError as e: + raise PackageError( + "Could not list packages in apt-cache: {}".format(e.output) + ) from None + + pkg_groups = output.strip().split("\n\n") + keys = ("Package", "Architecture", "Version") + + for pkg_raw in pkg_groups: + lines = str(pkg_raw).splitlines() + vals = {} + for line in lines: + if line.startswith(keys): + items = line.split(":", 1) + vals[items[0]] = items[1].strip() + else: + continue + + epoch, split_version = DebianPackage._get_epoch_from_version(vals["Version"]) + pkg = DebianPackage( + vals["Package"], + split_version, + epoch, + vals["Architecture"], + PackageState.Available, + ) + + if (pkg.arch == "all" or pkg.arch == arch) and ( + version == "" or str(pkg.version) == version + ): + return pkg + + # If we didn't find it, fail through + raise PackageNotFoundError("Package {}.{} is not in the apt cache!".format(package, arch)) + + +class Version: + """An abstraction around package versions. + + This seems like it should be strictly unnecessary, except that `apt_pkg` is not usable inside a + venv, and wedging version comparisions into `DebianPackage` would overcomplicate it. + + This class implements the algorithm found here: + https://www.debian.org/doc/debian-policy/ch-controlfields.html#version + """ + + def __init__(self, version: str, epoch: str): + self._version = version + self._epoch = epoch or "" + + def __repr__(self): + """A representation of the package.""" + return "<{}.{}: {}>".format(self.__module__, self.__class__.__name__, self.__dict__) + + def __str__(self): + """A human-readable representation of the package.""" + return "{}{}".format("{}:".format(self._epoch) if self._epoch else "", self._version) + + @property + def epoch(self): + """Returns the epoch for a package. May be empty.""" + return self._epoch + + @property + def number(self) -> str: + """Returns the version number for a package.""" + return self._version + + def _get_parts(self, version: str) -> Tuple[str, str]: + """Separate the version into component upstream and Debian pieces.""" + try: + version.rindex("-") + except ValueError: + # No hyphens means no Debian version + return version, "0" + + upstream, debian = version.rsplit("-", 1) + return upstream, debian + + def _listify(self, revision: str) -> List[str]: + """Split a revision string into a listself. + + This list is comprised of alternating between strings and numbers, + padded on either end to always be "str, int, str, int..." and + always be of even length. This allows us to trivially implement the + comparison algorithm described. + """ + result = [] + while revision: + rev_1, remains = self._get_alphas(revision) + rev_2, remains = self._get_digits(remains) + result.extend([rev_1, rev_2]) + revision = remains + return result + + def _get_alphas(self, revision: str) -> Tuple[str, str]: + """Return a tuple of the first non-digit characters of a revision.""" + # get the index of the first digit + for i, char in enumerate(revision): + if char.isdigit(): + if i == 0: + return "", revision + return revision[0:i], revision[i:] + # string is entirely alphas + return revision, "" + + def _get_digits(self, revision: str) -> Tuple[int, str]: + """Return a tuple of the first integer characters of a revision.""" + # If the string is empty, return (0,'') + if not revision: + return 0, "" + # get the index of the first non-digit + for i, char in enumerate(revision): + if not char.isdigit(): + if i == 0: + return 0, revision + return int(revision[0:i]), revision[i:] + # string is entirely digits + return int(revision), "" + + def _dstringcmp(self, a, b): # noqa: C901 + """Debian package version string section lexical sort algorithm. + + The lexical comparison is a comparison of ASCII values modified so + that all the letters sort earlier than all the non-letters and so that + a tilde sorts before anything, even the end of a part. + """ + if a == b: + return 0 + try: + for i, char in enumerate(a): + if char == b[i]: + continue + # "a tilde sorts before anything, even the end of a part" + # (emptyness) + if char == "~": + return -1 + if b[i] == "~": + return 1 + # "all the letters sort earlier than all the non-letters" + if char.isalpha() and not b[i].isalpha(): + return -1 + if not char.isalpha() and b[i].isalpha(): + return 1 + # otherwise lexical sort + if ord(char) > ord(b[i]): + return 1 + if ord(char) < ord(b[i]): + return -1 + except IndexError: + # a is longer than b but otherwise equal, greater unless there are tildes + if char == "~": + return -1 + return 1 + # if we get here, a is shorter than b but otherwise equal, so check for tildes... + if b[len(a)] == "~": + return 1 + return -1 + + def _compare_revision_strings(self, first: str, second: str): # noqa: C901 + """Compare two debian revision strings.""" + if first == second: + return 0 + + # listify pads results so that we will always be comparing ints to ints + # and strings to strings (at least until we fall off the end of a list) + first_list = self._listify(first) + second_list = self._listify(second) + if first_list == second_list: + return 0 + try: + for i, item in enumerate(first_list): + # explicitly raise IndexError if we've fallen off the edge of list2 + if i >= len(second_list): + raise IndexError + # if the items are equal, next + if item == second_list[i]: + continue + # numeric comparison + if isinstance(item, int): + if item > second_list[i]: + return 1 + if item < second_list[i]: + return -1 + else: + # string comparison + return self._dstringcmp(item, second_list[i]) + except IndexError: + # rev1 is longer than rev2 but otherwise equal, hence greater + # ...except for goddamn tildes + if first_list[len(second_list)][0][0] == "~": + return 1 + return 1 + # rev1 is shorter than rev2 but otherwise equal, hence lesser + # ...except for goddamn tildes + if second_list[len(first_list)][0][0] == "~": + return -1 + return -1 + + def _compare_version(self, other) -> int: + if (self.number, self.epoch) == (other.number, other.epoch): + return 0 + + if self.epoch < other.epoch: + return -1 + if self.epoch > other.epoch: + return 1 + + # If none of these are true, follow the algorithm + upstream_version, debian_version = self._get_parts(self.number) + other_upstream_version, other_debian_version = self._get_parts(other.number) + + upstream_cmp = self._compare_revision_strings(upstream_version, other_upstream_version) + if upstream_cmp != 0: + return upstream_cmp + + debian_cmp = self._compare_revision_strings(debian_version, other_debian_version) + if debian_cmp != 0: + return debian_cmp + + return 0 + + def __lt__(self, other) -> bool: + """Less than magic method impl.""" + return self._compare_version(other) < 0 + + def __eq__(self, other) -> bool: + """Equality magic method impl.""" + return self._compare_version(other) == 0 + + def __gt__(self, other) -> bool: + """Greater than magic method impl.""" + return self._compare_version(other) > 0 + + def __le__(self, other) -> bool: + """Less than or equal to magic method impl.""" + return self.__eq__(other) or self.__lt__(other) + + def __ge__(self, other) -> bool: + """Greater than or equal to magic method impl.""" + return self.__gt__(other) or self.__eq__(other) + + def __ne__(self, other) -> bool: + """Not equal to magic method impl.""" + return not self.__eq__(other) + + +def add_package( + package_names: Union[str, List[str]], + version: Optional[str] = "", + arch: Optional[str] = "", + update_cache: Optional[bool] = False, +) -> Union[DebianPackage, List[DebianPackage]]: + """Add a package or list of packages to the system. + + Args: + name: the name(s) of the package(s) + version: an (Optional) version as a string. Defaults to the latest known + arch: an optional architecture for the package + update_cache: whether or not to run `apt-get update` prior to operating + + Raises: + PackageNotFoundError if the package is not in the cache. + """ + cache_refreshed = False + if update_cache: + update() + cache_refreshed = True + + packages = {"success": [], "retry": [], "failed": []} + + package_names = [package_names] if type(package_names) is str else package_names + if not package_names: + raise TypeError("Expected at least one package name to add, received zero!") + + if len(package_names) != 1 and version: + raise TypeError( + "Explicit version should not be set if more than one package is being added!" + ) + + for p in package_names: + pkg, success = _add(p, version, arch) + if success: + packages["success"].append(pkg) + else: + logger.warning("failed to locate and install/update '%s'", pkg) + packages["retry"].append(p) + + if packages["retry"] and not cache_refreshed: + logger.info("updating the apt-cache and retrying installation of failed packages.") + update() + + for p in packages["retry"]: + pkg, success = _add(p, version, arch) + if success: + packages["success"].append(pkg) + else: + packages["failed"].append(p) + + if packages["failed"]: + raise PackageError("Failed to install packages: {}".format(", ".join(packages["failed"]))) + + return packages["success"] if len(packages["success"]) > 1 else packages["success"][0] + + +def _add( + name: str, + version: Optional[str] = "", + arch: Optional[str] = "", +) -> Tuple[Union[DebianPackage, str], bool]: + """Adds a package. + + Args: + name: the name(s) of the package(s) + version: an (Optional) version as a string. Defaults to the latest known + arch: an optional architecture for the package + + Returns: a tuple of `DebianPackage` if found, or a :str: if it is not, and + a boolean indicating success + """ + try: + pkg = DebianPackage.from_system(name, version, arch) + pkg.ensure(state=PackageState.Present) + return pkg, True + except PackageNotFoundError: + return name, False + + +def remove_package( + package_names: Union[str, List[str]] +) -> Union[DebianPackage, List[DebianPackage]]: + """Removes a package from the system. + + Args: + package_names: the name of a package + + Raises: + PackageNotFoundError if the package is not found. + """ + packages = [] + + package_names = [package_names] if type(package_names) is str else package_names + if not package_names: + raise TypeError("Expected at least one package name to add, received zero!") + + for p in package_names: + try: + pkg = DebianPackage.from_installed_package(p) + pkg.ensure(state=PackageState.Absent) + packages.append(pkg) + except PackageNotFoundError: + logger.info("package '%s' was requested for removal, but it was not installed.", p) + + # the list of packages will be empty when no package is removed + logger.debug("packages: '%s'", packages) + return packages[0] if len(packages) == 1 else packages + + +def update() -> None: + """Updates the apt cache via `apt-get update`.""" + check_call(["apt-get", "update"], stderr=PIPE, stdout=PIPE) + + +class InvalidSourceError(Error): + """Exceptions for invalid source entries.""" + + +class GPGKeyError(Error): + """Exceptions for GPG keys.""" + + +class DebianRepository: + """An abstraction to represent a repository.""" + + def __init__( + self, + enabled: bool, + repotype: str, + uri: str, + release: str, + groups: List[str], + filename: Optional[str] = "", + gpg_key_filename: Optional[str] = "", + options: Optional[dict] = None, + ): + self._enabled = enabled + self._repotype = repotype + self._uri = uri + self._release = release + self._groups = groups + self._filename = filename + self._gpg_key_filename = gpg_key_filename + self._options = options + + @property + def enabled(self): + """Return whether or not the repository is enabled.""" + return self._enabled + + @property + def repotype(self): + """Return whether it is binary or source.""" + return self._repotype + + @property + def uri(self): + """Return the URI.""" + return self._uri + + @property + def release(self): + """Return which Debian/Ubuntu releases it is valid for.""" + return self._release + + @property + def groups(self): + """Return the enabled package groups.""" + return self._groups + + @property + def filename(self): + """Returns the filename for a repository.""" + return self._filename + + @filename.setter + def filename(self, fname: str) -> None: + """Sets the filename used when a repo is written back to diskself. + + Args: + fname: a filename to write the repository information to. + """ + if not fname.endswith(".list"): + raise InvalidSourceError("apt source filenames should end in .list!") + + self._filename = fname + + @property + def gpg_key(self): + """Returns the path to the GPG key for this repository.""" + return self._gpg_key_filename + + @property + def options(self): + """Returns any additional repo options which are set.""" + return self._options + + def make_options_string(self) -> str: + """Generate the complete options string for a a repository. + + Combining `gpg_key`, if set, and the rest of the options to find + a complex repo string. + """ + options = self._options if self._options else {} + if self._gpg_key_filename: + options["signed-by"] = self._gpg_key_filename + + return ( + "[{}] ".format(" ".join(["{}={}".format(k, v) for k, v in options.items()])) + if options + else "" + ) + + @staticmethod + def prefix_from_uri(uri: str) -> str: + """Get a repo list prefix from the uri, depending on whether a path is set.""" + uridetails = urlparse(uri) + path = ( + uridetails.path.lstrip("/").replace("/", "-") if uridetails.path else uridetails.netloc + ) + return "/etc/apt/sources.list.d/{}".format(path) + + @staticmethod + def from_repo_line(repo_line: str, write_file: Optional[bool] = True) -> "DebianRepository": + """Instantiate a new `DebianRepository` a `sources.list` entry line. + + Args: + repo_line: a string representing a repository entry + write_file: boolean to enable writing the new repo to disk + """ + repo = RepositoryMapping._parse(repo_line, "UserInput") + fname = "{}-{}.list".format( + DebianRepository.prefix_from_uri(repo.uri), repo.release.replace("/", "-") + ) + repo.filename = fname + + options = repo.options if repo.options else {} + if repo.gpg_key: + options["signed-by"] = repo.gpg_key + + # For Python 3.5 it's required to use sorted in the options dict in order to not have + # different results in the order of the options between executions. + options_str = ( + "[{}] ".format(" ".join(["{}={}".format(k, v) for k, v in sorted(options.items())])) + if options + else "" + ) + + if write_file: + with open(fname, "wb") as f: + f.write( + ( + "{}".format("#" if not repo.enabled else "") + + "{} {}{} ".format(repo.repotype, options_str, repo.uri) + + "{} {}\n".format(repo.release, " ".join(repo.groups)) + ).encode("utf-8") + ) + + return repo + + def disable(self) -> None: + """Remove this repository from consideration. + + Disable it instead of removing from the repository file. + """ + searcher = "{} {}{} {}".format( + self.repotype, self.make_options_string(), self.uri, self.release + ) + for line in fileinput.input(self._filename, inplace=True): + if re.match(r"^{}\s".format(re.escape(searcher)), line): + print("# {}".format(line), end="") + else: + print(line, end="") + + def import_key(self, key: str) -> None: + """Import an ASCII Armor key. + + A Radix64 format keyid is also supported for backwards + compatibility. In this case Ubuntu keyserver will be + queried for a key via HTTPS by its keyid. This method + is less preferrable because https proxy servers may + require traffic decryption which is equivalent to a + man-in-the-middle attack (a proxy server impersonates + keyserver TLS certificates and has to be explicitly + trusted by the system). + + Args: + key: A GPG key in ASCII armor format, + including BEGIN and END markers or a keyid. + + Raises: + GPGKeyError if the key could not be imported + """ + key = key.strip() + if "-" in key or "\n" in key: + # Send everything not obviously a keyid to GPG to import, as + # we trust its validation better than our own. eg. handling + # comments before the key. + logger.debug("PGP key found (looks like ASCII Armor format)") + if ( + "-----BEGIN PGP PUBLIC KEY BLOCK-----" in key + and "-----END PGP PUBLIC KEY BLOCK-----" in key + ): + logger.debug("Writing provided PGP key in the binary format") + key_bytes = key.encode("utf-8") + key_name = self._get_keyid_by_gpg_key(key_bytes) + key_gpg = self._dearmor_gpg_key(key_bytes) + self._gpg_key_filename = "/etc/apt/trusted.gpg.d/{}.gpg".format(key_name) + self._write_apt_gpg_keyfile(key_name=self._gpg_key_filename, key_material=key_gpg) + else: + raise GPGKeyError("ASCII armor markers missing from GPG key") + else: + logger.warning( + "PGP key found (looks like Radix64 format). " + "SECURELY importing PGP key from keyserver; " + "full key not provided." + ) + # as of bionic add-apt-repository uses curl with an HTTPS keyserver URL + # to retrieve GPG keys. `apt-key adv` command is deprecated as is + # apt-key in general as noted in its manpage. See lp:1433761 for more + # history. Instead, /etc/apt/trusted.gpg.d is used directly to drop + # gpg + key_asc = self._get_key_by_keyid(key) + # write the key in GPG format so that apt-key list shows it + key_gpg = self._dearmor_gpg_key(key_asc.encode("utf-8")) + self._gpg_key_filename = "/etc/apt/trusted.gpg.d/{}.gpg".format(key) + self._write_apt_gpg_keyfile(key_name=key, key_material=key_gpg) + + @staticmethod + def _get_keyid_by_gpg_key(key_material: bytes) -> str: + """Get a GPG key fingerprint by GPG key material. + + Gets a GPG key fingerprint (40-digit, 160-bit) by the ASCII armor-encoded + or binary GPG key material. Can be used, for example, to generate file + names for keys passed via charm options. + """ + # Use the same gpg command for both Xenial and Bionic + cmd = ["gpg", "--with-colons", "--with-fingerprint"] + ps = subprocess.run( + cmd, + stdout=PIPE, + stderr=PIPE, + input=key_material, + ) + out, err = ps.stdout.decode(), ps.stderr.decode() + if "gpg: no valid OpenPGP data found." in err: + raise GPGKeyError("Invalid GPG key material provided") + # from gnupg2 docs: fpr :: Fingerprint (fingerprint is in field 10) + return re.search(r"^fpr:{9}([0-9A-F]{40}):$", out, re.MULTILINE).group(1) + + @staticmethod + def _get_key_by_keyid(keyid: str) -> str: + """Get a key via HTTPS from the Ubuntu keyserver. + + Different key ID formats are supported by SKS keyservers (the longer ones + are more secure, see "dead beef attack" and https://evil32.com/). Since + HTTPS is used, if SSLBump-like HTTPS proxies are in place, they will + impersonate keyserver.ubuntu.com and generate a certificate with + keyserver.ubuntu.com in the CN field or in SubjAltName fields of a + certificate. If such proxy behavior is expected it is necessary to add the + CA certificate chain containing the intermediate CA of the SSLBump proxy to + every machine that this code runs on via ca-certs cloud-init directive (via + cloudinit-userdata model-config) or via other means (such as through a + custom charm option). Also note that DNS resolution for the hostname in a + URL is done at a proxy server - not at the client side. + 8-digit (32 bit) key ID + https://keyserver.ubuntu.com/pks/lookup?search=0x4652B4E6 + 16-digit (64 bit) key ID + https://keyserver.ubuntu.com/pks/lookup?search=0x6E85A86E4652B4E6 + 40-digit key ID: + https://keyserver.ubuntu.com/pks/lookup?search=0x35F77D63B5CEC106C577ED856E85A86E4652B4E6 + + Args: + keyid: An 8, 16 or 40 hex digit keyid to find a key for + + Returns: + A string contining key material for the specified GPG key id + + + Raises: + subprocess.CalledProcessError + """ + # options=mr - machine-readable output (disables html wrappers) + keyserver_url = ( + "https://keyserver.ubuntu.com" "/pks/lookup?op=get&options=mr&exact=on&search=0x{}" + ) + curl_cmd = ["curl", keyserver_url.format(keyid)] + # use proxy server settings in order to retrieve the key + return check_output(curl_cmd).decode() + + @staticmethod + def _dearmor_gpg_key(key_asc: bytes) -> bytes: + """Converts a GPG key in the ASCII armor format to the binary format. + + Args: + key_asc: A GPG key in ASCII armor format. + + Returns: + A GPG key in binary format as a string + + Raises: + GPGKeyError + """ + ps = subprocess.run(["gpg", "--dearmor"], stdout=PIPE, stderr=PIPE, input=key_asc) + out, err = ps.stdout, ps.stderr.decode() + if "gpg: no valid OpenPGP data found." in err: + raise GPGKeyError( + "Invalid GPG key material. Check your network setup" + " (MTU, routing, DNS) and/or proxy server settings" + " as well as destination keyserver status." + ) + else: + return out + + @staticmethod + def _write_apt_gpg_keyfile(key_name: str, key_material: bytes) -> None: + """Writes GPG key material into a file at a provided path. + + Args: + key_name: A key name to use for a key file (could be a fingerprint) + key_material: A GPG key material (binary) + """ + with open(key_name, "wb") as keyf: + keyf.write(key_material) + + +class RepositoryMapping(Mapping): + """An representation of known repositories. + + Instantiation of `RepositoryMapping` will iterate through the + filesystem, parse out repository files in `/etc/apt/...`, and create + `DebianRepository` objects in this list. + + Typical usage: + + repositories = apt.RepositoryMapping() + repositories.add(DebianRepository( + enabled=True, repotype="deb", uri="https://example.com", release="focal", + groups=["universe"] + )) + """ + + def __init__(self): + self._repository_map = {} + # Repositories that we're adding -- used to implement mode param + self.default_file = "/etc/apt/sources.list" + + # read sources.list if it exists + if os.path.isfile(self.default_file): + self.load(self.default_file) + + # read sources.list.d + for file in glob.iglob("/etc/apt/sources.list.d/*.list"): + self.load(file) + + def __contains__(self, key: str) -> bool: + """Magic method for checking presence of repo in mapping.""" + return key in self._repository_map + + def __len__(self) -> int: + """Return number of repositories in map.""" + return len(self._repository_map) + + def __iter__(self) -> Iterable[DebianRepository]: + """Iterator magic method for RepositoryMapping.""" + return iter(self._repository_map.values()) + + def __getitem__(self, repository_uri: str) -> DebianRepository: + """Return a given `DebianRepository`.""" + return self._repository_map[repository_uri] + + def __setitem__(self, repository_uri: str, repository: DebianRepository) -> None: + """Add a `DebianRepository` to the cache.""" + self._repository_map[repository_uri] = repository + + def load(self, filename: str): + """Load a repository source file into the cache. + + Args: + filename: the path to the repository file + """ + parsed = [] + skipped = [] + with open(filename, "r") as f: + for n, line in enumerate(f): + try: + repo = self._parse(line, filename) + except InvalidSourceError: + skipped.append(n) + else: + repo_identifier = "{}-{}-{}".format(repo.repotype, repo.uri, repo.release) + self._repository_map[repo_identifier] = repo + parsed.append(n) + logger.debug("parsed repo: '%s'", repo_identifier) + + if skipped: + skip_list = ", ".join(str(s) for s in skipped) + logger.debug("skipped the following lines in file '%s': %s", filename, skip_list) + + if parsed: + logger.info("parsed %d apt package repositories", len(parsed)) + else: + raise InvalidSourceError("all repository lines in '{}' were invalid!".format(filename)) + + @staticmethod + def _parse(line: str, filename: str) -> DebianRepository: + """Parse a line in a sources.list file. + + Args: + line: a single line from `load` to parse + filename: the filename being read + + Raises: + InvalidSourceError if the source type is unknown + """ + enabled = True + repotype = uri = release = gpg_key = "" + options = {} + groups = [] + + line = line.strip() + if line.startswith("#"): + enabled = False + line = line[1:] + + # Check for "#" in the line and treat a part after it as a comment then strip it off. + i = line.find("#") + if i > 0: + line = line[:i] + + # Split a source into substrings to initialize a new repo. + source = line.strip() + if source: + # Match any repo options, and get a dict representation. + for v in re.findall(OPTIONS_MATCHER, source): + opts = dict(o.split("=") for o in v.strip("[]").split()) + # Extract the 'signed-by' option for the gpg_key + gpg_key = opts.pop("signed-by", "") + options = opts + + # Remove any options from the source string and split the string into chunks + source = re.sub(OPTIONS_MATCHER, "", source) + chunks = source.split() + + # Check we've got a valid list of chunks + if len(chunks) < 3 or chunks[0] not in VALID_SOURCE_TYPES: + raise InvalidSourceError("An invalid sources line was found in %s!", filename) + + repotype = chunks[0] + uri = chunks[1] + release = chunks[2] + groups = chunks[3:] + + return DebianRepository( + enabled, repotype, uri, release, groups, filename, gpg_key, options + ) + else: + raise InvalidSourceError("An invalid sources line was found in %s!", filename) + + def add(self, repo: DebianRepository, default_filename: Optional[bool] = False) -> None: + """Add a new repository to the system. + + Args: + repo: a `DebianRepository` object + default_filename: an (Optional) filename if the default is not desirable + """ + new_filename = "{}-{}.list".format( + DebianRepository.prefix_from_uri(repo.uri), repo.release.replace("/", "-") + ) + + fname = repo.filename or new_filename + + options = repo.options if repo.options else {} + if repo.gpg_key: + options["signed-by"] = repo.gpg_key + + with open(fname, "wb") as f: + f.write( + ( + "{}".format("#" if not repo.enabled else "") + + "{} {}{} ".format(repo.repotype, repo.make_options_string(), repo.uri) + + "{} {}\n".format(repo.release, " ".join(repo.groups)) + ).encode("utf-8") + ) + + self._repository_map["{}-{}-{}".format(repo.repotype, repo.uri, repo.release)] = repo + + def disable(self, repo: DebianRepository) -> None: + """Remove a repository. Disable by default. + + Args: + repo: a `DebianRepository` to disable + """ + searcher = "{} {}{} {}".format( + repo.repotype, repo.make_options_string(), repo.uri, repo.release + ) + + for line in fileinput.input(repo.filename, inplace=True): + if re.match(r"^{}\s".format(re.escape(searcher)), line): + print("# {}".format(line), end="") + else: + print(line, end="") + + self._repository_map["{}-{}-{}".format(repo.repotype, repo.uri, repo.release)] = repo diff --git a/ceph-mon/lib/charms/operator_libs_linux/v1/systemd.py b/ceph-mon/lib/charms/operator_libs_linux/v1/systemd.py new file mode 100644 index 00000000..5be34c17 --- /dev/null +++ b/ceph-mon/lib/charms/operator_libs_linux/v1/systemd.py @@ -0,0 +1,219 @@ +# Copyright 2021 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Abstractions for stopping, starting and managing system services via systemd. + +This library assumes that your charm is running on a platform that uses systemd. E.g., +Centos 7 or later, Ubuntu Xenial (16.04) or later. + +For the most part, we transparently provide an interface to a commonly used selection of +systemd commands, with a few shortcuts baked in. For example, service_pause and +service_resume with run the mask/unmask and enable/disable invocations. + +Example usage: +```python +from charms.operator_libs_linux.v0.systemd import service_running, service_reload + +# Start a service +if not service_running("mysql"): + success = service_start("mysql") + +# Attempt to reload a service, restarting if necessary +success = service_reload("nginx", restart_on_failure=True) +``` + +""" + +import logging +import subprocess + +__all__ = [ # Don't export `_systemctl`. (It's not the intended way of using this lib.) + "service_pause", + "service_reload", + "service_restart", + "service_resume", + "service_running", + "service_start", + "service_stop", + "daemon_reload", +] + +logger = logging.getLogger(__name__) + +# The unique Charmhub library identifier, never change it +LIBID = "045b0d179f6b4514a8bb9b48aee9ebaf" + +# Increment this major API version when introducing breaking changes +LIBAPI = 1 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 0 + + +class SystemdError(Exception): + pass + + +def _popen_kwargs(): + return dict( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + bufsize=1, + universal_newlines=True, + encoding="utf-8", + ) + + +def _systemctl( + sub_cmd: str, service_name: str = None, now: bool = None, quiet: bool = None +) -> bool: + """Control a system service. + + Args: + sub_cmd: the systemctl subcommand to issue + service_name: the name of the service to perform the action on + now: passes the --now flag to the shell invocation. + quiet: passes the --quiet flag to the shell invocation. + """ + cmd = ["systemctl", sub_cmd] + + if service_name is not None: + cmd.append(service_name) + if now is not None: + cmd.append("--now") + if quiet is not None: + cmd.append("--quiet") + if sub_cmd != "is-active": + logger.debug("Attempting to {} '{}' with command {}.".format(cmd, service_name, cmd)) + else: + logger.debug("Checking if '{}' is active".format(service_name)) + + proc = subprocess.Popen(cmd, **_popen_kwargs()) + last_line = "" + for line in iter(proc.stdout.readline, ""): + last_line = line + logger.debug(line) + + proc.wait() + + if sub_cmd == "is-active": + # If we are just checking whether a service is running, return True/False, rather + # than raising an error. + if proc.returncode < 1: + return True + if proc.returncode == 3: # Code returned when service is not active. + return False + + if proc.returncode < 1: + return True + + raise SystemdError( + "Could not {}{}: systemd output: {}".format( + sub_cmd, " {}".format(service_name) if service_name else "", last_line + ) + ) + + +def service_running(service_name: str) -> bool: + """Determine whether a system service is running. + + Args: + service_name: the name of the service + """ + return _systemctl("is-active", service_name, quiet=True) + + +def service_start(service_name: str) -> bool: + """Start a system service. + + Args: + service_name: the name of the service to stop + """ + return _systemctl("start", service_name) + + +def service_stop(service_name: str) -> bool: + """Stop a system service. + + Args: + service_name: the name of the service to stop + """ + return _systemctl("stop", service_name) + + +def service_restart(service_name: str) -> bool: + """Restart a system service. + + Args: + service_name: the name of the service to restart + """ + return _systemctl("restart", service_name) + + +def service_reload(service_name: str, restart_on_failure: bool = False) -> bool: + """Reload a system service, optionally falling back to restart if reload fails. + + Args: + service_name: the name of the service to reload + restart_on_failure: boolean indicating whether to fallback to a restart if the + reload fails. + """ + try: + return _systemctl("reload", service_name) + except SystemdError: + if restart_on_failure: + return _systemctl("restart", service_name) + else: + raise + + +def service_pause(service_name: str) -> bool: + """Pause a system service. + + Stop it, and prevent it from starting again at boot. + + Args: + service_name: the name of the service to pause + """ + _systemctl("disable", service_name, now=True) + _systemctl("mask", service_name) + + if not service_running(service_name): + return True + + raise SystemdError("Attempted to pause '{}', but it is still running.".format(service_name)) + + +def service_resume(service_name: str) -> bool: + """Resume a system service. + + Re-enable starting again at boot. Start the service. + + Args: + service_name: the name of the service to resume + """ + _systemctl("unmask", service_name) + _systemctl("enable", service_name, now=True) + + if service_running(service_name): + return True + + raise SystemdError("Attempted to resume '{}', but it is not running.".format(service_name)) + + +def daemon_reload() -> bool: + """Reload systemd manager configuration.""" + return _systemctl("daemon-reload") diff --git a/ceph-mon/lib/charms/prometheus_k8s/v0/prometheus_scrape.py b/ceph-mon/lib/charms/prometheus_k8s/v0/prometheus_scrape.py new file mode 100644 index 00000000..6d8b9f8c --- /dev/null +++ b/ceph-mon/lib/charms/prometheus_k8s/v0/prometheus_scrape.py @@ -0,0 +1,2287 @@ +# Copyright 2021 Canonical Ltd. +# See LICENSE file for licensing details. +"""## Overview. + +This document explains how to integrate with the Prometheus charm +for the purpose of providing a metrics endpoint to Prometheus. It +also explains how alternative implementations of the Prometheus charms +may maintain the same interface and be backward compatible with all +currently integrated charms. Finally this document is the +authoritative reference on the structure of relation data that is +shared between Prometheus charms and any other charm that intends to +provide a scrape target for Prometheus. + +## Provider Library Usage + +This Prometheus charm interacts with its scrape targets using its +charm library. Charms seeking to expose metric endpoints for the +Prometheus charm, must do so using the `MetricsEndpointProvider` +object from this charm library. For the simplest use cases, using the +`MetricsEndpointProvider` object only requires instantiating it, +typically in the constructor of your charm (the one which exposes a +metrics endpoint). The `MetricsEndpointProvider` constructor requires +the name of the relation over which a scrape target (metrics endpoint) +is exposed to the Prometheus charm. This relation must use the +`prometheus_scrape` interface. By default address of the metrics +endpoint is set to the unit IP address, by each unit of the +`MetricsEndpointProvider` charm. These units set their address in +response to the `PebbleReady` event of each container in the unit, +since container restarts of Kubernetes charms can result in change of +IP addresses. The default name for the metrics endpoint relation is +`metrics-endpoint`. It is strongly recommended to use the same +relation name for consistency across charms and doing so obviates the +need for an additional constructor argument. The +`MetricsEndpointProvider` object may be instantiated as follows + + from charms.prometheus_k8s.v0.prometheus_scrape import MetricsEndpointProvider + + def __init__(self, *args): + super().__init__(*args) + ... + self.metrics_endpoint = MetricsEndpointProvider(self) + ... + +Note that the first argument (`self`) to `MetricsEndpointProvider` is +always a reference to the parent (scrape target) charm. + +An instantiated `MetricsEndpointProvider` object will ensure that each +unit of its parent charm, is a scrape target for the +`MetricsEndpointConsumer` (Prometheus) charm. By default +`MetricsEndpointProvider` assumes each unit of the consumer charm +exports its metrics at a path given by `/metrics` on port 80. These +defaults may be changed by providing the `MetricsEndpointProvider` +constructor an optional argument (`jobs`) that represents a +Prometheus scrape job specification using Python standard data +structures. This job specification is a subset of Prometheus' own +[scrape +configuration](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) +format but represented using Python data structures. More than one job +may be provided using the `jobs` argument. Hence `jobs` accepts a list +of dictionaries where each dictionary represents one `` +object as described in the Prometheus documentation. The currently +supported configuration subset is: `job_name`, `metrics_path`, +`static_configs` + +Suppose it is required to change the port on which scraped metrics are +exposed to 8000. This may be done by providing the following data +structure as the value of `jobs`. + +``` +[ + { + "static_configs": [ + { + "targets": ["*:8000"] + } + ] + } +] +``` + +The wildcard ("*") host specification implies that the scrape targets +will automatically be set to the host addresses advertised by each +unit of the consumer charm. + +It is also possible to change the metrics path and scrape multiple +ports, for example + +``` +[ + { + "metrics_path": "/my-metrics-path", + "static_configs": [ + { + "targets": ["*:8000", "*:8081"], + } + ] + } +] +``` + +More complex scrape configurations are possible. For example + +``` +[ + { + "static_configs": [ + { + "targets": ["10.1.32.215:7000", "*:8000"], + "labels": { + "some-key": "some-value" + } + } + ] + } +] +``` + +This example scrapes the target "10.1.32.215" at port 7000 in addition +to scraping each unit at port 8000. There is however one difference +between wildcard targets (specified using "*") and fully qualified +targets (such as "10.1.32.215"). The Prometheus charm automatically +associates labels with metrics generated by each target. These labels +localise the source of metrics within the Juju topology by specifying +its "model name", "model UUID", "application name" and "unit +name". However unit name is associated only with wildcard targets but +not with fully qualified targets. + +Multiple jobs with different metrics paths and labels are allowed, but +each job must be given a unique name: + +``` +[ + { + "job_name": "my-first-job", + "metrics_path": "one-path", + "static_configs": [ + { + "targets": ["*:7000"], + "labels": { + "some-key": "some-value" + } + } + ] + }, + { + "job_name": "my-second-job", + "metrics_path": "another-path", + "static_configs": [ + { + "targets": ["*:8000"], + "labels": { + "some-other-key": "some-other-value" + } + } + ] + } +] +``` + +**Important:** `job_name` should be a fixed string (e.g. hardcoded literal). +For instance, if you include variable elements, like your `unit.name`, it may break +the continuity of the metrics time series gathered by Prometheus when the leader unit +changes (e.g. on upgrade or rescale). + +Additionally, it is also technically possible, but **strongly discouraged**, to +configure the following scrape-related settings, which behave as described by the +[Prometheus documentation](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config): + +- `static_configs` +- `scrape_interval` +- `scrape_timeout` +- `proxy_url` +- `relabel_configs` +- `metrics_relabel_configs` +- `sample_limit` +- `label_limit` +- `label_name_length_limit` +- `label_value_length_limit` + +The settings above are supported by the `prometheus_scrape` library only for the sake of +specialized facilities like the [Prometheus Scrape Config](https://charmhub.io/prometheus-scrape-config-k8s) +charm. Virtually no charms should use these settings, and charmers definitely **should not** +expose them to the Juju administrator via configuration options. + +## Consumer Library Usage + +The `MetricsEndpointConsumer` object may be used by Prometheus +charms to manage relations with their scrape targets. For this +purposes a Prometheus charm needs to do two things + +1. Instantiate the `MetricsEndpointConsumer` object by providing it a +reference to the parent (Prometheus) charm and optionally the name of +the relation that the Prometheus charm uses to interact with scrape +targets. This relation must confirm to the `prometheus_scrape` +interface and it is strongly recommended that this relation be named +`metrics-endpoint` which is its default value. + +For example a Prometheus charm may instantiate the +`MetricsEndpointConsumer` in its constructor as follows + + from charms.prometheus_k8s.v0.prometheus_scrape import MetricsEndpointConsumer + + def __init__(self, *args): + super().__init__(*args) + ... + self.metrics_consumer = MetricsEndpointConsumer(self) + ... + +2. A Prometheus charm also needs to respond to the +`TargetsChangedEvent` event of the `MetricsEndpointConsumer` by adding itself as +an observer for these events, as in + + self.framework.observe( + self.metrics_consumer.on.targets_changed, + self._on_scrape_targets_changed, + ) + +In responding to the `TargetsChangedEvent` event the Prometheus +charm must update the Prometheus configuration so that any new scrape +targets are added and/or old ones removed from the list of scraped +endpoints. For this purpose the `MetricsEndpointConsumer` object +exposes a `jobs()` method that returns a list of scrape jobs. Each +element of this list is the Prometheus scrape configuration for that +job. In order to update the Prometheus configuration, the Prometheus +charm needs to replace the current list of jobs with the list provided +by `jobs()` as follows + + def _on_scrape_targets_changed(self, event): + ... + scrape_jobs = self.metrics_consumer.jobs() + for job in scrape_jobs: + prometheus_scrape_config.append(job) + ... + +## Alerting Rules + +This charm library also supports gathering alerting rules from all +related `MetricsEndpointProvider` charms and enabling corresponding alerts within the +Prometheus charm. Alert rules are automatically gathered by `MetricsEndpointProvider` +charms when using this library, from a directory conventionally named +`prometheus_alert_rules`. This directory must reside at the top level +in the `src` folder of the consumer charm. Each file in this directory +is assumed to be in one of two formats: +- the official prometheus alert rule format, conforming to the +[Prometheus docs](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) +- a single rule format, which is a simplified subset of the official format, +comprising a single alert rule per file, using the same YAML fields. + +The file name must have the `.rule` extension. + +An example of the contents of such a file in the custom single rule +format is shown below. + +``` +alert: HighRequestLatency +expr: job:request_latency_seconds:mean5m{my_key=my_value} > 0.5 +for: 10m +labels: + severity: Medium + type: HighLatency +annotations: + summary: High request latency for {{ $labels.instance }}. +``` + +The `MetricsEndpointProvider` will read all available alert rules and +also inject "filtering labels" into the alert expressions. The +filtering labels ensure that alert rules are localised to the metrics +provider charm's Juju topology (application, model and its UUID). Such +a topology filter is essential to ensure that alert rules submitted by +one provider charm generates alerts only for that same charm. When +alert rules are embedded in a charm, and the charm is deployed as a +Juju application, the alert rules from that application have their +expressions automatically updated to filter for metrics coming from +the units of that application alone. This remove risk of spurious +evaluation, e.g., when you have multiple deployments of the same charm +monitored by the same Prometheus. + +Not all alerts one may want to specify can be embedded in a +charm. Some alert rules will be specific to a user's use case. This is +the case, for example, of alert rules that are based on business +constraints, like expecting a certain amount of requests to a specific +API every five minutes. Such alert rules can be specified via the +[COS Config Charm](https://charmhub.io/cos-configuration-k8s), +which allows importing alert rules and other settings like dashboards +from a Git repository. + +Gathering alert rules and generating rule files within the Prometheus +charm is easily done using the `alerts()` method of +`MetricsEndpointConsumer`. Alerts generated by Prometheus will +automatically include Juju topology labels in the alerts. These labels +indicate the source of the alert. The following labels are +automatically included with each alert + +- `juju_model` +- `juju_model_uuid` +- `juju_application` + +## Relation Data + +The Prometheus charm uses both application and unit relation data to +obtain information regarding its scrape jobs, alert rules and scrape +targets. This relation data is in JSON format and it closely resembles +the YAML structure of Prometheus [scrape configuration] +(https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config). + +Units of Metrics provider charms advertise their names and addresses +over unit relation data using the `prometheus_scrape_unit_name` and +`prometheus_scrape_unit_address` keys. While the `scrape_metadata`, +`scrape_jobs` and `alert_rules` keys in application relation data +of Metrics provider charms hold eponymous information. + +""" # noqa: W505 + +import copy +import hashlib +import ipaddress +import json +import logging +import os +import platform +import re +import socket +import subprocess +import tempfile +from pathlib import Path +from typing import Dict, List, Optional, Tuple, Union + +import yaml +from charms.observability_libs.v0.juju_topology import JujuTopology +from ops.charm import CharmBase, RelationRole +from ops.framework import BoundEvent, EventBase, EventSource, Object, ObjectEvents + +# The unique Charmhub library identifier, never change it +LIBID = "bc84295fef5f4049878f07b131968ee2" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 22 + +logger = logging.getLogger(__name__) + + +ALLOWED_KEYS = { + "job_name", + "metrics_path", + "static_configs", + "scrape_interval", + "scrape_timeout", + "proxy_url", + "relabel_configs", + "metrics_relabel_configs", + "sample_limit", + "label_limit", + "label_name_length_limit", + "label_value_length_limit", + "scheme", + "basic_auth", + "tls_config", +} +DEFAULT_JOB = { + "metrics_path": "/metrics", + "static_configs": [{"targets": ["*:80"]}], +} + + +DEFAULT_RELATION_NAME = "metrics-endpoint" +RELATION_INTERFACE_NAME = "prometheus_scrape" + +DEFAULT_ALERT_RULES_RELATIVE_PATH = "./src/prometheus_alert_rules" + + +class RelationNotFoundError(Exception): + """Raised if there is no relation with the given name is found.""" + + def __init__(self, relation_name: str): + self.relation_name = relation_name + self.message = "No relation named '{}' found".format(relation_name) + + super().__init__(self.message) + + +class RelationInterfaceMismatchError(Exception): + """Raised if the relation with the given name has a different interface.""" + + def __init__( + self, + relation_name: str, + expected_relation_interface: str, + actual_relation_interface: str, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_interface + self.actual_relation_interface = actual_relation_interface + self.message = ( + "The '{}' relation has '{}' as interface rather than the expected '{}'".format( + relation_name, actual_relation_interface, expected_relation_interface + ) + ) + + super().__init__(self.message) + + +class RelationRoleMismatchError(Exception): + """Raised if the relation with the given name has a different role.""" + + def __init__( + self, + relation_name: str, + expected_relation_role: RelationRole, + actual_relation_role: RelationRole, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_role + self.actual_relation_role = actual_relation_role + self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format( + relation_name, repr(actual_relation_role), repr(expected_relation_role) + ) + + super().__init__(self.message) + + +class InvalidAlertRuleEvent(EventBase): + """Event emitted when alert rule files are not parsable. + + Enables us to set a clear status on the provider. + """ + + def __init__(self, handle, errors: str = "", valid: bool = False): + super().__init__(handle) + self.errors = errors + self.valid = valid + + def snapshot(self) -> Dict: + """Save alert rule information.""" + return { + "valid": self.valid, + "errors": self.errors, + } + + def restore(self, snapshot): + """Restore alert rule information.""" + self.valid = snapshot["valid"] + self.errors = snapshot["errors"] + + +class MetricsEndpointProviderEvents(ObjectEvents): + """Events raised by :class:`InvalidAlertRuleEvent`s.""" + + alert_rule_status_changed = EventSource(InvalidAlertRuleEvent) + + +def _validate_relation_by_interface_and_direction( + charm: CharmBase, + relation_name: str, + expected_relation_interface: str, + expected_relation_role: RelationRole, +): + """Verifies that a relation has the necessary characteristics. + + Verifies that the `relation_name` provided: (1) exists in metadata.yaml, + (2) declares as interface the interface name passed as `relation_interface` + and (3) has the right "direction", i.e., it is a relation that `charm` + provides or requires. + + Args: + charm: a `CharmBase` object to scan for the matching relation. + relation_name: the name of the relation to be verified. + expected_relation_interface: the interface name to be matched by the + relation named `relation_name`. + expected_relation_role: whether the `relation_name` must be either + provided or required by `charm`. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the same relation interface + as specified via the `expected_relation_interface` argument. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the same role as specified + via the `expected_relation_role` argument. + """ + if relation_name not in charm.meta.relations: + raise RelationNotFoundError(relation_name) + + relation = charm.meta.relations[relation_name] + + actual_relation_interface = relation.interface_name + if actual_relation_interface != expected_relation_interface: + raise RelationInterfaceMismatchError( + relation_name, expected_relation_interface, actual_relation_interface + ) + + if expected_relation_role == RelationRole.provides: + if relation_name not in charm.meta.provides: + raise RelationRoleMismatchError( + relation_name, RelationRole.provides, RelationRole.requires + ) + elif expected_relation_role == RelationRole.requires: + if relation_name not in charm.meta.requires: + raise RelationRoleMismatchError( + relation_name, RelationRole.requires, RelationRole.provides + ) + else: + raise Exception("Unexpected RelationDirection: {}".format(expected_relation_role)) + + +def _sanitize_scrape_configuration(job) -> dict: + """Restrict permissible scrape configuration options. + + If job is empty then a default job is returned. The + default job is + + ``` + { + "metrics_path": "/metrics", + "static_configs": [{"targets": ["*:80"]}], + } + ``` + + Args: + job: a dict containing a single Prometheus job + specification. + + Returns: + a dictionary containing a sanitized job specification. + """ + sanitized_job = DEFAULT_JOB.copy() + sanitized_job.update({key: value for key, value in job.items() if key in ALLOWED_KEYS}) + return sanitized_job + + +class InvalidAlertRulePathError(Exception): + """Raised if the alert rules folder cannot be found or is otherwise invalid.""" + + def __init__( + self, + alert_rules_absolute_path: Path, + message: str, + ): + self.alert_rules_absolute_path = alert_rules_absolute_path + self.message = message + + super().__init__(self.message) + + +def _is_official_alert_rule_format(rules_dict: dict) -> bool: + """Are alert rules in the upstream format as supported by Prometheus. + + Alert rules in dictionary format are in "official" form if they + contain a "groups" key, since this implies they contain a list of + alert rule groups. + + Args: + rules_dict: a set of alert rules in Python dictionary format + + Returns: + True if alert rules are in official Prometheus file format. + """ + return "groups" in rules_dict + + +def _is_single_alert_rule_format(rules_dict: dict) -> bool: + """Are alert rules in single rule format. + + The Prometheus charm library supports reading of alert rules in a + custom format that consists of a single alert rule per file. This + does not conform to the official Prometheus alert rule file format + which requires that each alert rules file consists of a list of + alert rule groups and each group consists of a list of alert + rules. + + Alert rules in dictionary form are considered to be in single rule + format if in the least it contains two keys corresponding to the + alert rule name and alert expression. + + Returns: + True if alert rule is in single rule file format. + """ + # one alert rule per file + return set(rules_dict) >= {"alert", "expr"} + + +class AlertRules: + """Utility class for amalgamating prometheus alert rule files and injecting juju topology. + + An `AlertRules` object supports aggregating alert rules from files and directories in both + official and single rule file formats using the `add_path()` method. All the alert rules + read are annotated with Juju topology labels and amalgamated into a single data structure + in the form of a Python dictionary using the `as_dict()` method. Such a dictionary can be + easily dumped into JSON format and exchanged over relation data. The dictionary can also + be dumped into YAML format and written directly into an alert rules file that is read by + Prometheus. Note that multiple `AlertRules` objects must not be written into the same file, + since Prometheus allows only a single list of alert rule groups per alert rules file. + + The official Prometheus format is a YAML file conforming to the Prometheus documentation + (https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/). + The custom single rule format is a subsection of the official YAML, having a single alert + rule, effectively "one alert per file". + """ + + # This class uses the following terminology for the various parts of a rule file: + # - alert rules file: the entire groups[] yaml, including the "groups:" key. + # - alert groups (plural): the list of groups[] (a list, i.e. no "groups:" key) - it is a list + # of dictionaries that have the "name" and "rules" keys. + # - alert group (singular): a single dictionary that has the "name" and "rules" keys. + # - alert rules (plural): all the alerts in a given alert group - a list of dictionaries with + # the "alert" and "expr" keys. + # - alert rule (singular): a single dictionary that has the "alert" and "expr" keys. + + def __init__(self, topology: Optional[JujuTopology] = None): + """Build and alert rule object. + + Args: + topology: an optional `JujuTopology` instance that is used to annotate all alert rules. + """ + self.topology = topology + self.tool = CosTool(None) + self.alert_groups = [] # type: List[dict] + + def _from_file(self, root_path: Path, file_path: Path) -> List[dict]: + """Read a rules file from path, injecting juju topology. + + Args: + root_path: full path to the root rules folder (used only for generating group name) + file_path: full path to a *.rule file. + + Returns: + A list of dictionaries representing the rules file, if file is valid (the structure is + formed by `yaml.safe_load` of the file); an empty list otherwise. + """ + with file_path.open() as rf: + # Load a list of rules from file then add labels and filters + try: + rule_file = yaml.safe_load(rf) + + except Exception as e: + logger.error("Failed to read alert rules from %s: %s", file_path.name, e) + return [] + + if not rule_file: + logger.warning("Empty rules file: %s", file_path.name) + return [] + if not isinstance(rule_file, dict): + logger.error("Invalid rules file (must be a dict): %s", file_path.name) + return [] + if _is_official_alert_rule_format(rule_file): + alert_groups = rule_file["groups"] + elif _is_single_alert_rule_format(rule_file): + # convert to list of alert groups + # group name is made up from the file name + alert_groups = [{"name": file_path.stem, "rules": [rule_file]}] + else: + # invalid/unsupported + logger.error("Invalid rules file: %s", file_path.name) + return [] + + # update rules with additional metadata + for alert_group in alert_groups: + # update group name with topology and sub-path + alert_group["name"] = self._group_name( + str(root_path), + str(file_path), + alert_group["name"], + ) + + # add "juju_" topology labels + for alert_rule in alert_group["rules"]: + if "labels" not in alert_rule: + alert_rule["labels"] = {} + + if self.topology: + alert_rule["labels"].update(self.topology.label_matcher_dict) + # insert juju topology filters into a prometheus alert rule + alert_rule["expr"] = self.tool.inject_label_matchers( + re.sub(r"%%juju_topology%%,?", "", alert_rule["expr"]), + self.topology.label_matcher_dict, + ) + + return alert_groups + + def _group_name(self, root_path: str, file_path: str, group_name: str) -> str: + """Generate group name from path and topology. + + The group name is made up of the relative path between the root dir_path, the file path, + and topology identifier. + + Args: + root_path: path to the root rules dir. + file_path: path to rule file. + group_name: original group name to keep as part of the new augmented group name + + Returns: + New group name, augmented by juju topology and relative path. + """ + rel_path = os.path.relpath(os.path.dirname(file_path), root_path) + rel_path = "" if rel_path == "." else rel_path.replace(os.path.sep, "_") + + # Generate group name: + # - name, from juju topology + # - suffix, from the relative path of the rule file; + group_name_parts = [self.topology.identifier] if self.topology else [] + group_name_parts.extend([rel_path, group_name, "alerts"]) + # filter to remove empty strings + return "_".join(filter(None, group_name_parts)) + + @classmethod + def _multi_suffix_glob( + cls, dir_path: Path, suffixes: List[str], recursive: bool = True + ) -> list: + """Helper function for getting all files in a directory that have a matching suffix. + + Args: + dir_path: path to the directory to glob from. + suffixes: list of suffixes to include in the glob (items should begin with a period). + recursive: a flag indicating whether a glob is recursive (nested) or not. + + Returns: + List of files in `dir_path` that have one of the suffixes specified in `suffixes`. + """ + all_files_in_dir = dir_path.glob("**/*" if recursive else "*") + return list(filter(lambda f: f.is_file() and f.suffix in suffixes, all_files_in_dir)) + + def _from_dir(self, dir_path: Path, recursive: bool) -> List[dict]: + """Read all rule files in a directory. + + All rules from files for the same directory are loaded into a single + group. The generated name of this group includes juju topology. + By default, only the top directory is scanned; for nested scanning, pass `recursive=True`. + + Args: + dir_path: directory containing *.rule files (alert rules without groups). + recursive: flag indicating whether to scan for rule files recursively. + + Returns: + a list of dictionaries representing prometheus alert rule groups, each dictionary + representing an alert group (structure determined by `yaml.safe_load`). + """ + alert_groups = [] # type: List[dict] + + # Gather all alerts into a list of groups + for file_path in self._multi_suffix_glob(dir_path, [".rule", ".rules"], recursive): + alert_groups_from_file = self._from_file(dir_path, file_path) + if alert_groups_from_file: + logger.debug("Reading alert rule from %s", file_path) + alert_groups.extend(alert_groups_from_file) + + return alert_groups + + def add_path(self, path: str, *, recursive: bool = False) -> None: + """Add rules from a dir path. + + All rules from files are aggregated into a data structure representing a single rule file. + All group names are augmented with juju topology. + + Args: + path: either a rules file or a dir of rules files. + recursive: whether to read files recursively or not (no impact if `path` is a file). + + Returns: + True if path was added else False. + """ + path = Path(path) # type: Path + if path.is_dir(): + self.alert_groups.extend(self._from_dir(path, recursive)) + elif path.is_file(): + self.alert_groups.extend(self._from_file(path.parent, path)) + else: + logger.debug("Alert rules path does not exist: %s", path) + + def as_dict(self) -> dict: + """Return standard alert rules file in dict representation. + + Returns: + a dictionary containing a single list of alert rule groups. + The list of alert rule groups is provided as value of the + "groups" dictionary key. + """ + return {"groups": self.alert_groups} if self.alert_groups else {} + + +class TargetsChangedEvent(EventBase): + """Event emitted when Prometheus scrape targets change.""" + + def __init__(self, handle, relation_id): + super().__init__(handle) + self.relation_id = relation_id + + def snapshot(self): + """Save scrape target relation information.""" + return {"relation_id": self.relation_id} + + def restore(self, snapshot): + """Restore scrape target relation information.""" + self.relation_id = snapshot["relation_id"] + + +class MonitoringEvents(ObjectEvents): + """Event descriptor for events raised by `MetricsEndpointConsumer`.""" + + targets_changed = EventSource(TargetsChangedEvent) + + +class MetricsEndpointConsumer(Object): + """A Prometheus based Monitoring service.""" + + on = MonitoringEvents() + + def __init__(self, charm: CharmBase, relation_name: str = DEFAULT_RELATION_NAME): + """A Prometheus based Monitoring service. + + Args: + charm: a `CharmBase` instance that manages this + instance of the Prometheus service. + relation_name: an optional string name of the relation between `charm` + and the Prometheus charmed service. The default is "metrics-endpoint". + It is strongly advised not to change the default, so that people + deploying your charm will have a consistent experience with all + other charms that consume metrics endpoints. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `prometheus_scrape` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.requires` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires + ) + + super().__init__(charm, relation_name) + self._charm = charm + self._relation_name = relation_name + self._tool = CosTool(self._charm) + events = self._charm.on[relation_name] + self.framework.observe(events.relation_changed, self._on_metrics_provider_relation_changed) + self.framework.observe( + events.relation_departed, self._on_metrics_provider_relation_departed + ) + + def _on_metrics_provider_relation_changed(self, event): + """Handle changes with related metrics providers. + + Anytime there are changes in relations between Prometheus + and metrics provider charms the Prometheus charm is informed, + through a `TargetsChangedEvent` event. The Prometheus charm can + then choose to update its scrape configuration. + + Args: + event: a `CharmEvent` in response to which the Prometheus + charm must update its scrape configuration. + """ + rel_id = event.relation.id + + self.on.targets_changed.emit(relation_id=rel_id) + + def _on_metrics_provider_relation_departed(self, event): + """Update job config when a metrics provider departs. + + When a metrics provider departs the Prometheus charm is informed + through a `TargetsChangedEvent` event so that it can update its + scrape configuration to ensure that the departed metrics provider + is removed from the list of scrape jobs and + + Args: + event: a `CharmEvent` that indicates a metrics provider + unit has departed. + """ + rel_id = event.relation.id + self.on.targets_changed.emit(relation_id=rel_id) + + def jobs(self) -> list: + """Fetch the list of scrape jobs. + + Returns: + A list consisting of all the static scrape configurations + for each related `MetricsEndpointProvider` that has specified + its scrape targets. + """ + scrape_jobs = [] + + for relation in self._charm.model.relations[self._relation_name]: + static_scrape_jobs = self._static_scrape_config(relation) + if static_scrape_jobs: + scrape_jobs.extend(static_scrape_jobs) + + scrape_jobs = _dedupe_job_names(scrape_jobs) + + return scrape_jobs + + def alerts(self) -> dict: + """Fetch alerts for all relations. + + A Prometheus alert rules file consists of a list of "groups". Each + group consists of a list of alerts (`rules`) that are sequentially + executed. This method returns all the alert rules provided by each + related metrics provider charm. These rules may be used to generate a + separate alert rules file for each relation since the returned list + of alert groups are indexed by that relations Juju topology identifier. + The Juju topology identifier string includes substrings that identify + alert rule related metadata such as the Juju model, model UUID and the + application name from where the alert rule originates. Since this + topology identifier is globally unique, it may be used for instance as + the name for the file into which the list of alert rule groups are + written. For each relation, the structure of data returned is a dictionary + representation of a standard prometheus rules file: + + {"groups": [{"name": ...}, ...]} + + per official prometheus documentation + https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/ + + The value of the `groups` key is such that it may be used to generate + a Prometheus alert rules file directly using `yaml.dump` but the + `groups` key itself must be included as this is required by Prometheus. + + For example the list of alert rule groups returned by this method may + be written into files consumed by Prometheus as follows + + ``` + for topology_identifier, alert_rule_groups in self.metrics_consumer.alerts().items(): + filename = "juju_" + topology_identifier + ".rules" + path = os.path.join(PROMETHEUS_RULES_DIR, filename) + rules = yaml.safe_dump(alert_rule_groups) + container.push(path, rules, make_dirs=True) + ``` + + Returns: + A dictionary mapping the Juju topology identifier of the source charm to + its list of alert rule groups. + """ + alerts = {} # type: Dict[str, dict] # mapping b/w juju identifiers and alert rule files + for relation in self._charm.model.relations[self._relation_name]: + if not relation.units or not relation.app: + continue + + alert_rules = json.loads(relation.data[relation.app].get("alert_rules", "{}")) + if not alert_rules: + continue + + try: + scrape_metadata = json.loads(relation.data[relation.app]["scrape_metadata"]) + identifier = JujuTopology.from_dict(scrape_metadata).identifier + alerts[identifier] = self._tool.apply_label_matchers(alert_rules) + + except KeyError as e: + logger.debug( + "Relation %s has no 'scrape_metadata': %s", + relation.id, + e, + ) + identifier = self._get_identifier_by_alert_rules(alert_rules) + + if not identifier: + logger.error( + "Alert rules were found but no usable group or identifier was present" + ) + continue + + alerts[identifier] = alert_rules + + _, errmsg = self._tool.validate_alert_rules(alert_rules) + if errmsg: + if alerts[identifier]: + del alerts[identifier] + relation.data[self._charm.app]["event"] = json.dumps({"errors": errmsg}) + continue + + return alerts + + def _get_identifier_by_alert_rules(self, rules: dict) -> Union[str, None]: + """Determine an appropriate dict key for alert rules. + + The key is used as the filename when writing alerts to disk, so the structure + and uniqueness is important. + + Args: + rules: a dict of alert rules + """ + if "groups" not in rules: + logger.debug("No alert groups were found in relation data") + return None + + # Construct an ID based on what's in the alert rules if they have labels + for group in rules["groups"]: + try: + labels = group["rules"][0]["labels"] + identifier = "{}_{}_{}".format( + labels["juju_model"], + labels["juju_model_uuid"], + labels["juju_application"], + ) + return identifier + except KeyError: + logger.debug("Alert rules were found but no usable labels were present") + continue + + logger.warning( + "No labeled alert rules were found, and no 'scrape_metadata' " + "was available. Using the alert group name as filename." + ) + try: + for group in rules["groups"]: + return group["name"] + except KeyError: + logger.debug("No group name was found to use as identifier") + + return None + + def _static_scrape_config(self, relation) -> list: + """Generate the static scrape configuration for a single relation. + + If the relation data includes `scrape_metadata` then the value + of this key is used to annotate the scrape jobs with Juju + Topology labels before returning them. + + Args: + relation: an `ops.model.Relation` object whose static + scrape configuration is required. + + Returns: + A list (possibly empty) of scrape jobs. Each job is a + valid Prometheus scrape configuration for that job, + represented as a Python dictionary. + """ + if not relation.units: + return [] + + scrape_jobs = json.loads(relation.data[relation.app].get("scrape_jobs", "[]")) + + if not scrape_jobs: + return [] + + scrape_metadata = json.loads(relation.data[relation.app].get("scrape_metadata", "{}")) + + if not scrape_metadata: + return scrape_jobs + + job_name_prefix = "juju_{}_prometheus_scrape".format( + JujuTopology.from_dict(scrape_metadata).identifier + ) + hosts = self._relation_hosts(relation) + + labeled_job_configs = [] + for job in scrape_jobs: + config = self._labeled_static_job_config( + _sanitize_scrape_configuration(job), + job_name_prefix, + hosts, + scrape_metadata, + ) + labeled_job_configs.append(config) + + return labeled_job_configs + + def _relation_hosts(self, relation) -> dict: + """Fetch unit names and address of all metrics provider units for a single relation. + + Args: + relation: An `ops.model.Relation` object for which the unit name to + address mapping is required. + + Returns: + A dictionary that maps unit names to unit addresses for + the specified relation. + """ + hosts = {} + for unit in relation.units: + # TODO deprecate and remove unit.name + unit_name = relation.data[unit].get("prometheus_scrape_unit_name") or unit.name + # TODO deprecate and remove "prometheus_scrape_host" + unit_address = relation.data[unit].get( + "prometheus_scrape_unit_address" + ) or relation.data[unit].get("prometheus_scrape_host") + if unit_name and unit_address: + hosts.update({unit_name: unit_address}) + + return hosts + + def _labeled_static_job_config(self, job, job_name_prefix, hosts, scrape_metadata) -> dict: + """Construct labeled job configuration for a single job. + + Args: + + job: a dictionary representing the job configuration as obtained from + `MetricsEndpointProvider` over relation data. + job_name_prefix: a string that may either be used as the + job name if the job has no associated name or used as a prefix for + the job if it does have a job name. + hosts: a dictionary mapping host names to host address for + all units of the relation for which this job configuration + must be constructed. + scrape_metadata: scrape configuration metadata obtained + from `MetricsEndpointProvider` from the same relation for + which this job configuration is being constructed. + + Returns: + A dictionary representing a Prometheus job configuration + for a single job. + """ + name = job.get("job_name") + job_name = "{}_{}".format(job_name_prefix, name) if name else job_name_prefix + + labeled_job = job.copy() + labeled_job["job_name"] = job_name + + static_configs = job.get("static_configs") + labeled_job["static_configs"] = [] + + # relabel instance labels so that instance identifiers are globally unique + # stable over unit recreation + instance_relabel_config = { + "source_labels": ["juju_model", "juju_model_uuid", "juju_application"], + "separator": "_", + "target_label": "instance", + "regex": "(.*)", + } + + # label all static configs in the Prometheus job + # labeling inserts Juju topology information and + # sets a relabeling config for instance labels + for static_config in static_configs: + labels = static_config.get("labels", {}) if static_configs else {} + all_targets = static_config.get("targets", []) + + # split all targets into those which will have unit labels + # and those which will not + ports = [] + unitless_targets = [] + for target in all_targets: + host, port = self._target_parts(target) + if host.strip() == "*": + ports.append(port.strip()) + else: + unitless_targets.append(target) + + # label scrape targets that do not have unit labels + if unitless_targets: + unitless_config = self._labeled_unitless_config( + unitless_targets, labels, scrape_metadata + ) + labeled_job["static_configs"].append(unitless_config) + + # label scrape targets that do have unit labels + for host_name, host_address in hosts.items(): + static_config = self._labeled_unit_config( + host_name, host_address, ports, labels, scrape_metadata + ) + labeled_job["static_configs"].append(static_config) + if "juju_unit" not in instance_relabel_config["source_labels"]: + instance_relabel_config["source_labels"].append("juju_unit") # type: ignore + + # ensure topology relabeling of instance label is last in order of relabelings + relabel_configs = job.get("relabel_configs", []) + relabel_configs.append(instance_relabel_config) + labeled_job["relabel_configs"] = relabel_configs + + return labeled_job + + def _target_parts(self, target) -> list: + """Extract host and port from a wildcard target. + + Args: + target: a string specifying a scrape target. A + scrape target is expected to have the format + "host:port". The host part may be a wildcard + "*" and the port part can be missing (along + with ":") in which case port is set to 80. + + Returns: + a list with target host and port as in [host, port] + """ + if ":" in target: + parts = target.split(":") + else: + parts = [target, "80"] + + return parts + + def _set_juju_labels(self, labels, scrape_metadata) -> dict: + """Create a copy of metric labels with Juju topology information. + + Args: + labels: a dictionary containing Prometheus metric labels. + scrape_metadata: scrape related metadata provided by + `MetricsEndpointProvider`. + + Returns: + a copy of the `labels` dictionary augmented with Juju + topology information except for unit name. + """ + juju_labels = labels.copy() # deep copy not needed + juju_labels.update(JujuTopology.from_dict(scrape_metadata).label_matcher_dict) + + return juju_labels + + def _labeled_unitless_config(self, targets, labels, scrape_metadata) -> dict: + """Static scrape configuration for fully qualified host addresses. + + Fully qualified hosts are those scrape targets for which the + address are specified by the `MetricsEndpointProvider` as part + of the scrape job specification set in application relation data. + The address specified need not belong to any unit of the + `MetricsEndpointProvider` charm. As a result there is no reliable + way to determine the name (Juju topology unit name) for such a + target. + + Args: + targets: a list of addresses of fully qualified hosts. + labels: labels specified by `MetricsEndpointProvider` clients + which are associated with `targets`. + scrape_metadata: scrape related metadata provided by `MetricsEndpointProvider`. + + Returns: + A dictionary containing the static scrape configuration + for a list of fully qualified hosts. + """ + juju_labels = self._set_juju_labels(labels, scrape_metadata) + unitless_config = {"targets": targets, "labels": juju_labels} + return unitless_config + + def _labeled_unit_config( + self, unit_name, host_address, ports, labels, scrape_metadata + ) -> dict: + """Static scrape configuration for a wildcard host. + + Wildcard hosts are those scrape targets whose name (Juju unit + name) and address (unit IP address) is set into unit relation + data by the `MetricsEndpointProvider` charm, which sets this + data for ALL its units. + + Args: + unit_name: a string representing the unit name of the wildcard host. + host_address: a string representing the address of the wildcard host. + ports: list of ports on which this wildcard host exposes its metrics. + labels: a dictionary of labels provided by + `MetricsEndpointProvider` intended to be associated with + this wildcard host. + scrape_metadata: scrape related metadata provided by `MetricsEndpointProvider`. + + Returns: + A dictionary containing the static scrape configuration + for a single wildcard host. + """ + juju_labels = self._set_juju_labels(labels, scrape_metadata) + + juju_labels["juju_unit"] = unit_name + + static_config = {"labels": juju_labels} + + if ports: + targets = [] + for port in ports: + targets.append("{}:{}".format(host_address, port)) + static_config["targets"] = targets # type: ignore + else: + static_config["targets"] = [host_address] # type: ignore + + return static_config + + +def _dedupe_job_names(jobs: List[dict]): + """Deduplicate a list of dicts by appending a hash to the value of the 'job_name' key. + + Additionally, fully de-duplicate any identical jobs. + + Args: + jobs: A list of prometheus scrape jobs + """ + jobs_copy = copy.deepcopy(jobs) + + # Convert to a dict with job names as keys + # I think this line is O(n^2) but it should be okay given the list sizes + jobs_dict = { + job["job_name"]: list(filter(lambda x: x["job_name"] == job["job_name"], jobs_copy)) + for job in jobs_copy + } + + # If multiple jobs have the same name, convert the name to "name_" + for key in jobs_dict: + if len(jobs_dict[key]) > 1: + for job in jobs_dict[key]: + job_json = json.dumps(job) + hashed = hashlib.sha256(job_json.encode()).hexdigest() + job["job_name"] = "{}_{}".format(job["job_name"], hashed) + new_jobs = [] + for key in jobs_dict: + new_jobs.extend([i for i in jobs_dict[key]]) + + # Deduplicate jobs which are equal + # Again this in O(n^2) but it should be okay + deduped_jobs = [] + seen = [] + for job in new_jobs: + job_json = json.dumps(job) + hashed = hashlib.sha256(job_json.encode()).hexdigest() + if hashed in seen: + continue + seen.append(hashed) + deduped_jobs.append(job) + + return deduped_jobs + + +def _resolve_dir_against_charm_path(charm: CharmBase, *path_elements: str) -> str: + """Resolve the provided path items against the directory of the main file. + + Look up the directory of the `main.py` file being executed. This is normally + going to be the charm.py file of the charm including this library. Then, resolve + the provided path elements and, if the result path exists and is a directory, + return its absolute path; otherwise, raise en exception. + + Raises: + InvalidAlertRulePathError, if the path does not exist or is not a directory. + """ + charm_dir = Path(str(charm.charm_dir)) + if not charm_dir.exists() or not charm_dir.is_dir(): + # Operator Framework does not currently expose a robust + # way to determine the top level charm source directory + # that is consistent across deployed charms and unit tests + # Hence for unit tests the current working directory is used + # TODO: updated this logic when the following ticket is resolved + # https://github.com/canonical/operator/issues/643 + charm_dir = Path(os.getcwd()) + + alerts_dir_path = charm_dir.absolute().joinpath(*path_elements) + + if not alerts_dir_path.exists(): + raise InvalidAlertRulePathError(alerts_dir_path, "directory does not exist") + if not alerts_dir_path.is_dir(): + raise InvalidAlertRulePathError(alerts_dir_path, "is not a directory") + + return str(alerts_dir_path) + + +class MetricsEndpointProvider(Object): + """A metrics endpoint for Prometheus.""" + + on = MetricsEndpointProviderEvents() + + def __init__( + self, + charm, + relation_name: str = DEFAULT_RELATION_NAME, + jobs=None, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + refresh_event: Optional[Union[BoundEvent, List[BoundEvent]]] = None, + external_hostname: str = None, + ): + """Construct a metrics provider for a Prometheus charm. + + If your charm exposes a Prometheus metrics endpoint, the + `MetricsEndpointProvider` object enables your charm to easily + communicate how to reach that metrics endpoint. + + By default, a charm instantiating this object has the metrics + endpoints of each of its units scraped by the related Prometheus + charms. The scraped metrics are automatically tagged by the + Prometheus charms with Juju topology data via the + `juju_model_name`, `juju_model_uuid`, `juju_application_name` + and `juju_unit` labels. To support such tagging `MetricsEndpointProvider` + automatically forwards scrape metadata to a `MetricsEndpointConsumer` + (Prometheus charm). + + Scrape targets provided by `MetricsEndpointProvider` can be + customized when instantiating this object. For example in the + case of a charm exposing the metrics endpoint for each of its + units on port 8080 and the `/metrics` path, the + `MetricsEndpointProvider` can be instantiated as follows: + + self.metrics_endpoint_provider = MetricsEndpointProvider( + self, + jobs=[{ + "static_configs": [{"targets": ["*:8080"]}], + }]) + + The notation `*:` means "scrape each unit of this charm on port + ``. + + In case the metrics endpoints are not on the standard `/metrics` path, + a custom path can be specified as follows: + + self.metrics_endpoint_provider = MetricsEndpointProvider( + self, + jobs=[{ + "metrics_path": "/my/strange/metrics/path", + "static_configs": [{"targets": ["*:8080"]}], + }]) + + Note how the `jobs` argument is a list: this allows you to expose multiple + combinations of paths "metrics_path" and "static_configs" in case your charm + exposes multiple endpoints, which could happen, for example, when you have + multiple workload containers, with applications in each needing to be scraped. + The structure of the objects in the `jobs` list is one-to-one with the + `scrape_config` configuration item of Prometheus' own configuration (see + https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config + ), but with only a subset of the fields allowed. The permitted fields are + listed in `ALLOWED_KEYS` object in this charm library module. + + It is also possible to specify alert rules. By default, this library will look + into the `/prometheus_alert_rules`, which in a standard charm + layouts resolves to `src/prometheus_alert_rules`. Each alert rule goes into a + separate `*.rule` file. If the syntax of a rule is invalid, + the `MetricsEndpointProvider` logs an error and does not load the particular + rule. + + To avoid false positives and negatives in the evaluation of alert rules, + all ingested alert rule expressions are automatically qualified using Juju + Topology filters. This ensures that alert rules provided by your charm, trigger + alerts based only on data scrapped from your charm. For example an alert rule + such as the following + + alert: UnitUnavailable + expr: up < 1 + for: 0m + + will be automatically transformed into something along the lines of the following + + alert: UnitUnavailable + expr: up{juju_model=, juju_model_uuid=, juju_application=} < 1 + for: 0m + + An attempt will be made to validate alert rules prior to loading them into Prometheus. + If they are invalid, an event will be emitted from this object which charms can respond + to in order to set a meaningful status for administrators. + + This can be observed via `consumer.on.alert_rule_status_changed` which contains: + - The error(s) encountered when validating as `errors` + - A `valid` attribute, which can be used to reset the state of charms if alert rules + are updated via another mechanism (e.g. `cos-config`) and refreshed. + + Args: + charm: a `CharmBase` object that manages this + `MetricsEndpointProvider` object. Typically, this is + `self` in the instantiating class. + relation_name: an optional string name of the relation between `charm` + and the Prometheus charmed service. The default is "metrics-endpoint". + It is strongly advised not to change the default, so that people + deploying your charm will have a consistent experience with all + other charms that provide metrics endpoints. + jobs: an optional list of dictionaries where each + dictionary represents the Prometheus scrape + configuration for a single job. When not provided, a + default scrape configuration is provided for the + `/metrics` endpoint polling all units of the charm on port `80` + using the `MetricsEndpointProvider` object. + alert_rules_path: an optional path for the location of alert rules + files. Defaults to "./prometheus_alert_rules", + resolved relative to the directory hosting the charm entry file. + The alert rules are automatically updated on charm upgrade. + refresh_event: an optional bound event or list of bound events which + will be observed to re-set scrape job data (IP address and others) + external_hostname: an optional argument that represents an external hostname that + can be generated by an Ingress or a Proxy. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `prometheus_scrape` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.provides` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides + ) + + try: + alert_rules_path = _resolve_dir_against_charm_path(charm, alert_rules_path) + except InvalidAlertRulePathError as e: + logger.debug( + "Invalid Prometheus alert rules folder at %s: %s", + e.alert_rules_absolute_path, + e.message, + ) + + super().__init__(charm, relation_name) + self.topology = JujuTopology.from_charm(charm) + + self._charm = charm + self._alert_rules_path = alert_rules_path + self._relation_name = relation_name + # sanitize job configurations to the supported subset of parameters + jobs = [] if jobs is None else jobs + self._jobs = [_sanitize_scrape_configuration(job) for job in jobs] + self.external_hostname = external_hostname + events = self._charm.on[self._relation_name] + self.framework.observe(events.relation_joined, self._set_scrape_job_spec) + self.framework.observe(events.relation_changed, self._on_relation_changed) + + if not refresh_event: + if len(self._charm.meta.containers) == 1: + if "kubernetes" in self._charm.meta.series: + # This is a podspec charm + refresh_event = [self._charm.on.update_status] + else: + # This is a sidecar/pebble charm + container = list(self._charm.meta.containers.values())[0] + refresh_event = [self._charm.on[container.name.replace("-", "_")].pebble_ready] + else: + logger.warning( + "%d containers are present in metadata.yaml and " + "refresh_event was not specified. Defaulting to update_status. " + "Metrics IP may not be set in a timely fashion.", + len(self._charm.meta.containers), + ) + refresh_event = [self._charm.on.update_status] + + else: + if not isinstance(refresh_event, list): + refresh_event = [refresh_event] + + for ev in refresh_event: + self.framework.observe(ev, self._set_scrape_job_spec) + + self.framework.observe(self._charm.on.upgrade_charm, self._set_scrape_job_spec) + + # If there is no leader during relation_joined we will still need to set alert rules. + self.framework.observe(self._charm.on.leader_elected, self._set_scrape_job_spec) + + def _on_relation_changed(self, event): + """Check for alert rule messages in the relation data before moving on.""" + if self._charm.unit.is_leader(): + ev = json.loads(event.relation.data[event.app].get("event", "{}")) + + if ev: + valid = bool(ev.get("valid", True)) + errors = ev.get("errors", "") + + if valid and not errors: + self.on.alert_rule_status_changed.emit(valid=valid) + else: + self.on.alert_rule_status_changed.emit(valid=valid, errors=errors) + + self._set_scrape_job_spec(event) + + def _set_scrape_job_spec(self, event): + """Ensure scrape target information is made available to prometheus. + + When a metrics provider charm is related to a prometheus charm, the + metrics provider sets specification and metadata related to its own + scrape configuration. This information is set using Juju application + data. In addition, each of the consumer units also sets its own + host address in Juju unit relation data. + """ + self._set_unit_ip(event) + + if not self._charm.unit.is_leader(): + return + + alert_rules = AlertRules(topology=self.topology) + alert_rules.add_path(self._alert_rules_path, recursive=True) + alert_rules_as_dict = alert_rules.as_dict() + + for relation in self._charm.model.relations[self._relation_name]: + relation.data[self._charm.app]["scrape_metadata"] = json.dumps(self._scrape_metadata) + relation.data[self._charm.app]["scrape_jobs"] = json.dumps(self._scrape_jobs) + + if alert_rules_as_dict: + # Update relation data with the string representation of the rule file. + # Juju topology is already included in the "scrape_metadata" field above. + # The consumer side of the relation uses this information to name the rules file + # that is written to the filesystem. + relation.data[self._charm.app]["alert_rules"] = json.dumps(alert_rules_as_dict) + + def _set_unit_ip(self, _): + """Set unit host address. + + Each time a metrics provider charm container is restarted it updates its own + host address in the unit relation data for the prometheus charm. + + The only argument specified is an event, and it ignored. This is for expediency + to be able to use this method as an event handler, although no access to the + event is actually needed. + """ + for relation in self._charm.model.relations[self._relation_name]: + unit_ip = str(self._charm.model.get_binding(relation).network.bind_address) + + if self.external_hostname: + unit_address = self.external_hostname + elif self._is_valid_unit_address(unit_ip): + unit_address = unit_ip + else: + unit_address = socket.getfqdn() + + relation.data[self._charm.unit]["prometheus_scrape_unit_address"] = unit_address + relation.data[self._charm.unit]["prometheus_scrape_unit_name"] = str( + self._charm.model.unit.name + ) + + def _is_valid_unit_address(self, address: str) -> bool: + """Validate a unit address. + + At present only IP address validation is supported, but + this may be extended to DNS addresses also, as needed. + + Args: + address: a string representing a unit address + """ + try: + _ = ipaddress.ip_address(address) + except ValueError: + return False + + return True + + @property + def _scrape_jobs(self) -> list: + """Fetch list of scrape jobs. + + Returns: + A list of dictionaries, where each dictionary specifies a + single scrape job for Prometheus. + """ + return self._jobs if self._jobs else [DEFAULT_JOB] + + @property + def _scrape_metadata(self) -> dict: + """Generate scrape metadata. + + Returns: + Scrape configuration metadata for this metrics provider charm. + """ + return self.topology.as_dict() + + +class PrometheusRulesProvider(Object): + """Forward rules to Prometheus. + + This object may be used to forward rules to Prometheus. At present it only supports + forwarding alert rules. This is unlike :class:`MetricsEndpointProvider`, which + is used for forwarding both scrape targets and associated alert rules. This object + is typically used when there is a desire to forward rules that apply globally (across + all deployed charms and units) rather than to a single charm. All rule files are + forwarded using the same 'prometheus_scrape' interface that is also used by + `MetricsEndpointProvider`. + + Args: + charm: A charm instance that `provides` a relation with the `prometheus_scrape` interface. + relation_name: Name of the relation in `metadata.yaml` that + has the `prometheus_scrape` interface. + dir_path: Root directory for the collection of rule files. + recursive: Whether to scan for rule files recursively. + """ + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + dir_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + recursive=True, + ): + super().__init__(charm, relation_name) + self._charm = charm + self._relation_name = relation_name + self._recursive = recursive + + try: + dir_path = _resolve_dir_against_charm_path(charm, dir_path) + except InvalidAlertRulePathError as e: + logger.debug( + "Invalid Prometheus alert rules folder at %s: %s", + e.alert_rules_absolute_path, + e.message, + ) + self.dir_path = dir_path + + events = self._charm.on[self._relation_name] + event_sources = [ + events.relation_joined, + events.relation_changed, + self._charm.on.leader_elected, + self._charm.on.upgrade_charm, + ] + + for event_source in event_sources: + self.framework.observe(event_source, self._update_relation_data) + + def _reinitialize_alert_rules(self): + """Reloads alert rules and updates all relations.""" + self._update_relation_data(None) + + def _update_relation_data(self, _): + """Update application relation data with alert rules for all relations.""" + if not self._charm.unit.is_leader(): + return + + alert_rules = AlertRules() + alert_rules.add_path(self.dir_path, recursive=self._recursive) + alert_rules_as_dict = alert_rules.as_dict() + + logger.info("Updating relation data with rule files from disk") + for relation in self._charm.model.relations[self._relation_name]: + relation.data[self._charm.app]["alert_rules"] = json.dumps( + alert_rules_as_dict, + sort_keys=True, # sort, to prevent unnecessary relation_changed events + ) + + +class MetricsEndpointAggregator(Object): + """Aggregate metrics from multiple scrape targets. + + `MetricsEndpointAggregator` collects scrape target information from one + or more related charms and forwards this to a `MetricsEndpointConsumer` + charm, which may be in a different Juju model. However, it is + essential that `MetricsEndpointAggregator` itself resides in the same + model as its scrape targets, as this is currently the only way to + ensure in Juju that the `MetricsEndpointAggregator` will be able to + determine the model name and uuid of the scrape targets. + + `MetricsEndpointAggregator` should be used in place of + `MetricsEndpointProvider` in the following two use cases: + + 1. Integrating one or more scrape targets that do not support the + `prometheus_scrape` interface. + + 2. Integrating one or more scrape targets through cross model + relations. Although the [Scrape Config Operator](https://charmhub.io/cos-configuration-k8s) + may also be used for the purpose of supporting cross model + relations. + + Using `MetricsEndpointAggregator` to build a Prometheus charm client + only requires instantiating it. Instantiating + `MetricsEndpointAggregator` is similar to `MetricsEndpointProvider` except + that it requires specifying the names of three relations: the + relation with scrape targets, the relation for alert rules, and + that with the Prometheus charms. For example + + ```python + self._aggregator = MetricsEndpointAggregator( + self, + { + "prometheus": "monitoring", + "scrape_target": "prometheus-target", + "alert_rules": "prometheus-rules" + } + ) + ``` + + `MetricsEndpointAggregator` assumes that each unit of a scrape target + sets in its unit-level relation data two entries with keys + "hostname" and "port". If it is required to integrate with charms + that do not honor these assumptions, it is always possible to + derive from `MetricsEndpointAggregator` overriding the `_get_targets()` + method, which is responsible for aggregating the unit name, host + address ("hostname") and port of the scrape target. + + `MetricsEndpointAggregator` also assumes that each unit of a + scrape target sets in its unit-level relation data a key named + "groups". The value of this key is expected to be the string + representation of list of Prometheus Alert rules in YAML format. + An example of a single such alert rule is + + ```yaml + - alert: HighRequestLatency + expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5 + for: 10m + labels: + severity: page + annotations: + summary: High request latency + ``` + + Once again if it is required to integrate with charms that do not + honour these assumptions about alert rules then an object derived + from `MetricsEndpointAggregator` may be used by overriding the + `_get_alert_rules()` method. + + `MetricsEndpointAggregator` ensures that Prometheus scrape job + specifications and alert rules are annotated with Juju topology + information, just like `MetricsEndpointProvider` and + `MetricsEndpointConsumer` do. + + By default, `MetricsEndpointAggregator` ensures that Prometheus + "instance" labels refer to Juju topology. This ensures that + instance labels are stable over unit recreation. While it is not + advisable to change this option, if required it can be done by + setting the "relabel_instance" keyword argument to `False` when + constructing an aggregator object. + """ + + def __init__(self, charm, relation_names, relabel_instance=True): + """Construct a `MetricsEndpointAggregator`. + + Args: + charm: a `CharmBase` object that manages this + `MetricsEndpointAggregator` object. Typically, this is + `self` in the instantiating class. + relation_names: a dictionary with three keys. The value + of the "scrape_target" and "alert_rules" keys are + the relation names over which scrape job and alert rule + information is gathered by this `MetricsEndpointAggregator`. + And the value of the "prometheus" key is the name of + the relation with a `MetricsEndpointConsumer` such as + the Prometheus charm. + relabel_instance: A boolean flag indicating if Prometheus + scrape job "instance" labels must refer to Juju Topology. + """ + super().__init__(charm, relation_names["prometheus"]) + + self._charm = charm + self._target_relation = relation_names["scrape_target"] + self._prometheus_relation = relation_names["prometheus"] + self._alert_rules_relation = relation_names["alert_rules"] + self._relabel_instance = relabel_instance + + # manage Prometheus charm relation events + prometheus_events = self._charm.on[self._prometheus_relation] + self.framework.observe(prometheus_events.relation_joined, self._set_prometheus_data) + + # manage list of Prometheus scrape jobs from related scrape targets + target_events = self._charm.on[self._target_relation] + self.framework.observe(target_events.relation_changed, self._update_prometheus_jobs) + self.framework.observe(target_events.relation_departed, self._remove_prometheus_jobs) + + # manage alert rules for Prometheus from related scrape targets + alert_rule_events = self._charm.on[self._alert_rules_relation] + self.framework.observe(alert_rule_events.relation_changed, self._update_alert_rules) + self.framework.observe(alert_rule_events.relation_departed, self._remove_alert_rules) + + def _set_prometheus_data(self, event): + """Ensure every new Prometheus instances is updated. + + Any time a new Prometheus unit joins the relation with + `MetricsEndpointAggregator`, that Prometheus unit is provided + with the complete set of existing scrape jobs and alert rules. + """ + jobs = [] # list of scrape jobs, one per relation + for relation in self.model.relations[self._target_relation]: + targets = self._get_targets(relation) + if targets and relation.app: + jobs.append(self._static_scrape_job(targets, relation.app.name)) + + groups = [] # list of alert rule groups, one group per relation + for relation in self.model.relations[self._alert_rules_relation]: + unit_rules = self._get_alert_rules(relation) + if unit_rules and relation.app: + appname = relation.app.name + rules = self._label_alert_rules(unit_rules, appname) + group = {"name": self._group_name(appname), "rules": rules} + groups.append(group) + + event.relation.data[self._charm.app]["scrape_jobs"] = json.dumps(jobs) + event.relation.data[self._charm.app]["alert_rules"] = json.dumps({"groups": groups}) + + def _set_target_job_data(self, targets: dict, app_name: str, **kwargs) -> None: + """Update scrape jobs in response to scrape target changes. + + When there is any change in relation data with any scrape + target, the Prometheus scrape job, for that specific target is + updated. Additionally, if this method is called manually, do the + same. + + Args: + targets: a `dict` containing target information + app_name: a `str` identifying the application + """ + # new scrape job for the relation that has changed + updated_job = self._static_scrape_job(targets, app_name, **kwargs) + + for relation in self.model.relations[self._prometheus_relation]: + jobs = json.loads(relation.data[self._charm.app].get("scrape_jobs", "[]")) + # list of scrape jobs that have not changed + jobs = [job for job in jobs if updated_job["job_name"] != job["job_name"]] + jobs.append(updated_job) + relation.data[self._charm.app]["scrape_jobs"] = json.dumps(jobs) + + def _update_prometheus_jobs(self, event): + """Update scrape jobs in response to scrape target changes. + + When there is any change in relation data with any scrape + target, the Prometheus scrape job, for that specific target is + updated. + """ + targets = self._get_targets(event.relation) + if not targets: + return + + # new scrape job for the relation that has changed + updated_job = self._static_scrape_job(targets, event.relation.app.name) + + for relation in self.model.relations[self._prometheus_relation]: + jobs = json.loads(relation.data[self._charm.app].get("scrape_jobs", "[]")) + # list of scrape jobs that have not changed + jobs = [job for job in jobs if updated_job["job_name"] != job["job_name"]] + jobs.append(updated_job) + relation.data[self._charm.app]["scrape_jobs"] = json.dumps(jobs) + + def _remove_prometheus_jobs(self, event): + """Remove scrape jobs when a target departs. + + Any time a scrape target departs, any Prometheus scrape job + associated with that specific scrape target is removed. + """ + job_name = self._job_name(event.relation.app.name) + unit_name = event.unit.name + + for relation in self.model.relations[self._prometheus_relation]: + jobs = json.loads(relation.data[self._charm.app].get("scrape_jobs", "[]")) + if not jobs: + continue + + changed_job = [j for j in jobs if j.get("job_name") == job_name] + if not changed_job: + continue + changed_job = changed_job[0] + + # list of scrape jobs that have not changed + jobs = [job for job in jobs if job.get("job_name") != job_name] + + # list of scrape jobs for units of the same application that still exist + configs_kept = [ + config + for config in changed_job["static_configs"] # type: ignore + if config.get("labels", {}).get("juju_unit") != unit_name + ] + + if configs_kept: + changed_job["static_configs"] = configs_kept # type: ignore + jobs.append(changed_job) + + relation.data[self._charm.app]["scrape_jobs"] = json.dumps(jobs) + + def _update_alert_rules(self, event): + """Update alert rules in response to scrape target changes. + + When there is any change in alert rule relation data for any + scrape target, the list of alert rules for that specific + target is updated. + """ + unit_rules = self._get_alert_rules(event.relation) + if not unit_rules: + return + + appname = event.relation.app.name + rules = self._label_alert_rules(unit_rules, appname) + # the alert rule group that has changed + updated_group = {"name": self._group_name(appname), "rules": rules} + + for relation in self.model.relations[self._prometheus_relation]: + alert_rules = json.loads(relation.data[self._charm.app].get("alert_rules", "{}")) + groups = alert_rules.get("groups", []) + # list of alert rule groups that have not changed + groups = [group for group in groups if updated_group["name"] != group["name"]] + groups.append(updated_group) + relation.data[self._charm.app]["alert_rules"] = json.dumps({"groups": groups}) + + def _remove_alert_rules(self, event): + """Remove alert rules for departed targets. + + Any time a scrape target departs any alert rules associated + with that specific scrape target is removed. + """ + group_name = self._group_name(event.relation.app.name) + unit_name = event.unit.name + + for relation in self.model.relations[self._prometheus_relation]: + alert_rules = json.loads(relation.data[self._charm.app].get("alert_rules", "{}")) + if not alert_rules: + continue + + groups = alert_rules.get("groups", []) + if not groups: + continue + + changed_group = [group for group in groups if group["name"] == group_name] + if not changed_group: + continue + changed_group = changed_group[0] + + # list of alert rule groups that have not changed + groups = [group for group in groups if group["name"] != group_name] + + # list of alert rules not associated with departing unit + rules_kept = [ + rule + for rule in changed_group.get("rules") # type: ignore + if rule.get("labels").get("juju_unit") != unit_name + ] + + if rules_kept: + changed_group["rules"] = rules_kept # type: ignore + groups.append(changed_group) + + relation.data[self._charm.app]["alert_rules"] = ( + json.dumps({"groups": groups}) if groups else "{}" + ) + + def _get_targets(self, relation) -> dict: + """Fetch scrape targets for a relation. + + Scrape target information is returned for each unit in the + relation. This information contains the unit name, network + hostname (or address) for that unit, and port on which a + metrics endpoint is exposed in that unit. + + Args: + relation: an `ops.model.Relation` object for which scrape + targets are required. + + Returns: + a dictionary whose keys are names of the units in the + relation. There values associated with each key is itself + a dictionary of the form + ``` + {"hostname": hostname, "port": port} + ``` + """ + targets = {} + for unit in relation.units: + port = relation.data[unit].get("port", 80) + hostname = relation.data[unit].get("hostname") + if hostname: + targets.update({unit.name: {"hostname": hostname, "port": port}}) + + return targets + + def _get_alert_rules(self, relation) -> dict: + """Fetch alert rules for a relation. + + Each unit of the related scrape target may have its own + associated alert rules. Alert rules for all units are returned + indexed by unit name. + + Args: + relation: an `ops.model.Relation` object for which alert + rules are required. + + Returns: + a dictionary whose keys are names of the units in the + relation. There values associated with each key is a list + of alert rules. Each rule is in dictionary format. The + structure "rule dictionary" corresponds to single + Prometheus alert rule. + """ + rules = {} + for unit in relation.units: + unit_rules = yaml.safe_load(relation.data[unit].get("groups", "")) + if unit_rules: + rules.update({unit.name: unit_rules}) + + return rules + + def _job_name(self, appname) -> str: + """Construct a scrape job name. + + Each relation has its own unique scrape job name. All units in + the relation are scraped as part of the same scrape job. + + Args: + appname: string name of a related application. + + Returns: + a string Prometheus scrape job name for the application. + """ + return "juju_{}_{}_{}_prometheus_scrape".format( + self.model.name, self.model.uuid[:7], appname + ) + + def _group_name(self, appname) -> str: + """Construct name for an alert rule group. + + Each unit in a relation may define its own alert rules. All + rules, for all units in a relation are grouped together and + given a single alert rule group name. + + Args: + appname: string name of a related application. + + Returns: + a string Prometheus alert rules group name for the application. + """ + return "juju_{}_{}_{}_alert_rules".format(self.model.name, self.model.uuid[:7], appname) + + def _label_alert_rules(self, unit_rules, appname) -> list: + """Apply juju topology labels to alert rules. + + Args: + unit_rules: a list of alert rules, where each rule is in + dictionary format. + appname: a string name of the application to which the + alert rules belong. + + Returns: + a list of alert rules with Juju topology labels. + """ + labeled_rules = [] + for unit_name, rules in unit_rules.items(): + for rule in rules: + # the new JujuTopology removed this, so build it up by hand + matchers = { + "juju_{}".format(k): v + for k, v in JujuTopology(self.model.name, self.model.uuid, appname, unit_name) + .as_dict(excluded_keys=["charm_name"]) + .items() + } + rule["labels"].update(matchers.items()) + labeled_rules.append(rule) + + return labeled_rules + + def _static_scrape_job(self, targets, application_name, **kwargs) -> dict: + """Construct a static scrape job for an application. + + Args: + targets: a dictionary providing hostname and port for all + scrape target. The keys of this dictionary are unit + names. Values corresponding to these keys are + themselves a dictionary with keys "hostname" and + "port". + application_name: a string name of the application for + which this static scrape job is being constructed. + + Returns: + A dictionary corresponding to a Prometheus static scrape + job configuration for one application. The returned + dictionary may be transformed into YAML and appended to + the list of any existing list of Prometheus static configs. + """ + juju_model = self.model.name + juju_model_uuid = self.model.uuid + job = { + "job_name": self._job_name(application_name), + "static_configs": [ + { + "targets": ["{}:{}".format(target["hostname"], target["port"])], + "labels": { + "juju_model": juju_model, + "juju_model_uuid": juju_model_uuid, + "juju_application": application_name, + "juju_unit": unit_name, + "host": target["hostname"], + }, + } + for unit_name, target in targets.items() + ], + "relabel_configs": self._relabel_configs + kwargs.get("relabel_configs", []), + } + job.update(kwargs.get("updates", {})) + + return job + + @property + def _relabel_configs(self) -> list: + """Create Juju topology relabeling configuration. + + Using Juju topology for instance labels ensures that these + labels are stable across unit recreation. + + Returns: + a list of Prometheus relabeling configurations. Each item in + this list is one relabel configuration. + """ + return ( + [ + { + "source_labels": [ + "juju_model", + "juju_model_uuid", + "juju_application", + "juju_unit", + ], + "separator": "_", + "target_label": "instance", + "regex": "(.*)", + } + ] + if self._relabel_instance + else [] + ) + + +class CosTool: + """Uses cos-tool to inject label matchers into alert rule expressions and validate rules.""" + + _path = None + _disabled = False + + def __init__(self, charm): + self._charm = charm + + @property + def path(self): + """Lazy lookup of the path of cos-tool.""" + if self._disabled: + return None + if not self._path: + self._path = self._get_tool_path() + if not self._path: + logger.debug("Skipping injection of juju topology as label matchers") + self._disabled = True + return self._path + + def apply_label_matchers(self, rules) -> dict: + """Will apply label matchers to the expression of all alerts in all supplied groups.""" + if not self.path: + return rules + for group in rules["groups"]: + rules_in_group = group.get("rules", []) + for rule in rules_in_group: + topology = {} + # if the user for some reason has provided juju_unit, we'll need to honor it + # in most cases, however, this will be empty + for label in [ + "juju_model", + "juju_model_uuid", + "juju_application", + "juju_charm", + "juju_unit", + ]: + if label in rule["labels"]: + topology[label] = rule["labels"][label] + + rule["expr"] = self.inject_label_matchers(rule["expr"], topology) + return rules + + def validate_alert_rules(self, rules: dict) -> Tuple[bool, str]: + """Will validate correctness of alert rules, returning a boolean and any errors.""" + if not self.path: + logger.debug("`cos-tool` unavailable. Not validating alert correctness.") + return True, "" + + with tempfile.TemporaryDirectory() as tmpdir: + rule_path = Path(tmpdir + "/validate_rule.yaml") + rule_path.write_text(yaml.dump(rules)) + + args = [str(self.path), "validate", str(rule_path)] + # noinspection PyBroadException + try: + self._exec(args) + return True, "" + except subprocess.CalledProcessError as e: + logger.debug("Validating the rules failed: %s", e.output) + return False, ", ".join( + [ + line + for line in e.output.decode("utf8").splitlines() + if "error validating" in line + ] + ) + + def inject_label_matchers(self, expression, topology) -> str: + """Add label matchers to an expression.""" + if not topology: + return expression + if not self.path: + logger.debug("`cos-tool` unavailable. Leaving expression unchanged: %s", expression) + return expression + args = [str(self.path), "transform"] + args.extend( + ["--label-matcher={}={}".format(key, value) for key, value in topology.items()] + ) + + args.extend(["{}".format(expression)]) + # noinspection PyBroadException + try: + return self._exec(args) + except subprocess.CalledProcessError as e: + logger.debug('Applying the expression failed: "%s", falling back to the original', e) + return expression + + def _get_tool_path(self) -> Optional[Path]: + arch = platform.machine() + arch = "amd64" if arch == "x86_64" else arch + res = "cos-tool-{}".format(arch) + try: + path = Path(res).resolve() + path.chmod(0o777) + return path + except NotImplementedError: + logger.debug("System lacks support for chmod") + except FileNotFoundError: + logger.debug('Could not locate cos-tool at: "{}"'.format(res)) + return None + + def _exec(self, cmd) -> str: + result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + return result.stdout.decode("utf-8").strip() diff --git a/ceph-mon/metadata.yaml b/ceph-mon/metadata.yaml new file mode 100644 index 00000000..ec30ebdd --- /dev/null +++ b/ceph-mon/metadata.yaml @@ -0,0 +1,54 @@ +name: ceph-mon +summary: Highly scalable distributed storage +maintainer: OpenStack Charmers +description: | + Ceph is a distributed storage and network file system designed to provide + excellent performance, reliability, and scalability. +docs: https://discourse.charmhub.io/t/ceph-mon-docs-index/10544 +tags: +- openstack +- storage +- file-servers +- misc +series: +- focal +- jammy +peers: + mon: + interface: ceph +extra-bindings: + public: + cluster: +provides: + nrpe-external-master: + interface: nrpe-external-master + scope: container + mds: + interface: ceph-mds + admin: + interface: ceph-admin + client: + interface: ceph-client + osd: + interface: ceph-osd + radosgw: + interface: ceph-radosgw + rbd-mirror: + interface: ceph-rbd-mirror + prometheus: + interface: http + metrics-endpoint: + interface: prometheus_scrape + dashboard: + interface: ceph-dashboard + cos-agent: + interface: cos_agent + +requires: + bootstrap-source: + interface: ceph-bootstrap +resources: + alert-rules: + type: file + filename: alert.yaml.rules + description: Alerting rules diff --git a/ceph-mon/osci.yaml b/ceph-mon/osci.yaml new file mode 100644 index 00000000..c02b3896 --- /dev/null +++ b/ceph-mon/osci.yaml @@ -0,0 +1,9 @@ +- project: + templates: + - charm-unit-jobs-py310 + - charm-functional-jobs + vars: + needs_charm_build: true + charm_build_name: ceph-mon + build_type: charmcraft + charmcraft_channel: 2.x/stable diff --git a/ceph-mon/rename.sh b/ceph-mon/rename.sh new file mode 100755 index 00000000..d0c35c97 --- /dev/null +++ b/ceph-mon/rename.sh @@ -0,0 +1,13 @@ +#!/bin/bash +charm=$(grep "charm_build_name" osci.yaml | awk '{print $2}') +echo "renaming ${charm}_*.charm to ${charm}.charm" +echo -n "pwd: " +pwd +ls -al +echo "Removing bad downloaded charm maybe?" +if [[ -e "${charm}.charm" ]]; +then + rm "${charm}.charm" +fi +echo "Renaming charm here." +mv ${charm}_*.charm ${charm}.charm diff --git a/ceph-mon/requirements.txt b/ceph-mon/requirements.txt new file mode 100644 index 00000000..64959dc5 --- /dev/null +++ b/ceph-mon/requirements.txt @@ -0,0 +1,14 @@ +importlib-resources +ops >= 1.2.0 +tenacity +pyudev +dnspython +netaddr +netifaces +pyyaml +git+https://github.com/openstack/charms.ceph#egg=charms_ceph +git+https://opendev.org/openstack/charm-ops-openstack#egg=ops_openstack +git+https://opendev.org/openstack/charm-ops-interface-tls-certificates#egg=interface_tls_certificates +git+https://github.com/openstack-charmers/ops-interface-ceph-iscsi-admin-access#egg=interface_ceph_iscsi_admin_access +git+https://github.com/openstack-charmers/ops-interface-openstack-loadbalancer#egg=interface_openstack_loadbalancer +git+https://github.com/juju/charm-helpers#egg=charmhelpers diff --git a/ceph-mon/setup.cfg b/ceph-mon/setup.cfg new file mode 100644 index 00000000..37083b62 --- /dev/null +++ b/ceph-mon/setup.cfg @@ -0,0 +1,5 @@ +[nosetests] +verbosity=2 +with-coverage=1 +cover-erase=1 +cover-package=hooks diff --git a/ceph-mon/src/ceph_client.py b/ceph-mon/src/ceph_client.py new file mode 100644 index 00000000..e29e6a11 --- /dev/null +++ b/ceph-mon/src/ceph_client.py @@ -0,0 +1,203 @@ +"""Ceph client library +""" + +import json +import logging + +from ops.framework import Object +from ops.framework import StoredState + +from charmhelpers.contrib.storage.linux.ceph import ( + send_osd_settings, +) +import charms_ceph.utils as ceph + + +from utils import ( + get_public_addr, + get_rbd_features, +) + + +logger = logging.getLogger(__name__) + + +class CephClientProvides(Object): + """ + Encapsulate the Provides side of the Ceph Client relation. + + Hook events observed: + - relation-joined + - relation-changed + """ + + charm = None + _stored = StoredState() + + def __init__(self, charm, relation_name='client'): + super().__init__(charm, relation_name) + + self._stored.set_default(processed=[], processed_map={}) + self.charm = charm + self.this_unit = self.model.unit + self.relation_name = relation_name + self.framework.observe( + charm.on[self.relation_name].relation_joined, + self._on_relation_changed + ) + self.framework.observe( + charm.on[self.relation_name].relation_changed, + self._on_relation_changed + ) + + self._stored.processed_map = {} + + def notify_all(self): + send_osd_settings() + if not self.charm.ready_for_service(): + return + for relation in self.model.relations[self.relation_name]: + for unit in relation.units: + self._handle_client_relation(relation, unit) + + def _on_relation_changed(self, event): + """Prepare relation for data from requiring side.""" + send_osd_settings() + if not self.charm.ready_for_service(): + return + self._handle_client_relation(event.relation, event.unit) + + def _get_ceph_info_from_configs(self): + """ + Create dictionary of ceph information required to set client relation. + + :returns: Dictionary of ceph configurations needed for client relation + :rtype: dict + """ + public_addr = get_public_addr() + rbd_features = get_rbd_features() + data = { + 'auth': 'cephx', + 'ceph-public-address': public_addr + } + if rbd_features: + data['rbd-features'] = rbd_features + return data + + def _get_custom_relation_init_data(self): + """Information required for specialised relation. + + :returns: Ceph configurations needed for specialised relation + :rtype: dict + """ + return {} + + def _get_client_application_name(self, relation, unit): + """Retrieve client application name from relation data.""" + return relation.data[unit].get( + 'application-name', + relation.app.name) + + def _handle_client_relation(self, relation, unit): + """Handle broker request and set the relation data + + :param relation: Operator relation + :type relation: Relation + :param unit: Unit to handle + :type unit: Unit + """ + + # if is_unsupported_cmr(unit): + # return + + logger.debug( + 'mon cluster in quorum and osds bootstrapped ' + '- providing client with keys, processing broker requests') + + service_name = self._get_client_application_name(relation, unit) + data = self._get_ceph_info_from_configs() + data.update(self._get_custom_relation_init_data()) + data.update({'key': ceph.get_named_key(service_name)}) + + data.update( + self._handle_broker_request( + relation, unit, add_legacy_response=True)) + for k, v in data.items(): + relation.data[self.this_unit][k] = str(v) + + def _handle_broker_request( + self, relation, unit, add_legacy_response=False, force=False): + """ + Retrieve broker request from relation, process, return response data. + + :param event: Operator event for the relation + :type relid: Event + :param add_legacy_response: (Optional) Adds the legacy ``broker_rsp`` + key to the response in addition to the + new way. + :type add_legacy_response: bool + :returns: Dictionary of response data ready for use with relation_set. + :param force: Whether to re-process broker requests. + :type force: bool + :rtype: dict + """ + def _get_broker_req_id(request): + try: + if isinstance(request, str): + try: + req_key = json.loads(request)['request-id'] + except (TypeError, json.decoder.JSONDecodeError): + logger.warning( + 'Not able to decode request ' + 'id for broker request {}'. + format(request)) + req_key = None + else: + req_key = request['request-id'] + except KeyError: + logger.warning( + 'Not able to decode request id for broker request {}'. + format(request)) + req_key = None + + return req_key + + response = {} + + settings = relation.data[unit] + if 'broker_req' in settings: + broker_req_id = _get_broker_req_id(settings['broker_req']) + if broker_req_id is None: + return {} + + if not ceph.is_leader(): + logger.debug( + "Not leader - ignoring broker request {}".format( + broker_req_id)) + return {} + + unit_id = settings.get( + 'unit-name', unit.name).replace('/', '-') + unit_response_key = 'broker-rsp-' + unit_id + prev_result = self._stored.processed_map.get(broker_req_id) + if prev_result is not None and not force: + # The broker request has been processed already and we have + # stored the result. Log it so that the users may know and + # return the cached value, with the unit key. + logger.debug( + "Ignoring already executed broker request {}".format( + broker_req_id)) + rsp = {unit_response_key: prev_result} + if add_legacy_response: + rsp.update({'broker_rsp': prev_result}) + return rsp + + rsp = self.charm.process_broker_request( + broker_req_id, settings['broker_req']) + response.update({unit_response_key: rsp}) + if add_legacy_response: + response.update({'broker_rsp': rsp}) + self._stored.processed_map[broker_req_id] = rsp + else: + logger.warn('broker_req not in settings: {}'.format(settings)) + return response diff --git a/ceph-mon/src/ceph_hooks.py b/ceph-mon/src/ceph_hooks.py new file mode 100755 index 00000000..8eabbaaa --- /dev/null +++ b/ceph-mon/src/ceph_hooks.py @@ -0,0 +1,1355 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import json +import os +import subprocess +import sys +import uuid +import pathlib + +import tenacity + +sys.path.append('lib') +import charms_ceph.utils as ceph +from charms_ceph.broker import ( + process_requests +) + +from charmhelpers.core import hookenv +from charmhelpers.core.hookenv import ( + log, + DEBUG, + ERROR, + INFO, + WARNING, + config, + relation_ids, + related_units, + is_relation_made, + relation_get, + relation_set, + leader_set, leader_get, + is_leader, + remote_unit, + Hooks, + service_name, + relations_of_type, + status_set, + local_unit, +) +from charmhelpers.core.host import ( + service_pause, + mkdir, + write_file, + rsync, + cmp_pkgrevno) +from charmhelpers.fetch import ( + apt_install, + filter_installed_packages, + add_source, +) +from charmhelpers.contrib.openstack.alternatives import install_alternative +from charmhelpers.contrib.openstack.utils import ( + clear_unit_paused, + clear_unit_upgrading, + get_os_codename_install_source, + set_unit_paused, + set_unit_upgrading, +) +from charmhelpers.contrib.network.ip import ( + get_ipv6_addr, + format_ipv6_addr, + get_relation_ip, +) +from charmhelpers.core.sysctl import create as create_sysctl +from charmhelpers.core.templating import render +from charmhelpers.contrib.storage.linux.ceph import ( + CephBrokerRq, + CephConfContext, + enable_pg_autoscale, + send_osd_settings, +) +from utils import ( + assert_charm_supports_ipv6, + get_cluster_addr, + get_networks, + get_public_addr, + get_rbd_features, + get_ceph_osd_releases, + execute_post_osd_upgrade_steps, + mgr_config_set_rbd_stats_pools, + mgr_disable_module, + mgr_enable_module, + is_mgr_module_enabled, + set_balancer_mode, + try_disable_insecure_reclaim, +) + +from charmhelpers.contrib.charmsupport import nrpe +from charmhelpers.contrib.hardening.harden import harden + +hooks = Hooks() + +NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins' +NAGIOS_FILE_FOLDER = '/var/lib/nagios' +SCRIPTS_DIR = '/usr/local/bin' +STATUS_FILE = '{}/cat-ceph-status.txt'.format(NAGIOS_FILE_FOLDER) +RADOSGW_STATUS_FILE = ('{}/current-radosgw-admin-sync-status.raw' + .format(NAGIOS_FILE_FOLDER)) +STATUS_CRONFILE = '/etc/cron.d/cat-ceph-health' +HOST_OSD_COUNT_REPORT = '{}/host-osd-report.json'.format(NAGIOS_FILE_FOLDER) + + +def get_current_ceph_version(): + try: + out = subprocess.check_output(['ceph-mon', '-v']).decode('utf-8') + except subprocess.CalledProcessError as exc: + log(("failed to get ceph version: %s. check that the ceph-mon " + "binary is installed and runs correctly") % str(exc), + level=ERROR) + return '' + + # ceph version X.Y.Z (HASH) version-name (stable) + return out.split()[4] + + +def check_for_upgrade(): + if not ceph.is_bootstrapped(): + log("Ceph is not bootstrapped, skipping upgrade checks.") + return + + c = hookenv.config() + old_version = ceph.resolve_ceph_version(c.previous('source') or + 'distro') + + if not old_version: + old_version = get_current_ceph_version() + if not old_version: + log(("failed to get ceph version. check that the ceph-mon " + "binary is installed and runs correctly"), level=ERROR) + return + + log('old_version: {}'.format(old_version)) + + new_version = ceph.resolve_ceph_version(hookenv.config('source')) + if not new_version: + log(("new version not found. make sure the 'source' option has " + "been set and try again (using 'distro' may help"), level=WARNING) + return + + old_version_os = get_os_codename_install_source(c.previous('source') or + 'distro') + new_version_os = get_os_codename_install_source(hookenv.config('source')) + + log('new_version: {}'.format(new_version)) + + if (old_version in ceph.UPGRADE_PATHS and + new_version == ceph.UPGRADE_PATHS[old_version]): + log("{} to {} is a valid upgrade path. Proceeding.".format( + old_version, new_version)) + ceph.roll_monitor_cluster(new_version=new_version, + upgrade_key='admin') + elif (old_version == new_version and + old_version_os is not None and + new_version_os is not None and + old_version_os < new_version_os): + # See LP: #1778823 + add_source(hookenv.config('source'), hookenv.config('key')) + log(("The installation source has changed yet there is no new major " + "version of Ceph in this new source. As a result no package " + "upgrade will take effect. Please upgrade manually if you need " + "to."), level=INFO) + else: + # Log a helpful error message + log("Invalid upgrade path from {} to {}. " + "Valid paths are: {}".format(old_version, + new_version, + ceph.pretty_print_upgrade_paths()), + level=ERROR) + + +def get_ceph_context(): + networks = get_networks('ceph-public-network') + public_network = ', '.join(networks) + + networks = get_networks('ceph-cluster-network') + cluster_network = ', '.join(networks) + + cephcontext = { + 'auth_supported': 'cephx', + 'mon_hosts': config('monitor-hosts') or ' '.join(get_mon_hosts()), + 'fsid': leader_get('fsid'), + 'old_auth': cmp_pkgrevno('ceph', "0.51") < 0, + 'use_syslog': str(config('use-syslog')).lower(), + 'ceph_public_network': public_network, + 'ceph_cluster_network': cluster_network, + 'loglevel': config('loglevel'), + 'dio': str(config('use-direct-io')).lower(), + 'mon_data_avail_warn': int(config('monitor-data-available-warning')), + 'mon_data_avail_crit': int(config('monitor-data-available-critical')), + } + + if config('prefer-ipv6'): + cephcontext['ms_bind_ipv4'] = False + cephcontext['ms_bind_ipv6'] = True + + local_addresses = get_ipv6_addr(dynamic_only=False) + public_addr = get_public_addr() + cluster_addr = get_cluster_addr() + # try binding to the address that juju will pass to other charms + if public_addr not in local_addresses: + log(f"Couldn't find a match for our assigned " + f"public ip {public_addr} " + f"out of {local_addresses}, " + f"using default {local_addresses[0]}", + level=WARNING) + public_addr = local_addresses[0] + + if cluster_addr not in local_addresses: + log(f"Couldn't find a match for our assigned " + f"cluster ip {cluster_addr} " + f"out of {local_addresses}, " + f"using default {local_addresses[0]}", + level=WARNING) + cluster_addr = local_addresses[0] + + if not public_network: + cephcontext['public_addr'] = public_addr + if not cluster_network: + cephcontext['cluster_addr'] = cluster_addr + else: + cephcontext['public_addr'] = get_public_addr() + cephcontext['cluster_addr'] = get_cluster_addr() + + rbd_features = get_rbd_features() + if rbd_features: + cephcontext['rbd_features'] = rbd_features + + if config('disable-pg-max-object-skew'): + cephcontext['disable_object_skew'] = config( + 'disable-pg-max-object-skew') + + # NOTE(dosaboy): these sections must correspond to what is supported in the + # config template. + sections = ['global', 'mds', 'mon'] + cephcontext.update(CephConfContext(permitted_sections=sections)()) + return cephcontext + + +def emit_cephconf(): + # Install ceph.conf as an alternative to support + # co-existence with other charms that write this file + charm_ceph_conf = "/var/lib/charm/{}/ceph.conf".format(service_name()) + mkdir(os.path.dirname(charm_ceph_conf), owner=ceph.ceph_user(), + group=ceph.ceph_user()) + render('ceph.conf', charm_ceph_conf, get_ceph_context(), perms=0o644) + install_alternative('ceph.conf', '/etc/ceph/ceph.conf', + charm_ceph_conf, 100) + + +JOURNAL_ZAPPED = '/var/lib/ceph/journal_zapped' + + +def update_host_osd_count_report(reset=False): + """Update report showing hosts->osds. Used for monitoring.""" + current_osd_tree = ceph.get_osd_tree('admin') + + # Convert [CrushLocation,...] -> {: [osdid],...} for easy comparison + current_host_osd_map = {} + for osd in current_osd_tree: + osd_list = current_host_osd_map.get(osd.host, []) + osd_list.append(osd.identifier) + current_host_osd_map[osd.host] = osd_list + + pathlib.Path(NAGIOS_FILE_FOLDER).mkdir(parents=True, exist_ok=True) + if not os.path.isfile(HOST_OSD_COUNT_REPORT) or reset: + write_file(HOST_OSD_COUNT_REPORT, '{}') + + with open(HOST_OSD_COUNT_REPORT, "r") as f: + expected_host_osd_map = json.load(f) + + if current_host_osd_map == expected_host_osd_map: + return + + for host, osd_list in current_host_osd_map.items(): + if host not in expected_host_osd_map: + expected_host_osd_map[host] = osd_list + + if len(osd_list) > len(expected_host_osd_map[host]): + # osd list is growing, add them to the expected + expected_host_osd_map[host] = osd_list + + if len(osd_list) == len(expected_host_osd_map[host]) and \ + osd_list != expected_host_osd_map[host]: + # different osd ids, maybe hdd swap, refresh + expected_host_osd_map[host] = osd_list + + write_file(HOST_OSD_COUNT_REPORT, + json.dumps(expected_host_osd_map)) + + +@hooks.hook('config-changed') +@harden() +def config_changed(): + ''' + Handle config-changed + + :returns: Whether or not relations should be notified after completion. + :rtype: bool + ''' + # Get the cfg object so we can see if the no-bootstrap value has changed + # and triggered this hook invocation + cfg = config() + if config('prefer-ipv6'): + assert_charm_supports_ipv6() + + check_for_upgrade() + set_balancer_mode(config('balancer-mode')) + + log('Monitor hosts are ' + repr(get_mon_hosts())) + + sysctl_dict = config('sysctl') + if sysctl_dict: + create_sysctl(sysctl_dict, '/etc/sysctl.d/50-ceph-charm.conf') + if relations_of_type('nrpe-external-master'): + update_nrpe_config() + + if is_leader(): + if not config('no-bootstrap'): + if not leader_get('fsid') or not leader_get('monitor-secret'): + if config('fsid'): + fsid = config('fsid') + else: + fsid = "{}".format(uuid.uuid1()) + if config('monitor-secret'): + mon_secret = config('monitor-secret') + else: + mon_secret = "{}".format(ceph.generate_monitor_secret()) + opts = { + 'fsid': fsid, + 'monitor-secret': mon_secret, + } + try: + leader_set(opts) + status_set('maintenance', + 'Created FSID and Monitor Secret') + log("Settings for the cluster are: {}".format(opts)) + except Exception as e: + # we're probably not the leader an exception occured + # let's log it anyway. + log("leader_set failed: {}".format(str(e))) + elif (cfg.changed('no-bootstrap') and + is_relation_made('bootstrap-source')): + # User changed the no-bootstrap config option, we're the leader, + # and the bootstrap-source relation has been made. The charm should + # be in a blocked state indicating that the no-bootstrap option + # must be set. This block is invoked when the user is trying to + # get out of that scenario by enabling no-bootstrap. + bootstrap_source_relation_changed() + + # This will only ensure that we are enabled if the 'pg-autotune' option + # is explicitly set to 'true', and not if it is 'auto' or 'false' + if (config('pg-autotune') == 'true' and + cmp_pkgrevno('ceph', '14.2.0') >= 0): + # The return value of the enable_module call will tell us if the + # module was already enabled, in which case, we don't need to + # re-configure the already configured pools + if mgr_enable_module('pg_autoscaler'): + ceph.monitor_key_set('admin', 'autotune', 'true') + for pool in ceph.list_pools(): + enable_pg_autoscale('admin', pool) + # unconditionally verify that the fsid and monitor-secret are set now + # otherwise we exit until a leader does this. + if leader_get('fsid') is None or leader_get('monitor-secret') is None: + log('still waiting for leader to setup keys') + status_set('waiting', 'Waiting for leader to setup keys') + return + + emit_cephconf() + + # Support use of single node ceph + if (not ceph.is_bootstrapped() and int(config('monitor-count')) == 1 and + is_leader()): + status_set('maintenance', 'Bootstrapping single Ceph MON') + # the following call raises an exception if it can't add the keyring + try: + ceph.bootstrap_monitor_cluster(leader_get('monitor-secret')) + except FileNotFoundError as e: # NOQA -- PEP8 is still PY2 + log("Couldn't bootstrap the monitor yet: {}".format(str(e))) + return + ceph.wait_for_bootstrap() + ceph.wait_for_quorum() + ceph.create_keyrings() + if cmp_pkgrevno('ceph', '12.0.0') >= 0: + status_set('maintenance', 'Bootstrapping single Ceph MGR') + ceph.bootstrap_manager() + try_disable_insecure_reclaim() + for relid in relation_ids('dashboard'): + dashboard_relation(relid) + + mgr_config_set_rbd_stats_pools() + + return True + + +def get_mon_hosts(): + hosts = [] + addr = get_public_addr() + hosts.append(format_ipv6_addr(addr) or addr) + + rel_ids = relation_ids('mon') + if config('no-bootstrap'): + rel_ids += relation_ids('bootstrap-source') + + for relid in rel_ids: + for unit in related_units(relid): + addr = relation_get('ceph-public-address', unit, relid) + if addr is not None: + hosts.append(format_ipv6_addr(addr) or addr) + + return sorted(hosts) + + +def get_peer_units(): + """ + Returns a dictionary of unit names from the mon peer relation with + a flag indicating whether the unit has presented its address + """ + units = {} + units[local_unit()] = True + for relid in relation_ids('mon'): + for unit in related_units(relid): + addr = relation_get('ceph-public-address', unit, relid) + units[unit] = addr is not None + return units + + +@hooks.hook('mon-relation-joined') +def mon_relation_joined(): + public_addr = get_public_addr() + for relid in relation_ids('mon'): + relation_set(relation_id=relid, + relation_settings={'ceph-public-address': public_addr}) + + +@hooks.hook('bootstrap-source-relation-changed') +def bootstrap_source_relation_changed(): + """Handles relation data changes on the bootstrap-source relation. + + The bootstrap-source relation to share remote bootstrap information with + the ceph-mon charm. This relation is used to exchange the remote + ceph-public-addresses which are used for the mon's, the fsid, and the + monitor-secret. + + :returns: Whether or not relations should be notified after completion. + :rtype: bool + '' + """ + if not config('no-bootstrap'): + status_set('blocked', 'Cannot join the bootstrap-source relation when ' + 'no-bootstrap is False') + return + + if not is_leader(): + log('Deferring leader-setting updates to the leader unit') + return + + curr_fsid = leader_get('fsid') + curr_secret = leader_get('monitor-secret') + for relid in relation_ids('bootstrap-source'): + for unit in related_units(relid=relid): + mon_secret = relation_get('monitor-secret', unit, relid) + fsid = relation_get('fsid', unit, relid) + + if not (mon_secret and fsid): + log('Relation data is not ready as the fsid or the ' + 'monitor-secret are missing from the relation: ' + 'mon_secret = {} and fsid = {} '.format(mon_secret, fsid)) + continue + + if not (curr_fsid or curr_secret): + curr_fsid = fsid + curr_secret = mon_secret + else: + # The fsids and secrets need to match or the local monitors + # will fail to join the mon cluster. If they don't, + # bail because something needs to be investigated. + assert curr_fsid == fsid, \ + "bootstrap fsid '{}' != current fsid '{}'".format( + fsid, curr_fsid) + assert curr_secret == mon_secret, \ + "bootstrap secret '{}' != current secret '{}'".format( + mon_secret, curr_secret) + opts = { + 'fsid': fsid, + 'monitor-secret': mon_secret, + } + try: + leader_set(opts) + log('Updating leader settings for fsid and monitor-secret ' + 'from remote relation data: {}'.format(opts)) + except Exception as e: + # we're probably not the leader an exception occured + # let's log it anyway. + log("leader_set failed: {}".format(str(e))) + + # The leader unit needs to bootstrap itself as it won't receive the + # leader-settings-changed hook elsewhere. + if curr_fsid: + return mon_relation() + + +@hooks.hook('prometheus-relation-joined', + 'prometheus-relation-changed') +def prometheus_relation(relid=None, unit=None, prometheus_permitted=None, + module_enabled=None): + log("DEPRECATION warning: relating to the prometheus2 machine charm is " + "deprecated in favor of COS Lite", level=INFO) + if not ceph.is_bootstrapped(): + return + if prometheus_permitted is None: + prometheus_permitted = cmp_pkgrevno('ceph', '12.2.0') >= 0 + + if module_enabled is None: + module_enabled = (is_mgr_module_enabled('prometheus') or + mgr_enable_module('prometheus')) + log("checking if prometheus module is enabled") + if prometheus_permitted and module_enabled: + mgr_config_set_rbd_stats_pools() + log("Updating prometheus") + data = { + 'hostname': get_relation_ip('prometheus'), + 'port': 9283, + } + relation_set(relation_id=relid, + relation_settings=data) + else: + log("Couldn't enable prometheus, but are related. " + "Prometheus is available in Ceph version: {} ; " + "Prometheus Module is enabled: {}".format( + prometheus_permitted, module_enabled), level=WARNING) + + +@hooks.hook('prometheus-relation-departed') +def prometheus_left(): + mgr_disable_module('prometheus') + + +@hooks.hook('mon-relation-departed', + 'mon-relation-changed', + 'leader-settings-changed', + 'bootstrap-source-relation-departed') +def mon_relation(): + ''' + Handle the mon relation + + :returns: Whether or not relations should be notified after completion. + :rtype: bool + ''' + if leader_get('monitor-secret') is None: + log('still waiting for leader to setup keys') + status_set('waiting', 'Waiting for leader to setup keys') + return + emit_cephconf() + + moncount = int(config('monitor-count')) + if len(get_mon_hosts()) >= moncount: + if ceph.is_bootstrapped(): + # The ceph-mon unit chosen for handling broker requests is based on + # internal Ceph MON leadership and not Juju leadership. To update + # the relations on all ceph-mon units after pool creation + # the unit handling the broker request will update a nonce on the + # mon relation. + notify_relations() + return True + else: + if attempt_mon_cluster_bootstrap(): + notify_relations() + return True + else: + log('Not enough mons ({}), punting.' + .format(len(get_mon_hosts()))) + + +def attempt_mon_cluster_bootstrap(): + status_set('maintenance', 'Bootstrapping MON cluster') + # the following call raises an exception + # if it can't add the keyring + try: + ceph.bootstrap_monitor_cluster(leader_get('monitor-secret')) + except FileNotFoundError as e: # NOQA -- PEP8 is still PY2 + log("Couldn't bootstrap the monitor yet: {}".format(str(e))) + return False + ceph.wait_for_bootstrap() + ceph.wait_for_quorum() + ceph.create_keyrings() + if cmp_pkgrevno('ceph', '12.0.0') >= 0: + status_set('maintenance', 'Bootstrapping Ceph MGR') + ceph.bootstrap_manager() + if ceph.monitor_key_exists('admin', 'autotune'): + autotune = ceph.monitor_key_get('admin', 'autotune') + else: + ceph.wait_for_manager() + autotune = config('pg-autotune') + if (cmp_pkgrevno('ceph', '14.2.0') >= 0 and + (autotune == 'true' or + autotune == 'auto')): + ceph.monitor_key_set('admin', 'autotune', 'true') + else: + ceph.monitor_key_set('admin', 'autotune', 'false') + if ceph.monitor_key_get('admin', 'autotune') == 'true': + try: + mgr_enable_module('pg_autoscaler') + except subprocess.CalledProcessError: + log("Failed to initialize autoscaler, it must be " + "initialized on the last monitor", level='info') + + try_disable_insecure_reclaim() + # If we can and want to + if is_leader() and config('customize-failure-domain'): + # But only if the environment supports it + if os.environ.get('JUJU_AVAILABILITY_ZONE'): + cmds = [ + "ceph osd getcrushmap -o /tmp/crush.map", + "crushtool -d /tmp/crush.map| " + "sed 's/step chooseleaf firstn 0 type host/step " + "chooseleaf firstn 0 type rack/' > " + "/tmp/crush.decompiled", + "crushtool -c /tmp/crush.decompiled -o /tmp/crush.map", + "crushtool -i /tmp/crush.map --test " + "--num-rep 3 --show-statistics", + "ceph osd setcrushmap -i /tmp/crush.map" + ] + for cmd in cmds: + try: + subprocess.check_call(cmd, shell=True) + except subprocess.CalledProcessError as e: + log("Failed to modify crush map:", level='error') + log("Cmd: {}".format(cmd), level='error') + log("Error: {}".format(e.output), level='error') + break + else: + log( + "Your Juju environment doesn't" + "have support for Availability Zones" + ) + return True + + +def notify_relations(reprocess_broker_requests=False): + notify_osds(reprocess_broker_requests=reprocess_broker_requests) + notify_radosgws(reprocess_broker_requests=reprocess_broker_requests) + notify_rbd_mirrors(reprocess_broker_requests=reprocess_broker_requests) + notify_prometheus() + + +def notify_prometheus(): + if relation_ids('prometheus') and ceph.is_bootstrapped(): + prometheus_permitted = cmp_pkgrevno('ceph', '12.2.0') >= 0 + module_enabled = (is_mgr_module_enabled('prometheus') or + mgr_enable_module('prometheus')) + for relid in relation_ids('prometheus'): + for unit in related_units(relid): + prometheus_relation(relid=relid, unit=unit, + prometheus_permitted=prometheus_permitted, + module_enabled=module_enabled) + + +def notify_osds(reprocess_broker_requests=False): + for relid in relation_ids('osd'): + for unit in related_units(relid): + osd_relation( + relid=relid, unit=unit, + reprocess_broker_requests=reprocess_broker_requests) + + +def notify_radosgws(reprocess_broker_requests=False): + for relid in relation_ids('radosgw'): + for unit in related_units(relid): + radosgw_relation( + relid=relid, unit=unit, + reprocess_broker_requests=reprocess_broker_requests) + + +def notify_rbd_mirrors(reprocess_broker_requests=False): + for relid in relation_ids('rbd-mirror'): + for unit in related_units(relid): + rbd_mirror_relation( + relid=relid, unit=unit, + recurse=False, + reprocess_broker_requests=reprocess_broker_requests) + + +def req_already_treated(request_id, relid, req_unit): + """Check if broker request already handled. + + The local relation data holds all the broker request/responses that + are handled as a dictionary. There will be a single entry for each + unit that makes broker request in the form of broker-rsp-: + {reqeust-id: , ..}. Verify if request_id exists in the relation + data broker response for the requested unit. + + :param request_id: Request ID + :type request_id: str + :param relid: Relation ID + :type relid: str + :param req_unit: Requested unit name + :type req_unit: str + :returns: Whether request is already handled + :rtype: bool + """ + status = relation_get(rid=relid, unit=local_unit()) + response_key = 'broker-rsp-' + req_unit.replace('/', '-') + if not status.get(response_key): + return False + data = None + # relation_get returns the value of response key as a dict or json + # encoded string + if isinstance(status[response_key], str): + try: + data = json.loads(status[response_key]) + except (TypeError, json.decoder.JSONDecodeError): + log('Not able to decode broker response for relid {} requested' + 'unit {}'.format(relid, req_unit), level=WARNING) + return False + else: + data = status[response_key] + if data.get('request-id') == request_id: + return True + return False + + +def notify_mons(): + """Update a nonce on the ``mon`` relation. + + This is useful for flagging that our peer mon units should update some of + their client relations. + + Normally we would have handled this with leader storage, but for the Ceph + case, the unit handling the broker requests is the Ceph MON leader and not + necessarilly the Juju leader. + + A non-leader unit has no way of changing data in leader-storage. + """ + nonce = uuid.uuid4() + for relid in relation_ids('mon'): + for unit in related_units(relid): + relation_set(relation_id=relid, + relation_settings={'nonce': nonce}) + + +def get_client_application_name(relid, unit): + """Retrieve client application name from relation data. + + :param relid: Realtion ID + :type relid: str + :param unit: Remote unit name + :type unit: str + """ + if not unit: + unit = remote_unit() + app_name = relation_get(rid=relid, unit=unit).get( + 'application-name', + hookenv.remote_service_name(relid=relid)) + return app_name + + +def retrieve_client_broker_requests(): + """Retrieve broker requests from client-type relations. + + :returns: Map of broker requests by request-id. + :rtype: List[CephBrokerRq] + """ + def _get_request(relation_data): + if 'broker_req' in relation_data: + rq = CephBrokerRq(raw_request_data=relation_data['broker_req']) + yield rq.request_id, rq + # Note that empty return from generator produces empty generator and + # not None, ref PEP 479 + return + + # we use a dictionary with request_id as key to deduplicate the list. + # we cannot use the list(set([])) trick here as CephBrokerRq is an + # unhashable type. We also cannot just pass on the raw request either + # as we need to intelligently compare them to avoid false negatives + # due to reordering of keys + return { + request_id: request + # NOTE(fnordahl): the ``rbd-mirror`` endpoint is omitted here as it is + # typically a consumer of the ouptut of this function + for endpoint in ('client', 'mds', 'radosgw') + for relid in relation_ids(endpoint) + for unit in related_units(relid) + for request_id, request in _get_request( + relation_get(rid=relid, unit=unit)) + }.values() + + +def handle_broker_request(relid, unit, add_legacy_response=False, + recurse=True, force=False): + """Retrieve broker request from relation, process, return response data. + + :param relid: Realtion ID + :type relid: str + :param unit: Remote unit name + :type unit: str + :param add_legacy_response: (Optional) Adds the legacy ``broker_rsp`` key + to the response in addition to the new way. + :type add_legacy_response: bool + :param recurse: Whether we should call out to update relation functions or + not. Mainly used to handle recursion when called from + notify_rbd_mirrors() + :type recurse: bool + :param force: Process broker requests even if they have already been + processed. + :type force: bool + :returns: Dictionary of response data ready for use with relation_set. + :rtype: dict + """ + def _get_broker_req_id(request): + if isinstance(request, str): + try: + req_key = json.loads(request)['request-id'] + except (TypeError, json.decoder.JSONDecodeError): + log('Not able to decode request id for broker request {}'. + format(request), + level=WARNING) + req_key = None + else: + req_key = request['request-id'] + + return req_key + + response = {} + if not unit: + unit = remote_unit() + settings = relation_get(rid=relid, unit=unit) + if 'broker_req' in settings: + broker_req_id = _get_broker_req_id(settings['broker_req']) + if broker_req_id is None: + return {} + + if not ceph.is_leader(): + log("Not leader - ignoring broker request {}".format( + broker_req_id), + level=DEBUG) + return {} + + if req_already_treated(broker_req_id, relid, unit) and not force: + log("Ignoring already executed broker request {}".format( + broker_req_id), + level=DEBUG) + return {} + + rsp = process_requests(settings['broker_req']) + unit_id = settings.get('unit-name', unit).replace('/', '-') + unit_response_key = 'broker-rsp-' + unit_id + response.update({unit_response_key: rsp}) + if add_legacy_response: + response.update({'broker_rsp': rsp}) + + if relation_ids('rbd-mirror'): + # NOTE(fnordahl): juju relation level data candidate + # notify mons to flag that the other mon units should update + # their ``rbd-mirror`` relations with information about new + # pools. + log('Notifying peers after processing broker request {}.'.format( + broker_req_id), + level=DEBUG) + notify_mons() + + if recurse: + # update ``rbd-mirror`` relations for this unit with + # information about new pools. + log('Notifying this units rbd-mirror relations after ' + 'processing broker request {}.'.format(broker_req_id), + level=DEBUG) + notify_rbd_mirrors() + + return response + + +@hooks.hook('osd-relation-joined') +@hooks.hook('osd-relation-changed') +def osd_relation(relid=None, unit=None, reprocess_broker_requests=False): + if ceph.is_quorum(): + log('mon cluster in quorum - providing fsid & keys') + public_addr = get_public_addr() + data = { + 'pending_key': '', + 'fsid': leader_get('fsid'), + 'osd_bootstrap_key': ceph.get_osd_bootstrap_key(), + 'auth': 'cephx', + 'ceph-public-address': public_addr, + 'osd_upgrade_key': ceph.get_named_key('osd-upgrade', + caps=ceph.osd_upgrade_caps), + 'osd_disk_removal_key': ceph.get_named_key( + 'osd-removal', + caps={ + 'mgr': ['allow *'], + 'mon': [ + 'allow r', + 'allow command "osd crush reweight"', + 'allow command "osd purge"', + 'allow command "osd destroy"', + ] + } + ), + # Provide a key to the osd for use by the crash module: + # https://docs.ceph.com/en/latest/mgr/crash/ + 'client_crash_key': ceph.create_named_keyring( + 'client', + 'crash', + caps={ + 'mon': ['profile crash'], + 'mgr': ['profile crash'], + } + ) + } + + data.update(handle_broker_request( + relid, unit, force=reprocess_broker_requests)) + relation_set(relation_id=relid, + relation_settings=data) + + if is_leader(): + ceph_osd_releases = get_ceph_osd_releases() + if len(ceph_osd_releases) == 1: + execute_post_osd_upgrade_steps(ceph_osd_releases[0]) + + # NOTE: radosgw key provision is gated on presence of OSD + # units so ensure that any deferred hooks are processed + notify_radosgws() + notify_rbd_mirrors() + send_osd_settings() + + for dashboard_relid in relation_ids('dashboard'): + dashboard_relation(dashboard_relid) + + if ready_for_service(): + update_host_osd_count_report() + + if is_leader(): + osd_host = relation_get(rid=relid, unit=unit, attribute='osd-host') + osd = f"osd/host:{osd_host}" + osd_memory_target = relation_get( + rid=relid, unit=unit, attribute='osd-memory-target' + ) + if all([osd_host, osd_memory_target]): + ceph.ceph_config_set( + "osd_memory_target", + osd_memory_target, + osd, + ) + + else: + log('mon cluster not in quorum - deferring fsid provision') + + +def related_osds(num_units=3): + ''' + Determine whether there are OSD units currently related + + @param num_units: The minimum number of units required + @return: boolean indicating whether the required number of + units where detected. + ''' + units = 0 + for r_id in relation_ids('osd'): + units += len(related_units(r_id)) + if units >= num_units: + return True + return False + + +def sufficient_osds(minimum_osds=3): + ''' + Determine if the minimum number of OSD's have been + bootstrapped into the cluster. + + @param expected_osds: The minimum number of OSD's required + @return: boolean indicating whether the required number of + OSD's where detected. + ''' + bootstrapped_osds = 0 + for r_id in relation_ids('osd'): + for unit in related_units(r_id): + unit_osds = relation_get( + attribute='bootstrapped-osds', + unit=unit, rid=r_id + ) + if unit_osds is not None: + bootstrapped_osds += int(unit_osds) + if bootstrapped_osds >= minimum_osds: + return True + return False + + +def ready_for_service(): + ''' + Determine whether the Ceph cluster is ready to service + storage traffic from clients + + @return: boolean indicating whether the Ceph cluster is + ready for pool creation/client usage. + ''' + if not ceph.is_quorum(): + log('mon cluster is not in quorum', level=DEBUG) + return False + if is_leader(): + if leader_get('bootstrapped-osds') is None and \ + not sufficient_osds(config('expected-osd-count') or 3): + log('insufficient osds bootstrapped', level=DEBUG) + return False + leader_set({'bootstrapped-osds': True}) + else: + if leader_get('bootstrapped-osds') is None: + return False + return True + + +@hooks.hook('dashboard-relation-joined') +def dashboard_relation(relid=None): + """Inform dashboard that mons are ready""" + if not ready_for_service(): + log("mon cluster is not in quorum, dashboard notification skipped", + level=WARNING) + return + + relation_set(relation_id=relid, relation_settings={'mon-ready': True}) + + +@hooks.hook('radosgw-relation-changed') +@hooks.hook('radosgw-relation-joined') +def radosgw_relation(relid=None, unit=None, reprocess_broker_requests=False): + # Install radosgw for admin tools + apt_install(packages=filter_installed_packages(['radosgw'])) + if not unit: + unit = remote_unit() + if is_unsupported_cmr(unit): + return + + # NOTE: radosgw needs some usage OSD storage, so defer key + # provision until OSD units are detected. + if ready_for_service(): + log('mon cluster in quorum and osds bootstrapped ' + '- providing radosgw with keys') + public_addr = get_public_addr() + data = { + 'fsid': leader_get('fsid'), + 'auth': 'cephx', + 'ceph-public-address': public_addr, + } + key_name = relation_get('key_name', unit=unit, rid=relid) + if key_name: + # New style, per unit keys + data['{}_key'.format(key_name)] = ( + ceph.get_radosgw_key(name=key_name) + ) + else: + # Old style global radosgw key + data['radosgw_key'] = ceph.get_radosgw_key() + + data.update(handle_broker_request( + relid, unit, force=reprocess_broker_requests)) + relation_set(relation_id=relid, relation_settings=data) + + +@hooks.hook('rbd-mirror-relation-joined') +@hooks.hook('rbd-mirror-relation-changed') +def rbd_mirror_relation( + relid=None, unit=None, recurse=True, + reprocess_broker_requests=False): + ''' + Handle the rbd mirror relation + + :param recurse: Whether we should call out to update relation functions or + not. Mainly used to handle recursion when called from + notify_rbd_mirrors() + :type recurse: bool + :returns: Whether or not relations should be notified after completion. + :rtype: bool + ''' + if ready_for_service(): + log('mon cluster in quorum and osds bootstrapped ' + '- providing rbd-mirror client with keys') + if not unit: + unit = remote_unit() + if is_unsupported_cmr(unit): + return + + # Add some tenacity in getting pool details + @tenacity.retry(wait=tenacity.wait_exponential(max=20), + reraise=True) + def get_pool_details(): + return ceph.list_pools_detail() + + # handle broker requests first to get a updated pool map + data = (handle_broker_request( + relid, unit, recurse=recurse, force=reprocess_broker_requests)) + data.update({ + 'auth': 'cephx', + 'ceph-public-address': get_public_addr(), + 'pools': json.dumps(get_pool_details(), sort_keys=True), + 'broker_requests': json.dumps( + [rq.request for rq in retrieve_client_broker_requests()], + sort_keys=True), + }) + cluster_addr = get_cluster_addr() + if cluster_addr: + data['ceph-cluster-address'] = cluster_addr + # handle both classic and reactive Endpoint peers + try: + unique_id = json.loads( + relation_get('unique_id', unit=unit, rid=relid)) + except (TypeError, json.decoder.JSONDecodeError): + unique_id = relation_get('unique_id', unit=unit, rid=relid) + if unique_id: + data['{}_key'.format(unique_id)] = ceph.get_rbd_mirror_key( + 'rbd-mirror.{}'.format(unique_id)) + + relation_set(relation_id=relid, relation_settings=data) + + # make sure clients are updated with the appropriate RBD features + # bitmap. + if recurse: + return True + + +@hooks.hook('mds-relation-changed') +@hooks.hook('mds-relation-joined') +def mds_relation_joined( + relid=None, unit=None, reprocess_broker_requests=False): + if ready_for_service(): + log('mon cluster in quorum and osds bootstrapped ' + '- providing mds client with keys') + mds_name = relation_get(attribute='mds-name', + rid=relid, unit=unit) + if not unit: + unit = remote_unit() + if is_unsupported_cmr(unit): + return + public_addr = get_public_addr() + data = { + 'fsid': leader_get('fsid'), + '{}_mds_key'.format(mds_name): + ceph.get_mds_key(name=mds_name), + 'auth': 'cephx', + 'ceph-public-address': public_addr} + data.update( + handle_broker_request( + relid, unit, force=reprocess_broker_requests)) + relation_set(relation_id=relid, relation_settings=data) + + +@hooks.hook('admin-relation-changed') +@hooks.hook('admin-relation-joined') +def admin_relation_joined(relid=None): + if is_unsupported_cmr(remote_unit()): + return + if ceph.is_quorum(): + name = relation_get('keyring-name') + if name is None: + name = 'admin' + log('mon cluster in quorum - providing admin client with keys') + mon_hosts = config('monitor-hosts') or ' '.join(get_mon_hosts()) + data = {'key': ceph.get_named_key(name=name, caps=ceph.admin_caps), + 'fsid': leader_get('fsid'), + 'auth': 'cephx', + 'mon_hosts': mon_hosts, + } + relation_set(relation_id=relid, + relation_settings=data) + + +@hooks.hook('upgrade-charm.real') +@harden() +def upgrade_charm(): + emit_cephconf() + apt_install(packages=filter_installed_packages( + ceph.determine_packages()), fatal=True) + try: + # we defer and explicitly run `ceph-create-keys` from + # add_keyring_to_ceph() as part of bootstrap process + # LP: #1719436. + service_pause('ceph-create-keys') + except ValueError: + pass + ceph.update_monfs() + mon_relation_joined() + if is_relation_made("nrpe-external-master"): + update_nrpe_config() + if not ceph.monitor_key_exists('admin', 'autotune'): + autotune = config('pg-autotune') + if (cmp_pkgrevno('ceph', '14.2.0') >= 0 and + (autotune == 'true' or + autotune == 'auto')): + ceph.monitor_key_set('admin', 'autotune', 'true') + else: + ceph.monitor_key_set('admin', 'autotune', 'false') + + # NOTE(jamespage): + # Reprocess broker requests to ensure that any cephx + # key permission changes are applied + notify_relations(reprocess_broker_requests=True) + + +@hooks.hook('nrpe-external-master-relation-joined') +@hooks.hook('nrpe-external-master-relation-changed') +def update_nrpe_config(): + apt_install('lockfile-progs', fatal=True) + log('Refreshing nagios checks') + if os.path.isdir(NAGIOS_PLUGINS): + rsync(os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios', + 'check_ceph_status.py'), + os.path.join(NAGIOS_PLUGINS, 'check_ceph_status.py')) + + rsync(os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios', + 'check_ceph_osd_count.py'), + os.path.join(NAGIOS_PLUGINS, 'check_ceph_osd_count.py')) + + rsync(os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios', + 'check_radosgw_sync_status.py'), + os.path.join(NAGIOS_PLUGINS, 'check_radosgw_sync_status.py')) + + script = os.path.join(SCRIPTS_DIR, 'collect_ceph_status.sh') + rsync(os.path.join(os.getenv('CHARM_DIR'), 'files', + 'nagios', 'collect_ceph_status.sh'), + script) + cronjob = "{} root {}\n".format('*/5 * * * *', script) + write_file(STATUS_CRONFILE, cronjob) + + # Find out if nrpe set nagios_hostname + hostname = nrpe.get_nagios_hostname() + current_unit = nrpe.get_nagios_unit_name() + nrpe_setup = nrpe.NRPE(hostname=hostname) + check_cmd = 'check_ceph_status.py -f {} --degraded_thresh {}' \ + ' --misplaced_thresh {}' \ + ' --recovery_rate {}'.format(STATUS_FILE, + config('nagios_degraded_thresh'), + config('nagios_misplaced_thresh'), + config('nagios_recovery_rate')) + if config('nagios_raise_nodeepscrub'): + check_cmd = check_cmd + ' --raise_nodeepscrub' + nrpe_setup.add_check( + shortname="ceph", + description='Check Ceph health {{{}}}'.format(current_unit), + check_cmd=check_cmd + ) + + check_cmd = 'check_ceph_osd_count.py {} '.format( + HOST_OSD_COUNT_REPORT) + nrpe_setup.add_check( + shortname='ceph_osd_count', + description='Check if osd count matches expected count', + check_cmd=check_cmd + ) + + if config('nagios_additional_checks'): + additional_critical = config('nagios_additional_checks_critical') + x = ast.literal_eval(config('nagios_additional_checks')) + + for key, value in x.items(): + name = "ceph-{}".format(key.replace(" ", "")) + log("Adding check {}".format(name)) + check_cmd = 'check_ceph_status.py -f {}' \ + ' --additional_check \"{}\"' \ + ' {}'.format(STATUS_FILE, value, + "--additional_check_critical" + if additional_critical is True else "") + nrpe_setup.add_check( + shortname=name, + description='Additional Ceph checks {{{}}}'.format( + current_unit), + check_cmd=check_cmd + ) + if config('nagios_check_num_osds'): + check_cmd = 'check_ceph_status.py -f {} --check_num_osds'.format( + STATUS_FILE) + nrpe_setup.add_check( + shortname='ceph_num_osds', + description='Check whether all OSDs are up and in', + check_cmd=check_cmd + ) + + check_cmd = ('check_radosgw_sync_status.py -f {}' + .format(RADOSGW_STATUS_FILE)) + if config('nagios_rgw_zones'): + check_cmd += ' --zones "{}"'.format(config('nagios_rgw_zones')) + if config('nagios_rgw_additional_checks'): + x = ast.literal_eval(config('nagios_rgw_additional_checks')) + for check in x: + check_cmd += ' --additional_check \"{}\"'.format(check) + nrpe_setup.add_check( + shortname='radosgw_multizone', + description='Check multizone radosgw health', + check_cmd=check_cmd + ) + + nrpe_setup.write() + + +VERSION_PACKAGE = 'ceph-common' + + +def is_cmr_unit(unit_name): + '''Is the remote unit connected via a cross model relation. + + :param unit_name: Name of unit + :type unit_name: str + :returns: Whether unit is connected via cmr + :rtype: bool + ''' + return unit_name.startswith('remote-') + + +def is_unsupported_cmr(unit_name): + '''If unit is connected via CMR and if that is supported. + + :param unit_name: Name of unit + :type unit_name: str + :returns: Whether unit is supported + :rtype: bool + ''' + unsupported = False + if unit_name and is_cmr_unit(unit_name): + unsupported = not config('permit-insecure-cmr') + if unsupported: + log("CMR detected and not supported", "ERROR") + return unsupported + + +@hooks.hook('pre-series-upgrade') +def pre_series_upgrade(): + log("Running prepare series upgrade hook", "INFO") + # NOTE: The Ceph packages handle the series upgrade gracefully. + # In order to indicate the step of the series upgrade process for + # administrators and automated scripts, the charm sets the paused and + # upgrading states. + set_unit_paused() + set_unit_upgrading() + + +@hooks.hook('post-series-upgrade') +def post_series_upgrade(): + log("Running complete series upgrade hook", "INFO") + # In order to indicate the step of the series upgrade process for + # administrators and automated scripts, the charm clears the paused and + # upgrading states. + clear_unit_paused() + clear_unit_upgrading() diff --git a/ceph-mon/src/ceph_mds.py b/ceph-mon/src/ceph_mds.py new file mode 100644 index 00000000..30d66f89 --- /dev/null +++ b/ceph-mon/src/ceph_mds.py @@ -0,0 +1,66 @@ +"""Ceph mds library +""" + +import logging +from typing import Dict + +from charmhelpers.core.hookenv import leader_get +from ops import model + +import charms_ceph.utils as ceph + + +logger = logging.getLogger(__name__) + +import ceph_client + + +class CephMdsProvides(ceph_client.CephClientProvides): + """Encapsulate the provides side of the Ceph MDS relation. + + Observes the mds-relation-joined hook event + """ + + charm = None + _mds_name = None + + def __init__(self, charm): + super().__init__(charm, "mds") + self.charm = charm + + def _get_mds_name(self, relation: model.Relation, unit: model.Unit) -> str: + """Retrieve mds-name from relation data.""" + unit_data = relation.data[unit] + return unit_data.get("mds-name", relation.app.name) + + def _get_custom_relation_init_data(self) -> Dict: + """Information required for the mds relation. + + :returns: Ceph configuration needed for the mds relation + :rtype: dict + """ + return { + "fsid": leader_get("fsid"), + "{}_mds_key".format(self._mds_name): ceph.get_mds_key( + name=self._mds_name + ), + } + + def _handle_client_relation( + self, relation: model.Relation, unit: model.Unit + ) -> None: + """Handle broker request and set the relation data + + :param relation: Operator relation + :type relation: Relation + :param unit: Unit to handle + :type unit: Unit + """ + + self._mds_name = self._get_mds_name(relation, unit) + + logger.debug( + "mon cluster in quorum and osds bootstrapped" + " - providing mds client with keys" + ) + super()._handle_client_relation(relation, unit) diff --git a/ceph-mon/src/ceph_metrics.py b/ceph-mon/src/ceph_metrics.py new file mode 100644 index 00000000..363bdc81 --- /dev/null +++ b/ceph-mon/src/ceph_metrics.py @@ -0,0 +1,231 @@ +# Copyright 2022 Canonical Ltd. +# See LICENSE file for licensing details. + +"""Provide ceph metrics to prometheus + +Configure prometheus scrape jobs via the metrics-endpoint relation. +""" +import json +import logging +import os.path +import pathlib +import socket + +from typing import Optional, Union, List, TYPE_CHECKING + +import ops.model + +if TYPE_CHECKING: + import charm + +from charms.prometheus_k8s.v0 import prometheus_scrape +from charms.grafana_agent.v0 import cos_agent +from charms_ceph import utils as ceph_utils +from ops.framework import BoundEvent +from utils import mgr_config_set_rbd_stats_pools + + +logger = logging.getLogger(__name__) + +DEFAULT_CEPH_JOB = { + "metrics_path": "/metrics", + "static_configs": [{"targets": ["*:9283"]}], +} +DEFAULT_CEPH_METRICS_ENDPOINT = { + "path": "/metrics", + "port": 9283, +} +DEFAULT_ALERT_RULES_RELATIVE_PATH = "files/prometheus_alert_rules" + + +class CephMetricsEndpointProvider(prometheus_scrape.MetricsEndpointProvider): + def __init__( + self, + charm: "charm.CephMonCharm", + relation_name: str = prometheus_scrape.DEFAULT_RELATION_NAME, + jobs=None, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + refresh_event: Optional[Union[BoundEvent, List[BoundEvent]]] = None, + ): + if jobs is None: + jobs = [DEFAULT_CEPH_JOB] + super().__init__( + charm, + relation_name=relation_name, + jobs=jobs, + alert_rules_path=alert_rules_path, + refresh_event=refresh_event, + ) + events = charm.on[relation_name] + self.framework.observe( + events.relation_departed, self._on_relation_departed + ) + self.framework.observe( + self.on.alert_rule_status_changed, + self._on_alert_rule_status_changed, + ) + charm._stored.set_default(alert_rule_errors=None) + + def _on_relation_changed(self, event): + """Enable prometheus on relation change""" + if not self._charm.unit.is_leader(): + return + + if not ceph_utils.is_bootstrapped(): + logger.debug("not bootstrapped, defer rel changed: %s", event) + event.defer() + return + + logger.debug( + "is_leader and is_bootstrapped, running rel changed: %s", event + ) + mgr_config_set_rbd_stats_pools() + ceph_utils.mgr_enable_module("prometheus") + logger.debug("module_enabled") + self.update_alert_rules() + super()._on_relation_changed(event) + + def _on_relation_departed(self, event): + """Disable prometheus on depart of relation""" + if self._charm.unit.is_leader() and ceph_utils.is_bootstrapped(): + logger.debug( + "is_leader and is_bootstrapped, running rel departed: %s", + event, + ) + ceph_utils.mgr_disable_module("prometheus") + logger.debug("module_disabled") + # We're not related to prom, don't care about alert rules + self._charm._stored.alert_rule_errors = None + + def have_alert_rule_errors(self): + return bool(self._charm._stored.alert_rule_errors) + + def _on_alert_rule_status_changed(self, event): + logger.debug( + "alert rule status changed: %s, %s, %s", + event, + event.valid, + event.errors, + ) + if event.errors: + logger.warning("invalid alert rules: %s", event.errors) + self._charm._stored.alert_rule_errors = event.errors + else: + self._charm._stored.alert_rule_errors = None + + def get_alert_rules_resource(self): + try: + return self._charm.model.resources.fetch("alert-rules") + except ops.model.ModelError as e: + logger.warning("can't get alert-rules resource: %s", e) + + def _set_alert_rules(self, rules_dict): + logger.debug("set alert rules: %s", rules_dict) + # alert rules seem ok locally, clear any errors + # prometheus may still signal alert rule errors + # via the relation though + self._charm._stored.alert_rule_errors = None + + for relation in self._charm.model.relations[self._relation_name]: + relation.data[self._charm.app]["alert_rules"] = json.dumps( + rules_dict + ) + + def update_alert_rules(self): + if self._charm.unit.is_leader() and ceph_utils.is_bootstrapped(): + resource = self.get_alert_rules_resource() + if resource is None or not os.path.getsize(resource): + logger.debug("empty rules resource, clearing alert rules") + self._set_alert_rules({}) + return + sink = pathlib.Path(self._alert_rules_path) / "alert.yaml.rules" + if sink.exists() or sink.is_symlink(): + sink.unlink() + sink.symlink_to(resource) + alert_rules = prometheus_scrape.AlertRules(topology=self.topology) + alert_rules.add_path(str(sink), recursive=True) + alert_rules_as_dict = alert_rules.as_dict() + if not alert_rules_as_dict: + msg = "invalid alert rules: {}".format(sink.open().read()) + logger.warning(msg) + self._charm._stored.alert_rule_errors = msg + return + self._set_alert_rules(alert_rules_as_dict) + + +class CephCOSAgentProvider(cos_agent.COSAgentProvider): + + def __init__(self, charm): + super().__init__( + charm, + metrics_rules_dir="./files/prometheus_alert_rules", + dashboard_dirs=["./files/grafana_dashboards"], + scrape_configs=self._custom_scrape_configs, + ) + events = self._charm.on[cos_agent.DEFAULT_RELATION_NAME] + self.framework.observe( + events.relation_departed, self._on_relation_departed + ) + + def _on_refresh(self, event): + """Enable prometheus on relation change""" + if not ceph_utils.is_bootstrapped(): + logger.debug("not bootstrapped, defer _on_refresh: %s", event) + event.defer() + return + logger.debug("refreshing cos_agent relation") + if self._charm.unit.is_leader(): + mgr_config_set_rbd_stats_pools() + ceph_utils.mgr_enable_module("prometheus") + super()._on_refresh(event) + + def _on_relation_departed(self, event): + """Disable prometheus on depart of relation""" + if self._charm.unit.is_leader() and ceph_utils.is_bootstrapped(): + logger.debug( + "is_leader and is_bootstrapped, running rel departed: %s", + event, + ) + ceph_utils.mgr_disable_module("prometheus") + logger.debug("module_disabled") + + def _custom_scrape_configs(self): + fqdn = socket.getfqdn() + fqdn_parts = fqdn.split('.') + domain = '.'.join(fqdn_parts[1:]) if len(fqdn_parts) > 1 else fqdn + return [ + { + "metrics_path": "/metrics", + "static_configs": [{"targets": ["localhost:9283"]}], + "honor_labels": True, + "metric_relabel_configs": [ + { + # localhost:9283 is the generic default instance label + # added by grafana-agent which is kinda useless. + # Replace it with a somewhat more meaningful label + "source_labels": ["instance"], + "regex": "^localhost:9283$", + "target_label": "instance", + "action": "replace", + "replacement": "ceph_cluster", + }, + { # if we have a non-empty hostname label, use it as the + # instance label + "source_labels": ["hostname"], + "regex": "(.+)", + "target_label": "instance", + "action": "replace", + "replacement": "${1}", + }, + { # tack on the domain to the instance label to make it + # conform to grafana-agent's node-exporter expectations + "source_labels": ["instance"], + "regex": "(.*)", + "target_label": "instance", + "action": "replace", + "replacement": "${1}." + domain, + }, + ] + }, + + ] diff --git a/ceph-mon/src/ceph_shared.py b/ceph-mon/src/ceph_shared.py new file mode 100644 index 00000000..4b52d372 --- /dev/null +++ b/ceph-mon/src/ceph_shared.py @@ -0,0 +1,88 @@ +# Copyright 2022 Canonical Ltd. +# See LICENSE file for licensing details. + +"""Shared operator framework code + +Provide helpers for querying current status of ceph-mon units +""" +import logging +from typing import Mapping, List, Dict, TYPE_CHECKING + +from ops import model, framework + + +logger = logging.getLogger(__name__) + +if TYPE_CHECKING: + import charm + + +class CephMonInfo(framework.Object): + """Provide status information about ceph-mon. + + Information about + - Relations + - Peer information + - CMR units + """ + + def __init__(self, charm: "charm.CephMonCharm"): + super().__init__(charm, "moninfo") + self.charm = charm + + @property + def relations(self) -> Mapping[str, List[model.Relation]]: + return self.charm.model.relations + + def get_peer_mons(self) -> Dict[model.Unit, model.RelationDataContent]: + """Retrieve information about ceph-mon peer units.""" + return self._get_related_unit_data("mon") + + def get_osd_units(self) -> Dict[model.Unit, model.RelationDataContent]: + """Retrieve information about related osd units.""" + return self._get_related_unit_data("osd") + + def _get_related_unit_data( + self, reltype: str + ) -> Dict[model.Unit, model.RelationDataContent]: + rel_units = [ + unit for rel in self.relations[reltype] for unit in rel.units + ] + rel_data = {} + for rel in self.relations[reltype]: + for unit in rel_units: + rel_data[unit] = rel.data.get(unit, {}) + return rel_data + + def remote_units(self) -> List[model.Unit]: + """Retrieve related CMR units.""" + remotes = [ + unit + for reltype in self.relations.values() + for rel in reltype + for unit in rel.units + if unit.name.startswith("remote-") + ] + return remotes + + def sufficient_osds(self, minimum_osds: int = 3) -> bool: + """ + Determine if the minimum number of OSD's have been + bootstrapped into the cluster. + + :param expected_osds: The minimum number of OSD's required + :return: boolean indicating whether the required number of + OSD's where detected. + """ + osds = self.get_osd_units() + bootstrapped_osds = sum( + int(osd.get("bootstrapped-osds")) + for osd in osds.values() + if osd.get("bootstrapped-osds") + ) + if bootstrapped_osds >= minimum_osds: + return True + return False + + def have_osd_relation(self) -> bool: + return bool(self.relations["osd"]) diff --git a/ceph-mon/src/ceph_status.py b/ceph-mon/src/ceph_status.py new file mode 100644 index 00000000..e854b20f --- /dev/null +++ b/ceph-mon/src/ceph_status.py @@ -0,0 +1,147 @@ +# Copyright 2022 Canonical Ltd. +# See LICENSE file for licensing details. + +"""Provide status checking for the ceph-mon charm""" + +import logging +from typing import Union, TYPE_CHECKING + +from charmhelpers.core.hookenv import ( + application_version_set, + is_relation_made, +) +from charmhelpers.fetch import get_upstream_version +from ops import model + +import utils + +if TYPE_CHECKING: + import charm + +from charmhelpers.contrib.storage.linux import ceph as ch_ceph + +import charms_ceph.utils as ceph_utils +import ceph_shared + +logger = logging.getLogger(__name__) + +VERSION_PACKAGE = "ceph-common" + + +class StatusAssessor(ceph_shared.CephMonInfo): + """Status checking for ceph-mon charms + + Takes a ceph-mon charm object as a client, registers checking methods for + the charm object and updates status. + """ + + def __init__(self, charm: "charm.CephMonCharm"): + super().__init__(charm) + self.framework.observe( + self.framework.on.commit, self.assess_status + ) + self.register_checks() + + def config(self, key) -> Union[str, int, float, bool, None]: + return self.charm.model.config.get(key) + + def check_insecure_cmr(self) -> model.StatusBase: + if not self.config("permit-insecure-cmr") and self.remote_units(): + return model.BlockedStatus("Unsupported CMR relation") + return model.ActiveStatus() + + def check_bootstrap_source(self) -> model.StatusBase: + if not self.config("no-bootstrap") and is_relation_made( + "bootstrap-source" + ): + return model.BlockedStatus( + "Cannot join the bootstrap-source relation when " + "no-bootstrap is False", + ) + return model.ActiveStatus() + + def check_moncount(self) -> model.StatusBase: + moncount = self.config("monitor-count") + if ( + len(self.get_peer_mons()) + 1 < moncount + ): # we're including ourselves + return model.BlockedStatus( + "Insufficient peer units to bootstrap" + " cluster (require {})".format(moncount) + ) + return model.ActiveStatus() + + def check_ready_mons(self) -> model.StatusBase: + moncount = self.config("monitor-count") + mons = self.get_peer_mons() + ready = sum( + 1 for mon in mons.values() if mon.get("ceph-public-address") + ) + if ready + 1 < moncount: # "this" mon is ready presumably + return model.WaitingStatus( + "Peer units detected, waiting for addresses" + ) + return model.ActiveStatus() + + def check_rbd_features(self) -> model.StatusBase: + configured_rbd_features = self.config("default-rbd-features") + if utils.has_rbd_mirrors() and configured_rbd_features: + if ( + utils.add_rbd_mirror_features(configured_rbd_features) + != configured_rbd_features + ): + # The configured RBD features bitmap does not contain the + # features required for RBD Mirroring + return model.BlockedStatus( + "Configuration mismatch: RBD Mirroring " + "enabled but incorrect value set for " + "``default-rbd-features``", + ) + return model.ActiveStatus() + + def check_get_osd_settings(self): + try: + ch_ceph.get_osd_settings("client") + except ch_ceph.OSD_SETTING_EXCEPTIONS as e: + return model.BlockedStatus(str(e)) + return model.ActiveStatus() + + def check_alert_rule_errors(self): + if self.charm.metrics_endpoint.have_alert_rule_errors(): + return model.BlockedStatus("invalid alert rules, check unit logs") + return model.ActiveStatus() + + def check_expected_osd_count(self): + if ceph_utils.is_bootstrapped() and ceph_utils.is_quorum(): + expected_osd_count = self.config("expected-osd-count") or 3 + if self.sufficient_osds(expected_osd_count): + return model.ActiveStatus("Unit is ready and clustered") + elif not self.have_osd_relation(): + return model.BlockedStatus("Missing relation: OSD") + else: + return model.WaitingStatus( + "Monitor bootstrapped but waiting for number of" + " OSDs to reach expected-osd-count ({})".format( + expected_osd_count + ) + ) + else: + return model.BlockedStatus("Unit not clustered (no quorum)") + + def register_checks(self): + checkers = [ + self.check_insecure_cmr, + self.check_bootstrap_source, + self.check_moncount, + self.check_ready_mons, + self.check_rbd_features, + self.check_alert_rule_errors, + self.check_expected_osd_count, + ] + for check in checkers: + self.charm.register_status_check(check) + + def assess_status(self, _event): + logger.debug("Running assess_status() for %s", self.charm) + application_version_set(get_upstream_version(VERSION_PACKAGE)) + self.charm.update_status() diff --git a/ceph-mon/src/charm.py b/ceph-mon/src/charm.py new file mode 100755 index 00000000..e433f071 --- /dev/null +++ b/ceph-mon/src/charm.py @@ -0,0 +1,319 @@ +#! /usr/bin/python3 +import logging +import os +import shutil + +from ops.main import main + +import ceph_status +import ceph_mds + +import charms.operator_libs_linux.v0.apt as apt +import charms.operator_libs_linux.v1.systemd as systemd + +from ops.charm import CharmEvents +from ops.framework import EventBase, EventSource + +import ops_openstack.core +import charms_ceph.utils as ceph +from charms_ceph.broker import ( + process_requests +) +import ceph_hooks as hooks +import ceph_client +import ceph_metrics + +import ops_actions + +logger = logging.getLogger(__name__) + + +class NotifyClientEvent(EventBase): + def __init__(self, handle): + super().__init__(handle) + + +class CephCharmEvents(CharmEvents): + """Custom charm events.""" + + notify_clients = EventSource(NotifyClientEvent) + + +class CephMonCharm(ops_openstack.core.OSBaseCharm): + + release = 'quincy' + + PACKAGES = [ + 'ceph', 'gdisk', + 'radosgw', 'lvm2', 'parted', 'smartmontools', + ] + + NEW_DB_PATH = '.charmhelpers-unit-state.db' + + on = CephCharmEvents() + + # General charm control callbacks. + + # TODO: Figure out how to do hardening in an operator-framework + # world + + def _initialise_config(self): + # The following two lines are a horrible hack to deal with the + # lifecycle of a charm changing compared to the classic charm. + # The previous (classic) version of the charm initialised a + # Config object in the install hook and let it go out of scope. + # As a result of this, the config_changed processing attempts + # to upgrade Ceph from distro to the configured release when it + # runs during the install or upgrade-charm hooks. + c = hooks.config() + c.save() + + def on_install(self, event): + self._initialise_config() + self.install_pkgs() + rm_packages = ceph.determine_packages_to_remove() + if rm_packages: + apt.remove_package(packages=rm_packages, fatal=True) + try: + # we defer and explicitly run `ceph-create-keys` from + # add_keyring_to_ceph() as part of bootstrap process + # LP: #1719436. + systemd.service_pause('ceph-create-keys') + except systemd.SystemdError: + pass + + def on_config(self, event): + if hooks.config_changed(): + self.on.notify_clients.emit() + + def make_db_path(self, suffix): + return os.path.join(os.environ.get('CHARM_DIR', ''), suffix) + + def migrate_db(self): + """ + Migrate the Key/Value database into a new location. + This is done to avoid conflicts between charmhelpers and + the ops library, since they both use the same path and + with excluding lock semantics. + """ + db_path = self.make_db_path('.unit-state.db') + new_db_path = self.make_db_path(self.NEW_DB_PATH) + if os.path.exists(db_path) and not os.path.exists(new_db_path): + # The new DB doesn't exist yet. Copy it over. + shutil.copy(db_path, new_db_path) + + def on_pre_series_upgrade(self, event): + hooks.pre_series_upgrade() + + def on_upgrade(self, event): + self._initialise_config() + self.metrics_endpoint.update_alert_rules() + self.migrate_db() + hooks.upgrade_charm() + self.on.notify_clients.emit() + + def on_post_series_upgrade(self, event): + hooks.post_series_upgrade() + + # Relations. + def on_mon_relation_joined(self, event): + hooks.mon_relation_joined() + + def on_bootstrap_source_relation_changed(self, event): + if hooks.bootstrap_source_relation_changed(): + self.on.notify_clients.emit() + + def on_prometheus_relation_joined_or_changed(self, event): + hooks.prometheus_relation() + + def on_prometheus_relation_departed(self, event): + hooks.prometheus_left() + + def on_mon_relation(self, event): + if hooks.mon_relation(): + self.on.notify_clients.emit() + + def on_osd_relation(self, event): + hooks.osd_relation() + self.on.notify_clients.emit() + + def on_dashboard_relation_joined(self, event): + hooks.dashboard_relation() + + def on_radosgw_relation(self, event): + hooks.radosgw_relation() + + def on_rbd_mirror_relation(self, event): + if hooks.rbd_mirror_relation(): + self.on.notify_clients.emit() + + def on_admin_relation(self, event): + hooks.admin_relation_joined() + + def on_nrpe_relation(self, event): + hooks.update_nrpe_config() + + def on_commit(self, _event): + self.ceph_status.assess_status() + + def on_pre_commit(self, _event): + # Fix bug: https://bugs.launchpad.net/charm-ceph-mon/+bug/2007976 + # The persistent config file doesn't update because the config save + # function handled by atexit is not triggered. + # Trigger it manually here. + hooks.hookenv._run_atexit() + + # Actions. + + def _observe_action(self, on_action, callable): + def _make_method(fn): + return lambda _, event: fn(event) + + method_name = 'on_' + str(on_action.event_kind) + method = _make_method(callable) + # In addition to being a method, the action callbacks _must_ have + # the same '__name__' as their attribute name (this is how lookups + # work in the operator framework world). + method.__name__ = method_name + + inst = type(self) + setattr(inst, method_name, method) + self.framework.observe(on_action, getattr(self, method_name)) + + def is_blocked_insecure_cmr(self): + remote_block = False + remote_unit_name = hooks.remote_unit() + if remote_unit_name and hooks.is_cmr_unit(remote_unit_name): + remote_block = not self.config['permit-insecure-cmr'] + return remote_block + + def notify_clients(self, _event): + self.clients.notify_all() + self.mds.notify_all() + for relation in self.model.relations['admin']: + hooks.admin_relation_joined(str(relation.id)) + + def on_rotate_key_action(self, event): + ops_actions.rotate_key.rotate_key( + event, self.framework.model + ) + + def __init__(self, *args): + super().__init__(*args) + self._stored.is_started = True + + if self.is_blocked_insecure_cmr(): + logging.error( + "Not running hook, CMR detected and not supported") + return + + # Make the charmhelpers lib use a different DB path. This is done + # so as to avoid conflicts with what the ops framework uses. + # See: https://bugs.launchpad.net/charm-ceph-mon/+bug/2005137 + os.environ['UNIT_STATE_DB'] = self.make_db_path(self.NEW_DB_PATH) + + fw = self.framework + + self.clients = ceph_client.CephClientProvides(self) + self.metrics_endpoint = ceph_metrics.CephMetricsEndpointProvider(self) + self.cos_agent = ceph_metrics.CephCOSAgentProvider(self) + self.ceph_status = ceph_status.StatusAssessor(self) + self.mds = ceph_mds.CephMdsProvides(self) + + self._observe_action(self.on.change_osd_weight_action, + ops_actions.change_osd_weight.change_osd_weight) + self._observe_action(self.on.copy_pool_action, + ops_actions.copy_pool.copy_pool) + self._observe_action(self.on.create_crush_rule_action, + ops_actions.create_crush_rule.create_crush_rule) + self._observe_action( + self.on.create_erasure_profile_action, + ops_actions.create_erasure_profile.create_erasure_profile_action) + self._observe_action(self.on.get_health_action, + ops_actions.get_health.get_health_action) + self._observe_action(self.on.get_erasure_profile_action, + ops_actions.get_erasure_profile.erasure_profile) + self._observe_action(self.on.list_entities_action, + ops_actions.list_entities.list_entities) + self._observe_action(self.on.rotate_key_action, + self.on_rotate_key_action) + + fw.observe(self.on.install, self.on_install) + fw.observe(self.on.config_changed, self.on_config) + fw.observe(self.on.pre_series_upgrade, self.on_pre_series_upgrade) + fw.observe(self.on.upgrade_charm, self.on_upgrade) + fw.observe(self.on.post_series_upgrade, self.on_post_series_upgrade) + + fw.observe(self.on.mon_relation_joined, self.on_mon_relation_joined) + fw.observe(self.on.bootstrap_source_relation_changed, + self.on_bootstrap_source_relation_changed) + fw.observe(self.on.prometheus_relation_joined, + self.on_prometheus_relation_joined_or_changed) + fw.observe(self.on.prometheus_relation_changed, + self.on_prometheus_relation_joined_or_changed) + fw.observe(self.on.prometheus_relation_departed, + self.on_prometheus_relation_departed) + + for key in ('mon_relation_departed', 'mon_relation_changed', + 'leader_settings_changed', + 'bootstrap_source_relation_departed'): + fw.observe(getattr(self.on, key), self.on_mon_relation) + + fw.observe(self.on.osd_relation_joined, + self.on_osd_relation) + fw.observe(self.on.osd_relation_changed, + self.on_osd_relation) + + fw.observe(self.on.dashboard_relation_joined, + self.on_dashboard_relation_joined) + + fw.observe(self.on.radosgw_relation_changed, + self.on_radosgw_relation) + fw.observe(self.on.radosgw_relation_joined, + self.on_radosgw_relation) + + fw.observe(self.on.rbd_mirror_relation_changed, + self.on_rbd_mirror_relation) + fw.observe(self.on.rbd_mirror_relation_joined, + self.on_rbd_mirror_relation) + + fw.observe(self.on.admin_relation_changed, + self.on_admin_relation) + fw.observe(self.on.admin_relation_joined, + self.on_admin_relation) + + fw.observe(self.on.nrpe_external_master_relation_joined, + self.on_nrpe_relation) + fw.observe(self.on.nrpe_external_master_relation_changed, + self.on_nrpe_relation) + + fw.observe(self.on.notify_clients, self.notify_clients) + + fw.observe(self.on.framework.on.pre_commit, self.on_pre_commit) + + def ready_for_service(self): + return hooks.ready_for_service() + + def process_broker_request(self, broker_req_id, requests, recurse=True): + broker_result = process_requests(requests) + if hooks.relation_ids('rbd-mirror'): + # NOTE(fnordahl): juju relation level data candidate + # notify mons to flag that the other mon units should update + # their ``rbd-mirror`` relations with information about new + # pools. + logger.debug('Notifying peers after processing broker' + 'request {}.'.format(broker_req_id)) + hooks.notify_mons() + # notify_rbd_mirrors is the only case where this is False + if recurse: + # update ``rbd-mirror`` relations for this unit with + # information about new pools. + logger.debug( + 'Notifying this units rbd-mirror relations after ' + 'processing broker request {}.'.format(broker_req_id)) + hooks.notify_rbd_mirrors() + return broker_result + + +if __name__ == '__main__': + main(CephMonCharm) diff --git a/ceph-mon/src/ops_actions/__init__.py b/ceph-mon/src/ops_actions/__init__.py new file mode 100644 index 00000000..3a2c227a --- /dev/null +++ b/ceph-mon/src/ops_actions/__init__.py @@ -0,0 +1,24 @@ +# Copyright 2022 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import ( # noqa: F401 + change_osd_weight, + copy_pool, + create_crush_rule, + create_erasure_profile, + get_health, + get_erasure_profile, + list_entities, + rotate_key, +) diff --git a/ceph-mon/src/ops_actions/change_osd_weight.py b/ceph-mon/src/ops_actions/change_osd_weight.py new file mode 100644 index 00000000..cc12cf94 --- /dev/null +++ b/ceph-mon/src/ops_actions/change_osd_weight.py @@ -0,0 +1,41 @@ +#! /usr/bin/env python3 +# +# Copyright 2020 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Changes the crush weight of an OSD.""" + +import charms_ceph.utils as ceph_utils +import logging + + +logger = logging.getLogger(__name__) + + +def change_osd_weight(event) -> None: + """Run reweight_osd to change OSD weight.""" + osd_num = event.params.get("osd") + new_weight = event.params.get("weight") + try: + result = ceph_utils.reweight_osd(str(osd_num), str(new_weight)) + except Exception as e: + logger.warn(e) + event.fail("Reweight failed due to exception") + return + + if not result: + event.fail("Reweight failed to complete") + return + + event.set_results({'message': 'success'}) diff --git a/ceph-mon/src/ops_actions/copy_pool.py b/ceph-mon/src/ops_actions/copy_pool.py new file mode 100644 index 00000000..722b167f --- /dev/null +++ b/ceph-mon/src/ops_actions/copy_pool.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +# +# Copyright 2019 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import subprocess + + +def copy_pool(event) -> None: + try: + source = event.params.get("source") + target = event.params.get("target") + subprocess.check_call([ + 'rados', 'cppool', + source, target + ]) + except subprocess.CalledProcessError as e: + event.fail("Error copying pool: {}".format(str(e))) diff --git a/ceph-mon/src/ops_actions/create_crush_rule.py b/ceph-mon/src/ops_actions/create_crush_rule.py new file mode 100644 index 00000000..453ac1ef --- /dev/null +++ b/ceph-mon/src/ops_actions/create_crush_rule.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# +# Copyright 2022 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Creates a new CRUSH rule.""" + +import logging +import subprocess + +logger = logging.getLogger(__name__) + + +def create_crush_rule(event) -> None: + """Create a new CRUSH rule.""" + + rule_name = event.params.get('name') + failure_domain = event.params.get('failure-domain') + device_class = event.params.get('device-class') + + cmd = [ + 'ceph', 'osd', 'crush', 'rule', + 'create-replicated', + rule_name, + 'default', + failure_domain + ] + if device_class: + cmd.append(device_class) + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError as e: + logger.warn(e) + event.fail("rule creation failed due to exception") + return + + event.set_results({'message': 'success'}) diff --git a/ceph-mon/src/ops_actions/create_erasure_profile.py b/ceph-mon/src/ops_actions/create_erasure_profile.py new file mode 100755 index 00000000..d84285be --- /dev/null +++ b/ceph-mon/src/ops_actions/create_erasure_profile.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +# +# Copyright 2022 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import CalledProcessError + +from charmhelpers.contrib.storage.linux.ceph import create_erasure_profile +import logging + +logger = logging.getLogger(__name__) + + +def create_erasure_profile_action(event): + name = event.params.get("name") + plugin = event.params.get("plugin") + failure_domain = event.params.get("failure-domain") + device_class = event.params.get("device-class") + k = event.params.get("data-chunks") + m = event.params.get("coding-chunks") + + # jerasure requires k+m + # isa requires k+m + # local requires k+m+l + # shec requires k+m+c + + if plugin == "jerasure": + try: + create_erasure_profile(service='admin', + erasure_plugin_name=plugin, + profile_name=name, + data_chunks=k, + coding_chunks=m, + failure_domain=failure_domain, + device_class=device_class) + except CalledProcessError as e: + logger.warning(e) + event.fail("Create erasure profile failed with " + "message: {}".format(str(e))) + elif plugin == "isa": + try: + create_erasure_profile(service='admin', + erasure_plugin_name=plugin, + profile_name=name, + data_chunks=k, + coding_chunks=m, + failure_domain=failure_domain, + device_class=device_class) + except CalledProcessError as e: + logger.warning(e) + event.fail("Create erasure profile failed with " + "message: {}".format(str(e))) + elif plugin == "lrc": + locality_chunks = event.params.get("locality-chunks") + crush_locality = event.params.get('crush-locality') + try: + create_erasure_profile(service='admin', + erasure_plugin_name=plugin, + profile_name=name, + data_chunks=k, + coding_chunks=m, + locality=locality_chunks, + crush_locality=crush_locality, + failure_domain=failure_domain, + device_class=device_class) + except CalledProcessError as e: + logger.warning(e) + event.fail("Create erasure profile failed with " + "message: {}".format(str(e))) + elif plugin == "shec": + c = event.params.get("durability-estimator") + try: + create_erasure_profile(service='admin', + erasure_plugin_name=plugin, + profile_name=name, + data_chunks=k, + coding_chunks=m, + durability_estimator=c, + failure_domain=failure_domain, + device_class=device_class) + except CalledProcessError as e: + logger.warning(e) + event.fail("Create erasure profile failed with " + "message: {}".format(str(e))) + elif plugin == "clay": + d = event.params.get("helper-chunks") + scalar_mds = event.params.get('scalar-mds') + try: + create_erasure_profile(service='admin', + erasure_plugin_name=plugin, + profile_name=name, + data_chunks=k, + coding_chunks=m, + helper_chunks=d, + scalar_mds=scalar_mds, + failure_domain=failure_domain, + device_class=device_class) + except CalledProcessError as e: + logger.warning(e) + event.fail("Create erasure profile failed with " + "message: {}".format(str(e))) + else: + # Unknown erasure plugin + event.fail("Unknown erasure-plugin type of {}. " + "Only jerasure, isa, lrc, shec or clay is " + "allowed".format(plugin)) diff --git a/ceph-mon/src/ops_actions/get_erasure_profile.py b/ceph-mon/src/ops_actions/get_erasure_profile.py new file mode 100755 index 00000000..e53fdfaf --- /dev/null +++ b/ceph-mon/src/ops_actions/get_erasure_profile.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +# +# Copyright 2022 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Get an erasure profile given a profile name.""" + +from charmhelpers.contrib.storage.linux import ceph + + +def erasure_profile(event) -> None: + profile_name = event.params.get("name") + out = ceph.get_erasure_profile(service="admin", name=profile_name) + event.set_results({"message": out}) diff --git a/ceph-mon/src/ops_actions/get_health.py b/ceph-mon/src/ops_actions/get_health.py new file mode 100755 index 00000000..b148c954 --- /dev/null +++ b/ceph-mon/src/ops_actions/get_health.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import check_output, CalledProcessError +import logging + + +logger = logging.getLogger(__name__) + + +def get_health_action(event): + try: + event.set_results( + {'message': check_output(['ceph', 'health']).decode('UTF-8')}) + except CalledProcessError as e: + logger.warning(e) + event.fail( + "ceph health failed with message: {}".format(str(e))) diff --git a/ceph-mon/src/ops_actions/list_entities.py b/ceph-mon/src/ops_actions/list_entities.py new file mode 100644 index 00000000..8726a9c2 --- /dev/null +++ b/ceph-mon/src/ops_actions/list_entities.py @@ -0,0 +1,53 @@ +#! /usr/bin/env python3 +# +# Copyright 2024 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Retrieve a list of entities recognized by the Ceph cluster.""" + +import json +import logging +import subprocess +import yaml + + +logger = logging.getLogger(__name__) + + +def list_entities(event): + try: + # NOTE(lmlg): Don't bother passing --format=json or the likes, + # since it sometimes contain escaped strings that are incompatible + # with python's json module. This method of fetching entities is + # simple enough and portable across Ceph versions. + out = subprocess.check_output(['sudo', 'ceph', 'auth', 'ls']) + ret = [] + + for line in out.decode('utf-8').split('\n'): + if line and not (line.startswith(' ') or line.startswith('\t') or + line.startswith('\n')): + ret.append(line) + + fmt = event.params.get('format', 'text') + if fmt == 'json': + msg = json.dumps(str(ret)) + elif fmt == 'yaml': + msg = yaml.safe_dump(str(ret)) + else: + msg = '\n'.join(ret) + + event.set_results({'message': msg}) + except Exception as e: + logger.warning(e) + event.fail('failed to list entities: {}'.format(str(e))) diff --git a/ceph-mon/src/ops_actions/rotate_key.py b/ceph-mon/src/ops_actions/rotate_key.py new file mode 100644 index 00000000..1e4ba829 --- /dev/null +++ b/ceph-mon/src/ops_actions/rotate_key.py @@ -0,0 +1,267 @@ +#! /usr/bin/env python3 +# +# Copyright 2024 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Rotate the key of one or more entities.""" + +import configparser +import json +import logging +import os +import subprocess + +import charms.operator_libs_linux.v1.systemd as systemd + + +logger = logging.getLogger(__name__) +MGR_DIR = "/var/lib/ceph/mgr" + + +def _find_mgr_path(base): + name = "ceph-" + base + try: + if name in os.listdir(MGR_DIR): + return MGR_DIR + "/" + name + except FileNotFoundError as exc: + logger.exception(exc) + return None + + +def _create_key(entity, event): + try: + cmd = ["sudo", "ceph", "auth", "get-or-create-pending", + entity, "--format=json"] + out = subprocess.check_output(cmd).decode("utf-8") + return json.loads(out)[0]["pending_key"] + except (subprocess.SubprocessError, json.decoder.JSONDecodeError) as exc: + logger.exception(exc) + event.fail("Failed to create key: %s" % str(exc)) + raise + + +def _replace_keyring_file(path, entity, key, event): + path += "/keyring" + try: + c = configparser.ConfigParser(default_section=None) + c.read(path) + c[entity]["key"] = key + + with open(path, "w") as file: + c.write(file) + except (KeyError, IOError) as exc: + logger.exception(exc) + event.fail("Failed to replace keyring file: %s" % str(exc)) + raise + + +def _restart_daemon(entity, event): + try: + systemd.service_restart(entity) + except systemd.SystemdError as exc: + logger.exception(exc) + event.fail("Failed to reload daemon: %s" % str(exc)) + raise + + +def _handle_rgw_key_rotation(entity, event, model): + rgw_name = entity[7:] # Skip 'client.' + relations = model.relations.get('radosgw') + if not relations: + event.fail('No RadosGW relations found') + return + + for relation in relations: + for unit in relation.units: + try: + data = relation.data + if data[unit]["key_name"] != rgw_name: + continue + except KeyError: + logger.exception('key name not found in relation data bag') + continue + + data[model.unit][rgw_name + "_key"] = _create_key(entity, event) + event.set_results({"message": "success"}) + return + + event.fail("Entity %s not found" % entity) + + +def _find_mds_unit(relations, mds_name): + for relation in relations: + for unit in relation.units: + try: + if mds_name == relation.data[unit]['mds-name']: + return relation.data + except KeyError: + logger.exception('mds name not found in relation data bag') + + +def _handle_mds_key_rotation(entity, event, model): + mds_name = entity[4:] + relations = model.relations.get('mds') + if not relations: + event.fail('No mds relations found') + return + + bag = _find_mds_unit(relations, mds_name) + if bag is None: + event.fail('No unit found for entity: %s' % entity) + return + + pending_key = _create_key(entity, event) + bag[model.unit][mds_name + "_mds_key"] = pending_key + event.set_results({'message': 'success'}) + + +def _get_osd_tree(): + out = subprocess.check_output(["sudo", "ceph", "osd", "dump", + "--format=json"]) + return json.loads(out.decode("utf8")).get("osds", ()) + + +def _clean_address(addr): + ix = addr.find(":") + return addr if ix < 0 else addr[0:ix] + + +def _get_osd_addrs(osd_id, tree=None): + if tree is None: + tree = _get_osd_tree() + + for osd in tree: + if osd.get("osd") != osd_id: + continue + + return [_clean_address(osd[x]) + for x in ("public_addr", "cluster_addr") + if x in osd] + + +def _get_unit_addr(unit, rel_id): + out = subprocess.check_output(["relation-get", "--format=json", + "-r", str(rel_id), "private-address", unit]) + return out.decode("utf8").replace('"', '').strip() + + +def _find_osd_unit(relations, model, osd_id, tree): + addrs = _get_osd_addrs(osd_id, tree) + if not addrs: + return None + + for relation in relations: + for unit in relation.units: + if _get_unit_addr(unit.name, relation.id) in addrs: + return relation.data[model.unit] + + +def _handle_osd_key_rotation(entity, event, model, tree=None): + osd_rels = model.relations.get("osd") + if not osd_rels: + event.fail("No OSD relations found") + return + + if tree is None: + tree = _get_osd_tree() + + osd_id = int(entity[4:]) + bag = _find_osd_unit(osd_rels, model, osd_id, tree) + if bag is not None: + key = _create_key(entity, event) + bag["pending_key"] = json.dumps({osd_id: key}) + event.set_results({"message": "success"}) + else: + event.fail("No OSD matching entity %s found" % entity) + + +def _add_osd_rotation(rotations, new_bag, osd_id, new_key): + # NOTE(lmlg): We can't use sets or dicts for relation databags, as they + # are mutable and don't implement a __hash__ method. So we use a simple + # (bag, dict) array to map the rotations. + elem = {osd_id: new_key} + for bag, data in rotations: + if bag is new_bag: + data.update(elem) + return + + rotations.append((new_bag, elem)) + + +def _get_osd_ids(): + ret = subprocess.check_output(["sudo", "ceph", "osd", "ls"]) + return ret.decode("utf8").split("\n") + + +def _rotate_all_osds(event, model): + tree = _get_osd_tree() + osd_rels = model.relations.get("osd") + ret = [] + + if not osd_rels: + event.fail("No OSD relations found") + return + + for osd_id in _get_osd_ids(): + osd_id = osd_id.strip() + if not osd_id: + continue + + bag = _find_osd_unit(osd_rels, model, int(osd_id), tree) + if bag is None: + continue + + key = _create_key("osd." + osd_id, event) + _add_osd_rotation(ret, bag, osd_id, key) + + for bag, elem in ret: + bag["pending_key"] = json.dumps(elem) + + event.set_results({"message": "success"}) + + +def rotate_key(event, model=None) -> None: + """Rotate the key of the specified entity.""" + entity = event.params.get("entity") + if entity.startswith("mgr"): + if len(entity) > 3: + if entity[3] != '.': + event.fail("Invalid entity name: %s" % entity) + return + path = _find_mgr_path(entity[4:]) + if path is None: + event.fail("Entity %s not found" % entity) + return + else: # just 'mgr' + try: + path = MGR_DIR + "/" + os.listdir(MGR_DIR)[0] + entity = "mgr." + os.path.basename(path)[5:] # skip 'ceph-' + except Exception: + event.fail("No managers found") + return + + key = _create_key(entity, event) + _replace_keyring_file(path, entity, key, event) + _restart_daemon("ceph-mgr@%s.service" % entity[4:], event) + event.set_results({"message": "success"}) + elif entity.startswith("client.rgw."): + _handle_rgw_key_rotation(entity, event, model) + elif entity.startswith('mds.'): + _handle_mds_key_rotation(entity, event, model) + elif entity == "osd": + _rotate_all_osds(event, model) + elif entity.startswith("osd."): + _handle_osd_key_rotation(entity, event, model) + else: + event.fail("Unknown entity: %s" % entity) diff --git a/ceph-mon/src/utils.py b/ceph-mon/src/utils.py new file mode 100644 index 00000000..1829f9c8 --- /dev/null +++ b/ceph-mon/src/utils.py @@ -0,0 +1,428 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import re +import socket +import subprocess +import errno + +import tenacity +from charms_ceph import utils as ceph_utils +from charmhelpers.core.hookenv import ( + DEBUG, + cached, + config, + goal_state, + is_leader, + log, + network_get_primary_address, + related_units, + relation_ids, + relation_get, + status_set, + unit_get, +) + +from charmhelpers.core.host import ( + lsb_release, + CompareHostReleases, + cmp_pkgrevno, +) +from charmhelpers.contrib.network.ip import ( + get_address_in_network, + get_ipv6_addr +) +from charmhelpers.contrib.storage.linux import ceph + +import dns.resolver + + +class OsdPostUpgradeError(Exception): + """Error class for OSD post-upgrade operations.""" + pass + + +def enable_pocket(pocket): + apt_sources = "/etc/apt/sources.list" + with open(apt_sources, "r") as sources: + lines = sources.readlines() + with open(apt_sources, "w") as sources: + for line in lines: + if pocket in line: + sources.write(re.sub('^# deb', 'deb', line)) + else: + sources.write(line) + + +def is_mgr_module_enabled(module): + """Is a given manager module enabled. + + :param module: + :type module: str + :returns: Whether the named module is enabled + :rtype: bool + """ + return module in ceph.enabled_manager_modules() + + +def mgr_enable_module(module): + """Enable a Ceph Manager Module. + + :param module: The module name to enable + :type module: str + + :raises: subprocess.CalledProcessError + """ + if not is_mgr_module_enabled(module): + subprocess.check_call(['ceph', 'mgr', 'module', 'enable', module]) + return True + return False + + +def mgr_disable_module(module): + """Enable a Ceph Manager Module. + + :param module: The module name to enable + :type module: str + + :raises: subprocess.CalledProcessError + """ + if is_mgr_module_enabled(module): + subprocess.check_call(['ceph', 'mgr', 'module', 'disable', module]) + return True + return False + + +def set_balancer_mode(mode): + '''Set the balancer mode used by the Ceph manager.''' + if not mode: + return + elif cmp_pkgrevno('ceph-common', '12.0.0') < 0: + log('Luminous or later is required to set the balancer mode') + return + elif not is_mgr_module_enabled('balancer'): + log("Balancer module is disabled") + return + + try: + subprocess.check_call(['ceph', 'balancer', 'mode', mode], shell=True) + except subprocess.CalledProcessError: + log('Failed to set balancer mode:', level='ERROR') + + +@cached +def get_unit_hostname(): + return socket.gethostname() + + +@cached +def get_host_ip(hostname=None): + if config('prefer-ipv6'): + return get_ipv6_addr(dynamic_only=False)[0] + + hostname = hostname or unit_get('private-address') + try: + # Test to see if already an IPv4 address + socket.inet_aton(hostname) + return hostname + except socket.error: + # This may throw an NXDOMAIN exception; in which case + # things are badly broken so just let it kill the hook + answers = dns.resolver.query(hostname, 'A') + if answers: + return answers[0].address + + +@cached +def get_public_addr(): + if config('ceph-public-network'): + return get_network_addrs('ceph-public-network')[0] + + try: + return network_get_primary_address('public') + except NotImplementedError: + log("network-get not supported", DEBUG) + + return get_host_ip() + + +@cached +def get_cluster_addr(): + if config('ceph-cluster-network'): + return get_network_addrs('ceph-cluster-network')[0] + + try: + return network_get_primary_address('cluster') + except NotImplementedError: + log("network-get not supported", DEBUG) + + return get_host_ip() + + +def get_networks(config_opt='ceph-public-network'): + """Get all configured networks from provided config option. + + If public network(s) are provided, go through them and return those for + which we have an address configured. + """ + networks = config(config_opt) + if networks: + networks = networks.split() + return [n for n in networks if get_address_in_network(n)] + + return [] + + +def get_network_addrs(config_opt): + """Get all configured public networks addresses. + + If public network(s) are provided, go through them and return the + addresses we have configured on any of those networks. + """ + addrs = [] + networks = config(config_opt) + if networks: + networks = networks.split() + addrs = [get_address_in_network(n) for n in networks] + addrs = [a for a in addrs if a] + + if not addrs: + if networks: + msg = ("Could not find an address on any of '%s' - resolve this " + "error to retry" % (networks)) + status_set('blocked', msg) + raise Exception(msg) + else: + return [get_host_ip()] + + return addrs + + +def assert_charm_supports_ipv6(): + """Check whether we are able to support charms ipv6.""" + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) < "trusty": + raise Exception("IPv6 is not supported in the charms for Ubuntu " + "versions less than Trusty 14.04") + + +def has_rbd_mirrors(): + """Determine if we have or will have ``rbd-mirror`` charms related. + + :returns: True or False + :rtype: bool + """ + try: + # NOTE(fnordahl): This optimization will not be useful until we get a + # resolution on LP: #1818245 + raise NotImplementedError + gs = goal_state() + return 'rbd-mirror' in gs.get('relations', {}) + except NotImplementedError: + for relid in relation_ids('rbd-mirror'): + if related_units(relid): + return True + + +def get_default_rbd_features(): + """Get default value for ``rbd_default_features``. + + This is retrieved by asking the installed Ceph binary to show its runtime + config when using a empty configuration file. + + :returns: Installed Ceph's Default vaule for ``rbd_default_features`` + :rtype: int + :raises: IndexError, json.JSONDecodeError, subprocess.CalledProcessError + """ + ceph_conf = json.loads(subprocess.check_output( + ['ceph-conf', '-c', '/dev/null', '-D', '--format', 'json'], + universal_newlines=True)) + return int(ceph_conf['rbd_default_features']) + + +def add_rbd_mirror_features(rbd_features): + """Take a RBD Features bitmap and add the features required for Mirroring. + + :param rbd_features: Input bitmap + :type rbd_features: int + :returns: Bitmap bitwise OR'ed with the features required for Mirroring. + :rtype: int + """ + RBD_FEATURE_EXCLUSIVE_LOCK = 4 + RBD_FEATURE_JOURNALING = 64 + return rbd_features | RBD_FEATURE_EXCLUSIVE_LOCK | RBD_FEATURE_JOURNALING + + +def get_rbd_features(): + """Determine if we should set, and what the rbd default features should be. + + :returns: None or the apropriate value to use + :rtype: Option[int, None] + """ + rbd_feature_config = config('default-rbd-features') + if rbd_feature_config: + return int(rbd_feature_config) + elif has_rbd_mirrors(): + return add_rbd_mirror_features(get_default_rbd_features()) + + +def get_ceph_osd_releases(): + ceph_osd_releases = set() + for r_id in relation_ids('osd'): + for unit in related_units(r_id): + ceph_osd_release = relation_get( + attribute='ceph_release', + unit=unit, rid=r_id + ) + if ceph_osd_release is not None: + ceph_osd_releases.add(ceph_osd_release) + return list(ceph_osd_releases) + + +def try_disable_insecure_reclaim(): + """Disable insecure global-id reclaim on supported versions. + + This function will disable insecure global-id reclaim on versions + of ceph that are supported. Running this on a healthy cluster or + a cluster that doesn't support the option won't have any effect. + """ + if is_leader(): + try: + subprocess.check_call([ + 'ceph', '--id', 'admin', + 'config', 'set', 'mon', + 'auth_allow_insecure_global_id_reclaim', 'false']) + except subprocess.CalledProcessError as e: + log("Could not disable insecure reclaim: {}".format(e), + level='ERROR') + + +def execute_post_osd_upgrade_steps(ceph_osd_release): + """Executes post-upgrade steps. + + Allows execution of any steps that need to be taken after osd upgrades + have finished (often specified in ceph upgrade docs). + + :param str ceph_osd_release: the new ceph-osd release. + """ + log('Executing post-ceph-osd upgrade commands.') + try: + if (_all_ceph_versions_same() and + not _is_required_osd_release(ceph_osd_release)): + log('Setting require_osd_release to {}.'.format(ceph_osd_release)) + _set_require_osd_release(ceph_osd_release) + except OsdPostUpgradeError as upgrade_error: + msg = 'OSD post-upgrade steps failed: {}'.format( + upgrade_error) + log(message=msg, level='ERROR') + + +def _get_versions(): + """Gets the ceph versions. + + Retry if the commands fails to give the cluster time to settle. + + :return tuple: (bool, dict) True if successful, False if not, and a dict + """ + try: + versions_command = 'ceph versions' + versions_str = subprocess.check_output( + versions_command.split()).decode('UTF-8') + except subprocess.CalledProcessError as call_error: + if call_error.returncode == errno.EINVAL: + log('Calling "ceph versions" failed. Command requires ' + 'luminous and above.', level='WARNING') + return False, {} + else: + log('Calling "ceph versions" failed.', level='ERROR') + raise OsdPostUpgradeError(call_error) + log('Versions: {}'.format(versions_str), level='DEBUG') + versions_dict = json.loads(versions_str) + return True, versions_dict + + +def _all_ceph_versions_same(): + """Checks that ceph-mon and ceph-osd have converged to the same version. + + :return boolean: True if all same, false if not or command failed. + """ + ok, versions_dict = _get_versions() + if not ok: + return False + if len(versions_dict['overall']) > 1: + log('All upgrades of mon and osd have not completed.') + return False + if len(versions_dict.get('osd', [])) < 1: + log('Monitors have converged but no osd versions found.', + level='WARNING') + return False + return True + + +def _is_required_osd_release(release): + """Checks to see if require_osd_release is set to input release. + + Runs and parses the ceph osd dump command to determine if + require_osd_release is set to the input release. If so, return + True. Else, return False. + + :param str release: the release to check against + :return bool: True if releases match, else False. + :raises: OsdPostUpgradeError + """ + try: + dump_command = 'ceph osd dump -f json' + osd_dump_str = subprocess.check_output( + dump_command.split()).decode('UTF-8') + osd_dump_dict = json.loads(osd_dump_str) + except subprocess.CalledProcessError as cmd_error: + log(message='Command {} failed.'.format(cmd_error.cmd), + level='ERROR') + raise OsdPostUpgradeError(cmd_error) + except json.JSONDecodeError as decode_error: + log(message='Failed to decode JSON.', + level='ERROR') + raise OsdPostUpgradeError(decode_error) + return osd_dump_dict.get('require_osd_release') == release + + +def _set_require_osd_release(release): + """Attempts to set the required_osd_release osd config option. + + :param str release: The release to set option to + :raises: OsdPostUpgradeError + """ + try: + command = 'ceph osd require-osd-release {} ' \ + '--yes-i-really-mean-it'.format(release) + subprocess.check_call(command.split()) + except subprocess.CalledProcessError as call_error: + msg = 'Unable to execute command <{}>'.format(call_error.cmd) + log(message=msg, level='ERROR') + raise OsdPostUpgradeError(call_error) + + +@tenacity.retry( + wait=tenacity.wait_exponential(multiplier=1, max=10), + reraise=True, + stop=tenacity.stop_after_attempt(30)) +def mgr_config_set_rbd_stats_pools(): + """Update ceph mgr config with the value from rbd-status-pools config + """ + if is_leader() and ceph_utils.is_bootstrapped(): + ceph_utils.mgr_config_set( + 'mgr/prometheus/rbd_stats_pools', + config('rbd-stats-pools') + ) diff --git a/ceph-mon/templates/ceph.conf b/ceph-mon/templates/ceph.conf new file mode 100644 index 00000000..52c1cb8f --- /dev/null +++ b/ceph-mon/templates/ceph.conf @@ -0,0 +1,81 @@ +[global] +{%- if old_auth %} +auth supported = {{ auth_supported }} +{%- else %} +auth cluster required = {{ auth_supported }} +auth service required = {{ auth_supported }} +auth client required = {{ auth_supported }} +{%- endif %} + +mon host = {{ mon_hosts }} +fsid = {{ fsid }} + +log to syslog = {{ use_syslog }} +err to syslog = {{ use_syslog }} +clog to syslog = {{ use_syslog }} +mon cluster log to syslog = {{ use_syslog }} +debug mon = {{ loglevel }}/5 +debug osd = {{ loglevel }}/5 + +# NOTE(jamespage): +# Disable object skew warnings as these only use +# the number of objects and not their size in the +# skew calculation. +mon pg warn max object skew = -1 + +{% if ms_bind_ipv6 %} +ms_bind_ipv6 = true +{%- endif %} +{%- if ms_bind_ipv4 == false %} +ms_bind_ipv4 = false +{% endif %} +{% if ceph_public_network is string %} +public network = {{ ceph_public_network }} +{%- endif %} +{%- if ceph_cluster_network is string %} +cluster network = {{ ceph_cluster_network }} +{%- endif %} +{%- if public_addr %} +public addr = {{ public_addr }} +{%- endif %} +{%- if cluster_addr %} +cluster addr = {{ cluster_addr }} +{%- endif %} +{% if global -%} +# The following are user-provided options provided via the config-flags charm option. +# User-provided [global] section config +{% for key in global -%} +{{ key }} = {{ global[key] }} +{% endfor %} +{% endif %} + +{% if rbd_features %} +rbd default features = {{ rbd_features }} +{% endif %} + +[mon] +keyring = /var/lib/ceph/mon/$cluster-$id/keyring +{% if mon -%} +# The following are user-provided options provided via the config-flags charm option. +# User-provided [mon] section config +{% for key in mon -%} +{{ key }} = {{ mon[key] }} +{% endfor %} +{% endif %} +{% if disable_object_skew and "mon pg warn max object skew" not in mon %} +mon pg warn max object skew = 0 +{% endif %} + +mon data avail warn = {{ mon_data_avail_warn }} +mon data avail crit = {{ mon_data_avail_crit }} + +[mds] +keyring = /var/lib/ceph/mds/$cluster-$id/keyring +{% if mds -%} +# The following are user-provided options provided via the config-flags charm option. +# User-provided [mds] section config +{% for key in mds -%} +{{ key }} = {{ mds[key] }} +{% endfor %} +{% endif %} + diff --git a/ceph-mon/terraform/README.md b/ceph-mon/terraform/README.md new file mode 100644 index 00000000..88248606 --- /dev/null +++ b/ceph-mon/terraform/README.md @@ -0,0 +1,94 @@ +# Terraform Manifest Module + +This module reads a yaml configuration file and exports the values into terraform variables that +can be passed down into other modules. It is specifically tailored for working with +modules for charms defined with the +[juju terraform provider](https://registry.terraform.io/providers/juju/juju/latest/docs). It +simplifies having to pass every individual charm input as a variable in the product level +module for a given product. + +## Inputs + +| Name | Type | Description | Required | +|------------|--------|------------------------------------------------------------------------|----------| +| `manifest` | string | Absolute path to the yaml file with the config for a Juju application. | true | +| `app` | string | Name of the application to load the config for. | true | + +## Outputs + +All outputs are under `config` as a map of values below: + +| Name | Description | +|---------------|-------------------------------------------------------------------------------| +| `app_name` | Name of the application in Juju. | +| `base` | Base to deploy the charm as eg. ubuntu@24.04. | +| `channel` | Channel of the application being deployed. | +| `config` | Map of the config for the charm, values can be found under the specific charm | +| `constraints` | String of constraints when deploying the charm `cores=2 mem=4069M` | +| `resources` | List of resources to deploy with the charm. | +| `revision` | Specific revision of this charm to deploy. | +| `units` | Number of units of a charm to deploy | +| `storage` | Storage configuration of a charm to deploy | + +## Usage + +This module is meant to be use as a helper for product modules. It is meant to allow the +user to have one manifest yaml file that can allow hold all the configuration for a solution +or deployment while also allowing the developer to not have to maintain the configuration +between each charm and the overall product. + +### Defining a `manifest` in terraform + +The manifest module will have to be defined for each charm in question. Terraform will +load the config under the app key and output the values. If the key is not found in the +manifest, then the module will return `null` and terraform will ignore the configuration. + +``` +module "ceph_mon_config" { + source = "git::https://github.com/canonical/k8s-bundles//terraform/manifest?ref=main" + manifest = var.manifest_yaml + app = "ceph_mon" +} +``` + +These values can the be passed into a resource for a specific charm: + +``` +module "ceph_mon" { + source = "git::https://github.com/canonical/ceph-charms//ceph-osd/terraform?ref=main" + app_name = module.ceph_mon_config.config.app_name + channel = module.ceph_mon_config.config.channel + config = module.ceph_mon_config.config.config + constraints = module.ceph_mon_config.config.constraints + model = var.model + resources = module.ceph_mon_config.config.resources + revision = module.ceph_mon_config.config.revision + base = module.ceph_mon_config.config.base + units = module.ceph_mon_config.config.units +} +``` + +### Defining a manifest.yaml + +In the implementation of the product module, the user can specify their configuration using +a single manifest file similar to the one below: + +``` yaml +ceph_mon: + channel: quincy/stable + constraints: arch=amd64 cores=2 mem=8192M root-disk=16384M virt-type=virtual-machine + units: 1 + config: + monitor-count: 1 + expected-osd-count: 2 +ceph_osd: + channel: quincy/stable + constraints: arch=amd64 cores=2 mem=8192M root-disk=16384M virt-type=virtual-machine + units: 2 + storage: + osd-devices: 1G,1 + osd-journals: 1G,1 +``` + +Using the terraform in the above section, the `units`, `base`, `constraints`, and `channel` +forward into the `ceph-mon` deployment. diff --git a/ceph-mon/terraform/main.tf b/ceph-mon/terraform/main.tf new file mode 100644 index 00000000..c800e77a --- /dev/null +++ b/ceph-mon/terraform/main.tf @@ -0,0 +1,19 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +resource "juju_application" "ceph_mon" { + name = var.app_name + model = var.model + + charm { + name = "ceph-mon" + channel = var.channel + revision = var.revision + base = var.base + } + + config = var.config + constraints = var.constraints + units = var.units + resources = var.resources +} diff --git a/ceph-mon/terraform/outputs.tf b/ceph-mon/terraform/outputs.tf new file mode 100644 index 00000000..a994bec7 --- /dev/null +++ b/ceph-mon/terraform/outputs.tf @@ -0,0 +1,14 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +output "app_name" { + description = "Name of the deployed application." + value = juju_application.ceph_mon.name +} + +output "provides" { + value = { + osd = "osd" + client = "client" + } +} diff --git a/ceph-mon/terraform/variables.tf b/ceph-mon/terraform/variables.tf new file mode 100644 index 00000000..322aa49f --- /dev/null +++ b/ceph-mon/terraform/variables.tf @@ -0,0 +1,55 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +variable "app_name" { + description = "Name of the application in the Juju model." + type = string + default = "ceph-mon" +} + +variable "base" { + description = "Ubuntu bases to deploy the charm onto" + type = string + default = "ubuntu@24.04" +} + +variable "channel" { + description = "The channel to use when deploying a charm." + type = string + default = "squid/beta" +} + +variable "resources" { + description = "Resources to use with the application." + type = map(string) + default = {} +} + +variable "revision" { + description = "Revision number of the charm" + type = number + default = null +} + +variable "units" { + description = "Number of units to deploy" + type = number + default = 1 +} + +variable "config" { + description = "Application config. Details about available options can be found at https://charmhub.io/ceph-mon/configurations." + type = map(string) + default = {} +} + +variable "constraints" { + description = "Juju constraints to apply for this application." + type = string + default = "arch=amd64" +} + +variable "model" { + description = "Reference to a `juju_model`." + type = string +} diff --git a/ceph-mon/terraform/versions.tf b/ceph-mon/terraform/versions.tf new file mode 100644 index 00000000..f5324296 --- /dev/null +++ b/ceph-mon/terraform/versions.tf @@ -0,0 +1,12 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +terraform { + required_version = ">= 1.6" + required_providers { + juju = { + source = "juju/juju" + version = ">= 0.14.0, < 1.0.0" + } + } +} diff --git a/ceph-mon/test-requirements.txt b/ceph-mon/test-requirements.txt new file mode 100644 index 00000000..43248e4c --- /dev/null +++ b/ceph-mon/test-requirements.txt @@ -0,0 +1,50 @@ +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of *requirements.txt files for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools + +# NOTE: newer versions of cryptography require a Rust compiler to build, +# see +# * https://github.com/openstack-charmers/zaza/issues/421 +# * https://mail.python.org/pipermail/cryptography-dev/2021-January/001003.html +# +cryptography<3.4 + +requests>=2.18.4 + +stestr>=2.2.0 + +# Dependency of stestr. Workaround for +# https://github.com/mtreinish/stestr/issues/145 +cliff<3.0.0 + +# Dependencies of stestr. Newer versions use keywords that didn't exist in +# python 3.5 yet (e.g. "ModuleNotFoundError") +importlib-metadata<3.0.0; python_version < '3.6' +importlib-resources<3.0.0; python_version < '3.6' + +# Some Zuul nodes sometimes pull newer versions of these dependencies which +# dropped support for python 3.5: +osprofiler<2.7.0;python_version<'3.6' +stevedore<1.31.0;python_version<'3.6' +debtcollector<1.22.0;python_version<'3.6' +oslo.utils<=3.41.0;python_version<'3.6' + +coverage>=4.5.2 +pyudev # for ceph-* charm unit tests (need to fix the ceph-* charm unit tests/mocking) +git+https://github.com/openstack-charmers/zaza.git#egg=zaza +git+https://github.com/openstack-charmers/zaza-openstack-tests.git#egg=zaza.openstack + +# Needed for charm-glance: +git+https://opendev.org/openstack/tempest.git#egg=tempest;python_version>='3.8' +tempest<30.0.0;python_version<'3.8' and python_version >= '3.6' +tempest<24.0.0;python_version<'3.6' + +croniter # needed for charm-rabbitmq-server unit tests + +# icey: pyopenssl 22 introduces a requirement on newer OpenSSL which causes test +# failures. Pin pyopenssl to resolve the failure. +pyopenssl<=22.0.0 + +pydantic < 2 +cosl diff --git a/ceph-mon/tests/bundles/jammy-antelope.yaml b/ceph-mon/tests/bundles/jammy-antelope.yaml new file mode 100644 index 00000000..1be00105 --- /dev/null +++ b/ceph-mon/tests/bundles/jammy-antelope.yaml @@ -0,0 +1,261 @@ +variables: + openstack-origin: &openstack-origin cloud:jammy-antelope + +local_overlay_enabled: False + +series: jammy + +comment: +- 'machines section to decide order of deployment. database sooner = faster' +machines: + '0': + constraints: mem=3072M + '1': + constraints: mem=3072M + '2': + constraints: mem=3072M + '3': + '4': + '5': + '6': + '7': + '8': + '9': + '10': + '11': + '12': + '13': + '14': + '15': + '16': + '17': + + +applications: + + keystone-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + glance-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + cinder-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + nova-cloud-controller-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + placement-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + + mysql-innodb-cluster: + charm: ch:mysql-innodb-cluster + num_units: 3 + to: + - '0' + - '1' + - '2' + channel: 8.0/edge + + rabbitmq-server: + charm: ch:rabbitmq-server + num_units: 1 + to: + - '9' + channel: 3.9/edge + + ceph-osd: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: '10G' + options: + source: *openstack-origin + osd-devices: '/dev/test-non-existent' + to: + - '3' + - '4' + - '5' + channel: quincy/edge + + ceph-mon: + charm: ch:ceph-mon + channel: quincy/edge + num_units: 3 + options: + source: *openstack-origin + monitor-count: '3' + to: + - '6' + - '7' + - '8' + + ceph-fs: + charm: ch:ceph-fs + num_units: 1 + options: + source: *openstack-origin + channel: quincy/edge + to: + - '17' + + keystone: + expose: True + charm: ch:keystone + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '10' + channel: 2023.1/edge + + nova-compute: + charm: ch:nova-compute + num_units: 1 + options: + openstack-origin: *openstack-origin + libvirt-image-backend: rbd + to: + - '11' + channel: 2023.1/edge + + glance: + expose: True + charm: ch:glance + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '12' + channel: 2023.1/edge + + cinder: + expose: True + charm: ch:cinder + num_units: 1 + options: + block-device: 'None' + glance-api-version: '2' + openstack-origin: *openstack-origin + to: + - '13' + channel: 2023.1/edge + + cinder-ceph: + charm: ch:cinder-ceph + channel: 2023.1/edge + + nova-cloud-controller: + expose: True + charm: ch:nova-cloud-controller + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '14' + channel: 2023.1/edge + + placement: + charm: ch:placement + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '15' + channel: 2023.1/edge + + prometheus2: + charm: ch:prometheus2 + num_units: 1 + to: + - '16' + +relations: + - - 'nova-compute:amqp' + - 'rabbitmq-server:amqp' + + - - 'nova-compute:image-service' + - 'glance:image-service' + + - - 'nova-compute:ceph' + - 'ceph-mon:client' + + - - nova-compute:ceph-access + - cinder-ceph:ceph-access + + - - 'keystone:shared-db' + - 'keystone-mysql-router:shared-db' + - - 'keystone-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'glance:shared-db' + - 'glance-mysql-router:shared-db' + - - 'glance-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'glance:identity-service' + - 'keystone:identity-service' + + - - 'glance:amqp' + - 'rabbitmq-server:amqp' + + - - 'glance:ceph' + - 'ceph-mon:client' + + - - 'cinder:shared-db' + - 'cinder-mysql-router:shared-db' + - - 'cinder-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'cinder:identity-service' + - 'keystone:identity-service' + + - - 'cinder:amqp' + - 'rabbitmq-server:amqp' + + - - 'cinder:image-service' + - 'glance:image-service' + + - - 'cinder-ceph:storage-backend' + - 'cinder:storage-backend' + + - - 'cinder-ceph:ceph' + - 'ceph-mon:client' + + - - 'ceph-osd:mon' + - 'ceph-mon:osd' + + - - 'ceph-mon:mds' + - 'ceph-fs:ceph-mds' + + - - 'nova-cloud-controller:shared-db' + - 'nova-cloud-controller-mysql-router:shared-db' + - - 'nova-cloud-controller-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'nova-cloud-controller:identity-service' + - 'keystone:identity-service' + + - - 'nova-cloud-controller:amqp' + - 'rabbitmq-server:amqp' + + - - 'nova-cloud-controller:cloud-compute' + - 'nova-compute:cloud-compute' + + - - 'nova-cloud-controller:image-service' + - 'glance:image-service' + + - - 'placement:shared-db' + - 'placement-mysql-router:shared-db' + - - 'placement-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'placement' + - 'keystone' + + - - 'placement' + - 'nova-cloud-controller' + + - - 'ceph-mon:prometheus' + - 'prometheus2:target' diff --git a/ceph-mon/tests/bundles/jammy-bobcat.yaml b/ceph-mon/tests/bundles/jammy-bobcat.yaml new file mode 100644 index 00000000..b9c1033f --- /dev/null +++ b/ceph-mon/tests/bundles/jammy-bobcat.yaml @@ -0,0 +1,261 @@ +variables: + openstack-origin: &openstack-origin cloud:jammy-bobcat + +local_overlay_enabled: False + +series: jammy + +comment: +- 'machines section to decide order of deployment. database sooner = faster' +machines: + '0': + constraints: mem=3072M + '1': + constraints: mem=3072M + '2': + constraints: mem=3072M + '3': + '4': + '5': + '6': + '7': + '8': + '9': + '10': + '11': + '12': + '13': + '14': + '15': + '16': + '17': + + +applications: + + keystone-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + glance-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + cinder-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + nova-cloud-controller-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + placement-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + + mysql-innodb-cluster: + charm: ch:mysql-innodb-cluster + num_units: 3 + to: + - '0' + - '1' + - '2' + channel: 8.0/edge + + rabbitmq-server: + charm: ch:rabbitmq-server + num_units: 1 + to: + - '9' + channel: 3.9/edge + + ceph-osd: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: '10G' + options: + source: *openstack-origin + osd-devices: '/dev/test-non-existent' + to: + - '3' + - '4' + - '5' + channel: reef/edge + + ceph-mon: + charm: ch:ceph-mon + channel: reef/edge + num_units: 3 + options: + source: *openstack-origin + monitor-count: '3' + to: + - '6' + - '7' + - '8' + + ceph-fs: + charm: ch:ceph-fs + num_units: 1 + options: + source: *openstack-origin + channel: reef/edge + to: + - '17' + + keystone: + expose: True + charm: ch:keystone + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '10' + channel: 2023.2/edge + + nova-compute: + charm: ch:nova-compute + num_units: 1 + options: + openstack-origin: *openstack-origin + libvirt-image-backend: rbd + to: + - '11' + channel: 2023.2/edge + + glance: + expose: True + charm: ch:glance + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '12' + channel: 2023.2/edge + + cinder: + expose: True + charm: ch:cinder + num_units: 1 + options: + block-device: 'None' + glance-api-version: '2' + openstack-origin: *openstack-origin + to: + - '13' + channel: 2023.2/edge + + cinder-ceph: + charm: ch:cinder-ceph + channel: 2023.2/edge + + nova-cloud-controller: + expose: True + charm: ch:nova-cloud-controller + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '14' + channel: 2023.2/edge + + placement: + charm: ch:placement + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '15' + channel: 2023.2/edge + + prometheus2: + charm: ch:prometheus2 + num_units: 1 + to: + - '16' + +relations: + - - 'nova-compute:amqp' + - 'rabbitmq-server:amqp' + + - - 'nova-compute:image-service' + - 'glance:image-service' + + - - 'nova-compute:ceph' + - 'ceph-mon:client' + + - - nova-compute:ceph-access + - cinder-ceph:ceph-access + + - - 'keystone:shared-db' + - 'keystone-mysql-router:shared-db' + - - 'keystone-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'glance:shared-db' + - 'glance-mysql-router:shared-db' + - - 'glance-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'glance:identity-service' + - 'keystone:identity-service' + + - - 'glance:amqp' + - 'rabbitmq-server:amqp' + + - - 'glance:ceph' + - 'ceph-mon:client' + + - - 'cinder:shared-db' + - 'cinder-mysql-router:shared-db' + - - 'cinder-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'cinder:identity-service' + - 'keystone:identity-service' + + - - 'cinder:amqp' + - 'rabbitmq-server:amqp' + + - - 'cinder:image-service' + - 'glance:image-service' + + - - 'cinder-ceph:storage-backend' + - 'cinder:storage-backend' + + - - 'cinder-ceph:ceph' + - 'ceph-mon:client' + + - - 'ceph-osd:mon' + - 'ceph-mon:osd' + + - - 'ceph-mon:mds' + - 'ceph-fs:ceph-mds' + + - - 'nova-cloud-controller:shared-db' + - 'nova-cloud-controller-mysql-router:shared-db' + - - 'nova-cloud-controller-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'nova-cloud-controller:identity-service' + - 'keystone:identity-service' + + - - 'nova-cloud-controller:amqp' + - 'rabbitmq-server:amqp' + + - - 'nova-cloud-controller:cloud-compute' + - 'nova-compute:cloud-compute' + + - - 'nova-cloud-controller:image-service' + - 'glance:image-service' + + - - 'placement:shared-db' + - 'placement-mysql-router:shared-db' + - - 'placement-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'placement' + - 'keystone' + + - - 'placement' + - 'nova-cloud-controller' + + - - 'ceph-mon:prometheus' + - 'prometheus2:target' diff --git a/ceph-mon/tests/bundles/jammy-caracal.yaml b/ceph-mon/tests/bundles/jammy-caracal.yaml new file mode 100644 index 00000000..4209121d --- /dev/null +++ b/ceph-mon/tests/bundles/jammy-caracal.yaml @@ -0,0 +1,45 @@ +variables: + openstack-origin: &openstack-origin cloud:jammy-caracal + +series: &series jammy + +machines: + '0': + '1': + '2': + '3': + constraints: cores=2 mem=6G root-disk=40G virt-type=virtual-machine + '4': + constraints: cores=2 mem=6G root-disk=40G virt-type=virtual-machine + '5': + constraints: cores=2 mem=6G root-disk=40G virt-type=virtual-machine + +applications: + ceph-mon: + charm: ch:ceph-mon + channel: latest/edge + num_units: 3 + options: + monitor-count: 3 + source: *openstack-origin + to: + - '0' + - '1' + - '2' + + ceph-osd: + charm: ch:ceph-osd + num_units: 3 + channel: latest/edge + options: + source: *openstack-origin + storage: + osd-devices: 'loop,10G' + to: + - '3' + - '4' + - '5' + +relations: + - - 'ceph-osd:mon' + - 'ceph-mon:osd' diff --git a/ceph-mon/tests/target.py b/ceph-mon/tests/target.py new file mode 100644 index 00000000..c3b0da6d --- /dev/null +++ b/ceph-mon/tests/target.py @@ -0,0 +1,2091 @@ +# Copyright 2018 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Ceph Testing.""" + +import unittest +import json +import logging +from os import ( + listdir, + path +) +import requests +import tempfile +import boto3 +import botocore.exceptions +import urllib3 + +import tenacity + +import zaza.charm_lifecycle.utils as lifecycle_utils +import zaza.openstack.charm_tests.test_utils as test_utils +import zaza.model as zaza_model +import zaza.openstack.utilities.ceph as zaza_ceph +import zaza.openstack.utilities.exceptions as zaza_exceptions +import zaza.openstack.utilities.generic as zaza_utils +import zaza.utilities.juju as juju_utils +import zaza.openstack.utilities.openstack as zaza_openstack +import zaza.openstack.utilities.generic as generic_utils + +# Disable warnings for ssl_verify=false +urllib3.disable_warnings( + urllib3.exceptions.InsecureRequestWarning +) + + +class CephLowLevelTest(test_utils.BaseCharmTest): + """Ceph Low Level Test Class.""" + + @classmethod + def setUpClass(cls): + """Run class setup for running ceph low level tests.""" + super(CephLowLevelTest, cls).setUpClass() + + def test_processes(self): + """Verify Ceph processes. + + Verify that the expected service processes are running + on each ceph unit. + """ + logging.info('Checking ceph-mon and ceph-osd processes...') + # Process name and quantity of processes to expect on each unit + ceph_mon_processes = { + 'ceph-mon': 1, + 'ceph-mgr': 1, + } + + ceph_osd_processes = { + 'ceph-osd': [1, 2, 3] + } + + # Units with process names and PID quantities expected + expected_processes = { + 'ceph-mon/0': ceph_mon_processes, + 'ceph-mon/1': ceph_mon_processes, + 'ceph-mon/2': ceph_mon_processes, + 'ceph-osd/0': ceph_osd_processes, + 'ceph-osd/1': ceph_osd_processes, + 'ceph-osd/2': ceph_osd_processes + } + + actual_pids = zaza_utils.get_unit_process_ids(expected_processes) + ret = zaza_utils.validate_unit_process_ids(expected_processes, + actual_pids) + self.assertTrue(ret) + + def test_services(self): + """Verify the ceph services. + + Verify the expected services are running on the service units. + """ + logging.info('Checking ceph-osd and ceph-mon services...') + services = {} + ceph_services = ['ceph-mon', 'ceph-mgr'] + services['ceph-osd/0'] = ['ceph-osd'] + + services['ceph-mon/0'] = ceph_services + services['ceph-mon/1'] = ceph_services + services['ceph-mon/2'] = ceph_services + + for unit_name, unit_services in services.items(): + zaza_model.block_until_service_status( + unit_name=unit_name, + services=unit_services, + target_status='running' + ) + + @test_utils.skipUntilVersion('ceph-mon', 'ceph', '14.2.0') + def test_pg_tuning(self): + """Verify that auto PG tuning is enabled for Nautilus+.""" + unit_name = 'ceph-mon/0' + cmd = "ceph osd pool autoscale-status --format=json" + result = zaza_model.run_on_unit(unit_name, cmd) + self.assertEqual(result['Code'], '0') + for pool in json.loads(result['Stdout']): + self.assertEqual(pool['pg_autoscale_mode'], 'on') + + +class CephTest(test_utils.BaseCharmTest): + """Ceph common functional tests.""" + + @classmethod + def setUpClass(cls): + """Run the ceph's common class setup.""" + super(CephTest, cls).setUpClass() + + def osd_out_in(self, services): + """Run OSD out and OSD in tests. + + Remove OSDs and then add them back in on a unit checking that services + are in the required state after each action + + :param services: Services expected to be restarted when config_file is + changed. + :type services: list + """ + zaza_model.block_until_service_status( + self.lead_unit, + services, + 'running', + model_name=self.model_name) + zaza_model.block_until_unit_wl_status( + self.lead_unit, + 'active', + model_name=self.model_name) + zaza_model.run_action( + self.lead_unit, + 'osd-out', + model_name=self.model_name) + zaza_model.block_until_unit_wl_status( + self.lead_unit, + 'maintenance', + model_name=self.model_name) + zaza_model.block_until_all_units_idle(model_name=self.model_name) + zaza_model.run_action( + self.lead_unit, + 'osd-in', + model_name=self.model_name) + zaza_model.block_until_unit_wl_status( + self.lead_unit, + 'active', + model_name=self.model_name) + zaza_model.block_until_all_units_idle(model_name=self.model_name) + zaza_model.block_until_service_status( + self.lead_unit, + services, + 'running', + model_name=self.model_name) + + def test_ceph_check_osd_pools(self): + """Check OSD pools. + + Check osd pools on all ceph units, expect them to be + identical, and expect specific pools to be present. + """ + try: + zaza_model.get_application('cinder-ceph') + except KeyError: + raise unittest.SkipTest("Skipping OpenStack dependent test") + logging.info('Checking pools on ceph units...') + + expected_pools = zaza_ceph.get_expected_pools() + results = [] + unit_name = 'ceph-mon/0' + + # Check for presence of expected pools on each unit + logging.debug('Expected pools: {}'.format(expected_pools)) + pools = zaza_ceph.get_ceph_pools(unit_name) + results.append(pools) + + for expected_pool in expected_pools: + if expected_pool not in pools: + msg = ('{} does not have pool: ' + '{}'.format(unit_name, expected_pool)) + raise zaza_exceptions.CephPoolNotFound(msg) + logging.debug('{} has (at least) the expected ' + 'pools.'.format(unit_name)) + + # Check that all units returned the same pool name:id data + for i, result in enumerate(results): + for other in results[i+1:]: + logging.debug('result: {}, other: {}'.format(result, other)) + self.assertEqual(result, other) + + def test_ceph_pool_creation_with_text_file(self): + """Check the creation of a pool and a text file. + + Create a pool, add a text file to it and retrieve its content. + Verify that the content matches the original file. + """ + unit_name = 'ceph-mon/0' + cmd = 'sudo ceph osd pool create test {PG_NUM}; \ + echo 123456789 > /tmp/input.txt; \ + rados put -p test test_input /tmp/input.txt; \ + rados get -p test test_input /dev/stdout' + cmd = cmd.format(PG_NUM=32) + logging.debug('Creating test pool and putting test file in pool...') + result = zaza_model.run_on_unit(unit_name, cmd) + code = result.get('Code') + if code != '0': + raise zaza_model.CommandRunFailed(cmd, result) + output = result.get('Stdout').strip() + logging.debug('Output received: {}'.format(output)) + self.assertEqual(output, '123456789') + + def test_ceph_encryption(self): + """Test Ceph encryption. + + Verify that the new disk is added with encryption by checking for + Ceph's encryption keys directory. + """ + current_release = zaza_openstack.get_os_release(application='ceph-mon') + trusty_mitaka = zaza_openstack.get_os_release('trusty_mitaka') + if current_release >= trusty_mitaka: + logging.warn("Skipping encryption test for Mitaka and higher") + return + unit_name = 'ceph-osd/0' + set_default = { + 'osd-encrypt': 'False', + 'osd-devices': '/dev/vdb /srv/ceph', + } + set_alternate = { + 'osd-encrypt': 'True', + 'osd-devices': '/dev/vdb /srv/ceph /srv/ceph_encrypted', + } + juju_service = 'ceph-osd' + logging.info('Making config change on {}...'.format(juju_service)) + mtime = zaza_model.get_unit_time(unit_name) + + file_mtime = None + + folder_name = '/etc/ceph/dmcrypt-keys/' + with self.config_change(set_default, set_alternate, + application_name=juju_service): + with tempfile.TemporaryDirectory() as tempdir: + # Creating a temp dir to copy keys + temp_folder = '/tmp/dmcrypt-keys' + cmd = 'mkdir {}'.format(temp_folder) + ret = zaza_model.run_on_unit(unit_name, cmd) + logging.debug('Ret for cmd {} is {}'.format(cmd, ret)) + # Copy keys from /etc to /tmp + cmd = 'sudo cp {}* {}'.format(folder_name, temp_folder) + ret = zaza_model.run_on_unit(unit_name, cmd) + logging.debug('Ret for cmd {} is {}'.format(cmd, ret)) + # Changing permissions to be able to SCP the files + cmd = 'sudo chown -R ubuntu:ubuntu {}'.format(temp_folder) + ret = zaza_model.run_on_unit(unit_name, cmd) + logging.debug('Ret for cmd {} is {}'.format(cmd, ret)) + # SCP to retrieve all files in folder + # -p: preserve timestamps + source = '/tmp/dmcrypt-keys/*' + zaza_model.scp_from_unit(unit_name=unit_name, + source=source, + destination=tempdir, + scp_opts='-p') + for elt in listdir(tempdir): + file_path = '/'.join([tempdir, elt]) + if path.isfile(file_path): + file_mtime = path.getmtime(file_path) + if file_mtime: + break + + if not file_mtime: + logging.warn('Could not determine mtime, assuming ' + 'folder does not exist') + raise FileNotFoundError('folder does not exist') + + if file_mtime >= mtime: + logging.info('Folder mtime is newer than provided mtime ' + '(%s >= %s) on %s (OK)' % (file_mtime, + mtime, unit_name)) + else: + logging.warn('Folder mtime is older than provided mtime' + '(%s < on %s) on %s' % (file_mtime, + mtime, unit_name)) + raise Exception('Folder mtime is older than provided mtime') + + def test_blocked_when_non_pristine_disk_appears(self): + """Test blocked state with non-pristine disk. + + Validate that charm goes into blocked state when it is presented with + new block devices that have foreign data on them. + Instances used in UOSCI has a flavour with ephemeral storage in + addition to the bootable instance storage. The ephemeral storage + device is partitioned, formatted and mounted early in the boot process + by cloud-init. + As long as the device is mounted the charm will not attempt to use it. + If we unmount it and trigger the config-changed hook the block device + will appear as a new and previously untouched device for the charm. + One of the first steps of device eligibility checks should be to make + sure we are seeing a pristine and empty device before doing any + further processing. + As the ephemeral device will have data on it we can use it to validate + that these checks work as intended. + """ + current_release = zaza_openstack.get_os_release(application='ceph-mon') + focal_ussuri = zaza_openstack.get_os_release('focal_ussuri') + if current_release >= focal_ussuri: + # NOTE(ajkavanagh) - focal (on ServerStack) is broken for /dev/vdb + # and so this test can't pass: LP#1842751 discusses the issue, but + # basically the snapd daemon along with lxcfs results in /dev/vdb + # being mounted in the lxcfs process namespace. If the charm + # 'tries' to umount it, it can (as root), but the mount is still + # 'held' by lxcfs and thus nothing else can be done with it. This + # is only a problem in serverstack with images with a default + # /dev/vdb ephemeral + logging.warn("Skipping pristine disk test for focal and higher") + return + logging.info('Checking behaviour when non-pristine disks appear...') + logging.info('Configuring ephemeral-unmount...') + alternate_conf = { + 'ephemeral-unmount': '/mnt', + 'osd-devices': '/dev/vdb' + } + juju_service = 'ceph-osd' + zaza_model.set_application_config(juju_service, alternate_conf) + ceph_osd_states = { + 'ceph-osd': { + 'workload-status': 'blocked', + 'workload-status-message': 'Non-pristine' + } + } + zaza_model.wait_for_application_states(states=ceph_osd_states) + logging.info('Units now in blocked state, running zap-disk action...') + unit_names = ['ceph-osd/0', 'ceph-osd/1', 'ceph-osd/2'] + for unit_name in unit_names: + zap_disk_params = { + 'devices': '/dev/vdb', + 'i-really-mean-it': True, + } + action_obj = zaza_model.run_action( + unit_name=unit_name, + action_name='zap-disk', + action_params=zap_disk_params + ) + logging.debug('Result of action: {}'.format(action_obj)) + + logging.info('Running add-disk action...') + for unit_name in unit_names: + add_disk_params = { + 'osd-devices': '/dev/vdb', + } + action_obj = zaza_model.run_action( + unit_name=unit_name, + action_name='add-disk', + action_params=add_disk_params + ) + logging.debug('Result of action: {}'.format(action_obj)) + + logging.info('Wait for idle/ready status...') + zaza_model.wait_for_application_states() + + logging.info('OK') + + set_default = { + 'ephemeral-unmount': '', + 'osd-devices': '/dev/vdb', + } + + bionic_train = zaza_openstack.get_os_release('bionic_train') + if current_release < bionic_train: + set_default['osd-devices'] = '/dev/vdb /srv/ceph' + + logging.info('Restoring to default configuration...') + zaza_model.set_application_config(juju_service, set_default) + + zaza_model.wait_for_application_states() + + def test_pause_and_resume(self): + """The services can be paused and resumed.""" + logging.info('Checking pause and resume actions...') + self.pause_resume(['ceph-osd']) + + def get_device_for_blacklist(self, unit): + """Return a device to be used by the blacklist tests.""" + cmd = "mount | grep 'on / ' | awk '{print $1}'" + obj = zaza_model.run_on_unit(unit, cmd) + return obj.get('Stdout').strip() + + def test_blacklist(self): + """Check the blacklist action. + + The blacklist actions execute and behave as expected. + """ + logging.info('Checking blacklist-add-disk and ' + 'blacklist-remove-disk actions...') + unit_name = 'ceph-osd/0' + + zaza_model.block_until_unit_wl_status( + unit_name, + 'active' + ) + + # Attempt to add device with non-absolute path should fail + action_obj = zaza_model.run_action( + unit_name=unit_name, + action_name='blacklist-add-disk', + action_params={'osd-devices': 'vda'} + ) + self.assertTrue(action_obj.status != 'completed') + zaza_model.block_until_unit_wl_status( + unit_name, + 'active' + ) + + # Attempt to add device with non-existent path should fail + action_obj = zaza_model.run_action( + unit_name=unit_name, + action_name='blacklist-add-disk', + action_params={'osd-devices': '/non-existent'} + ) + self.assertTrue(action_obj.status != 'completed') + zaza_model.block_until_unit_wl_status( + unit_name, + 'active' + ) + + # Attempt to add device with existent path should succeed + device = self.get_device_for_blacklist(unit_name) + if not device: + raise unittest.SkipTest( + "Skipping test because no device was found") + + action_obj = zaza_model.run_action( + unit_name=unit_name, + action_name='blacklist-add-disk', + action_params={'osd-devices': device} + ) + self.assertEqual('completed', action_obj.status) + zaza_model.block_until_unit_wl_status( + unit_name, + 'active' + ) + + # Attempt to remove listed device should always succeed + action_obj = zaza_model.run_action( + unit_name=unit_name, + action_name='blacklist-remove-disk', + action_params={'osd-devices': device} + ) + self.assertEqual('completed', action_obj.status) + zaza_model.block_until_unit_wl_status( + unit_name, + 'active' + ) + logging.debug('OK') + + def test_list_disks(self): + """Test the list-disks action. + + The list-disks action execute. + """ + logging.info('Checking list-disks action...') + unit_name = 'ceph-osd/0' + + zaza_model.block_until_unit_wl_status( + unit_name, + 'active' + ) + + action_obj = zaza_model.run_action( + unit_name=unit_name, + action_name='list-disks', + ) + self.assertEqual('completed', action_obj.status) + zaza_model.block_until_unit_wl_status( + unit_name, + 'active' + ) + logging.debug('OK') + + def get_local_osd_id(self, unit): + """Get the OSD id for a unit.""" + ret = zaza_model.run_on_unit(unit, + 'ceph-volume lvm list --format=json') + local = list(json.loads(ret['Stdout']))[-1] + return local if local.startswith('osd.') else 'osd.' + local + + def get_num_osds(self, osd, is_up_only=False): + """Compute the number of active OSD's.""" + result = zaza_model.run_on_unit(osd, 'ceph osd stat --format=json') + result = json.loads(result['Stdout']) + if is_up_only: + return int(result['num_up_osds']) + else: + return int(result['num_osds']) + + def get_osd_devices_on_unit(self, unit_name): + """Get information for osd devices present on a particular unit. + + :param unit: Unit name to be queried for osd device info. + :type unit: str + """ + osd_devices = json.loads( + zaza_model.run_on_unit( + unit_name, 'ceph-volume lvm list --format=json' + ).get('Stdout', '') + ) + + return osd_devices + + def remove_disk_from_osd_unit(self, unit, osd_id, is_purge=False): + """Remove osd device with provided osd_id from unit. + + :param unit: Unit name where the osd device is to be removed from. + :type unit: str + + :param osd_id: osd-id for the osd device to be removed. + :type osd_id: str + + :param is_purge: whether to purge the osd device + :type is_purge: bool + """ + action_obj = zaza_model.run_action( + unit_name=unit, + action_name='remove-disk', + action_params={ + 'osd-ids': osd_id, + 'timeout': 10, + 'format': 'json', + 'purge': is_purge + } + ) + zaza_utils.assertActionRanOK(action_obj) + results = json.loads(action_obj.data['results']['message']) + results = results[next(iter(results))] + self.assertEqual(results['osd-ids'], osd_id) + zaza_model.run_on_unit(unit, 'partprobe') + + def remove_one_osd(self, unit, block_devs): + """Remove one device from osd unit. + + :param unit: Unit name where the osd device is to be removed from. + :type unit: str + :params block_devs: list of block devices on the scpecified unit + :type block_devs: list[str] + """ + # Should have more than 1 OSDs to take one out and test. + self.assertGreater(len(block_devs), 1) + + # Get complete device details for an OSD. + key = list(block_devs)[-1] + device = { + 'osd-id': key if key.startswith('osd.') else 'osd.' + key, + 'block-device': block_devs[key][0]['devices'][0] + } + + self.remove_disk_from_osd_unit(unit, device['osd-id'], is_purge=True) + return device + + def test_cache_device(self): + """Test replacing a disk in use.""" + logging.info('Running add-disk action with a caching device') + mon = next(iter(zaza_model.get_units('ceph-mon'))).entity_id + osds = [x.entity_id for x in zaza_model.get_units('ceph-osd')] + osd_info = dict() + + # Remove one of the two disks. + logging.info('Removing single disk from each OSD') + for unit in osds: + block_devs = self.get_osd_devices_on_unit(unit) + if len(block_devs) < 2: + continue + device_info = self.remove_one_osd(unit, block_devs) + block_dev = device_info['block-device'] + logging.info("Removing device %s from unit %s" % (block_dev, unit)) + osd_info[unit] = device_info + if not osd_info: + raise unittest.SkipTest( + 'Skipping OSD replacement Test, no spare devices added') + + logging.debug('Removed OSD Info: {}'.format(osd_info)) + zaza_model.wait_for_application_states() + + logging.info('Recycling previously removed disks') + for unit, device_info in osd_info.items(): + osd_id = device_info['osd-id'] + block_dev = device_info['block-device'] + logging.info("Found device %s on unit %s" % (block_dev, unit)) + self.assertNotEqual(block_dev, None) + action_obj = zaza_model.run_action( + unit_name=unit, + action_name='add-disk', + action_params={'osd-devices': block_dev, + 'osd-ids': osd_id, + 'partition-size': 5} + ) + zaza_utils.assertActionRanOK(action_obj) + zaza_model.wait_for_application_states() + + logging.info('Removing previously added OSDs') + for unit, device_info in osd_info.items(): + osd_id = device_info['osd-id'] + block_dev = device_info['block-device'] + logging.info( + "Removing block device %s from unit %s" % + (block_dev, unit) + ) + self.remove_disk_from_osd_unit(unit, osd_id, is_purge=False) + zaza_model.wait_for_application_states() + + logging.info('Finally adding back OSDs') + for unit, device_info in osd_info.items(): + block_dev = device_info['block-device'] + action_obj = zaza_model.run_action( + unit_name=unit, + action_name='add-disk', + action_params={'osd-devices': block_dev, + 'partition-size': 5} + ) + zaza_utils.assertActionRanOK(action_obj) + zaza_model.wait_for_application_states() + + for attempt in tenacity.Retrying( + wait=tenacity.wait_exponential(multiplier=2, max=32), + reraise=True, stop=tenacity.stop_after_attempt(10), + retry=tenacity.retry_if_exception_type(AssertionError) + ): + with attempt: + self.assertEqual( + len(osds) * 2, self.get_num_osds(mon, is_up_only=True) + ) + + +class CephRGWTest(test_utils.BaseCharmTest): + """Ceph RADOS Gateway Daemons Test Class. + + This Testset is not idempotent, because we don't support scale down from + multisite to singlesite (yet). Tests can be performed independently. + However, If test_100 has completed migration, retriggering the test-set + would cause a time-out in test_003. + """ + + # String Resources + primary_rgw_app = 'ceph-radosgw' + primary_rgw_unit = 'ceph-radosgw/0' + secondary_rgw_app = 'secondary-ceph-radosgw' + secondary_rgw_unit = 'secondary-ceph-radosgw/0' + + @classmethod + def setUpClass(cls): + """Run class setup for running ceph low level tests.""" + super(CephRGWTest, cls).setUpClass(application_name='ceph-radosgw') + + @property + def expected_apps(self): + """Determine application names for ceph-radosgw apps.""" + _apps = [ + self.primary_rgw_app + ] + try: + zaza_model.get_application(self.secondary_rgw_app) + _apps.append(self.secondary_rgw_app) + except KeyError: + pass + return _apps + + @property + def multisite(self): + """Determine whether deployment is multi-site.""" + try: + zaza_model.get_application(self.secondary_rgw_app) + return True + except KeyError: + return False + + def get_rgwadmin_cmd_skeleton(self, unit_name): + """ + Get radosgw-admin cmd skeleton with rgw.hostname populated key. + + :param unit_name: Unit on which the complete command would be run. + :type unit_name: str + :returns: hostname filled basic command skeleton + :rtype: str + """ + app_name = unit_name.split('/')[0] + juju_units = zaza_model.get_units(app_name) + unit_hostnames = generic_utils.get_unit_hostnames(juju_units) + hostname = unit_hostnames[unit_name] + return 'radosgw-admin --id=rgw.{} '.format(hostname) + + def purge_bucket(self, application, bucket_name): + """Remove a bucket and all it's objects. + + :param application: RGW application name + :type application: str + :param bucket_name: Name for RGW bucket to be deleted + :type bucket_name: str + """ + juju_units = zaza_model.get_units(application) + unit_hostnames = generic_utils.get_unit_hostnames(juju_units) + for unit_name, hostname in unit_hostnames.items(): + key_name = "rgw.{}".format(hostname) + cmd = 'radosgw-admin --id={} bucket rm --bucket={}' \ + ' --purge-objects'.format(key_name, bucket_name) + zaza_model.run_on_unit(unit_name, cmd) + + def wait_for_status(self, application, + is_primary=False, sync_expected=True): + """Wait for required RGW endpoint to finish sync for data and metadata. + + :param application: RGW application which has to be waited for + :type application: str + :param is_primary: whether RGW application is primary or secondary + :type is_primary: boolean + :param sync_expected: whether sync details should be expected in status + :type sync_expected: boolean + """ + juju_units = zaza_model.get_units(application) + unit_hostnames = generic_utils.get_unit_hostnames(juju_units) + data_check = 'data is caught up with source' + meta_primary = 'metadata sync no sync (zone is master)' + meta_secondary = 'metadata is caught up with master' + meta_check = meta_primary if is_primary else meta_secondary + + for attempt in tenacity.Retrying( + wait=tenacity.wait_exponential(multiplier=10, max=300), + reraise=True, stop=tenacity.stop_after_attempt(12), + retry=tenacity.retry_if_exception_type(AssertionError) + ): + with attempt: + for unit_name, hostname in unit_hostnames.items(): + key_name = "rgw.{}".format(hostname) + cmd = 'radosgw-admin --id={} sync status'.format(key_name) + stdout = zaza_model.run_on_unit( + unit_name, cmd + ).get('Stdout', '') + if sync_expected: + # Both data and meta sync. + self.assertIn(data_check, stdout) + self.assertIn(meta_check, stdout) + else: + # ExpectPrimary's Meta Status and no Data sync status + self.assertIn(meta_primary, stdout) + self.assertNotIn(data_check, stdout) + + def fetch_rgw_object(self, target_client, container_name, object_name): + """Fetch RGW object content. + + :param target_client: boto3 client object configured for an endpoint. + :type target_client: str + :param container_name: RGW bucket name for desired object. + :type container_name: str + :param object_name: Object name for desired object. + :type object_name: str + """ + for attempt in tenacity.Retrying( + wait=tenacity.wait_exponential(multiplier=1, max=60), + reraise=True, stop=tenacity.stop_after_attempt(12) + ): + with attempt: + return target_client.Object( + container_name, object_name + ).get()['Body'].read().decode('UTF-8') + + def promote_rgw_to_primary(self, app_name: str): + """Promote provided app to Primary and update period at new secondary. + + :param app_name: Secondary site rgw Application to be promoted. + :type app_name: str + """ + if app_name is self.primary_rgw_app: + new_secondary = self.secondary_rgw_unit + else: + new_secondary = self.primary_rgw_unit + + # Promote to Primary + zaza_model.run_action_on_leader( + app_name, + 'promote', + action_params={}, + ) + + # Period Update Commit new secondary. + cmd = self.get_rgwadmin_cmd_skeleton(new_secondary) + zaza_model.run_on_unit( + new_secondary, cmd + 'period update --commit' + ) + + def get_client_keys(self, rgw_app_name=None): + """Create access_key and secret_key for boto3 client. + + :param rgw_app_name: RGW application for which keys are required. + :type rgw_app_name: str + """ + unit_name = self.primary_rgw_unit + if rgw_app_name is not None: + unit_name = rgw_app_name + '/0' + user_name = 'botoclient' + cmd = self.get_rgwadmin_cmd_skeleton(unit_name) + users = json.loads(zaza_model.run_on_unit( + unit_name, cmd + 'user list' + ).get('Stdout', '')) + # Fetch boto3 user keys if user exists. + if user_name in users: + output = json.loads(zaza_model.run_on_unit( + unit_name, cmd + 'user info --uid={}'.format(user_name) + ).get('Stdout', '')) + keys = output['keys'][0] + return keys['access_key'], keys['secret_key'] + # Create boto3 user if it does not exist. + create_cmd = cmd + 'user create --uid={} --display-name={}'.format( + user_name, user_name + ) + output = json.loads( + zaza_model.run_on_unit(unit_name, create_cmd).get('Stdout', '') + ) + keys = output['keys'][0] + return keys['access_key'], keys['secret_key'] + + @tenacity.retry( + retry=tenacity.retry_if_result(lambda ret: ret is None), + wait=tenacity.wait_fixed(10), + stop=tenacity.stop_after_attempt(5) + ) + def get_rgw_endpoint(self, unit_name: str): + """Fetch Application endpoint for RGW unit. + + :param unit_name: Unit name for which RGW endpoint is required. + :type unit_name: str + """ + # Get address "public" network binding. + unit_address = zaza_model.run_on_unit( + unit_name, "network-get public --bind-address" + ).get('Stdout', '').strip() + + logging.info("Unit: {}, Endpoint: {}".format(unit_name, unit_address)) + if unit_address is None: + return None + # Evaluate port + try: + zaza_model.get_application("vault") + return "https://{}:443".format(unit_address) + except KeyError: + return "http://{}:80".format(unit_address) + + def configure_rgw_apps_for_multisite(self): + """Configure Multisite values on primary and secondary apps.""" + realm = 'zaza_realm' + zonegroup = 'zaza_zg' + + zaza_model.set_application_config( + self.primary_rgw_app, + { + 'realm': realm, + 'zonegroup': zonegroup, + 'zone': 'zaza_primary' + } + ) + zaza_model.set_application_config( + self.secondary_rgw_app, + { + 'realm': realm, + 'zonegroup': zonegroup, + 'zone': 'zaza_secondary' + } + ) + + def configure_rgw_multisite_relation(self): + """Configure multi-site relation between primary and secondary apps.""" + multisite_relation = zaza_model.get_relation_id( + self.primary_rgw_app, self.secondary_rgw_app, + remote_interface_name='secondary' + ) + if multisite_relation is None: + logging.info('Configuring Multisite') + self.configure_rgw_apps_for_multisite() + zaza_model.add_relation( + self.primary_rgw_app, + self.primary_rgw_app + ":primary", + self.secondary_rgw_app + ":secondary" + ) + zaza_model.block_until_unit_wl_status( + self.secondary_rgw_unit, "waiting" + ) + + zaza_model.block_until_unit_wl_status( + self.secondary_rgw_unit, "active" + ) + zaza_model.block_until_unit_wl_status( + self.primary_rgw_unit, "active" + ) + zaza_model.wait_for_unit_idle(self.secondary_rgw_unit) + zaza_model.wait_for_unit_idle(self.primary_rgw_unit) + + def clean_rgw_multisite_config(self, app_name): + """Clear Multisite Juju config values to default. + + :param app_name: App for which config values are to be cleared + :type app_name: str + """ + unit_name = app_name + "/0" + zaza_model.set_application_config( + app_name, + { + 'realm': "", + 'zonegroup': "", + 'zone': "default" + } + ) + # Commit changes to period. + cmd = self.get_rgwadmin_cmd_skeleton(unit_name) + zaza_model.run_on_unit( + unit_name, cmd + 'period update --commit --rgw-zone=default ' + '--rgw-zonegroup=default' + ) + + def enable_virtual_hosted_bucket(self): + """Enable virtual hosted bucket on primary rgw app.""" + zaza_model.set_application_config( + self.primary_rgw_app, + { + 'virtual-hosted-bucket-enabled': "true" + } + ) + + def set_os_public_hostname(self): + """Set os-public-hostname on primary rgw app.""" + zaza_model.set_application_config( + self.primary_rgw_app, + { + 'os-public-hostname': "rgw.example.com", + } + ) + + def clean_virtual_hosted_bucket(self): + """Clear virtual hosted bucket on primary app.""" + zaza_model.set_application_config( + self.primary_rgw_app, + { + 'os-public-hostname': "", + 'virtual-hosted-bucket-enabled': "false" + } + ) + + def test_001_processes(self): + """Verify Ceph processes. + + Verify that the expected service processes are running + on each ceph unit. + """ + logging.info('Checking radosgw processes...') + # Process name and quantity of processes to expect on each unit + ceph_radosgw_processes = { + 'radosgw': 1, + } + + # Units with process names and PID quantities expected + expected_processes = {} + for app in self.expected_apps: + for unit in zaza_model.get_units(app): + expected_processes[unit.entity_id] = ceph_radosgw_processes + + actual_pids = zaza_utils.get_unit_process_ids(expected_processes) + ret = zaza_utils.validate_unit_process_ids(expected_processes, + actual_pids) + self.assertTrue(ret) + + def test_002_services(self): + """Verify the ceph services. + + Verify the expected services are running on the service units. + """ + logging.info('Checking radosgw services...') + services = ['radosgw', 'haproxy'] + for app in self.expected_apps: + for unit in zaza_model.get_units(app): + zaza_model.block_until_service_status( + unit_name=unit.entity_id, + services=services, + target_status='running' + ) + + def test_003_object_storage_and_secondary_block(self): + """Verify Object Storage API and Secondary Migration block.""" + container_name = 'zaza-container' + obj_data = 'Test data from Zaza' + obj_name = 'prefile' + + logging.info('Checking Object Storage API for Primary Cluster') + # 1. Fetch Primary Endpoint Details + primary_endpoint = self.get_rgw_endpoint(self.primary_rgw_unit) + self.assertNotEqual(primary_endpoint, None) + + # 2. Create RGW Client and perform IO + access_key, secret_key = self.get_client_keys() + primary_client = boto3.resource("s3", + verify=False, + endpoint_url=primary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + primary_client.Bucket(container_name).create() + primary_object_one = primary_client.Object( + container_name, + obj_name + ) + primary_object_one.put(Body=obj_data) + + # 3. Fetch Object and Perform Data Integrity check. + content = primary_object_one.get()['Body'].read().decode('UTF-8') + self.assertEqual(content, obj_data) + + # Skip multisite tests if not compatible with bundle. + if not self.multisite: + logging.info('Skipping Secondary Object gatewaty verification') + return + + logging.info('Checking Object Storage API for Secondary Cluster') + # 1. Fetch Secondary Endpoint Details + secondary_endpoint = self.get_rgw_endpoint(self.secondary_rgw_unit) + self.assertNotEqual(secondary_endpoint, None) + + # 2. Create RGW Client and perform IO + access_key, secret_key = self.get_client_keys(self.secondary_rgw_app) + secondary_client = boto3.resource("s3", + verify=False, + endpoint_url=secondary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + secondary_client.Bucket(container_name).create() + secondary_object = secondary_client.Object( + container_name, + obj_name + ) + secondary_object.put(Body=obj_data) + + # 3. Fetch Object and Perform Data Integrity check. + content = secondary_object.get()['Body'].read().decode('UTF-8') + self.assertEqual(content, obj_data) + + logging.info('Checking Secondary Migration Block') + # 1. Migrate to multisite + if zaza_model.get_relation_id( + self.primary_rgw_app, self.secondary_rgw_app, + remote_interface_name='secondary' + ) is not None: + logging.info('Skipping Test, Multisite relation already present.') + return + + logging.info('Configuring Multisite') + self.configure_rgw_apps_for_multisite() + zaza_model.add_relation( + self.primary_rgw_app, + self.primary_rgw_app + ":primary", + self.secondary_rgw_app + ":secondary" + ) + + # 2. Verify secondary fails migration due to existing Bucket. + assert_state = { + self.secondary_rgw_app: { + "workload-status": "blocked", + "workload-status-message-prefix": + "Non-Pristine RGW site can't be used as secondary" + } + } + zaza_model.wait_for_application_states(states=assert_state, + timeout=900) + + # 3. Perform Secondary Cleanup + logging.info('Perform cleanup at secondary') + self.clean_rgw_multisite_config(self.secondary_rgw_app) + zaza_model.remove_relation( + self.primary_rgw_app, + self.primary_rgw_app + ":primary", + self.secondary_rgw_app + ":secondary" + ) + + # Make secondary pristine. + self.purge_bucket(self.secondary_rgw_app, container_name) + + zaza_model.block_until_unit_wl_status(self.secondary_rgw_unit, + 'active') + + def test_004_multisite_directional_sync_policy(self): + """Verify Multisite Directional Sync Policy.""" + # Skip multisite tests if not compatible with bundle. + if not self.multisite: + logging.info('Skipping multisite sync policy verification') + return + + container_name = 'zaza-container' + primary_obj_name = 'primary-testfile' + primary_obj_data = 'Primary test data' + secondary_directional_obj_name = 'secondary-directional-testfile' + secondary_directional_obj_data = 'Secondary directional test data' + secondary_symmetrical_obj_name = 'secondary-symmetrical-testfile' + secondary_symmetrical_obj_data = 'Secondary symmetrical test data' + + logging.info('Verifying multisite directional sync policy') + + # Set default sync policy to "allowed", which allows buckets to sync, + # but the sync is disabled by default in the zone group. Also, set the + # secondary zone sync policy flow type policy to "directional". + zaza_model.set_application_config( + self.primary_rgw_app, + { + "sync-policy-state": "allowed", + } + ) + zaza_model.set_application_config( + self.secondary_rgw_app, + { + "sync-policy-flow-type": "directional", + } + ) + zaza_model.wait_for_unit_idle(self.secondary_rgw_unit) + zaza_model.wait_for_unit_idle(self.primary_rgw_unit) + + # Setup multisite relation. + self.configure_rgw_multisite_relation() + + logging.info('Waiting for Data and Metadata to Synchronize') + # NOTE: We only check the secondary zone, because the sync policy flow + # type is set to "directional" between the primary and secondary zones. + self.wait_for_status(self.secondary_rgw_app, is_primary=False) + + # Create bucket on primary RGW zone. + logging.info('Creating bucket on primary zone') + primary_endpoint = self.get_rgw_endpoint(self.primary_rgw_unit) + self.assertNotEqual(primary_endpoint, None) + + access_key, secret_key = self.get_client_keys() + primary_client = boto3.resource("s3", + verify=False, + endpoint_url=primary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + primary_client.Bucket(container_name).create() + + # Enable sync on the bucket. + logging.info('Enabling sync on the bucket from the primary zone') + zaza_model.run_action_on_leader( + self.primary_rgw_app, + 'enable-buckets-sync', + action_params={ + 'buckets': container_name, + }, + raise_on_failure=True, + ) + + # Check that sync cannot be enabled using secondary Juju RGW app. + with self.assertRaises(zaza_model.ActionFailed): + zaza_model.run_action_on_leader( + self.secondary_rgw_app, + 'enable-buckets-sync', + action_params={ + 'buckets': container_name, + }, + raise_on_failure=True, + ) + + logging.info('Waiting for Data and Metadata to Synchronize') + self.wait_for_status(self.secondary_rgw_app, is_primary=False) + + # Perform IO on primary zone bucket. + logging.info('Performing IO on primary zone bucket') + primary_object = primary_client.Object( + container_name, + primary_obj_name + ) + primary_object.put(Body=primary_obj_data) + + # Verify that the object is replicated to the secondary zone. + logging.info('Verifying that the object is replicated to the ' + 'secondary zone') + secondary_endpoint = self.get_rgw_endpoint(self.secondary_rgw_unit) + self.assertNotEqual(secondary_endpoint, None) + + secondary_client = boto3.resource("s3", + verify=False, + endpoint_url=secondary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + secondary_data = self.fetch_rgw_object( + secondary_client, + container_name, + primary_obj_name + ) + self.assertEqual(secondary_data, primary_obj_data) + + # Write object to the secondary zone bucket, when the sync policy + # flow type is set to "directional" between the zones. + logging.info('Writing object to the secondary zone bucket, which ' + 'should not be replicated to the primary zone') + secondary_object = secondary_client.Object( + container_name, + secondary_directional_obj_name + ) + secondary_object.put(Body=secondary_directional_obj_data) + + # Verify that the object is not replicated to the primary zone. + logging.info('Verifying that the object is not replicated to the ' + 'primary zone') + with self.assertRaises(botocore.exceptions.ClientError): + self.fetch_rgw_object( + primary_client, + container_name, + secondary_directional_obj_name + ) + + logging.info('Setting sync policy flow to "symmetrical" on the ' + 'secondary RGW zone') + zaza_model.set_application_config( + self.secondary_rgw_app, + { + "sync-policy-flow-type": "symmetrical", + } + ) + zaza_model.wait_for_unit_idle(self.secondary_rgw_unit) + zaza_model.wait_for_unit_idle(self.primary_rgw_unit) + + # Write another object to the secondary zone bucket. + logging.info('Writing another object to the secondary zone bucket.') + secondary_object = secondary_client.Object( + container_name, + secondary_symmetrical_obj_name + ) + secondary_object.put(Body=secondary_symmetrical_obj_data) + + logging.info('Waiting for Data and Metadata to Synchronize') + # NOTE: This time, we check both the primary and secondary zones, + # because the sync policy flow type is set to "symmetrical" between + # the zones. + self.wait_for_status(self.secondary_rgw_app, is_primary=False) + self.wait_for_status(self.primary_rgw_app, is_primary=True) + + # Verify that all objects are replicated to the primary zone. + logging.info('Verifying that all objects are replicated to the ' + 'primary zone (including older objects).') + test_cases = [ + { + 'obj_name': primary_obj_name, + 'obj_data': primary_obj_data, + }, + { + 'obj_name': secondary_directional_obj_name, + 'obj_data': secondary_directional_obj_data, + }, + { + 'obj_name': secondary_symmetrical_obj_name, + 'obj_data': secondary_symmetrical_obj_data, + }, + ] + for tc in test_cases: + logging.info('Verifying that object "{}" is replicated'.format( + tc['obj_name'])) + primary_data = self.fetch_rgw_object( + primary_client, + container_name, + tc['obj_name'] + ) + self.assertEqual(primary_data, tc['obj_data']) + + # Cleanup. + logging.info('Cleaning up buckets after test case') + self.purge_bucket(self.primary_rgw_app, container_name) + self.purge_bucket(self.secondary_rgw_app, container_name) + + logging.info('Waiting for Data and Metadata to Synchronize') + self.wait_for_status(self.secondary_rgw_app, is_primary=False) + self.wait_for_status(self.primary_rgw_app, is_primary=True) + + # Set multisite sync policy state to "enabled" on the primary RGW app. + # Paired with "symmetrical" sync policy flow on the secondary RGW app, + # this enables bidirectional sync between the zones (which is the + # default behaviour without multisite sync policies configured). + logging.info('Setting sync policy state to "enabled".') + zaza_model.set_application_config( + self.primary_rgw_app, + { + "sync-policy-state": "enabled", + } + ) + zaza_model.wait_for_unit_idle(self.primary_rgw_unit) + + def test_100_migration_and_multisite_failover(self): + """Perform multisite migration and verify failover.""" + container_name = 'zaza-container' + obj_data = 'Test data from Zaza' + # Skip multisite tests if not compatible with bundle. + if not self.multisite: + raise unittest.SkipTest('Skipping Migration Test') + + logging.info('Perform Pre-Migration IO') + # 1. Fetch Endpoint Details + primary_endpoint = self.get_rgw_endpoint(self.primary_rgw_unit) + self.assertNotEqual(primary_endpoint, None) + + # 2. Create primary client and add pre-migration object. + access_key, secret_key = self.get_client_keys() + primary_client = boto3.resource("s3", + verify=False, + endpoint_url=primary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + primary_client.Bucket(container_name).create() + primary_client.Object( + container_name, + 'prefile' + ).put(Body=obj_data) + + # If Primary/Secondary relation does not exist, add it. + self.configure_rgw_multisite_relation() + + logging.info('Waiting for Data and Metadata to Synchronize') + self.wait_for_status(self.secondary_rgw_app, is_primary=False) + self.wait_for_status(self.primary_rgw_app, is_primary=True) + + logging.info('Performing post migration IO tests.') + # Add another object at primary + primary_client.Object( + container_name, + 'postfile' + ).put(Body=obj_data) + + # 1. Fetch Endpoint Details + secondary_endpoint = self.get_rgw_endpoint(self.secondary_rgw_unit) + self.assertNotEqual(secondary_endpoint, None) + + # 2. Create secondary client and fetch synchronised objects. + secondary_client = boto3.resource("s3", + verify=False, + endpoint_url=secondary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + + # 3. Verify Data Integrity + # fetch_rgw_object has internal retry so waiting for sync beforehand + # is not required for post migration object sync. + pre_migration_data = self.fetch_rgw_object( + secondary_client, container_name, 'prefile' + ) + post_migration_data = self.fetch_rgw_object( + secondary_client, container_name, 'postfile' + ) + + # 4. Verify Syncronisation works and objects are replicated + self.assertEqual(pre_migration_data, obj_data) + self.assertEqual(post_migration_data, obj_data) + + logging.info('Checking multisite failover/failback') + # Failover Scenario, Promote Secondary-Ceph-RadosGW to Primary + self.promote_rgw_to_primary(self.secondary_rgw_app) + + # Wait for Sites to be syncronised. + self.wait_for_status(self.primary_rgw_app, is_primary=False) + self.wait_for_status(self.secondary_rgw_app, is_primary=True) + + # IO Test + container = 'failover-container' + test_data = 'Test data from Zaza on Secondary' + secondary_client.Bucket(container).create() + secondary_object = secondary_client.Object(container, 'testfile') + secondary_object.put( + Body=test_data + ) + secondary_content = secondary_object.get()[ + 'Body' + ].read().decode('UTF-8') + + # Wait for Sites to be syncronised. + self.wait_for_status(self.primary_rgw_app, is_primary=False) + self.wait_for_status(self.secondary_rgw_app, is_primary=True) + + # Recovery scenario, reset ceph-rgw as primary. + self.promote_rgw_to_primary(self.primary_rgw_app) + self.wait_for_status(self.primary_rgw_app, is_primary=True) + self.wait_for_status(self.secondary_rgw_app, is_primary=False) + + # Fetch Syncronised copy of testfile from primary site. + primary_content = self.fetch_rgw_object( + primary_client, container, 'testfile' + ) + + # Verify Data Integrity. + self.assertEqual(secondary_content, primary_content) + + # Scaledown and verify replication has stopped. + logging.info('Checking multisite scaledown') + zaza_model.remove_relation( + self.primary_rgw_app, + self.primary_rgw_app + ":primary", + self.secondary_rgw_app + ":secondary" + ) + + # wait for sync stop + self.wait_for_status(self.primary_rgw_app, sync_expected=False) + self.wait_for_status(self.secondary_rgw_app, sync_expected=False) + + # Refresh client and verify objects are not replicating. + primary_client = boto3.resource("s3", + verify=False, + endpoint_url=primary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + secondary_client = boto3.resource("s3", + verify=False, + endpoint_url=secondary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + + # IO Test + container = 'scaledown-container' + test_data = 'Scaledown Test data' + secondary_client.Bucket(container).create() + secondary_object = secondary_client.Object(container, 'scaledown') + secondary_object.put( + Body=test_data + ) + + # Since bucket is not replicated. + with self.assertRaises(botocore.exceptions.ClientError): + primary_content = self.fetch_rgw_object( + primary_client, container, 'scaledown' + ) + + # Cleanup of scaledown resources and synced resources. + self.purge_bucket(self.secondary_rgw_app, container) + self.purge_bucket(self.secondary_rgw_app, 'zaza-container') + self.purge_bucket(self.secondary_rgw_app, 'failover-container') + + def test_101_virtual_hosted_bucket(self): + """Test virtual hosted bucket.""" + # skip if quincy or older + current_release = zaza_openstack.get_os_release( + application='ceph-mon') + reef = zaza_openstack.get_os_release('jammy_bobcat') + if current_release < reef: + raise unittest.SkipTest( + 'Virtual hosted bucket not supported in quincy or older') + + primary_rgw_unit = zaza_model.get_unit_from_name(self.primary_rgw_unit) + if primary_rgw_unit.workload_status != "active": + logging.info('Skipping virtual hosted bucket test since ' + 'primary rgw unit is not in active state') + return + + logging.info('Testing virtual hosted bucket') + + # 0. Configure virtual hosted bucket + self.enable_virtual_hosted_bucket() + zaza_model.block_until_wl_status_info_starts_with( + self.primary_rgw_app, + 'os-public-hostname must have a value', + timeout=900 + ) + self.set_os_public_hostname() + zaza_model.block_until_all_units_idle(self.model_name) + container_name = 'zaza-bucket' + obj_data = 'Test content from Zaza' + obj_name = 'testfile' + + # 1. Fetch Primary Endpoint Details + primary_endpoint = self.get_rgw_endpoint(self.primary_rgw_unit) + self.assertNotEqual(primary_endpoint, None) + + # 2. Create RGW Client and perform IO + access_key, secret_key = self.get_client_keys() + primary_client = boto3.resource("s3", + verify=False, + endpoint_url=primary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + # We may not have certs for the pub hostname yet, so retry a few times. + for attempt in tenacity.Retrying( + stop=tenacity.stop_after_attempt(10), + wait=tenacity.wait_fixed(4), + ): + with attempt: + primary_client.Bucket(container_name).create() + primary_object_one = primary_client.Object( + container_name, + obj_name + ) + primary_object_one.put(Body=obj_data) + primary_client.Bucket(container_name).Acl().put(ACL='public-read') + primary_client.Object(container_name, obj_name).Acl().put( + ACL='public-read' + ) + + # 3. Test if we can get content via virtual hosted bucket name + public_hostname = zaza_model.get_application_config( + self.primary_rgw_app + )["os-public-hostname"]["value"] + url = f"{primary_endpoint}/{obj_name}" + headers = {'host': f"{container_name}.{public_hostname}"} + f = requests.get(url, headers=headers, verify=False) + self.assertEqual(f.text, obj_data) + + # 4. Cleanup and de-configure virtual hosted bucket + self.clean_virtual_hosted_bucket() + zaza_model.block_until_all_units_idle(self.model_name) + self.purge_bucket(self.primary_rgw_app, container_name) + + +class CephProxyTest(unittest.TestCase): + """Test ceph via proxy.""" + + @classmethod + def setUpClass(cls): + """Run class setup for running tests.""" + super(CephProxyTest, cls).setUpClass() + + test_config = lifecycle_utils.get_charm_config(fatal=False) + cls.target_deploy_status = test_config.get('target_deploy_status', {}) + + def test_ceph_health(self): + """Make sure ceph-proxy can communicate with ceph.""" + logging.info('Wait for idle/ready status...') + zaza_model.wait_for_application_states( + states=self.target_deploy_status) + + self.assertEqual( + zaza_model.run_on_leader("ceph-proxy", "sudo ceph health")["Code"], + "0" + ) + + def test_cinder_ceph_restrict_pool_setup(self): + """Make sure cinder-ceph restrict pool was created successfully.""" + try: + zaza_model.get_application('cinder-ceph') + except KeyError: + raise unittest.SkipTest("Skipping OpenStack dependent test") + logging.info('Wait for idle/ready status...') + zaza_model.wait_for_application_states( + states=self.target_deploy_status) + + for attempt in tenacity.Retrying( + wait=tenacity.wait_exponential(multiplier=2, max=32), + reraise=True, stop=tenacity.stop_after_attempt(8), + ): + with attempt: + pools = zaza_ceph.get_ceph_pools('ceph-mon/0') + if 'cinder-ceph' not in pools: + msg = ('cinder-ceph pool not found querying ceph-mon/0,' + 'got: {}'.format(pools)) + raise zaza_exceptions.CephPoolNotFound(msg) + + # Checking for cinder-ceph specific permissions makes + # the test more rugged when we add additional relations + # to ceph for other applications (such as glance and nova). + expected_permissions = [ + "allow rwx pool=cinder-ceph", + "allow class-read object_prefix rbd_children", + ] + cmd = "sudo ceph auth get client.cinder-ceph" + result = zaza_model.run_on_unit('ceph-mon/0', cmd) + output = result.get('Stdout').strip() + + for expected in expected_permissions: + if expected not in output: + msg = ('cinder-ceph pool restriction ({}) was not' + ' configured correctly.' + ' Found: {}'.format(expected, output)) + raise zaza_exceptions.CephPoolNotConfigured(msg) + + +class CephPrometheusTest(unittest.TestCase): + """Test the Ceph <-> Prometheus relation.""" + + def test_prometheus_metrics(self): + """Validate that Prometheus has Ceph metrics.""" + try: + zaza_model.get_application( + 'prometheus2') + except KeyError: + raise unittest.SkipTest('Prometheus not present, skipping test') + unit = zaza_model.get_unit_from_name( + zaza_model.get_lead_unit_name('prometheus2')) + prometheus_mon_count = _get_mon_count_from_prometheus( + zaza_model.get_unit_public_address(unit)) + self.assertTrue(0 < int(prometheus_mon_count)) + + +class CephPoolConfig(Exception): + """Custom Exception for bad Ceph pool config.""" + + pass + + +class CheckPoolTypes(unittest.TestCase): + """Test the ceph pools created for clients are of the expected type.""" + + def test_check_pool_types(self): + """Check type of pools created for clients.""" + app_pools = [ + ('glance', 'glance'), + ('nova-compute', 'nova'), + ('cinder-ceph', 'cinder-ceph')] + runtime_pool_details = zaza_ceph.get_ceph_pool_details() + for app, pool_name in app_pools: + try: + app_config = zaza_model.get_application_config(app) + except KeyError: + logging.info( + 'Skipping pool check of %s, application %s not present', + pool_name, + app) + continue + rel_id = zaza_model.get_relation_id( + app, + 'ceph-mon', + remote_interface_name='client') + if not rel_id: + logging.info( + 'Skipping pool check of %s, ceph relation not present', + app) + continue + juju_pool_config = app_config.get('pool-type') + if juju_pool_config: + expected_pool_type = juju_pool_config['value'] + else: + # If the pool-type option is absent assume the default of + # replicated. + expected_pool_type = zaza_ceph.REPLICATED_POOL_TYPE + for pool_config in runtime_pool_details: + if pool_config['pool_name'] == pool_name: + logging.info('Checking {} is {}'.format( + pool_name, + expected_pool_type)) + expected_pool_code = -1 + if expected_pool_type == zaza_ceph.REPLICATED_POOL_TYPE: + expected_pool_code = zaza_ceph.REPLICATED_POOL_CODE + elif expected_pool_type == zaza_ceph.ERASURE_POOL_TYPE: + expected_pool_code = zaza_ceph.ERASURE_POOL_CODE + self.assertEqual( + pool_config['type'], + expected_pool_code) + break + else: + raise CephPoolConfig( + "Failed to find config for {}".format(pool_name)) + + +# NOTE: We might query before prometheus has fetch data +@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, + min=5, max=10), + reraise=True) +def _get_mon_count_from_prometheus(prometheus_ip): + url = ('http://{}:9090/api/v1/query?query=' + 'count(ceph_mon_metadata)'.format(prometheus_ip)) + client = requests.session() + response = client.get(url) + logging.debug("Prometheus response: {}".format(response.json())) + return response.json()['data']['result'][0]['value'][1] + + +class BlueStoreCompressionCharmOperation(test_utils.BaseCharmTest): + """Test charm handling of bluestore compression configuration options.""" + + @classmethod + def setUpClass(cls): + """Perform class one time initialization.""" + super(BlueStoreCompressionCharmOperation, cls).setUpClass() + release_application = 'keystone' + try: + zaza_model.get_application(release_application) + except KeyError: + release_application = 'ceph-mon' + cls.current_release = zaza_openstack.get_os_release( + application=release_application) + cls.bionic_rocky = zaza_openstack.get_os_release('bionic_rocky') + + def setUp(self): + """Perform common per test initialization steps.""" + super(BlueStoreCompressionCharmOperation, self).setUp() + + # determine if the tests should be run or not + logging.debug('os_release: {} >= {} = {}' + .format(self.current_release, + self.bionic_rocky, + self.current_release >= self.bionic_rocky)) + self.mimic_or_newer = self.current_release >= self.bionic_rocky + + def _assert_pools_properties(self, pools, pools_detail, + expected_properties, log_func=logging.info): + """Check properties on a set of pools. + + :param pools: List of pool names to check. + :type pools: List[str] + :param pools_detail: List of dictionaries with pool detail + :type pools_detail List[Dict[str,any]] + :param expected_properties: Properties to check and their expected + values. + :type expected_properties: Dict[str,any] + :returns: Nothing + :raises: AssertionError + """ + for pool in pools: + for pd in pools_detail: + if pd['pool_name'] == pool: + if 'options' in expected_properties: + for k, v in expected_properties['options'].items(): + self.assertEqual(pd['options'][k], v) + log_func("['options']['{}'] == {}".format(k, v)) + for k, v in expected_properties.items(): + if k == 'options': + continue + self.assertEqual(pd[k], v) + log_func("{} == {}".format(k, v)) + + def test_configure_compression(self): + """Enable compression and validate properties flush through to pool.""" + if not self.mimic_or_newer: + logging.info('Skipping test, Mimic or newer required.') + return + if self.application_name == 'ceph-osd': + # The ceph-osd charm itself does not request pools, neither does + # the BlueStore Compression configuration options it have affect + # pool properties. + logging.info('test does not apply to ceph-osd charm.') + return + elif self.application_name == 'ceph-radosgw': + # The Ceph RadosGW creates many light weight pools to keep track of + # metadata, we only compress the pool containing actual data. + app_pools = ['.rgw.buckets.data'] + else: + # Retrieve which pools the charm under test has requested skipping + # metadata pools as they are deliberately not compressed. + app_pools = [ + pool + for pool in zaza_ceph.get_pools_from_broker_req( + self.application_name, model_name=self.model_name) + if 'metadata' not in pool + ] + + ceph_pools_detail = zaza_ceph.get_ceph_pool_details( + model_name=self.model_name) + + logging.debug('BEFORE: {}'.format(ceph_pools_detail)) + try: + logging.info('Checking Ceph pool compression_mode prior to change') + self._assert_pools_properties( + app_pools, ceph_pools_detail, + {'options': {'compression_mode': 'none'}}) + except KeyError: + logging.info('property does not exist on pool, which is OK.') + logging.info('Changing "bluestore-compression-mode" to "force" on {}' + .format(self.application_name)) + with self.config_change( + {'bluestore-compression-mode': 'none'}, + {'bluestore-compression-mode': 'force'}): + logging.info('Checking Ceph pool compression_mode after to change') + self._check_pool_compression_mode(app_pools, 'force') + + logging.info('Checking Ceph pool compression_mode after ' + 'restoring config to previous value') + self._check_pool_compression_mode(app_pools, 'none') + + @tenacity.retry( + wait=tenacity.wait_exponential(multiplier=1, min=2, max=10), + stop=tenacity.stop_after_attempt(10), + reraise=True, + retry=tenacity.retry_if_exception_type(AssertionError) + ) + def _check_pool_compression_mode(self, app_pools, mode): + ceph_pools_detail = zaza_ceph.get_ceph_pool_details( + model_name=self.model_name) + logging.debug('ceph_pools_details: %s', ceph_pools_detail) + logging.debug(juju_utils.get_relation_from_unit( + 'ceph-mon', self.application_name, None, + model_name=self.model_name)) + self._assert_pools_properties( + app_pools, ceph_pools_detail, + {'options': {'compression_mode': mode}}) + + def test_invalid_compression_configuration(self): + """Set invalid configuration and validate charm response.""" + if not self.mimic_or_newer: + logging.info('Skipping test, Mimic or newer required.') + return + stored_target_deploy_status = self.test_config.get( + 'target_deploy_status', {}) + new_target_deploy_status = stored_target_deploy_status.copy() + new_target_deploy_status[self.application_name] = { + 'workload-status': 'blocked', + 'workload-status-message': 'Invalid configuration', + } + if 'target_deploy_status' in self.test_config: + self.test_config['target_deploy_status'].update( + new_target_deploy_status) + else: + self.test_config['target_deploy_status'] = new_target_deploy_status + + with self.config_change( + {'bluestore-compression-mode': 'none'}, + {'bluestore-compression-mode': 'PEBCAK'}): + logging.info('Charm went into blocked state as expected, restore ' + 'configuration') + self.test_config[ + 'target_deploy_status'] = stored_target_deploy_status + + +class CephAuthTest(unittest.TestCase): + """Ceph auth tests (user creation and deletion).""" + + def test_ceph_auth(self): + """Test creating and deleting user.""" + logging.info('Creating user and exported keyring...') + action_obj = zaza_model.run_action_on_leader( + 'ceph-mon', + 'get-or-create-user', + action_params={'username': 'sandbox', + 'mon-caps': 'allow r', + 'osd-caps': 'allow r'} + ) + logging.debug('Result of action: {}'.format(action_obj)) + create_results = json.loads(action_obj.data['results']['message']) + + logging.info('Getting existing user and exported keyring...') + action_obj = zaza_model.run_action_on_leader( + 'ceph-mon', + 'get-or-create-user', + action_params={'username': 'sandbox'} + ) + logging.debug('Result of action: {}'.format(action_obj)) + get_results = json.loads(action_obj.data['results']['message']) + + self.assertEqual(get_results, create_results) + + logging.info('Deleting existing user...') + action_obj = zaza_model.run_action_on_leader( + 'ceph-mon', + 'delete-user', + action_params={'username': 'sandbox'} + ) + logging.debug('Result of action: {}'.format(action_obj)) + + logging.info('Verify user is deleted...') + result = zaza_model.run_on_leader( + 'ceph-mon', + 'sudo ceph auth get client.sandbox', + ) + logging.debug('ceph auth get: {}'.format(result)) + self.assertIn("failed to find client.sandbox", result.get('Stderr')) + + +class CephMonActionsTest(test_utils.BaseCharmTest): + """Test miscellaneous actions of the ceph-mon charm.""" + + @classmethod + def setUpClass(cls): + """Run class setup for running ceph-mon actions.""" + super(CephMonActionsTest, cls).setUpClass() + # Allow mons to delete pools. + zaza_model.run_on_unit( + 'ceph-mon/0', + "ceph tell mon.\\* injectargs '--mon-allow-pool-delete=true'" + ) + + def _get_osd_weight(self, osd, unit): + """Fetch the CRUSH weight of an OSD.""" + cmd = 'sudo ceph osd crush tree --format=json' + result = zaza_model.run_on_unit(unit, cmd) + self.assertEqual(int(result.get('Code')), 0) + + tree = json.loads(result.get('Stdout')) + for node in tree['nodes']: + if node.get('name') == osd: + return node['crush_weight'] + + def test_reweight_osd(self): + """Test the change-osd-weight action.""" + unit = 'ceph-mon/0' + osd = 0 + osd_str = 'osd.' + str(osd) + weight = 700 + prev_weight = self._get_osd_weight(osd_str, unit) + try: + action_obj = zaza_model.run_action( + unit_name=unit, + action_name='change-osd-weight', + action_params={'osd': osd, 'weight': 700} + ) + zaza_utils.assertActionRanOK(action_obj) + self.assertEqual(weight, self._get_osd_weight(osd_str, unit)) + finally: + # Reset the weight. + zaza_model.run_action( + unit_name=unit, + action_name='change-osd-weight', + action_params={'osd': osd, 'weight': prev_weight} + ) + + def test_copy_pool(self): + """Test the copy-pool (and list-pool) action.""" + unit = 'ceph-mon/0' + logging.debug('Creating secondary test pool') + cmd = 'sudo ceph osd pool create test2 32' + cmd2 = 'sudo ceph osd pool create test3 32' + try: + result = zaza_model.run_on_unit(unit, cmd) + self.assertEqual(int(result.get('Code')), 0) + result = zaza_model.run_on_unit(unit, cmd2) + self.assertEqual(int(result.get('Code')), 0) + + action_obj = zaza_model.run_action( + unit_name=unit, + action_name='list-pools', + action_params={} + ) + zaza_utils.assertActionRanOK(action_obj) + self.assertIn('test2', action_obj.data['results']['message']) + self.assertIn('test3', action_obj.data['results']['message']) + + logging.debug('Copying test pool') + action_obj = zaza_model.run_action( + unit_name=unit, + action_name='copy-pool', + action_params={'source': 'test2', 'target': 'test3'} + ) + zaza_utils.assertActionRanOK(action_obj) + finally: + # Clean up our mess. + zaza_model.run_on_unit( + unit, + ('sudo ceph osd pool delete test2 test2 ' + '--yes-i-really-really-mean-it') + ) + zaza_model.run_on_unit( + unit, + ('sudo ceph osd pool delete test3 test3 ' + '--yes-i-really-really-mean-it') + ) + + +class CephMonJujuPersistent(test_utils.BaseCharmTest): + """Check juju persistent config is working.""" + + def test_persistent_config(self): + """Check persistent config will update if config change.""" + set_default = { + 'loglevel': 1, + } + set_alternate = { + 'loglevel': 2, + } + unit = 'ceph-mon/0' + cmd = ( + 'cat /var/lib/juju/agents' + '/unit-ceph-mon-0/charm/.juju-persistent-config' + ) + with self.config_change( + default_config=set_default, + alternate_config=set_alternate, + application_name='ceph-mon', + ): + result = zaza_model.run_on_unit( + unit, + cmd, + ) + data = json.loads(result['Stdout']) + assert data['loglevel'] == 2 + + +class CephMonKeyRotationTests(test_utils.BaseCharmTest): + """Tests for the rotate-key action.""" + + def setUp(self): + """Initialize key rotation test class.""" + super(CephMonKeyRotationTests, self).setUp() + try: + # Workaround for ubuntu units that don't play nicely with zaza. + zaza_model.get_application('ubuntu') + self.app_states = { + 'ubuntu': { + 'workload-status-message': '' + } + } + except KeyError: + self.app_states = None + + def _get_all_keys(self, unit, entity_filter): + cmd = 'sudo ceph auth ls' + result = zaza_model.run_on_unit(unit, cmd) + # Don't use json formatting, as it's buggy upstream. + data = result['Stdout'].split() + ret = set() + + for ix, line in enumerate(data): + # Structure: + # $ENTITY + # key: + # key contents + # That's why we need to move one position ahead. + if 'key:' in line and entity_filter(data[ix - 1]): + ret.add((data[ix - 1], data[ix + 1])) + return ret + + def _check_key_rotation(self, entity, unit): + def entity_filter(name): + return name.startswith(entity) + + old_keys = self._get_all_keys(unit, entity_filter) + action_obj = zaza_model.run_action( + unit_name=unit, + action_name='rotate-key', + action_params={'entity': entity} + ) + zaza_utils.assertActionRanOK(action_obj) + # NOTE(lmlg): There's a nasty race going on here. Essentially, + # since this action involves 2 different applications, what + # happens is as follows: + # (1) (2) (3) (4) + # ceph-mon rotates key | (idle) | remote-unit rotates key | (idle) + # Between (2) and (3), there's a window where all units are + # idle, _but_ the key hasn't been rotated in the other unit. + # As such, we retry a few times instead of using the + # `wait_for_application_states` interface. + + for attempt in tenacity.Retrying( + wait=tenacity.wait_exponential(multiplier=2, max=32), + reraise=True, stop=tenacity.stop_after_attempt(20), + retry=tenacity.retry_if_exception_type(AssertionError) + ): + with attempt: + new_keys = self._get_all_keys(unit, entity_filter) + self.assertNotEqual(old_keys, new_keys) + + diff = new_keys - old_keys + self.assertEqual(len(diff), 1) + first = next(iter(diff)) + # Check that the entity matches. The 'entity_filter' + # callable will return a true-like value if it + # matches the type of entity we're after (i.e: 'mgr') + self.assertTrue(entity_filter(first[0])) + + def _get_rgw_client(self, unit): + ret = self._get_all_keys(unit, lambda x: x.startswith('client.rgw')) + if not ret: + return None + return next(iter(ret))[0] + + def _get_fs_client(self, unit): + def _filter_fs(name): + return (name.startswith('mds.') and + name not in ('mds.ceph-fs', 'mds.None')) + + ret = self._get_all_keys(unit, _filter_fs) + if not ret: + return None + return next(iter(ret))[0] + + def test_key_rotate(self): + """Test that rotating the keys actually changes them.""" + unit = 'ceph-mon/0' + self._check_key_rotation('osd.0', unit) + + try: + zaza_model.get_application('ceph-radosgw') + rgw_client = self._get_rgw_client(unit) + if rgw_client: + self._check_key_rotation(rgw_client, unit) + else: + logging.info('ceph-radosgw units present, but no RGW service') + except KeyError: + pass + + try: + zaza_model.get_application('ceph-fs') + fs_svc = self._get_fs_client(unit) + if fs_svc is not None: + # Only wait for ceph-fs, as this model includes 'ubuntu' + # units, and those don't play nice with zaza (they don't + # set the workload-status-message correctly). + self._check_key_rotation(fs_svc, unit) + else: + logging.info('ceph-fs units present, but no MDS service') + except KeyError: + pass diff --git a/ceph-mon/tests/tests.yaml b/ceph-mon/tests/tests.yaml new file mode 100644 index 00000000..6925db91 --- /dev/null +++ b/ceph-mon/tests/tests.yaml @@ -0,0 +1,20 @@ +charm_name: ceph-mon + +gate_bundles: + - jammy-caracal + +smoke_bundles: + - jammy-caracal + +dev_bundles: + - jammy-caracal + +tests: + - zaza.charm_tests.lifecycle.refresh.CharmRefreshAll + - tests.target.CephLowLevelTest + - tests.target.CephTest + - tests.target.CephPrometheusTest + - tests.target.CephAuthTest + - tests.target.CephMonActionsTest + - tests.target.CephMonKeyRotationTests + - tests.target.CephMonJujuPersistent diff --git a/ceph-mon/tox.ini b/ceph-mon/tox.ini new file mode 100644 index 00000000..82202a8f --- /dev/null +++ b/ceph-mon/tox.ini @@ -0,0 +1,154 @@ +# Classic charm (with zaza): ./tox.ini +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of tox.ini for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools +# +# TODO: Distill the func test requirements from the lint/unit test +# requirements. They are intertwined. Also, Zaza itself should specify +# all of its own requirements and if it doesn't, fix it there. +[tox] +envlist = pep8,py3 +skipsdist = True +# NOTE: Avoid build/test env pollution by not enabling sitepackages. +sitepackages = False +# NOTE: Avoid false positives by not skipping missing interpreters. +skip_missing_interpreters = False + +# NOTE: https://wiki.canonical.com/engineering/OpenStack/InstallLatestToxOnOsci +minversion = 3.18.0 + +[testenv] +setenv = VIRTUAL_ENV={envdir} + PYTHONHASHSEED=0 + TEST_JUJU3=1 + CHARM_DIR={envdir} + CHARMS_ARTIFACT_DIR={toxinidir}/.. +install_command = + pip install {opts} {packages} +commands = stestr run --slowest {posargs} +allowlist_externals = + charmcraft + {toxinidir}/rename.sh +passenv = + HOME + TERM + CS_* + OS_* + TEST_* +deps = -r{toxinidir}/test-requirements.txt + +[testenv:build] +basepython = python3 +deps = -r{toxinidir}/build-requirements.txt +commands = + charmcraft clean + charmcraft -v pack + {toxinidir}/rename.sh + +[testenv:py36] +basepython = python3.6 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py37] +basepython = python3.7 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py38] +basepython = python3.8 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py39] +basepython = python3.9 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py310] +basepython = python3.10 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py311] +basepython = python3.11 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py312] +basepython = python3.12 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py3] +basepython = python3 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:pep8] +basepython = python3 +deps = flake8 + charm-tools +commands = flake8 {posargs} unit_tests tests actions files src + +[testenv:cover] +# Technique based heavily upon +# https://github.com/openstack/nova/blob/master/tox.ini +basepython = python3 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt +setenv = + {[testenv]setenv} + PYTHON=coverage run +commands = + coverage erase + stestr run --slowest {posargs} + coverage combine + coverage html -d cover + coverage xml -o cover/coverage.xml + coverage report + +[coverage:run] +branch = True +concurrency = multiprocessing +parallel = True +source = + . +omit = + .tox/* + */charmhelpers/* + unit_tests/* + +[testenv:venv] +basepython = python3 +commands = {posargs} + +[testenv:func-noop] +basepython = python3 +commands = + functest-run-suite --help + +[testenv:func] +basepython = python3 +commands = + functest-run-suite --keep-model + +[testenv:func-smoke] +basepython = python3 +commands = + functest-run-suite --keep-model --smoke + +[testenv:func-dev] +basepython = python3 +commands = + functest-run-suite --keep-model --dev + +[testenv:func-target] +basepython = python3 +commands = + functest-run-suite --keep-model --bundle {posargs} + +[flake8] +ignore = E402,E226,W503,W504 +exclude = */charmhelpers diff --git a/ceph-mon/unit_tests/__init__.py b/ceph-mon/unit_tests/__init__.py new file mode 100644 index 00000000..91c4fa66 --- /dev/null +++ b/ceph-mon/unit_tests/__init__.py @@ -0,0 +1,37 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +from unittest import mock + + +sys.path.append('hooks') +sys.path.append('lib') +sys.path.append('unit_tests') +sys.path.append('actions') +sys.path.append('src') + + +# Patch out lsb_release() and get_platform() as unit tests should be fully +# insulated from the underlying platform. Unit tests assume that the system is +# ubuntu jammy. +mock.patch( + 'charmhelpers.osplatform.get_platform', return_value='ubuntu' +).start() +mock.patch( + 'charmhelpers.core.host.lsb_release', + return_value={ + 'DISTRIB_CODENAME': 'jammy' + }).start() diff --git a/ceph-mon/unit_tests/ceph_crit.json b/ceph-mon/unit_tests/ceph_crit.json new file mode 100644 index 00000000..faa23cef --- /dev/null +++ b/ceph-mon/unit_tests/ceph_crit.json @@ -0,0 +1,226 @@ +{ + "health": { + "health": { + "health_services": [ + { + "mons": [ + { + "name": "juju-2691ab-1-lxd-1", + "kb_total": 155284096, + "kb_used": 1247744, + "kb_avail": 154036352, + "avail_percent": 99, + "last_updated": "2017-05-17 03:31:35.562497", + "store_stats": { + "bytes_total": 1012055342, + "bytes_sst": 0, + "bytes_log": 29673298, + "bytes_misc": 982382044, + "last_updated": "0.000000" + }, + "health": "HEALTH_OK" + }, + { + "name": "juju-2691ab-13-lxd-0", + "kb_total": 153820288, + "kb_used": 1361280, + "kb_avail": 152459008, + "avail_percent": 99, + "last_updated": "2017-05-17 03:31:04.097201", + "store_stats": { + "bytes_total": 1370003168, + "bytes_sst": 0, + "bytes_log": 29813159, + "bytes_misc": 1340190009, + "last_updated": "0.000000" + }, + "health": "HEALTH_OK" + }, + { + "name": "juju-2691ab-2-lxd-1", + "kb_total": 155251072, + "kb_used": 1373440, + "kb_avail": 153877632, + "avail_percent": 99, + "last_updated": "2017-05-17 03:31:20.684777", + "store_stats": { + "bytes_total": 1400974192, + "bytes_sst": 0, + "bytes_log": 1129945, + "bytes_misc": 1399844247, + "last_updated": "0.000000" + }, + "health": "HEALTH_OK" + } + ] + } + ] + }, + "timechecks": { + "epoch": 32, + "round": 24492, + "round_status": "finished", + "mons": [ + { + "name": "juju-2691ab-1-lxd-1", + "skew": 0, + "latency": 0, + "health": "HEALTH_OK" + }, + { + "name": "juju-2691ab-13-lxd-0", + "skew": 0.000919, + "latency": 0.001036, + "health": "HEALTH_OK" + }, + { + "name": "juju-2691ab-2-lxd-1", + "skew": 0, + "latency": 0.001009, + "health": "HEALTH_OK" + } + ] + }, + "summary": [ + { + "severity": "HEALTH_WARN", + "summary": "48 pgs backfill_wait" + }, + { + "severity": "HEALTH_WARN", + "summary": "45 pgs backfilling" + }, + { + "severity": "HEALTH_WARN", + "summary": "1 pgs degraded" + }, + { + "severity": "HEALTH_WARN", + "summary": "1 pgs recovery_wait" + }, + { + "severity": "HEALTH_WARN", + "summary": "22 pgs stuck unclean" + }, + { + "severity": "HEALTH_WARN", + "summary": "recovery 14/46842755 objects degraded (0.000%)" + }, + { + "severity": "HEALTH_WARN", + "summary": "recovery 448540/46842755 objects misplaced (0.958%)" + }, + { + "severity": "HEALTH_CRITICAL", + "summary": "Test critical status message" + } + ], + "overall_status": "HEALTH_CRITICAL", + "detail": [] + }, + "fsid": "ca9451f1-5c4f-4e85-bb14-a08dfc0568f7", + "election_epoch": 32, + "quorum": [ + 0, + 1, + 2 + ], + "quorum_names": [ + "juju-2691ab-1-lxd-1", + "juju-2691ab-13-lxd-0", + "juju-2691ab-2-lxd-1" + ], + "monmap": { + "epoch": 1, + "fsid": "ca9451f1-5c4f-4e85-bb14-a08dfc0568f7", + "modified": "2016-12-03 08:09:21.854671", + "created": "2016-12-03 08:09:21.854671", + "mons": [ + { + "rank": 0, + "name": "juju-2691ab-1-lxd-1", + "addr": "10.182.254.221:6789/0" + }, + { + "rank": 1, + "name": "juju-2691ab-13-lxd-0", + "addr": "10.182.254.229:6789/0" + }, + { + "rank": 2, + "name": "juju-2691ab-2-lxd-1", + "addr": "10.182.254.242:6789/0" + } + ] + }, + "osdmap": { + "osdmap": { + "epoch": 141540, + "num_osds": 314, + "num_up_osds": 314, + "num_in_osds": 314, + "full": false, + "nearfull": false, + "num_remapped_pgs": 92 + } + }, + "pgmap": { + "pgs_by_state": [ + { + "state_name": "active+clean", + "count": 9274 + }, + { + "state_name": "active+remapped+wait_backfill", + "count": 48 + }, + { + "state_name": "active+remapped+backfilling", + "count": 45 + }, + { + "state_name": "active+clean+scrubbing+deep", + "count": 9 + }, + { + "state_name": "active+remapped", + "count": 2 + }, + { + "state_name": "active+recovery_wait+degraded", + "count": 1 + }, + { + "state_name": "active+clean+scrubbing", + "count": 1 + } + ], + "version": 13885884, + "num_pgs": 9380, + "data_bytes": 64713222471610, + "bytes_used": 193613093122048, + "bytes_avail": 690058090491904, + "bytes_total": 883671183613952, + "degraded_objects": 14, + "degraded_total": 46842755, + "degraded_ratio": 0, + "misplaced_objects": 448540, + "misplaced_total": 46842755, + "misplaced_ratio": 0.009575, + "recovering_objects_per_sec": 389, + "recovering_bytes_per_sec": 1629711746, + "recovering_keys_per_sec": 0, + "num_objects_recovered": 218, + "num_bytes_recovered": 912252928, + "num_keys_recovered": 0, + "read_bytes_sec": 117041457, + "write_bytes_sec": 293414043, + "read_op_per_sec": 5282, + "write_op_per_sec": 5270 + }, + "fsmap": { + "epoch": 1, + "by_rank": [] + } +} + diff --git a/ceph-mon/unit_tests/ceph_crit_luminous.json b/ceph-mon/unit_tests/ceph_crit_luminous.json new file mode 100644 index 00000000..c81a3f36 --- /dev/null +++ b/ceph-mon/unit_tests/ceph_crit_luminous.json @@ -0,0 +1,196 @@ +{ + "fsid": "a7285ad8-3961-11e8-b715-00163e030140", + "health": { + "checks": { + "OSD_DOWN": { + "severity": "HEALTH_WARN", + "summary": { + "message": "1 osds down" + } + }, + "PG_DEGRADED": { + "severity": "HEALTH_WARN", + "summary": { + "message": "Degraded data redundancy: 31/906 objects degraded (3.422%), 74 pgs unclean, 74 pgs degraded" + } + } + }, + "status": "HEALTH_WARN" + }, + "election_epoch": 28, + "quorum": [ + 0, + 1, + 2 + ], + "quorum_names": [ + "juju-7cfc1d-1-lxd-0", + "juju-7cfc1d-0-lxd-0", + "juju-7cfc1d-12-lxd-0" + ], + "monmap": { + "epoch": 2, + "fsid": "a7285ad8-3961-11e8-b715-00163e030140", + "modified": "2018-04-06 06:37:04.978765", + "created": "2018-04-06 06:35:06.513449", + "features": { + "persistent": [ + "kraken", + "luminous" + ], + "optional": [] + }, + "mons": [ + { + "rank": 0, + "name": "juju-7cfc1d-1-lxd-0", + "addr": "172.18.250.75:6789/0", + "public_addr": "172.18.250.75:6789/0" + }, + { + "rank": 1, + "name": "juju-7cfc1d-0-lxd-0", + "addr": "172.18.250.76:6789/0", + "public_addr": "172.18.250.76:6789/0" + }, + { + "rank": 2, + "name": "juju-7cfc1d-12-lxd-0", + "addr": "172.18.250.84:6789/0", + "public_addr": "172.18.250.84:6789/0" + } + ] + }, + "osdmap": { + "osdmap": { + "epoch": 257, + "num_osds": 33, + "num_up_osds": 32, + "num_in_osds": 33, + "full": false, + "nearfull": false, + "num_remapped_pgs": 0 + } + }, + "pgmap": { + "pgs_by_state": [ + { + "state_name": "active+clean", + "count": 958 + }, + { + "state_name": "active+undersized+degraded", + "count": 74 + } + ], + "num_pgs": 1032, + "num_pools": 20, + "num_objects": 302, + "data_bytes": 580388173, + "bytes_used": 2971890057216, + "bytes_avail": 128989599563776, + "bytes_total": 131961489620992, + "degraded_objects": 31, + "degraded_total": 906, + "degraded_ratio": 0.034216 + }, + "fsmap": { + "epoch": 1, + "by_rank": [] + }, + "mgrmap": { + "epoch": 4, + "active_gid": 4131, + "active_name": "juju-7cfc1d-1-lxd-0", + "active_addr": "172.18.250.75:6800/88914", + "available": true, + "standbys": [ + { + "gid": 4134, + "name": "juju-7cfc1d-0-lxd-0", + "available_modules": [ + "balancer", + "dashboard", + "influx", + "localpool", + "prometheus", + "restful", + "selftest", + "status", + "zabbix" + ] + }, + { + "gid": 4299, + "name": "juju-7cfc1d-12-lxd-0", + "available_modules": [ + "balancer", + "dashboard", + "influx", + "localpool", + "prometheus", + "restful", + "selftest", + "status", + "zabbix" + ] + } + ], + "modules": [ + "balancer", + "restful", + "status" + ], + "available_modules": [ + "balancer", + "dashboard", + "influx", + "localpool", + "prometheus", + "restful", + "selftest", + "status", + "zabbix" + ], + "services": {} + }, + "servicemap": { + "epoch": 22, + "modified": "2018-04-14 06:25:03.499825", + "services": { + "rgw": { + "daemons": { + "summary": "", + "radosgw.gateway": { + "start_epoch": 22, + "start_stamp": "2018-04-14 06:25:02.277715", + "gid": 156351, + "addr": "172.18.250.74:0/2962286796", + "metadata": { + "arch": "x86_64", + "ceph_version": "ceph version 12.2.2 (cf0baeeeeba3b47f9427c6c97e2144b094b7e5ba) luminous (stable)", + "cpu": "Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GHz", + "distro": "ubuntu", + "distro_description": "Ubuntu 16.04.4 LTS", + "distro_version": "16.04", + "frontend_config#0": "civetweb port=60", + "frontend_type#0": "civetweb", + "hostname": "juju-7cfc1d-1-lxd-1", + "kernel_description": "#43~16.04.1-Ubuntu SMP Wed Mar 14 17:48:43 UTC 2018", + "kernel_version": "4.13.0-38-generic", + "mem_swap_kb": "8388604", + "mem_total_kb": "528154640", + "num_handles": "1", + "os": "Linux", + "pid": "225019", + "zone_id": "34009c14-e608-47e6-84c5-bf2cefbe94f8", + "zone_name": "default", + "zonegroup_id": "7771c284-f980-41f0-861b-66c95357cb3d", + "zonegroup_name": "default" + } + } + } + } + } + } +} diff --git a/ceph-mon/unit_tests/ceph_degraded_luminous.json b/ceph-mon/unit_tests/ceph_degraded_luminous.json new file mode 100644 index 00000000..3cf3bdd3 --- /dev/null +++ b/ceph-mon/unit_tests/ceph_degraded_luminous.json @@ -0,0 +1,147 @@ +{ + "fsid": "b03a2900-e297-11e8-a7db-00163ed10659", + "health": { + "checks": { + "OSD_DOWN": { + "severity": "HEALTH_WARN", + "summary": { + "message": "3 osds down" + } + }, + "OSD_HOST_DOWN": { + "severity": "HEALTH_WARN", + "summary": { + "message": "1 host (3 osds) down" + } + }, + "OBJECT_MISPLACED": { + "severity": "HEALTH_WARN", + "summary": { + "message": "9883/43779 objects misplaced (22.575%)" + } + }, + "PG_DEGRADED": { + "severity": "HEALTH_WARN", + "summary": { + "message": "Degraded data redundancy: 14001/43779 objects degraded (31.981%), 32 pgs degraded" + } + }, + "POOL_APP_NOT_ENABLED": { + "severity": "HEALTH_WARN", + "summary": { + "message": "application not enabled on 1 pool(s)" + } + }, + "TOO_FEW_PGS": { + "severity": "HEALTH_WARN", + "summary": { + "message": "too few PGs per OSD (7 < min 30)" + } + } + }, + "status": "HEALTH_WARN" + }, + "election_epoch": 5, + "quorum": [ + 0 + ], + "quorum_names": [ + "juju-460e0f-11" + ], + "monmap": { + "epoch": 1, + "fsid": "b03a2900-e297-11e8-a7db-00163ed10659", + "modified": "2018-11-07 14:17:12.324408", + "created": "2018-11-07 14:17:12.324408", + "features": { + "persistent": [ + "kraken", + "luminous" + ], + "optional": [] + }, + "mons": [ + { + "rank": 0, + "name": "juju-460e0f-11", + "addr": "192.168.100.81:6789/0", + "public_addr": "192.168.100.81:6789/0" + } + ] + }, + "osdmap": { + "osdmap": { + "epoch": 72, + "num_osds": 9, + "num_up_osds": 6, + "num_in_osds": 9, + "full": false, + "nearfull": false, + "num_remapped_pgs": 16 + } + }, + "pgmap": { + "pgs_by_state": [ + { + "state_name": "active+undersized+degraded", + "count": 16 + }, + { + "state_name": "active+undersized+degraded+remapped+backfill_wait", + "count": 14 + }, + { + "state_name": "active+undersized+degraded+remapped+backfilling", + "count": 2 + } + ], + "num_pgs": 32, + "num_pools": 1, + "num_objects": 14593, + "data_bytes": 61169729807, + "bytes_used": 14540595200, + "bytes_avail": 14889525248, + "bytes_total": 29430120448, + "degraded_objects": 14001, + "degraded_total": 43779, + "degraded_ratio": 0.319811, + "misplaced_objects": 9883, + "misplaced_total": 43779, + "misplaced_ratio": 0.225748 + }, + "fsmap": { + "epoch": 1, + "by_rank": [] + }, + "mgrmap": { + "epoch": 5, + "active_gid": 14097, + "active_name": "juju-460e0f-11", + "active_addr": "192.168.100.81:6800/204", + "available": true, + "standbys": [], + "modules": [ + "balancer", + "restful", + "status" + ], + "available_modules": [ + "balancer", + "dashboard", + "influx", + "localpool", + "prometheus", + "restful", + "selftest", + "status", + "zabbix" + ], + "services": {} + }, + "servicemap": { + "epoch": 1, + "modified": "0.000000", + "services": {} + } +} + diff --git a/ceph-mon/unit_tests/ceph_error.json b/ceph-mon/unit_tests/ceph_error.json new file mode 100644 index 00000000..eb9a161c --- /dev/null +++ b/ceph-mon/unit_tests/ceph_error.json @@ -0,0 +1,118 @@ +{ + "health": { + "health": { + "health_services": [ + { + "mons": [ + { + "name": "juju-460e0f-12", + "kb_total": 1829760, + "kb_used": 835072, + "kb_avail": 994688, + "avail_percent": 54, + "last_updated": "2018-11-07 18:46:32.308592", + "store_stats": { + "bytes_total": 15678387, + "bytes_sst": 0, + "bytes_log": 420953, + "bytes_misc": 15257434, + "last_updated": "0.000000" + }, + "health": "HEALTH_OK" + } + ] + } + ] + }, + "timechecks": { + "epoch": 3, + "round": 0, + "round_status": "finished" + }, + "summary": [ + { + "severity": "HEALTH_ERR", + "summary": "6 pgs are stuck inactive for more than 300 seconds" + }, + { + "severity": "HEALTH_WARN", + "summary": "7 pgs peering" + }, + { + "severity": "HEALTH_WARN", + "summary": "6 pgs stuck inactive" + }, + { + "severity": "HEALTH_WARN", + "summary": "6 pgs stuck unclean" + } + ], + "overall_status": "HEALTH_ERR", + "detail": [] + }, + "fsid": "68a9ca14-e297-11e8-843c-00163e64b0c0", + "election_epoch": 3, + "quorum": [ + 0 + ], + "quorum_names": [ + "juju-460e0f-12" + ], + "monmap": { + "epoch": 1, + "fsid": "68a9ca14-e297-11e8-843c-00163e64b0c0", + "modified": "2018-11-07 14:17:27.659064", + "created": "2018-11-07 14:17:27.659064", + "mons": [ + { + "rank": 0, + "name": "juju-460e0f-12", + "addr": "192.168.100.26:6789\/0" + } + ] + }, + "osdmap": { + "osdmap": { + "epoch": 28, + "num_osds": 9, + "num_up_osds": 9, + "num_in_osds": 9, + "full": false, + "nearfull": false, + "num_remapped_pgs": 0 + } + }, + "pgmap": { + "pgs_by_state": [ + { + "state_name": "creating", + "count": 113 + }, + { + "state_name": "active+clean", + "count": 64 + }, + { + "state_name": "activating", + "count": 8 + }, + { + "state_name": "peering", + "count": 7 + } + ], + "version": 7831, + "num_pgs": 192, + "data_bytes": 1790967809, + "bytes_used": 9995157504, + "bytes_avail": 9157476352, + "bytes_total": 19152633856, + "write_bytes_sec": 89844495, + "read_op_per_sec": 0, + "write_op_per_sec": 21 + }, + "fsmap": { + "epoch": 1, + "by_rank": [] + } +} diff --git a/ceph-mon/unit_tests/ceph_many_warnings_luminous.json b/ceph-mon/unit_tests/ceph_many_warnings_luminous.json new file mode 100644 index 00000000..3e5c11e8 --- /dev/null +++ b/ceph-mon/unit_tests/ceph_many_warnings_luminous.json @@ -0,0 +1,147 @@ +{ + "fsid": "b03a2900-e297-11e8-a7db-00163ed10659", + "health": { + "checks": { + "OBJECT_MISPLACED": { + "severity": "HEALTH_WARN", + "summary": { + "message": "1560/12264 objects misplaced (12.720%)" + } + }, + "PG_AVAILABILITY": { + "severity": "HEALTH_WARN", + "summary": { + "message": "Reduced data availability: 27 pgs inactive, 30 pgs peering" + } + }, + "POOL_APP_NOT_ENABLED": { + "severity": "HEALTH_WARN", + "summary": { + "message": "application not enabled on 1 pool(s)" + } + }, + "TOO_FEW_PGS": { + "severity": "HEALTH_WARN", + "summary": { + "message": "too few PGs per OSD (21 < min 30)" + } + } + }, + "status": "HEALTH_WARN" + }, + "election_epoch": 5, + "quorum": [ + 0 + ], + "quorum_names": [ + "juju-460e0f-11" + ], + "monmap": { + "epoch": 1, + "fsid": "b03a2900-e297-11e8-a7db-00163ed10659", + "modified": "2018-11-07 14:17:12.324408", + "created": "2018-11-07 14:17:12.324408", + "features": { + "persistent": [ + "kraken", + "luminous" + ], + "optional": [] + }, + "mons": [ + { + "rank": 0, + "name": "juju-460e0f-11", + "addr": "192.168.100.81:6789/0", + "public_addr": "192.168.100.81:6789/0" + } + ] + }, + "osdmap": { + "osdmap": { + "epoch": 118, + "num_osds": 9, + "num_up_osds": 9, + "num_in_osds": 9, + "full": false, + "nearfull": false, + "num_remapped_pgs": 15 + } + }, + "pgmap": { + "pgs_by_state": [ + { + "state_name": "unknown", + "count": 65 + }, + { + "state_name": "peering", + "count": 31 + }, + { + "state_name": "activating", + "count": 17 + }, + { + "state_name": "activating+remapped", + "count": 15 + } + ], + "num_pgs": 128, + "num_pools": 1, + "num_objects": 4088, + "data_bytes": 17187733578, + "bytes_used": 14360064000, + "bytes_avail": 15023263744, + "bytes_total": 29383327744, + "unknown_pgs_ratio": 0.507812, + "inactive_pgs_ratio": 0.492188, + "misplaced_objects": 1560, + "misplaced_total": 12264, + "misplaced_ratio": 0.127202, + "recovering_objects_per_sec": 14, + "recovering_bytes_per_sec": 60779755, + "recovering_keys_per_sec": 0, + "num_objects_recovered": 113, + "num_bytes_recovered": 471859200, + "num_keys_recovered": 0, + "read_bytes_sec": 0, + "write_bytes_sec": 244132150, + "read_op_per_sec": 0, + "write_op_per_sec": 116 + }, + "fsmap": { + "epoch": 1, + "by_rank": [] + }, + "mgrmap": { + "epoch": 5, + "active_gid": 14097, + "active_name": "juju-460e0f-11", + "active_addr": "192.168.100.81:6800/204", + "available": true, + "standbys": [], + "modules": [ + "balancer", + "restful", + "status" + ], + "available_modules": [ + "balancer", + "dashboard", + "influx", + "localpool", + "prometheus", + "restful", + "selftest", + "status", + "zabbix" + ], + "services": {} + }, + "servicemap": { + "epoch": 1, + "modified": "0.000000", + "services": {} + } +} diff --git a/ceph-mon/unit_tests/ceph_nodeepscrub.json b/ceph-mon/unit_tests/ceph_nodeepscrub.json new file mode 100644 index 00000000..2488fabb --- /dev/null +++ b/ceph-mon/unit_tests/ceph_nodeepscrub.json @@ -0,0 +1,202 @@ +{ + "health": { + "health": { + "health_services": [ + { + "mons": [ + { + "name": "juju-c62a41-21-lxd-0", + "kb_total": 334602320, + "kb_used": 2127960, + "kb_avail": 315454468, + "avail_percent": 94, + "last_updated": "2018-11-08 09:47:09.932189", + "store_stats": { + "bytes_total": 34880542, + "bytes_sst": 0, + "bytes_log": 1647123, + "bytes_misc": 33233419, + "last_updated": "0.000000" + }, + "health": "HEALTH_OK" + }, + { + "name": "juju-c62a41-24-lxd-0", + "kb_total": 334602320, + "kb_used": 2128116, + "kb_avail": 315454312, + "avail_percent": 94, + "last_updated": "2018-11-08 09:47:16.418007", + "store_stats": { + "bytes_total": 36811676, + "bytes_sst": 0, + "bytes_log": 3574345, + "bytes_misc": 33237331, + "last_updated": "0.000000" + }, + "health": "HEALTH_OK" + }, + { + "name": "juju-c62a41-25-lxd-0", + "kb_total": 334602320, + "kb_used": 2128860, + "kb_avail": 315453568, + "avail_percent": 94, + "last_updated": "2018-11-08 09:47:21.198816", + "store_stats": { + "bytes_total": 37388424, + "bytes_sst": 0, + "bytes_log": 4151569, + "bytes_misc": 33236855, + "last_updated": "0.000000" + }, + "health": "HEALTH_OK" + } + ] + } + ] + }, + "timechecks": { + "epoch": 14, + "round": 4480, + "round_status": "finished", + "mons": [ + { + "name": "juju-c62a41-21-lxd-0", + "skew": 0.000000, + "latency": 0.000000, + "health": "HEALTH_OK" + }, + { + "name": "juju-c62a41-24-lxd-0", + "skew": 0.000282, + "latency": 0.000989, + "health": "HEALTH_OK" + }, + { + "name": "juju-c62a41-25-lxd-0", + "skew": -0.001223, + "latency": 0.000776, + "health": "HEALTH_OK" + } + ] + }, + "summary": [ + { + "severity": "HEALTH_WARN", + "summary": "19 pgs backfill_wait" + }, + { + "severity": "HEALTH_WARN", + "summary": "4 pgs backfilling" + }, + { + "severity": "HEALTH_WARN", + "summary": "1 pgs peering" + }, + { + "severity": "HEALTH_WARN", + "summary": "24 pgs stuck unclean" + }, + { + "severity": "HEALTH_WARN", + "summary": "recovery 17386\/112794 objects misplaced (15.414%)" + }, + { + "severity": "HEALTH_WARN", + "summary": "pool pool1 has many more objects per pg than average (too few pgs?)" + }, + { + "severity": "HEALTH_WARN", + "summary": "nodeep-scrub flag(s) set" + } + ], + "overall_status": "HEALTH_WARN", + "detail": [] + }, + "fsid": "66af7af5-2f60-4e0e-94dc-49f49bd37284", + "election_epoch": 14, + "quorum": [ + 0, + 1, + 2 + ], + "quorum_names": [ + "juju-c62a41-21-lxd-0", + "juju-c62a41-24-lxd-0", + "juju-c62a41-25-lxd-0" + ], + "monmap": { + "epoch": 2, + "fsid": "66af7af5-2f60-4e0e-94dc-49f49bd37284", + "modified": "2018-10-31 15:37:56.902830", + "created": "2018-10-31 15:37:40.288870", + "mons": [ + { + "rank": 0, + "name": "juju-c62a41-21-lxd-0", + "addr": "100.84.195.4:6789\/0" + }, + { + "rank": 1, + "name": "juju-c62a41-24-lxd-0", + "addr": "100.84.196.4:6789\/0" + }, + { + "rank": 2, + "name": "juju-c62a41-25-lxd-0", + "addr": "100.84.196.5:6789\/0" + } + ] + }, + "osdmap": { + "osdmap": { + "epoch": 316, + "num_osds": 48, + "num_up_osds": 48, + "num_in_osds": 48, + "full": false, + "nearfull": false, + "num_remapped_pgs": 22 + } + }, + "pgmap": { + "pgs_by_state": [ + { + "state_name": "active+clean", + "count": 3448 + }, + { + "state_name": "active+remapped+wait_backfill", + "count": 19 + }, + { + "state_name": "active+remapped+backfilling", + "count": 4 + }, + { + "state_name": "peering", + "count": 1 + } + ], + "version": 141480, + "num_pgs": 3472, + "data_bytes": 157009583781, + "bytes_used": 487185850368, + "bytes_avail": 75282911256576, + "bytes_total": 75770097106944, + "misplaced_objects": 17386, + "misplaced_total": 112794, + "misplaced_ratio": 0.154139, + "recovering_objects_per_sec": 436, + "recovering_bytes_per_sec": 1832614589, + "recovering_keys_per_sec": 0, + "num_objects_recovered": 446, + "num_bytes_recovered": 1870659584, + "num_keys_recovered": 0 + }, + "fsmap": { + "epoch": 1, + "by_rank": [] + } +} diff --git a/ceph-mon/unit_tests/ceph_nodeepscrub_luminous.json b/ceph-mon/unit_tests/ceph_nodeepscrub_luminous.json new file mode 100644 index 00000000..3d161fba --- /dev/null +++ b/ceph-mon/unit_tests/ceph_nodeepscrub_luminous.json @@ -0,0 +1,102 @@ +{ + "fsid": "b03a2900-e297-11e8-a7db-00163ed10659", + "health": { + "checks": { + "OSDMAP_FLAGS": { + "severity": "HEALTH_WARN", + "summary": { + "message": "nodeep-scrub flag(s) set" + } + } + }, + "status": "HEALTH_WARN" + }, + "election_epoch": 5, + "quorum": [ + 0 + ], + "quorum_names": [ + "juju-460e0f-11" + ], + "monmap": { + "epoch": 1, + "fsid": "b03a2900-e297-11e8-a7db-00163ed10659", + "modified": "2018-11-07 14:17:12.324408", + "created": "2018-11-07 14:17:12.324408", + "features": { + "persistent": [ + "kraken", + "luminous" + ], + "optional": [] + }, + "mons": [ + { + "rank": 0, + "name": "juju-460e0f-11", + "addr": "192.168.100.81:6789/0", + "public_addr": "192.168.100.81:6789/0" + } + ] + }, + "osdmap": { + "osdmap": { + "epoch": 518, + "num_osds": 9, + "num_up_osds": 9, + "num_in_osds": 9, + "full": false, + "nearfull": false, + "num_remapped_pgs": 0 + } + }, + "pgmap": { + "pgs_by_state": [ + { + "state_name": "active+clean", + "count": 128 + } + ], + "num_pgs": 128, + "num_pools": 1, + "num_objects": 14896, + "data_bytes": 62440603919, + "bytes_used": 14225776640, + "bytes_avail": 9450938368, + "bytes_total": 23676715008 + }, + "fsmap": { + "epoch": 1, + "by_rank": [] + }, + "mgrmap": { + "epoch": 5, + "active_gid": 14097, + "active_name": "juju-460e0f-11", + "active_addr": "192.168.100.81:6800/204", + "available": true, + "standbys": [], + "modules": [ + "balancer", + "restful", + "status" + ], + "available_modules": [ + "balancer", + "dashboard", + "influx", + "localpool", + "prometheus", + "restful", + "selftest", + "status", + "zabbix" + ], + "services": {} + }, + "servicemap": { + "epoch": 1, + "modified": "0.000000", + "services": {} + } +} diff --git a/ceph-mon/unit_tests/ceph_noout.json b/ceph-mon/unit_tests/ceph_noout.json new file mode 100644 index 00000000..3a57eb2b --- /dev/null +++ b/ceph-mon/unit_tests/ceph_noout.json @@ -0,0 +1,206 @@ +{ + "health": { + "health": { + "health_services": [ + { + "mons": [ + { + "name": "juju-c62a41-21-lxd-0", + "kb_total": 334602320, + "kb_used": 2127960, + "kb_avail": 315454468, + "avail_percent": 94, + "last_updated": "2018-11-08 09:47:09.932189", + "store_stats": { + "bytes_total": 34880542, + "bytes_sst": 0, + "bytes_log": 1647123, + "bytes_misc": 33233419, + "last_updated": "0.000000" + }, + "health": "HEALTH_WARN" + }, + { + "name": "juju-c62a41-24-lxd-0", + "kb_total": 334602320, + "kb_used": 2128116, + "kb_avail": 315454312, + "avail_percent": 94, + "last_updated": "2018-11-08 09:47:16.418007", + "store_stats": { + "bytes_total": 36811676, + "bytes_sst": 0, + "bytes_log": 3574345, + "bytes_misc": 33237331, + "last_updated": "0.000000" + }, + "health": "HEALTH_OK" + }, + { + "name": "juju-c62a41-25-lxd-0", + "kb_total": 334602320, + "kb_used": 2128860, + "kb_avail": 315453568, + "avail_percent": 94, + "last_updated": "2018-11-08 09:47:21.198816", + "store_stats": { + "bytes_total": 37388424, + "bytes_sst": 0, + "bytes_log": 4151569, + "bytes_misc": 33236855, + "last_updated": "0.000000" + }, + "health": "HEALTH_OK" + } + ] + } + ] + }, + "timechecks": { + "epoch": 14, + "round": 4480, + "round_status": "finished", + "mons": [ + { + "name": "juju-c62a41-21-lxd-0", + "skew": 0.000000, + "latency": 0.000000, + "health": "HEALTH_OK" + }, + { + "name": "juju-c62a41-24-lxd-0", + "skew": 0.000282, + "latency": 0.000989, + "health": "HEALTH_OK" + }, + { + "name": "juju-c62a41-25-lxd-0", + "skew": -0.001223, + "latency": 0.000776, + "health": "HEALTH_OK" + } + ] + }, + "summary": [ + { + "severity": "HEALTH_WARN", + "summary": "noout flag(s) set" + }, + { + "severity": "HEALTH_WARN", + "summary": "19 pgs backfill_wait" + }, + { + "severity": "HEALTH_WARN", + "summary": "4 pgs backfilling" + }, + { + "severity": "HEALTH_WARN", + "summary": "1 pgs peering" + }, + { + "severity": "HEALTH_WARN", + "summary": "24 pgs stuck unclean" + }, + { + "severity": "HEALTH_WARN", + "summary": "recovery 17386\/112794 objects misplaced (15.414%)" + }, + { + "severity": "HEALTH_WARN", + "summary": "pool pool1 has many more objects per pg than average (too few pgs?)" + }, + { + "severity": "HEALTH_WARN", + "summary": "nodeep-scrub flag(s) set" + } + ], + "overall_status": "HEALTH_WARN", + "detail": [] + }, + "fsid": "66af7af5-2f60-4e0e-94dc-49f49bd37284", + "election_epoch": 14, + "quorum": [ + 0, + 1, + 2 + ], + "quorum_names": [ + "juju-c62a41-21-lxd-0", + "juju-c62a41-24-lxd-0", + "juju-c62a41-25-lxd-0" + ], + "monmap": { + "epoch": 2, + "fsid": "66af7af5-2f60-4e0e-94dc-49f49bd37284", + "modified": "2018-10-31 15:37:56.902830", + "created": "2018-10-31 15:37:40.288870", + "mons": [ + { + "rank": 0, + "name": "juju-c62a41-21-lxd-0", + "addr": "100.84.195.4:6789\/0" + }, + { + "rank": 1, + "name": "juju-c62a41-24-lxd-0", + "addr": "100.84.196.4:6789\/0" + }, + { + "rank": 2, + "name": "juju-c62a41-25-lxd-0", + "addr": "100.84.196.5:6789\/0" + } + ] + }, + "osdmap": { + "osdmap": { + "epoch": 316, + "num_osds": 48, + "num_up_osds": 48, + "num_in_osds": 48, + "full": false, + "nearfull": false, + "num_remapped_pgs": 22 + } + }, + "pgmap": { + "pgs_by_state": [ + { + "state_name": "active+clean", + "count": 3448 + }, + { + "state_name": "active+remapped+wait_backfill", + "count": 19 + }, + { + "state_name": "active+remapped+backfilling", + "count": 4 + }, + { + "state_name": "peering", + "count": 1 + } + ], + "version": 141480, + "num_pgs": 3472, + "data_bytes": 157009583781, + "bytes_used": 487185850368, + "bytes_avail": 75282911256576, + "bytes_total": 75770097106944, + "misplaced_objects": 17386, + "misplaced_total": 112794, + "misplaced_ratio": 0.154139, + "recovering_objects_per_sec": 436, + "recovering_bytes_per_sec": 1832614589, + "recovering_keys_per_sec": 0, + "num_objects_recovered": 446, + "num_bytes_recovered": 1870659584, + "num_keys_recovered": 0 + }, + "fsmap": { + "epoch": 1, + "by_rank": [] + } +} diff --git a/ceph-mon/unit_tests/ceph_noout_luminous.json b/ceph-mon/unit_tests/ceph_noout_luminous.json new file mode 100644 index 00000000..4658af8a --- /dev/null +++ b/ceph-mon/unit_tests/ceph_noout_luminous.json @@ -0,0 +1,102 @@ +{ + "fsid": "b03a2900-e297-11e8-a7db-00163ed10659", + "health": { + "checks": { + "OSDMAP_FLAGS": { + "severity": "HEALTH_WARN", + "summary": { + "message": "noout flag(s) set" + } + } + }, + "status": "HEALTH_WARN" + }, + "election_epoch": 5, + "quorum": [ + 0 + ], + "quorum_names": [ + "juju-460e0f-11" + ], + "monmap": { + "epoch": 1, + "fsid": "b03a2900-e297-11e8-a7db-00163ed10659", + "modified": "2018-11-07 14:17:12.324408", + "created": "2018-11-07 14:17:12.324408", + "features": { + "persistent": [ + "kraken", + "luminous" + ], + "optional": [] + }, + "mons": [ + { + "rank": 0, + "name": "juju-460e0f-11", + "addr": "192.168.100.81:6789/0", + "public_addr": "192.168.100.81:6789/0" + } + ] + }, + "osdmap": { + "osdmap": { + "epoch": 518, + "num_osds": 9, + "num_up_osds": 9, + "num_in_osds": 9, + "full": false, + "nearfull": false, + "num_remapped_pgs": 0 + } + }, + "pgmap": { + "pgs_by_state": [ + { + "state_name": "active+clean", + "count": 128 + } + ], + "num_pgs": 128, + "num_pools": 1, + "num_objects": 14896, + "data_bytes": 62440603919, + "bytes_used": 14225776640, + "bytes_avail": 9450938368, + "bytes_total": 23676715008 + }, + "fsmap": { + "epoch": 1, + "by_rank": [] + }, + "mgrmap": { + "epoch": 5, + "active_gid": 14097, + "active_name": "juju-460e0f-11", + "active_addr": "192.168.100.81:6800/204", + "available": true, + "standbys": [], + "modules": [ + "balancer", + "restful", + "status" + ], + "available_modules": [ + "balancer", + "dashboard", + "influx", + "localpool", + "prometheus", + "restful", + "selftest", + "status", + "zabbix" + ], + "services": {} + }, + "servicemap": { + "epoch": 1, + "modified": "0.000000", + "services": {} + } +} diff --git a/ceph-mon/unit_tests/ceph_ok.json b/ceph-mon/unit_tests/ceph_ok.json new file mode 100644 index 00000000..2eafbc15 --- /dev/null +++ b/ceph-mon/unit_tests/ceph_ok.json @@ -0,0 +1 @@ +{"health":{"health":{"health_services":[{"mons":[{"name":"somehost-2","kb_total":384443444,"kb_used":254122936,"kb_avail":110768868,"avail_percent":28,"last_updated":"2017-06-28 07:22:57.268852","store_stats":{"bytes_total":563914940,"bytes_sst":0,"bytes_log":1201349,"bytes_misc":562713591,"last_updated":"0.000000"},"health":"HEALTH_OK"},{"name":"somehost-3","kb_total":384443444,"kb_used":181563008,"kb_avail":183328796,"avail_percent":47,"last_updated":"2017-06-28 07:22:09.013733","store_stats":{"bytes_total":584703758,"bytes_sst":0,"bytes_log":17361907,"bytes_misc":567341851,"last_updated":"0.000000"},"health":"HEALTH_OK"},{"name":"somehost-4","kb_total":384443444,"kb_used":278218520,"kb_avail":86673284,"avail_percent":22,"last_updated":"2017-06-28 07:22:31.725105","store_stats":{"bytes_total":598087748,"bytes_sst":0,"bytes_log":26273616,"bytes_misc":571814132,"last_updated":"0.000000"},"health":"HEALTH_OK"}]}]},"timechecks":{"epoch":52,"round":35412,"round_status":"finished","mons":[{"name":"somehost-2","skew":0.000000,"latency":0.000000,"health":"HEALTH_OK"},{"name":"somehost-3","skew":-0.001662,"latency":0.000531,"health":"HEALTH_OK"},{"name":"somehost-4","skew":-0.000034,"latency":0.000425,"health":"HEALTH_OK"}]},"summary":[],"overall_status":"HEALTH_OK","detail":[]},"fsid":"9486fd14-676d-481c-aa16-77b071a315d8","election_epoch":52,"quorum":[0,1,2],"quorum_names":["somehost-2","somehost-3","somehost-4"],"monmap":{"epoch":1,"fsid":"9486fd14-676d-481c-aa16-77b071a315d8","modified":"2016-08-09 06:33:15.685755","created":"2016-08-09 06:33:15.685755","mons":[{"rank":0,"name":"somehost-2","addr":"10.28.2.21:6789\/0"},{"rank":1,"name":"somehost-3","addr":"10.28.2.22:6789\/0"},{"rank":2,"name":"somehost-4","addr":"10.28.2.23:6789\/0"}]},"osdmap":{"osdmap":{"epoch":11122,"num_osds":42,"num_up_osds":42,"num_in_osds":42,"full":false,"nearfull":false,"num_remapped_pgs":0}},"pgmap":{"pgs_by_state":[{"state_name":"active+clean","count":12350},{"state_name":"active+clean+scrubbing+deep","count":2}],"version":25999715,"num_pgs":12352,"data_bytes":13428555112092,"bytes_used":40180090028032,"bytes_avail":43795596517376,"bytes_total":83975686545408,"read_bytes_sec":92475,"write_bytes_sec":5309194,"read_op_per_sec":367,"write_op_per_sec":506},"fsmap":{"epoch":1,"by_rank":[]}} diff --git a/ceph-mon/unit_tests/ceph_ok_luminous.json b/ceph-mon/unit_tests/ceph_ok_luminous.json new file mode 100644 index 00000000..8a489d48 --- /dev/null +++ b/ceph-mon/unit_tests/ceph_ok_luminous.json @@ -0,0 +1,180 @@ +{ + "fsid": "1111111-11111-1111-1111-111111111111", + "health": { + "checks": {}, + "status": "HEALTH_OK" + }, + "election_epoch": 28, + "quorum": [ + 0, + 1, + 2 + ], + "quorum_names": [ + "juju-badbad-1-lxd-0", + "juju-badbad-0-lxd-0", + "juju-badbad-12-lxd-0" + ], + "monmap": { + "epoch": 2, + "fsid": "1111111-11111-1111-1111-111111111111", + "modified": "2018-04-06 06:37:04.978765", + "created": "2018-04-06 06:35:06.513449", + "features": { + "persistent": [ + "kraken", + "luminous" + ], + "optional": [] + }, + "mons": [ + { + "rank": 0, + "name": "juju-badbad-1-lxd-0", + "addr": "10.11.12.75:6789/0", + "public_addr": "10.11.12.75:6789/0" + }, + { + "rank": 1, + "name": "juju-badbad-0-lxd-0", + "addr": "10.11.12.76:6789/0", + "public_addr": "10.11.12.76:6789/0" + }, + { + "rank": 2, + "name": "juju-badbad-12-lxd-0", + "addr": "10.11.12.84:6789/0", + "public_addr": "10.11.12.84:6789/0" + } + ] + }, + "osdmap": { + "osdmap": { + "epoch": 262, + "num_osds": 33, + "num_up_osds": 32, + "num_in_osds": 32, + "full": false, + "nearfull": false, + "num_remapped_pgs": 0 + } + }, + "pgmap": { + "pgs_by_state": [ + { + "state_name": "active+clean", + "count": 1032 + } + ], + "num_pgs": 1032, + "num_pools": 20, + "num_objects": 561, + "data_bytes": 1584814720, + "bytes_used": 2884842602496, + "bytes_avail": 125077821714432, + "bytes_total": 127962664316928, + "read_bytes_sec": 1513, + "read_op_per_sec": 1, + "write_op_per_sec": 0 + }, + "fsmap": { + "epoch": 1, + "by_rank": [] + }, + "mgrmap": { + "epoch": 4, + "active_gid": 4131, + "active_name": "juju-badbad-1-lxd-0", + "active_addr": "10.11.12.75:6800/88914", + "available": true, + "standbys": [ + { + "gid": 4134, + "name": "juju-badbad-0-lxd-0", + "available_modules": [ + "balancer", + "dashboard", + "influx", + "localpool", + "prometheus", + "restful", + "selftest", + "status", + "zabbix" + ] + }, + { + "gid": 4299, + "name": "juju-badbad-12-lxd-0", + "available_modules": [ + "balancer", + "dashboard", + "influx", + "localpool", + "prometheus", + "restful", + "selftest", + "status", + "zabbix" + ] + } + ], + "modules": [ + "balancer", + "restful", + "status" + ], + "available_modules": [ + "balancer", + "dashboard", + "influx", + "localpool", + "prometheus", + "restful", + "selftest", + "status", + "zabbix" + ], + "services": {} + }, + "servicemap": { + "epoch": 29, + "modified": "2018-04-18 06:25:04.076050", + "services": { + "rgw": { + "daemons": { + "summary": "", + "radosgw.gateway": { + "start_epoch": 29, + "start_stamp": "2018-04-18 06:25:02.612368", + "gid": 231504, + "addr": "10.11.12.78:0/2747422053", + "metadata": { + "arch": "x86_64", + "ceph_version": "ceph version 12.2.2 (cf0baeeeeba3b47f9427c6c97e2144b094b7e5ba) luminous (stable)", + "cpu": "Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GHz", + "distro": "ubuntu", + "distro_description": "Ubuntu 16.04.4 LTS", + "distro_version": "16.04", + "frontend_config#0": "civetweb port=60", + "frontend_type#0": "civetweb", + "hostname": "juju-badbad-0-lxd-1", + "kernel_description": "#43~16.04.1-Ubuntu SMP Wed Mar 14 17:48:43 UTC 2018", + "kernel_version": "4.13.0-38-generic", + "mem_swap_kb": "8388604", + "mem_total_kb": "528154640", + "num_handles": "1", + "os": "Linux", + "pid": "225487", + "zone_id": "11111111-1111-1111-1111-111111111111", + "zone_name": "default", + "zonegroup_id": "11111111-1111-1111-1111-111111111111", + "zonegroup_name": "default" + } + } + } + } + } + } +} + diff --git a/ceph-mon/unit_tests/ceph_params.json b/ceph-mon/unit_tests/ceph_params.json new file mode 100644 index 00000000..4b4f6efb --- /dev/null +++ b/ceph-mon/unit_tests/ceph_params.json @@ -0,0 +1,222 @@ +{ + "health": { + "health": { + "health_services": [ + { + "mons": [ + { + "name": "juju-2691ab-1-lxd-1", + "kb_total": 155284096, + "kb_used": 1247744, + "kb_avail": 154036352, + "avail_percent": 99, + "last_updated": "2017-05-17 03:31:35.562497", + "store_stats": { + "bytes_total": 1012055342, + "bytes_sst": 0, + "bytes_log": 29673298, + "bytes_misc": 982382044, + "last_updated": "0.000000" + }, + "health": "HEALTH_OK" + }, + { + "name": "juju-2691ab-13-lxd-0", + "kb_total": 153820288, + "kb_used": 1361280, + "kb_avail": 152459008, + "avail_percent": 99, + "last_updated": "2017-05-17 03:31:04.097201", + "store_stats": { + "bytes_total": 1370003168, + "bytes_sst": 0, + "bytes_log": 29813159, + "bytes_misc": 1340190009, + "last_updated": "0.000000" + }, + "health": "HEALTH_OK" + }, + { + "name": "juju-2691ab-2-lxd-1", + "kb_total": 155251072, + "kb_used": 1373440, + "kb_avail": 153877632, + "avail_percent": 99, + "last_updated": "2017-05-17 03:31:20.684777", + "store_stats": { + "bytes_total": 1400974192, + "bytes_sst": 0, + "bytes_log": 1129945, + "bytes_misc": 1399844247, + "last_updated": "0.000000" + }, + "health": "HEALTH_OK" + } + ] + } + ] + }, + "timechecks": { + "epoch": 32, + "round": 24492, + "round_status": "finished", + "mons": [ + { + "name": "juju-2691ab-1-lxd-1", + "skew": 0, + "latency": 0, + "health": "HEALTH_OK" + }, + { + "name": "juju-2691ab-13-lxd-0", + "skew": 0.000919, + "latency": 0.001036, + "health": "HEALTH_OK" + }, + { + "name": "juju-2691ab-2-lxd-1", + "skew": 0, + "latency": 0.001009, + "health": "HEALTH_OK" + } + ] + }, + "summary": [ + { + "severity": "HEALTH_WARN", + "summary": "48 pgs backfill_wait" + }, + { + "severity": "HEALTH_WARN", + "summary": "45 pgs backfilling" + }, + { + "severity": "HEALTH_WARN", + "summary": "1 pgs degraded" + }, + { + "severity": "HEALTH_WARN", + "summary": "1 pgs recovery_wait" + }, + { + "severity": "HEALTH_WARN", + "summary": "22 pgs stuck unclean" + }, + { + "severity": "HEALTH_WARN", + "summary": "recovery lots/bignumber objects degraded (15%)" + }, + { + "severity": "HEALTH_WARN", + "summary": "recovery 448540/46842755 objects misplaced (0.958%)" + } + ], + "overall_status": "HEALTH_WARN", + "detail": [] + }, + "fsid": "ca9451f1-5c4f-4e85-bb14-a08dfc0568f7", + "election_epoch": 32, + "quorum": [ + 0, + 1, + 2 + ], + "quorum_names": [ + "juju-2691ab-1-lxd-1", + "juju-2691ab-13-lxd-0", + "juju-2691ab-2-lxd-1" + ], + "monmap": { + "epoch": 1, + "fsid": "ca9451f1-5c4f-4e85-bb14-a08dfc0568f7", + "modified": "2016-12-03 08:09:21.854671", + "created": "2016-12-03 08:09:21.854671", + "mons": [ + { + "rank": 0, + "name": "juju-2691ab-1-lxd-1", + "addr": "10.182.254.221:6789/0" + }, + { + "rank": 1, + "name": "juju-2691ab-13-lxd-0", + "addr": "10.182.254.229:6789/0" + }, + { + "rank": 2, + "name": "juju-2691ab-2-lxd-1", + "addr": "10.182.254.242:6789/0" + } + ] + }, + "osdmap": { + "osdmap": { + "epoch": 141540, + "num_osds": 314, + "num_up_osds": 314, + "num_in_osds": 314, + "full": false, + "nearfull": false, + "num_remapped_pgs": 92 + } + }, + "pgmap": { + "pgs_by_state": [ + { + "state_name": "active+clean", + "count": 9274 + }, + { + "state_name": "active+remapped+wait_backfill", + "count": 48 + }, + { + "state_name": "active+remapped+backfilling", + "count": 45 + }, + { + "state_name": "active+clean+scrubbing+deep", + "count": 9 + }, + { + "state_name": "active+remapped", + "count": 2 + }, + { + "state_name": "active+recovery_wait+degraded", + "count": 1 + }, + { + "state_name": "active+clean+scrubbing", + "count": 1 + } + ], + "version": 13885884, + "num_pgs": 9380, + "data_bytes": 64713222471610, + "bytes_used": 193613093122048, + "bytes_avail": 690058090491904, + "bytes_total": 883671183613952, + "degraded_objects": 14, + "degraded_total": 46842755, + "degraded_ratio": 0, + "misplaced_objects": 448540, + "misplaced_total": 46842755, + "misplaced_ratio": 0.15, + "recovering_objects_per_sec": 389, + "recovering_bytes_per_sec": 1629711746, + "recovering_keys_per_sec": 0, + "num_objects_recovered": 218, + "num_bytes_recovered": 912252928, + "num_keys_recovered": 0, + "read_bytes_sec": 117041457, + "write_bytes_sec": 293414043, + "read_op_per_sec": 5282, + "write_op_per_sec": 5270 + }, + "fsmap": { + "epoch": 1, + "by_rank": [] + } +} + diff --git a/ceph-mon/unit_tests/ceph_warn.json b/ceph-mon/unit_tests/ceph_warn.json new file mode 100644 index 00000000..3688dd00 --- /dev/null +++ b/ceph-mon/unit_tests/ceph_warn.json @@ -0,0 +1 @@ +{"health":{"health":{"health_services":[{"mons":[{"name":"juju-2691ab-1-lxd-1","kb_total":155284096,"kb_used":1247744,"kb_avail":154036352,"avail_percent":99,"last_updated":"2017-05-17 03:31:35.562497","store_stats":{"bytes_total":1012055342,"bytes_sst":0,"bytes_log":29673298,"bytes_misc":982382044,"last_updated":"0.000000"},"health":"HEALTH_OK"},{"name":"juju-2691ab-13-lxd-0","kb_total":153820288,"kb_used":1361280,"kb_avail":152459008,"avail_percent":99,"last_updated":"2017-05-17 03:31:04.097201","store_stats":{"bytes_total":1370003168,"bytes_sst":0,"bytes_log":29813159,"bytes_misc":1340190009,"last_updated":"0.000000"},"health":"HEALTH_OK"},{"name":"juju-2691ab-2-lxd-1","kb_total":155251072,"kb_used":1373440,"kb_avail":153877632,"avail_percent":99,"last_updated":"2017-05-17 03:31:20.684777","store_stats":{"bytes_total":1400974192,"bytes_sst":0,"bytes_log":1129945,"bytes_misc":1399844247,"last_updated":"0.000000"},"health":"HEALTH_OK"}]}]},"timechecks":{"epoch":32,"round":24492,"round_status":"finished","mons":[{"name":"juju-2691ab-1-lxd-1","skew":0.000000,"latency":0.000000,"health":"HEALTH_OK"},{"name":"juju-2691ab-13-lxd-0","skew":0.000919,"latency":0.001036,"health":"HEALTH_OK"},{"name":"juju-2691ab-2-lxd-1","skew":0.000000,"latency":0.001009,"health":"HEALTH_OK"}]},"summary":[{"severity":"HEALTH_WARN","summary":"48 pgs backfill_wait"},{"severity":"HEALTH_WARN","summary":"45 pgs backfilling"},{"severity":"HEALTH_WARN","summary":"1 pgs degraded"},{"severity":"HEALTH_WARN","summary":"1 pgs recovery_wait"},{"severity":"HEALTH_WARN","summary":"22 pgs stuck unclean"},{"severity":"HEALTH_WARN","summary":"recovery 14\/46842755 objects degraded (0.000%)"},{"severity":"HEALTH_WARN","summary":"recovery 448540\/46842755 objects misplaced (0.958%)"}],"overall_status":"HEALTH_WARN","detail":[]},"fsid":"ca9451f1-5c4f-4e85-bb14-a08dfc0568f7","election_epoch":32,"quorum":[0,1,2],"quorum_names":["juju-2691ab-1-lxd-1","juju-2691ab-13-lxd-0","juju-2691ab-2-lxd-1"],"monmap":{"epoch":1,"fsid":"ca9451f1-5c4f-4e85-bb14-a08dfc0568f7","modified":"2016-12-03 08:09:21.854671","created":"2016-12-03 08:09:21.854671","mons":[{"rank":0,"name":"juju-2691ab-1-lxd-1","addr":"10.182.254.221:6789\/0"},{"rank":1,"name":"juju-2691ab-13-lxd-0","addr":"10.182.254.229:6789\/0"},{"rank":2,"name":"juju-2691ab-2-lxd-1","addr":"10.182.254.242:6789\/0"}]},"osdmap":{"osdmap":{"epoch":141540,"num_osds":314,"num_up_osds":311,"num_in_osds":311,"full":false,"nearfull":false,"num_remapped_pgs":92}},"pgmap":{"pgs_by_state":[{"state_name":"active+clean","count":9274},{"state_name":"active+remapped+wait_backfill","count":48},{"state_name":"active+remapped+backfilling","count":45},{"state_name":"active+clean+scrubbing+deep","count":9},{"state_name":"active+remapped","count":2},{"state_name":"active+recovery_wait+degraded","count":1},{"state_name":"active+clean+scrubbing","count":1}],"version":13885884,"num_pgs":9380,"data_bytes":64713222471610,"bytes_used":193613093122048,"bytes_avail":690058090491904,"bytes_total":883671183613952,"degraded_objects":14,"degraded_total":46842755,"degraded_ratio":0.000000,"misplaced_objects":448540,"misplaced_total":46842755,"misplaced_ratio":0.009575,"recovering_objects_per_sec":389,"recovering_bytes_per_sec":1629711746,"recovering_keys_per_sec":0,"num_objects_recovered":218,"num_bytes_recovered":912252928,"num_keys_recovered":0,"read_bytes_sec":117041457,"write_bytes_sec":293414043,"read_op_per_sec":5282,"write_op_per_sec":5270},"fsmap":{"epoch":1,"by_rank":[]}} diff --git a/ceph-mon/unit_tests/helpers.py b/ceph-mon/unit_tests/helpers.py new file mode 100644 index 00000000..70dc272f --- /dev/null +++ b/ceph-mon/unit_tests/helpers.py @@ -0,0 +1,25 @@ +# Copyright 2020 Canonical Ltd. +# See LICENSE file for licensing details. + +from typing import Callable +from unittest.mock import patch + + +def patch_network_get(private_address="10.0.0.10") -> Callable: + def network_get(*args, **kwargs) -> dict: + """ + Patch for the not-yet-implemented testing backend needed + for `bind_address`. + + This patch decorator can be used for cases such as: + self.model.get_binding(event.relation).network.bind_address + """ + return { + "bind-addresses": [ + { + "addresses": [{"value": private_address}], + } + ], + } + + return patch("ops.testing._TestingModelBackend.network_get", network_get) diff --git a/ceph-mon/unit_tests/manage_test_relations.py b/ceph-mon/unit_tests/manage_test_relations.py new file mode 100644 index 00000000..c9ca72a8 --- /dev/null +++ b/ceph-mon/unit_tests/manage_test_relations.py @@ -0,0 +1,53 @@ +# Copyright 2022 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest.mock as mock +from ops.testing import Harness +with mock.patch('charmhelpers.contrib.hardening.harden.harden') as mock_dec: + mock_dec.side_effect = (lambda *dargs, **dkwargs: lambda f: + lambda *args, **kwargs: f(*args, **kwargs)) + # src.charm imports ceph_hooks, so we need to workaround the inclusion + # of the 'harden' decorator. + from src.charm import CephMonCharm + + +relation_id = int + + +def add_ceph_client_relation(harness: Harness[CephMonCharm]) -> relation_id: + rel_id = harness.add_relation( + 'client', + 'glance') + harness.add_relation_unit( + rel_id, + 'glance/0') + harness.update_relation_data( + rel_id, + 'glance/0', + {'ingress-address': '10.0.0.3'}) + return rel_id + + +def add_ceph_mds_relation(harness: Harness[CephMonCharm]) -> relation_id: + rel_id = harness.add_relation( + 'mds', + 'ceph-fs') + harness.add_relation_unit( + rel_id, + 'ceph-fs/0') + harness.update_relation_data( + rel_id, + 'ceph-fs/0', + {'ingress-address': '10.0.0.3'}) + return rel_id diff --git a/ceph-mon/unit_tests/test_action_change_osd_weight.py b/ceph-mon/unit_tests/test_action_change_osd_weight.py new file mode 100644 index 00000000..49db6796 --- /dev/null +++ b/ceph-mon/unit_tests/test_action_change_osd_weight.py @@ -0,0 +1,41 @@ +# Copyright 2020 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for reweight_osd action.""" + +import unittest.mock as mock +from test_utils import CharmTestCase, MockActionEvent +from ops.testing import Harness + +with mock.patch('charmhelpers.contrib.hardening.harden.harden') as mock_dec: + mock_dec.side_effect = (lambda *dargs, **dkwargs: lambda f: + lambda *args, **kwargs: f(*args, **kwargs)) + # src.charm imports ceph_hooks, so we need to workaround the inclusion + # of the 'harden' decorator. + from src.charm import CephMonCharm + + +class ReweightTestCase(CharmTestCase): + """Run tests for action.""" + + def setUp(self): + self.harness = Harness(CephMonCharm) + + @mock.patch("ops_actions.change_osd_weight.ceph_utils.reweight_osd") + def test_reweight_osd(self, _reweight_osd): + """Test reweight_osd action has correct calls.""" + _reweight_osd.return_value = True + self.harness.begin() + self.harness.charm.on_change_osd_weight_action( + MockActionEvent({'osd': 4, 'weight': 1.2})) + _reweight_osd.assert_has_calls([mock.call("4", "1.2")]) diff --git a/ceph-mon/unit_tests/test_action_delete_user.py b/ceph-mon/unit_tests/test_action_delete_user.py new file mode 100644 index 00000000..74c66201 --- /dev/null +++ b/ceph-mon/unit_tests/test_action_delete_user.py @@ -0,0 +1,39 @@ +# Copyright 2022 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for delete_user action.""" + +from actions import delete_user +from test_utils import CharmTestCase + + +class DeleteUserTestCase(CharmTestCase): + _stderr = b"""updated""" + + def setUp(self): + super(DeleteUserTestCase, self).setUp( + delete_user, ["check_output", "action_get", "action_fail", + "action_set", "log"]) + self.action_get.return_value = "sandbox" # username=sandbox + self.check_output.return_value = self._stderr + + def test_delete_user(self): + """Test getting status updated.""" + self.user = None + + def _action_set(message): + self.user = message["message"] + self.action_set.side_effect = _action_set + delete_user.main() + self.action_get.assert_called_once_with("username") + self.assertEqual(self.user, "updated") diff --git a/ceph-mon/unit_tests/test_action_get_or_create_user.py b/ceph-mon/unit_tests/test_action_get_or_create_user.py new file mode 100644 index 00000000..03127acb --- /dev/null +++ b/ceph-mon/unit_tests/test_action_get_or_create_user.py @@ -0,0 +1,57 @@ +# Copyright 2022 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for get_or_create_user action.""" + +import json + +from actions import get_or_create_user +from test_utils import CharmTestCase + + +class GetOrCreateUserTestCase(CharmTestCase): + _keyring = b""" + [ + { + "entity": "client.sandbox", + "key": "AQCnGXxiOkueGBAAsWX27MV8PNwuyMhPSzSCPg==", + "caps": { + "mon": "allow r", + "osd": "allow r" + } + } + ]""" + + def setUp(self): + super(GetOrCreateUserTestCase, self).setUp( + get_or_create_user, ["check_output", "action_get", "action_fail", + "action_set", "log"]) + self.action_get.return_value = "sandbox" # username=sandbox + self.check_output.return_value = self._keyring + + def test_get_or_create_user(self): + """Test getting resulting keyring.""" + self.user = None + + def _action_set(message): + self.user = json.loads(message["message"]) + self.action_set.side_effect = _action_set + get_or_create_user.main() + self.action_get.assert_called_once_with("username") + self.assertEqual(self.user[0]["entity"], "client.sandbox") + self.assertEqual( + self.user[0]["key"], + "AQCnGXxiOkueGBAAsWX27MV8PNwuyMhPSzSCPg==" + ) + self.assertEqual(self.user[0]["caps"]["mon"], "allow r") + self.assertEqual(self.user[0]["caps"]["osd"], "allow r") diff --git a/ceph-mon/unit_tests/test_action_list_crush_rules.py b/ceph-mon/unit_tests/test_action_list_crush_rules.py new file mode 100644 index 00000000..87f52ecb --- /dev/null +++ b/ceph-mon/unit_tests/test_action_list_crush_rules.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 +# +# Copyright 2022 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import yaml + +from actions import list_crush_rules +from test_utils import CharmTestCase + + +class ListCrushRulesTestCase(CharmTestCase): + ceph_osd_crush_rule_dump = b""" + [ + { + "rule_id": 0, + "rule_name": "replicated_rule", + "ruleset": 0, + "type": 1, + "min_size": 1, + "max_size": 10, + "steps": [ + { + "op": "take", + "item": -1, + "item_name": "default" + }, + { + "op": "chooseleaf_firstn", + "num": 0, + "type": "host" + }, + { + "op": "emit" + } + ] + }, + { + "rule_id": 1, + "rule_name": "test-host", + "ruleset": 1, + "type": 1, + "min_size": 1, + "max_size": 10, + "steps": [ + { + "op": "take", + "item": -1, + "item_name": "default" + }, + { + "op": "chooseleaf_firstn", + "num": 0, + "type": "host" + }, + { + "op": "emit" + } + ] + }, + { + "rule_id": 2, + "rule_name": "test-chassis", + "ruleset": 2, + "type": 1, + "min_size": 1, + "max_size": 10, + "steps": [ + { + "op": "take", + "item": -1, + "item_name": "default" + }, + { + "op": "chooseleaf_firstn", + "num": 0, + "type": "chassis" + }, + { + "op": "emit" + } + ] + }, + { + "rule_id": 3, + "rule_name": "test-rack-hdd", + "ruleset": 3, + "type": 1, + "min_size": 1, + "max_size": 10, + "steps": [ + { + "op": "take", + "item": -2, + "item_name": "default~hdd" + }, + { + "op": "chooseleaf_firstn", + "num": 0, + "type": "rack" + }, + { + "op": "emit" + } + ] + } + ] + """ + + def setUp(self): + super(ListCrushRulesTestCase, self).setUp( + list_crush_rules, ["check_output", "function_fail", "function_get", + "function_set"]) + self.function_get.return_value = "json" # format=json + self.check_output.return_value = self.ceph_osd_crush_rule_dump + + def test_getting_list_crush_rules_text_format(self): + """Test getting list of crush rules in text format.""" + self.function_get.return_value = "text" + list_crush_rules.main() + self.function_get.assert_called_once_with("format") + self.function_set.assert_called_once_with( + {"message": "(0, replicated_rule),(1, test-host)," + "(2, test-chassis),(3, test-rack-hdd)"}) + + def test_getting_list_crush_rules_json_format(self): + """Test getting list of crush rules in json format.""" + crush_rules = self.ceph_osd_crush_rule_dump.decode("UTF-8") + crush_rules = json.loads(crush_rules) + self.function_get.return_value = "json" + list_crush_rules.main() + self.function_get.assert_called_once_with("format") + self.function_set.assert_called_once_with( + {"message": json.dumps(crush_rules)}) + + def test_getting_list_crush_rules_yaml_format(self): + """Test getting list of crush rules in yaml format.""" + crush_rules = self.ceph_osd_crush_rule_dump.decode("UTF-8") + crush_rules = json.loads(crush_rules) + self.function_get.return_value = "yaml" + list_crush_rules.main() + self.function_get.assert_called_once_with("format") + self.function_set.assert_called_once_with( + {"message": yaml.dump(crush_rules)}) diff --git a/ceph-mon/unit_tests/test_action_list_inconsistent.py b/ceph-mon/unit_tests/test_action_list_inconsistent.py new file mode 100644 index 00000000..6a3694cf --- /dev/null +++ b/ceph-mon/unit_tests/test_action_list_inconsistent.py @@ -0,0 +1,89 @@ +# Copyright 2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the list_inconsistent_objs action.""" + +from actions import list_inconsistent_objs as action +import unittest.mock as mock +from test_utils import CharmTestCase + + +class ListInconsistentTestCase(CharmTestCase): + """Run tests for the action.""" + + def setUp(self): + """Init mocks for test cases.""" + super(ListInconsistentTestCase, self).setUp( + action, ["get_health_detail", "get_rados_inconsistent"] + ) + + @mock.patch("actions.list_inconsistent_objs.get_rados_inconsistent") + @mock.patch("actions.list_inconsistent_objs.get_health_detail") + def test_inconsistent_empty( + self, _get_health_detail, _get_rados_inconsistent + ): + """Test that the returned object is empty.""" + _get_health_detail.return_value = "nothing to see here" + _get_rados_inconsistent.return_value = """ + {"epoch": 0, "inconsistents": {1: 1}} + """ + ret = action.get_inconsistent_objs() + _get_health_detail.assert_called_once() + _get_rados_inconsistent.assert_not_called() + self.assertEqual(len(ret), 0) + self.assertEqual('', action.text_format(ret)) + + @mock.patch("actions.list_inconsistent_objs.get_rados_inconsistent") + @mock.patch("actions.list_inconsistent_objs.get_health_detail") + def test_inconsistent_entry( + self, _get_health_detail, _get_rados_inconsistent + ): + """Test that expected PG is in the returned value.""" + pg_id = '3.9' + _get_health_detail.return_value = """ + pg 2.1 is active + pg {} is active+inconsistent+clean + """.format(pg_id) + + _get_rados_inconsistent.return_value = """{ + "epoch": 95, + "inconsistents": [ { "errors": [ "size_mismatch" ], + "object": { "locator": "", "name": "testfile", + "nspace": "", "snap": "head" }, + "shards": [ { "data_digest": "0xa3ba020a", + "errors": [ "size_mismatch" ], + "omap_digest": "0xffffffff", + "osd": 0, "size": 21 }, + { "data_digest": "0xa3ba020a", + "errors": [ "size_mismatch" ], + "omap_digest": "0xffffffff", + "osd": 1, "size": 22 }, + { "data_digest": "0xa3ba020a", + "errors": [], + "omap_digest": "0xffffffff", + "osd": 2, "size": 23 } + ]}] + }""" + + ret = action.get_inconsistent_objs() + _get_health_detail.assert_called_once() + _get_rados_inconsistent.assert_called() + self.assertNotEqual(len(ret), 0) + self.assertIn(pg_id, ret) + + js = action.json.loads(_get_rados_inconsistent.return_value) + obj_name = js["inconsistents"][0]["object"]["name"] + + self.assertIn(obj_name, ret[pg_id]) + self.assertEqual(action.text_format(ret), + '{}: {}'.format(pg_id, obj_name)) diff --git a/ceph-mon/unit_tests/test_action_list_pools.py b/ceph-mon/unit_tests/test_action_list_pools.py new file mode 100644 index 00000000..2491085d --- /dev/null +++ b/ceph-mon/unit_tests/test_action_list_pools.py @@ -0,0 +1,119 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json + +from actions import list_pools +from test_utils import CharmTestCase + + +class ListPoolsTestCase(CharmTestCase): + ceph_osd_dump = b""" + {"epoch": 19, "fsid": "90e7e074-8263-11eb-9c5c-fa163eee3d70", "created": + "2021-03-11 12:16:36.284078", "modified": "2021-03-18 10:41:23.173546", + "flags": "sortbitwise,recovery_deletes,purged_snapdirs", "crush_version": + 7, "full_ratio": 0.95, "backfillfull_ratio": 0.9, "nearfull_ratio": 0.85, + "cluster_snapshot": "", "pool_max": 2, "max_osd": 3, + "require_min_compat_client": "jewel", "min_compat_client": "jewel", + "require_osd_release": "luminous", "pools": [{"pool": 1, "pool_name": + "test", "flags": 1, "flags_names": "hashpspool", "type": 1, "size": 3, + "min_size": 2, "crush_rule": 0, "object_hash": 2, "pg_num": 8, + "pg_placement_num": 8, "crash_replay_interval": 0, "last_change": "16", + "last_force_op_resend": "0", "last_force_op_resend_preluminous": "0", + "auid": 0, "snap_mode": "selfmanaged", "snap_seq": 0, "snap_epoch": 0, + "pool_snaps": [], "removed_snaps": "[]", "quota_max_bytes": 0, + "quota_max_objects": 0, "tiers": [], "tier_of": -1, "read_tier": -1, + "write_tier": -1, "cache_mode": "none", "target_max_bytes": 0, + "target_max_objects": 0, "cache_target_dirty_ratio_micro": 400000, + "cache_target_dirty_high_ratio_micro": 600000, + "cache_target_full_ratio_micro": 800000, "cache_min_flush_age": 0, + "cache_min_evict_age": 0, "erasure_code_profile": "", "hit_set_params": + {"type": "none"}, "hit_set_period": 0, "hit_set_count": 0, + "use_gmt_hitset": true, "min_read_recency_for_promote": 0, + "min_write_recency_for_promote": 0, "hit_set_grade_decay_rate": 0, + "hit_set_search_last_n": 0, "grade_table": [], "stripe_width": 0, + "expected_num_objects": 0, "fast_read": false, "options": {}, + "application_metadata": {"unknown": {}}}, {"pool": 2, "pool_name": + "test2", "flags": 1, "flags_names": "hashpspool", "type": 1, "size": 3, + "min_size": 2, "crush_rule": 0, "object_hash": 2, "pg_num": 8, + "pg_placement_num": 8, "crash_replay_interval": 0, "last_change": "19", + "last_force_op_resend": "0", "last_force_op_resend_preluminous": "0", + "auid": 0, "snap_mode": "selfmanaged", "snap_seq": 0, "snap_epoch": 0, + "pool_snaps": [], "removed_snaps": "[]", "quota_max_bytes": 0, + "quota_max_objects": 0, "tiers": [], "tier_of": -1, "read_tier": -1, + "write_tier": -1, "cache_mode": "none", "target_max_bytes": 0, + "target_max_objects": 0, "cache_target_dirty_ratio_micro": 400000, + "cache_target_dirty_high_ratio_micro": 600000, + "cache_target_full_ratio_micro": 800000, "cache_min_flush_age": 0, + "cache_min_evict_age": 0, "erasure_code_profile": "", "hit_set_params": + {"type": "none"}, "hit_set_period": 0, "hit_set_count": 0, + "use_gmt_hitset": true, "min_read_recency_for_promote": 0, + "min_write_recency_for_promote": 0, "hit_set_grade_decay_rate": 0, + "hit_set_search_last_n": 0, "grade_table": [], "stripe_width": 0, + "expected_num_objects": 0, "fast_read": false, "options": {}, + "application_metadata": {"unknown": {}}}], "osds": [{"osd": 0, "uuid": + "52755316-e15b-430f-82f6-e98f2800f979", "up": 1, "in": 1, "weight": 1.0, + "primary_affinity": 1.0, "last_clean_begin": 0, "last_clean_end": 0, + "up_from": 5, "up_thru": 17, "down_at": 0, "lost_at": 0, "public_addr": + "10.5.0.21:6800/19211", "cluster_addr": "10.5.0.21:6801/19211", + "heartbeat_back_addr": "10.5.0.21:6802/19211", "heartbeat_front_addr": + "10.5.0.21:6803/19211", "state": ["exists", "up"]}, {"osd": 1, "uuid": + "ac221f5d-0e99-468a-b3fd-8b3e47dcd8e3", "up": 1, "in": 1, "weight": 1.0, + "primary_affinity": 1.0, "last_clean_begin": 0, "last_clean_end": 0, + "up_from": 9, "up_thru": 17, "down_at": 0, "lost_at": 0, "public_addr": + "10.5.0.5:6800/19128", "cluster_addr": "10.5.0.5:6801/19128", + "heartbeat_back_addr": "10.5.0.5:6802/19128", "heartbeat_front_addr": + "10.5.0.5:6803/19128", "state": ["exists", "up"]}, {"osd": 2, "uuid": + "1e379cd3-0fb2-4645-a574-5096dc8e6f11", "up": 1, "in": 1, "weight": 1.0, + "primary_affinity": 1.0, "last_clean_begin": 0, "last_clean_end": 0, + "up_from": 13, "up_thru": 17, "down_at": 0, "lost_at": 0, "public_addr": + "10.5.0.51:6800/19302", "cluster_addr": "10.5.0.51:6801/19302", + "heartbeat_back_addr": "10.5.0.51:6802/19302", "heartbeat_front_addr": + "10.5.0.51:6803/19302", "state": ["exists", "up"]}], "osd_xinfo": + [{"osd": 0, "down_stamp": "0.000000", "laggy_probability": 0.0, + "laggy_interval": 0, "features": 4611087853746454523, "old_weight": 0}, + {"osd": 1, "down_stamp": "0.000000", "laggy_probability": 0.0, + "laggy_interval": 0, "features": 4611087853746454523, "old_weight": 0}, + {"osd": 2, "down_stamp": "0.000000", "laggy_probability": 0.0, + "laggy_interval": 0, "features": 4611087853746454523, "old_weight": 0}], + "pg_upmap": [], "pg_upmap_items": [], "pg_temp": [], "primary_temp": [], + "blacklist": {}, "erasure_code_profiles": {"default": {"k": "2", "m": "1", + "plugin": "jerasure", "technique": "reed_sol_van"}}}""" + + def setUp(self): + super(ListPoolsTestCase, self).setUp( + list_pools, ["check_output", "function_fail", "function_get", + "function_set"]) + self.function_get.return_value = "json" # format=json + self.check_output.return_value = self.ceph_osd_dump + + def test_getting_list_pools_without_details(self): + """Test getting list of pools without details.""" + self.function_get.return_value = "text" + self.check_output.return_value = b"1 test,2 test2" + list_pools.main() + self.function_get.assert_called_once_with("format") + self.function_set.assert_called_once_with( + {"message": "1 test,2 test2"}) + + def test_getting_list_pools_with_details(self): + """Test getting list of pools with details.""" + self.pools = None + + def _function_set(message): + self.pools = json.loads(message['message']) + self.function_set.side_effect = _function_set + list_pools.main() + self.function_get.assert_called_once_with("format") + self.assertEqual(self.pools[0]["pool"], 1) + self.assertEqual(self.pools[0]["size"], 3) + self.assertEqual(self.pools[0]["min_size"], 2) diff --git a/ceph-mon/unit_tests/test_action_pg_repair.py b/ceph-mon/unit_tests/test_action_pg_repair.py new file mode 100644 index 00000000..258c6103 --- /dev/null +++ b/ceph-mon/unit_tests/test_action_pg_repair.py @@ -0,0 +1,280 @@ +# Copyright 2022 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the pg_repair action.""" + +from actions import pg_repair as action +import unittest.mock as mock +from test_utils import CharmTestCase +import json + + +class PlacementGroupRepairTestCase(CharmTestCase): + """Run tests for the action.""" + + def setUp(self): + """Init mocks for test cases.""" + super(PlacementGroupRepairTestCase, self).setUp( + action, + [ + "function_fail", + "function_set", + "get_rados_inconsistent_objs", + "get_rados_inconsistent_pgs", + ], + ) + + @mock.patch("actions.pg_repair.get_rados_inconsistent_pgs") + def test_get_inconsistent_pgs(self, _rados_inc_pgs): + """Test collection of all inconsistent placement groups.""" + _rados_inc_pgs.side_effect = (["1.a", "2.b"], ["2.b", "3.c"], []) + ceph_pools = ["testPool0", "testPool1", "testPool2"] + result = action.get_inconsistent_pgs(ceph_pools) + self.assertEqual(result, {"1.a", "2.b", "3.c"}) + + @mock.patch("actions.pg_repair.get_rados_inconsistent_objs") + def test_safe_case_detection(self, _rados_inc_objs): + """Test that safe case is detected.""" + _rados_inc_objs.return_value = rados_inc_obj_output_safe() + result = action.is_pg_safe_to_repair("") + self.assertTrue(result) + + @mock.patch("actions.pg_repair.get_rados_inconsistent_objs") + def test_unsafe_case_detection_extra_erros(self, _rados_inc_objs): + """Test that the unsafe case of extra errors is detected.""" + _rados_inc_objs.return_value = rados_inc_obj_output_extra_errors() + result = action.is_pg_safe_to_repair("") + self.assertFalse(result) + + @mock.patch("actions.pg_repair.get_rados_inconsistent_objs") + def test_unsafe_case_detection_multiple_read_errors(self, _rados_inc_objs): + """Test that the unsafe case of multiple read errors is detected.""" + _rados_inc_objs.return_value = ( + rados_inc_obj_output_multiple_read_errors() + ) + result = action.is_pg_safe_to_repair("") + self.assertFalse(result) + + @mock.patch("actions.pg_repair.get_rados_inconsistent_objs") + def test_get_safe_pg_repair(self, _rados_inc_objs): + _rados_inc_objs.side_effect = ( + rados_inc_obj_output_safe(), + rados_inc_obj_output_extra_errors(), + rados_inc_obj_output_multiple_read_errors(), + ) + inconsistent_pgs = ("3.1f2", "12.ab3", "16.222") + result = action.get_safe_pg_repairs(inconsistent_pgs) + self.assertEqual(result, {"3.1f2"}) + + @mock.patch("actions.pg_repair.list_pools") + def test_pg_repair_no_ceph_pools(self, _list_pools): + """Test action fails when no Ceph pools found.""" + _list_pools.return_value = [] + action.pg_repair() + msg = "No Ceph pools found." + self.function_set.assert_called_once_with(msg) + + @mock.patch("actions.pg_repair.get_inconsistent_pgs") + @mock.patch("actions.pg_repair.list_pools") + def test_pg_repair_no_inconsistent_pgs(self, _list_pools, _get_inc_pgs): + _list_pools.return_value = ["testPool"] + _get_inc_pgs.return_value = [] + action.pg_repair() + msg = "No inconsistent placement groups found." + self.function_set.assert_called_once_with(msg) + + @mock.patch("actions.pg_repair.check_output") + @mock.patch("actions.pg_repair.get_rados_inconsistent_objs") + @mock.patch("actions.pg_repair.get_rados_inconsistent_pgs") + @mock.patch("actions.pg_repair.list_pools") + def test_pg_repair_safe_case( + self, _list_pools, _rados_inc_pgs, _rados_inc_objs, _check_output + ): + """Test action succeeds with one read error.""" + _list_pools.return_value = ["testPool"] + _rados_inc_pgs.return_value = {"16.abf", "12.bd4"} + _rados_inc_objs.return_value = rados_inc_obj_output_safe() + _check_output.return_value = b"" + action.pg_repair() + self.function_set.assert_called_once_with( + {"message": "placement groups repaired: ['12.bd4', '16.abf']"} + ) + + @mock.patch("actions.pg_repair.get_rados_inconsistent_objs") + @mock.patch("actions.pg_repair.get_rados_inconsistent_pgs") + @mock.patch("actions.pg_repair.list_pools") + def test_pg_repair_extra_errors( + self, _list_pools, _rados_inc_pgs, _rados_inc_objs + ): + """Test action fails with errors other than read errors.""" + _list_pools.return_value = ["testPool"] + _rados_inc_pgs.return_value = {"16.abf", "12.bd4"} + _rados_inc_objs.return_value = rados_inc_obj_output_extra_errors() + action.pg_repair() + self.function_set.assert_called_once() + + @mock.patch("actions.pg_repair.get_rados_inconsistent_objs") + @mock.patch("actions.pg_repair.get_rados_inconsistent_pgs") + @mock.patch("actions.pg_repair.list_pools") + def test_pg_repair_multiple_read_errors( + self, _list_pools, _rados_inc_pgs, _rados_inc_objs + ): + """Test action fails with multiple read errors.""" + _list_pools.return_value = ["testPool"] + _rados_inc_pgs.return_value = {"16.abf", "12.bd4"} + _rados_inc_objs.return_value = ( + rados_inc_obj_output_multiple_read_errors() + ) + action.pg_repair() + self.function_set.assert_called_once() + + +def rados_inc_obj_output_safe(): + return json.loads("""{ + "epoch": 873, + "inconsistents": [ + { + "object": { + "data": "nothing to see here" + }, + "errors": [], + "union_shard_errors": [ + "read_error" + ], + "selected_object_info": { + "data": "nothing to see here" + }, + "shards": [ + { + "osd": 53, + "primary": true, + "errors": [ + "read_error" + ], + "size": 4046848 + }, + { + "osd": 56, + "primary": false, + "errors": [], + "size": 4046848, + "omap_digest": "0xffffffff", + "data_digest": "0xb86056e7" + }, + { + "osd": 128, + "primary": false, + "errors": [], + "size": 4046848, + "omap_digest": "0xffffffff", + "data_digest": "0xb86056e7" + } + ] + } + ] + }""") + + +def rados_inc_obj_output_extra_errors(): + return json.loads("""{ + "epoch": 873, + "inconsistents": [ + { + "object": { + "data": "nothing to see here" + }, + "errors": [], + "union_shard_errors": [ + "read_error" + ], + "selected_object_info": { + "data": "nothing to see here" + }, + "shards": [ + { + "osd": 53, + "primary": true, + "errors": [ + "read_error", + "some_other_error" + ], + "size": 4046848 + }, + { + "osd": 56, + "primary": false, + "errors": [], + "size": 4046848, + "omap_digest": "0xffffffff", + "data_digest": "0xb86056e7" + }, + { + "osd": 128, + "primary": false, + "errors": [], + "size": 4046848, + "omap_digest": "0xffffffff", + "data_digest": "0xb86056e7" + } + ] + } + ] + }""") + + +def rados_inc_obj_output_multiple_read_errors(): + return json.loads("""{ + "epoch": 873, + "inconsistents": [ + { + "object": { + "data": "nothing to see here" + }, + "errors": [], + "union_shard_errors": [ + "read_error" + ], + "selected_object_info": { + "data": "nothing to see here" + }, + "shards": [ + { + "osd": 53, + "primary": true, + "errors": [ + "read_error" + ], + "size": 4046848 + }, + { + "osd": 56, + "primary": false, + "errors": [ + "read_error" + ], + "size": 4046848, + "omap_digest": "0xffffffff", + "data_digest": "0xb86056e7" + }, + { + "osd": 128, + "primary": false, + "errors": [], + "size": 4046848, + "omap_digest": "0xffffffff", + "data_digest": "0xb86056e7" + } + ] + } + ] + }""") diff --git a/ceph-mon/unit_tests/test_action_purge_osd.py b/ceph-mon/unit_tests/test_action_purge_osd.py new file mode 100644 index 00000000..2146f9e8 --- /dev/null +++ b/ceph-mon/unit_tests/test_action_purge_osd.py @@ -0,0 +1,74 @@ +# Copyright 2020 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for purge_osd action.""" + +from actions import purge_osd as action +import unittest.mock as mock +from test_utils import CharmTestCase + + +class PurgeTestCase(CharmTestCase): + """Run tests for action.""" + + def setUp(self): + """Init mocks for test cases.""" + super(PurgeTestCase, self).setUp( + action, ["check_call", "function_get", "function_fail", "open"] + ) + + @mock.patch("actions.purge_osd.get_osd_weight") + @mock.patch("actions.purge_osd.cmp_pkgrevno") + @mock.patch("charmhelpers.contrib.storage.linux.ceph.get_osds") + def test_purge_osd(self, _get_osds, _cmp_pkgrevno, _get_osd_weight): + """Test purge_osd action has correct calls.""" + _get_osds.return_value = [0, 1, 2, 3, 4, 5] + _cmp_pkgrevno.return_value = 1 + _get_osd_weight.return_value = 0 + osd = 4 + action.purge_osd(osd) + cmds = [ + mock.call(["ceph", "osd", "out", "osd.4"]), + mock.call( + ["ceph", "osd", "purge", str(osd), "--yes-i-really-mean-it"] + ), + ] + self.check_call.assert_has_calls(cmds) + + @mock.patch("actions.purge_osd.get_osd_weight") + @mock.patch("actions.purge_osd.cmp_pkgrevno") + @mock.patch("charmhelpers.contrib.storage.linux.ceph.get_osds") + def test_purge_invalid_osd( + self, _get_osds, _cmp_pkgrevno, _get_osd_weight + ): + """Test purge_osd action captures bad OSD string.""" + _get_osds.return_value = [0, 1, 2, 3, 4, 5] + _cmp_pkgrevno.return_value = 1 + _get_osd_weight.return_value = 0 + osd = 99 + action.purge_osd(osd) + self.function_fail.assert_called() + + @mock.patch("actions.purge_osd.get_osd_weight") + @mock.patch("actions.purge_osd.cmp_pkgrevno") + @mock.patch("charmhelpers.contrib.storage.linux.ceph.get_osds") + def test_purge_osd_weight_high( + self, _get_osds, _cmp_pkgrevno, _get_osd_weight + ): + """Test purge_osd action fails when OSD has weight >0.""" + _get_osds.return_value = [0, 1, 2, 3, 4, 5] + _cmp_pkgrevno.return_value = 1 + _get_osd_weight.return_value = 2.5 + osd = "4" + action.purge_osd(osd) + self.function_fail.assert_called() diff --git a/ceph-mon/unit_tests/test_actions_mon.py b/ceph-mon/unit_tests/test_actions_mon.py new file mode 100644 index 00000000..54c44fff --- /dev/null +++ b/ceph-mon/unit_tests/test_actions_mon.py @@ -0,0 +1,90 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import sys +import unittest.mock as mock + +from test_utils import CharmTestCase + +# python-apt is not installed as part of test-requirements but is imported by +# some charmhelpers modules so create a fake import. +mock_apt = mock.MagicMock() +sys.modules['apt'] = mock_apt +mock_apt.apt_pkg = mock.MagicMock() + +# mocking for rados +mock_rados = mock.MagicMock() +sys.modules['rados'] = mock_rados +mock_rados.connect = mock.MagicMock() + +# mocking for psutil +mock_psutil = mock.MagicMock() +sys.modules['psutil'] = mock_psutil +mock_psutil.disks = mock.MagicMock() + +with mock.patch('charmhelpers.contrib.hardening.harden.harden') as mock_dec: + mock_dec.side_effect = (lambda *dargs, **dkwargs: lambda f: + lambda *args, **kwargs: f(*args, **kwargs)) + # import health actions as actions + import ceph_ops as actions + + +class OpsTestCase(CharmTestCase): + + def setUp(self): + super(OpsTestCase, self).setUp( + actions, ["check_output", + "action_get", + "action_fail", + "open"]) + + @mock.patch('socket.gethostname') + def test_get_quorum_status(self, mock_hostname): + mock_hostname.return_value = 'mockhost' + cmd_out = ( + '{"election_epoch":4,"quorum":[0,1,2],"quorum_names":["juju-18410c' + '-zaza-b7061340ed19-1","juju-18410c-zaza-b7061340ed19-0","juju-184' + '10c-zaza-b7061340ed19-2"],"quorum_leader_name":"juju-18410c-zaza-' + 'b7061340ed19-1","quorum_age":97785,"monmap":{"epoch":1,"fsid":"4f' + '9dd22a-1b71-11ec-a02a-fa163ee765d3","modified":"2021-09-22 06:51:' + '10.975225","created":"2021-09-22 06:51:10.975225","min_mon_releas' + 'e":14,"min_mon_release_name":"nautilus","features":{"persistent":' + '["kraken","luminous","mimic","osdmap-prune","nautilus"],"optional' + '":[]},"mons":[{"rank":0,"name":"juju-18410c-zaza-b7061340ed19-1",' + '"public_addrs":{"addrvec":[{"type":"v2","addr":"10.5.0.122:3300",' + '"nonce":0},{"type":"v1","addr":"10.5.0.122:6789","nonce":0}]},"ad' + 'dr":"10.5.0.122:6789/0","public_addr":"10.5.0.122:6789/0"},{"rank' + '":1,"name":"juju-18410c-zaza-b7061340ed19-0","public_addrs":{"add' + 'rvec":[{"type":"v2","addr":"10.5.2.239:3300","nonce":0},{"type":"' + 'v1","addr":"10.5.2.239:6789","nonce":0}]},"addr":"10.5.2.239:6789' + '/0","public_addr":"10.5.2.239:6789/0"},{"rank":2,"name":"juju-184' + '10c-zaza-b7061340ed19-2","public_addrs":{"addrvec":[{"type":"v2",' + '"addr":"10.5.3.201:3300","nonce":0},{"type":"v1","addr":"10.5.3.2' + '01:6789","nonce":0}]},"addr":"10.5.3.201:6789/0","public_addr":"1' + '0.5.3.201:6789/0"}]}}' + ) + self.check_output.return_value = cmd_out.encode() + + result = actions.get_quorum_status() + self.assertDictEqual(result, { + "election-epoch": 4, + "quorum-age": 97785, + "quorum-names": "juju-18410c-zaza-b7061340ed19-1, " + "juju-18410c-zaza-b7061340ed19-0, " + "juju-18410c-zaza-b7061340ed19-2", + "quorum-leader-name": "juju-18410c-zaza-b7061340ed19-1", + }) + + result = actions.get_quorum_status(format_type="json") + self.assertDictEqual(json.loads(result["message"]), + json.loads(cmd_out)) diff --git a/ceph-mon/unit_tests/test_ceph_actions.py b/ceph-mon/unit_tests/test_ceph_actions.py new file mode 100644 index 00000000..6a4b77db --- /dev/null +++ b/ceph-mon/unit_tests/test_ceph_actions.py @@ -0,0 +1,405 @@ +# Copyright 2019 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest.mock as mock +from ops.testing import Harness +import subprocess + +import test_utils +import ops_actions.copy_pool as copy_pool +import ops_actions.list_entities as list_entities +import ops_actions.rotate_key as rotate_key + +with mock.patch('charmhelpers.contrib.hardening.harden.harden') as mock_dec: + mock_dec.side_effect = (lambda *dargs, **dkwargs: lambda f: + lambda *args, **kwargs: f(*args, **kwargs)) + # src.charm imports ceph_hooks, so we need to workaround the inclusion + # of the 'harden' decorator. + from src.charm import CephMonCharm + + +class CopyPoolTestCase(test_utils.CharmTestCase): + + def setUp(self): + self.harness = Harness(CephMonCharm) + + @mock.patch.object(copy_pool.subprocess, 'check_call') + def test_copy_pool(self, mock_check_call): + _action_data = { + 'source': 'source-pool', + 'target': 'target-pool', + } + self.harness.begin() + self.harness.charm.on_copy_pool_action( + test_utils.MockActionEvent(_action_data)) + mock_check_call.assert_called_with([ + 'rados', 'cppool', + 'source-pool', 'target-pool', + ]) + + @mock.patch.object(copy_pool.subprocess, 'check_call') + def test_copy_pool_failed(self, mock_check_call): + _action_data = { + 'source': 'source-pool', + 'target': 'target-pool', + } + self.harness.begin() + mock_check_call.side_effect = subprocess.CalledProcessError(1, 'rados') + event = test_utils.MockActionEvent(_action_data) + self.harness.charm.on_copy_pool_action(event) + mock_check_call.assert_called_with([ + 'rados', 'cppool', + 'source-pool', 'target-pool', + ]) + event.fail.assert_called_once_with(mock.ANY) + + +class CreateCrushRuleTestCase(test_utils.CharmTestCase): + """Run tests for action.""" + + def setUp(self): + self.harness = Harness(CephMonCharm) + self.addCleanup(self.harness.cleanup) + + @mock.patch("ops_actions.create_crush_rule.subprocess.check_call") + def test_create_crush_rule(self, mock_check_call): + """Test reweight_osd action has correct calls.""" + self.harness.begin() + self.harness.charm.on_create_crush_rule_action( + test_utils.MockActionEvent({ + 'name': 'replicated_nvme', + 'failure-domain': 'host', + 'device-class': 'nvme', + })) + expected = [ + 'ceph', 'osd', 'crush', 'rule', + 'create-replicated', + 'replicated_nvme', + 'default', + 'host', + 'nvme', + ] + mock_check_call.assert_called_once_with(expected) + + @mock.patch("ops_actions.create_crush_rule.subprocess.check_call") + def test_create_crush_rule_no_class(self, mock_check_call): + """Test reweight_osd action has correct calls.""" + self.harness.begin() + self.harness.charm.on_create_crush_rule_action( + test_utils.MockActionEvent({ + 'name': 'replicated_whoknows', + 'failure-domain': 'disk', + })) + expected = [ + 'ceph', 'osd', 'crush', 'rule', + 'create-replicated', + 'replicated_whoknows', + 'default', + 'disk' + ] + mock_check_call.assert_called_once_with(expected) + + @mock.patch("ops_actions.create_crush_rule.subprocess.check_call") + def test_create_crush_rule_failed(self, mock_check_call): + """Test reweight_osd action has correct calls.""" + self.harness.begin() + mock_check_call.side_effect = subprocess.CalledProcessError(1, 'test') + event = test_utils.MockActionEvent({ + 'name': 'replicated_nvme', + 'failure-domain': 'host', + 'device-class': 'nvme', + }) + self.harness.charm.on_create_crush_rule_action(event) + expected = [ + 'ceph', 'osd', 'crush', 'rule', + 'create-replicated', + 'replicated_nvme', + 'default', + 'host', + 'nvme', + ] + mock_check_call.assert_called_once_with(expected) + event.fail.assert_called_once_with( + 'rule creation failed due to exception') + + +class CreateErasureProfileTestCase(test_utils.CharmTestCase): + """Run tests for action.""" + + def setUp(self): + self.harness = Harness(CephMonCharm) + self.addCleanup(self.harness.cleanup) + + @mock.patch('ops_actions.create_erasure_profile.create_erasure_profile') + def test_create_jerasure_profile(self, mock_create_erasure_profile): + self.harness.begin() + self.harness.charm.on_create_erasure_profile_action( + test_utils.MockActionEvent({ + 'name': 'erasure', + 'plugin': 'jerasure', + 'failure-domain': 'disk', + 'k': 6, + 'm': 3, + })) + mock_create_erasure_profile.assert_called_once_with( + service='admin', erasure_plugin_name='jerasure', + profile_name='erasure', data_chunks=None, + coding_chunks=None, failure_domain='disk', device_class=None + ) + + @mock.patch('ops_actions.create_erasure_profile.create_erasure_profile') + def test_create_isa_profile(self, mock_create_erasure_profile): + self.harness.begin() + self.harness.charm.on_create_erasure_profile_action( + test_utils.MockActionEvent({ + 'name': 'erasure', + 'plugin': 'isa', + 'failure-domain': 'disk', + 'k': 6, + 'm': 3, + })) + mock_create_erasure_profile.assert_called_once_with( + service='admin', erasure_plugin_name='isa', + profile_name='erasure', data_chunks=None, + coding_chunks=None, failure_domain='disk', device_class=None + ) + + @mock.patch('ops_actions.create_erasure_profile.create_erasure_profile') + def test_create_lrc_profile(self, mock_create_erasure_profile): + self.harness.begin() + self.harness.charm.on_create_erasure_profile_action( + test_utils.MockActionEvent({ + 'name': 'erasure', + 'plugin': 'lrc', + 'failure-domain': 'disk', + 'k': 6, + 'm': 3, + 'locality-chunks': 2, + 'crush-locality': 'host', + })) + mock_create_erasure_profile.assert_called_once_with( + service='admin', erasure_plugin_name='lrc', + profile_name='erasure', data_chunks=None, + coding_chunks=None, locality=2, crush_locality='host', + failure_domain='disk', device_class=None + ) + + @mock.patch('ops_actions.create_erasure_profile.create_erasure_profile') + def test_create_shec_profile(self, mock_create_erasure_profile): + self.harness.begin() + self.harness.charm.on_create_erasure_profile_action( + test_utils.MockActionEvent({ + 'name': 'erasure', + 'plugin': 'shec', + 'failure-domain': 'disk', + 'k': 6, + 'm': 3, + 'durability-estimator': 2 + })) + mock_create_erasure_profile.assert_called_once_with( + service='admin', erasure_plugin_name='shec', + profile_name='erasure', data_chunks=None, + coding_chunks=None, durability_estimator=2, + failure_domain='disk', device_class=None + ) + + @mock.patch('ops_actions.create_erasure_profile.create_erasure_profile') + def test_create_clay_profile(self, mock_create_erasure_profile): + self.harness.begin() + self.harness.charm.on_create_erasure_profile_action( + test_utils.MockActionEvent({ + 'name': 'erasure', + 'plugin': 'clay', + 'failure-domain': 'disk', + 'k': 6, + 'm': 3, + 'helper-chunks': 2, + 'scalar-mds': 'jerasure' + })) + mock_create_erasure_profile.assert_called_once_with( + service='admin', erasure_plugin_name='clay', + profile_name='erasure', data_chunks=None, + coding_chunks=None, helper_chunks=2, + scalar_mds='jerasure', failure_domain='disk', device_class=None + ) + + +class GetHealthTestCase(test_utils.CharmTestCase): + """Run tests for action.""" + + def setUp(self): + self.harness = Harness(CephMonCharm) + self.harness.begin() + self.addCleanup(self.harness.cleanup) + + @mock.patch('ops_actions.get_health.check_output') + def test_get_health_action(self, mock_check_output): + mock_check_output.return_value = b'yay' + event = test_utils.MockActionEvent({}) + self.harness.charm.on_get_health_action(event) + event.set_results.assert_called_once_with(({'message': 'yay'})) + + @mock.patch('ops_actions.get_health.check_output') + def test_get_health_action_error(self, mock_check_output): + mock_check_output.side_effect = subprocess.CalledProcessError( + 1, 'test') + event = test_utils.MockActionEvent({}) + self.harness.charm.on_get_health_action(event) + event.fail.assert_called_once_with( + 'ceph health failed with message: ' + "Command 'test' returned non-zero exit status 1.") + + +class GetErasureProfile(test_utils.CharmTestCase): + """Run tests for action.""" + + def setUp(self): + self.harness = Harness(CephMonCharm) + self.harness.begin() + self.addCleanup(self.harness.cleanup) + + @mock.patch('ops_actions.get_erasure_profile.ceph') + def test_get_erasure_profile_ok(self, mock_ceph): + mock_ceph.get_erasure_profile.return_value = "foo-erasure-params" + event = test_utils.MockActionEvent({"name": "foo-profile"}) + self.harness.charm.on_get_erasure_profile_action(event) + event.set_results.assert_called_once_with(( + {"message": "foo-erasure-params"} + )) + + @mock.patch('ops_actions.get_erasure_profile.ceph') + def test_get_erasure_profile_notfound(self, mock_ceph): + mock_ceph.get_erasure_profile.return_value = None + event = test_utils.MockActionEvent({"name": "notfound-profile"}) + self.harness.charm.on_get_erasure_profile_action(event) + event.set_results.assert_called_once_with(( + {"message": None} + )) + + +class ListEntities(test_utils.CharmTestCase): + """Run tests for action.""" + + def setUp(self): + self.harness = Harness(CephMonCharm) + self.harness.begin() + self.addCleanup(self.harness.cleanup) + + @mock.patch.object(list_entities.subprocess, 'check_output') + def test_list_entities(self, check_output): + check_output.return_value = b""" +client.admin + key: AQAOwwFmTR3TNxAAIsdYgastd0uKntPtEnoWug== +mgr.0 + key: AQAVwwFm/CmaJhAAdacns6DdFe4xZE1iwj8izg== +""" + event = test_utils.MockActionEvent({}) + self.harness.charm.on_list_entities_action(event) + event.set_results.assert_called_once_with( + {"message": "client.admin\nmgr.0"} + ) + + +# Needs to be outside as the decorator wouldn't find it otherwise. +MGR_KEYRING_FILE = """ +[mgr.host-1] + key = old-key +""" + +OSD_DUMP = b""" +{ + "osds": [ + { + "osd": 0, + "public_addr": "10.5.2.40:6801/13869" + }, + { + "osd": 1, + "public_addr": "10.5.0.160:6801/9017" + } + ] +} +""" + + +class RotateKey(test_utils.CharmTestCase): + """Run tests for action.""" + + def setUp(self): + self.harness = Harness(CephMonCharm) + self.harness.begin() + self.addCleanup(self.harness.cleanup) + + def test_invalid_entity(self): + event = test_utils.MockActionEvent({'entity': '???'}) + self.harness.charm.on_rotate_key_action(event) + event.fail.assert_called_once() + + def test_invalid_mgr(self): + event = test_utils.MockActionEvent({'entity': 'mgr-123'}) + self.harness.charm.on_rotate_key_action(event) + event.fail.assert_called_once() + + @mock.patch('builtins.open', new_callable=mock.mock_open, + read_data=MGR_KEYRING_FILE) + @mock.patch.object(rotate_key.systemd, 'service_restart') + @mock.patch.object(rotate_key.subprocess, 'check_output') + @mock.patch.object(rotate_key.os, 'listdir') + def test_rotate_mgr_key(self, listdir, check_output, service_restart, + _open): + listdir.return_value = ['ceph-host-1'] + check_output.return_value = b'[{"pending_key": "new-key"}]' + + event = test_utils.MockActionEvent({'entity': 'mgr.host-1'}) + rotate_key.rotate_key(event) + + event.set_results.assert_called_with({'message': 'success'}) + listdir.assert_called_once_with('/var/lib/ceph/mgr') + check_output.assert_called_once() + service_restart.assert_called_once_with('ceph-mgr@host-1.service') + + calls = any(x for x in _open.mock_calls + if any(p is not None and 'new-key' in p for p in x.args)) + self.assertTrue(calls) + + @mock.patch.object(rotate_key, '_create_key') + @mock.patch.object(rotate_key.subprocess, 'check_output') + def test_rotate_osd_key(self, check_output, create_key): + def _check_output_inner(args): + if args == ['sudo', 'ceph', 'osd', 'dump', '--format=json']: + return OSD_DUMP + elif args[5] == 'ceph-osd/0': + return b'10.5.2.40' + else: + return b'10.5.0.160' + + check_output.side_effect = _check_output_inner + create_key.return_value = 'some-key' + + unit0 = mock.MagicMock() + unit0.name = 'ceph-osd/0' + unit1 = mock.MagicMock() + unit1.name = 'ceph-osd/1' + + relations = mock.MagicMock() + relations.units = [unit0, unit1] + relations.data = {'ceph-mon/0': {}} + + model = mock.MagicMock() + model.relations = {'osd': [relations]} + model.unit = 'ceph-mon/0' + + event = test_utils.MockActionEvent({'entity': 'osd.1'}) + rotate_key.rotate_key(event, model) + self.assertEqual(relations.data['ceph-mon/0'], + {'pending_key': '{"1": "some-key"}'}) diff --git a/ceph-mon/unit_tests/test_ceph_client_interface.py b/ceph-mon/unit_tests/test_ceph_client_interface.py new file mode 100644 index 00000000..41b2b4ec --- /dev/null +++ b/ceph-mon/unit_tests/test_ceph_client_interface.py @@ -0,0 +1,156 @@ +# Copyright 2022 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for reweight_osd action.""" + +# import json +import unittest.mock as mock +from test_utils import CharmTestCase +from ops.testing import Harness +from manage_test_relations import ( + add_ceph_client_relation, + add_ceph_mds_relation, +) + +with mock.patch('charmhelpers.contrib.hardening.harden.harden') as mock_dec: + mock_dec.side_effect = (lambda *dargs, **dkwargs: lambda f: + lambda *args, **kwargs: f(*args, **kwargs)) + # src.charm imports ceph_hooks, so we need to workaround the inclusion + # of the 'harden' decorator. + from src.charm import CephMonCharm + + +class CephClientTestCase(CharmTestCase): + """Run tests for action.""" + + def setUp(self): + self.harness = Harness(CephMonCharm) + self.addCleanup(self.harness.cleanup) + + @mock.patch("src.charm.ceph_client.ceph.get_named_key") + @mock.patch("src.charm.ceph_client.get_rbd_features") + @mock.patch("src.charm.ceph_client.get_public_addr") + @mock.patch.object(CephMonCharm, "ready_for_service") + @mock.patch("src.charm.ceph_client.send_osd_settings") + def test_client_relation( + self, _send_osd_settings, mock_ready_for_service, + mock_get_public_addr, mock_get_rbd_features, mock_get_named_key): + mock_get_public_addr.return_value = '127.0.0.1' + mock_ready_for_service.return_value = True + mock_get_rbd_features.return_value = 42 + mock_get_named_key.return_value = 'test key' + self.harness.begin() + self.harness.set_leader() + rel_id = add_ceph_client_relation(self.harness) + unit_rel_data = self.harness.get_relation_data( + rel_id, + 'ceph-mon/0') + self.assertEqual( + unit_rel_data, + { + 'auth': 'cephx', + 'ceph-public-address': '127.0.0.1', + 'key': 'test key', + 'rbd-features': '42', + }) + + @mock.patch("src.charm.ceph_client.ceph.is_leader") + @mock.patch.object(CephMonCharm, "process_broker_request") + @mock.patch("src.charm.ceph_client.ceph.get_named_key") + @mock.patch("src.charm.ceph_client.get_rbd_features") + @mock.patch("src.charm.ceph_client.get_public_addr") + @mock.patch.object(CephMonCharm, "ready_for_service") + @mock.patch("src.charm.ceph_client.send_osd_settings") + def test_client_relation_broker( + self, _send_osd_settings, mock_ready_for_service, + mock_get_public_addr, mock_get_rbd_features, mock_get_named_key, + mock_process_broker_request, mock_is_leader): + mock_get_public_addr.return_value = '127.0.0.1' + mock_ready_for_service.return_value = True + mock_get_rbd_features.return_value = 42 + mock_get_named_key.return_value = 'test key' + mock_process_broker_request.return_value = 'AOK' + mock_is_leader.return_value = True + self.harness.begin() + self.harness.set_leader() + rel_id = add_ceph_client_relation(self.harness) + self.harness.update_relation_data( + rel_id, + 'glance/0', + {'broker_req': '{"request-id": "req"}'}) + mock_process_broker_request.assert_called_once_with( + 'req', '{"request-id": "req"}' + ) + unit_rel_data = self.harness.get_relation_data( + rel_id, + 'ceph-mon/0') + self.assertEqual( + unit_rel_data, + { + 'auth': 'cephx', + 'ceph-public-address': '127.0.0.1', + 'key': 'test key', + 'rbd-features': '42', + 'broker-rsp-glance-0': 'AOK', + 'broker_rsp': 'AOK' + }) + mock_process_broker_request.reset_mock() + self.harness.update_relation_data( + rel_id, + 'glance/0', + {'broker_req': '{"request-id": "req"}'}) + mock_process_broker_request.assert_not_called() + + @mock.patch("src.charm.ceph_client.ceph.get_named_key") + @mock.patch("src.charm.ceph_client.get_rbd_features") + @mock.patch("src.charm.ceph_client.get_public_addr") + @mock.patch.object(CephMonCharm, "ready_for_service") + @mock.patch("src.charm.ceph_client.send_osd_settings") + @mock.patch("src.charm.ceph_mds.leader_get", return_value="testfsid") + @mock.patch("src.charm.ceph_mds.ceph") + def test_notify_clients( + self, _ceph, _leader, _send_osd_settings, mock_ready_for_service, + mock_get_public_addr, mock_get_rbd_features, mock_get_named_key): + mock_get_public_addr.return_value = '127.0.0.1' + mock_ready_for_service.return_value = True + mock_get_rbd_features.return_value = None + mock_get_named_key.return_value = 'test key' + self.harness.begin() + self.harness.set_leader() + rel_id = add_ceph_client_relation(self.harness) + add_ceph_mds_relation(self.harness) + + unit_rel_data = self.harness.get_relation_data( + rel_id, + 'ceph-mon/0') + self.assertEqual( + unit_rel_data, + { + 'auth': 'cephx', + 'ceph-public-address': '127.0.0.1', + 'key': 'test key', + }) + mock_get_rbd_features.return_value = 42 + self.harness.charm.on.notify_clients.emit() + unit_rel_data = self.harness.get_relation_data( + rel_id, + 'ceph-mon/0') + self.assertEqual( + unit_rel_data, + { + 'auth': 'cephx', + 'ceph-public-address': '127.0.0.1', + 'key': 'test key', + 'rbd-features': '42', + }) + self.assertEqual(self.harness.charm.mds._mds_name, "ceph-fs") diff --git a/ceph-mon/unit_tests/test_ceph_hooks.py b/ceph-mon/unit_tests/test_ceph_hooks.py new file mode 100644 index 00000000..5426d00c --- /dev/null +++ b/ceph-mon/unit_tests/test_ceph_hooks.py @@ -0,0 +1,1049 @@ +import copy +import json +import unittest +import sys + +from unittest.mock import patch, MagicMock, DEFAULT, call + +# python-apt is not installed as part of test-requirements but is imported by +# some charmhelpers modules so create a fake import. +mock_apt = MagicMock() +sys.modules['apt'] = mock_apt +mock_apt.apt_pkg = MagicMock() + +import charmhelpers.contrib.storage.linux.ceph as ceph +import test_utils + +with patch('charmhelpers.contrib.hardening.harden.harden') as mock_dec: + mock_dec.side_effect = (lambda *dargs, **dkwargs: lambda f: + lambda *args, **kwargs: f(*args, **kwargs)) + import ceph_hooks + import utils + +TO_PATCH = [ + 'config', + 'is_leader', + 'is_relation_made', + 'leader_get', + 'leader_set', + 'log', + 'mon_relation', + 'relation_ids', + 'related_units', + 'relation_get', + 'relations_of_type', + 'status_set', + 'try_disable_insecure_reclaim', +] + +CHARM_CONFIG = {'config-flags': '', + 'auth-supported': False, + 'fsid': '1234', + 'loglevel': 1, + 'use-syslog': True, + 'osd-journal-size': 1024, + 'use-direct-io': True, + 'osd-format': 'ext4', + 'monitor-hosts': '', + 'monitor-data-available-warning': 30, + 'monitor-data-available-critical': 5, + 'prefer-ipv6': False, + 'default-rbd-features': None, + 'nagios_degraded_thresh': '1', + 'nagios_misplaced_thresh': '10', + 'nagios_recovery_rate': '1', + 'nagios_raise_nodeepscrub': True, + 'nagios_additional_checks': "", + 'nagios_additional_checks_critical': False, + 'nagios_rgw_zones': "", + 'nagios_rgw_additional_checks': "", + 'nagios_check_num_osds': False, + 'disable-pg-max-object-skew': False, + 'rbd-stats-pools': 'foo'} + + +class CephHooksTestCase(test_utils.CharmTestCase): + def setUp(self): + super(CephHooksTestCase, self).setUp(ceph_hooks, TO_PATCH) + self.config.side_effect = self.test_config.get + + @patch.object(ceph_hooks, 'get_rbd_features', return_value=None) + @patch.object(ceph_hooks, 'get_public_addr', lambda *args: "10.0.0.1") + @patch.object(ceph_hooks, 'get_cluster_addr', lambda *args: "10.1.0.1") + @patch.object(ceph_hooks, 'cmp_pkgrevno', lambda *args: 1) + @patch.object(ceph_hooks, 'get_mon_hosts', lambda *args: ['10.0.0.1', + '10.0.0.2']) + @patch.object(ceph_hooks, 'get_networks', lambda *args: "") + @patch.object(ceph_hooks, 'leader_get', lambda *args: '1234') + @patch.object(ceph, 'config') + @patch.object(ceph_hooks, 'config') + def test_get_ceph_context(self, mock_config, mock_config2, + _get_rbd_features): + config = copy.deepcopy(CHARM_CONFIG) + mock_config.side_effect = lambda key: config[key] + mock_config2.side_effect = lambda key: config[key] + ctxt = ceph_hooks.get_ceph_context() + expected = {'auth_supported': 'cephx', + 'ceph_cluster_network': '', + 'ceph_public_network': '', + 'cluster_addr': '10.1.0.1', + 'dio': 'true', + 'fsid': '1234', + 'loglevel': 1, + 'mon_hosts': '10.0.0.1 10.0.0.2', + 'mon_data_avail_warn': 30, + 'mon_data_avail_crit': 5, + 'old_auth': False, + 'public_addr': '10.0.0.1', + 'use_syslog': 'true'} + self.assertEqual(ctxt, expected) + + @patch.object(ceph_hooks, 'get_rbd_features', return_value=1) + @patch.object(ceph_hooks, 'get_public_addr', lambda *args: "10.0.0.1") + @patch.object(ceph_hooks, 'get_cluster_addr', lambda *args: "10.1.0.1") + @patch.object(ceph_hooks, 'cmp_pkgrevno', + lambda pkg, ver: -1 if ver == '12.1.0' else 1) + @patch.object(ceph_hooks, 'get_mon_hosts', lambda *args: ['10.0.0.1', + '10.0.0.2']) + @patch.object(ceph_hooks, 'get_networks', lambda *args: "") + @patch.object(ceph_hooks, 'leader_get', lambda *args: '1234') + @patch.object(ceph, 'config') + @patch.object(ceph_hooks, 'config') + def test_get_ceph_context_rbd_features(self, mock_config, mock_config2, + _get_rbd_features): + config = copy.deepcopy(CHARM_CONFIG) + mock_config.side_effect = lambda key: config[key] + mock_config2.side_effect = lambda key: config[key] + ctxt = ceph_hooks.get_ceph_context() + expected = {'auth_supported': 'cephx', + 'ceph_cluster_network': '', + 'ceph_public_network': '', + 'cluster_addr': '10.1.0.1', + 'dio': 'true', + 'fsid': '1234', + 'loglevel': 1, + 'mon_hosts': '10.0.0.1 10.0.0.2', + 'mon_data_avail_warn': 30, + 'mon_data_avail_crit': 5, + 'old_auth': False, + 'public_addr': '10.0.0.1', + 'use_syslog': 'true', + 'rbd_features': 1} + self.assertEqual(ctxt, expected) + + @patch.object(ceph_hooks, 'get_rbd_features', return_value=None) + @patch.object(ceph_hooks, 'get_public_addr', lambda *args: "10.0.0.1") + @patch.object(ceph_hooks, 'get_cluster_addr', lambda *args: "10.1.0.1") + @patch.object(ceph_hooks, 'cmp_pkgrevno', lambda *args: 1) + @patch.object(ceph_hooks, 'get_mon_hosts', lambda *args: ['10.0.0.1', + '10.0.0.2']) + @patch.object(ceph_hooks, 'get_networks', lambda *args: "") + @patch.object(ceph_hooks, 'leader_get', lambda *args: '1234') + @patch.object(ceph, 'config') + @patch.object(ceph_hooks, 'config') + def test_get_ceph_context_w_config_flags(self, mock_config, mock_config2, + _get_rbd_features): + config = copy.deepcopy(CHARM_CONFIG) + config['config-flags'] = '{"mon": {"mon sync max retries": 10}}' + mock_config.side_effect = lambda key: config[key] + mock_config2.side_effect = lambda key: config[key] + ctxt = ceph_hooks.get_ceph_context() + expected = {'auth_supported': 'cephx', + 'ceph_cluster_network': '', + 'ceph_public_network': '', + 'cluster_addr': '10.1.0.1', + 'dio': 'true', + 'fsid': '1234', + 'loglevel': 1, + 'mon_hosts': '10.0.0.1 10.0.0.2', + 'mon_data_avail_warn': 30, + 'mon_data_avail_crit': 5, + 'old_auth': False, + 'mon': {'mon sync max retries': 10}, + 'public_addr': '10.0.0.1', + 'use_syslog': 'true'} + self.assertEqual(ctxt, expected) + + @patch.object(ceph_hooks, 'get_rbd_features', return_value=None) + @patch.object(ceph_hooks, 'get_public_addr', lambda *args: "10.0.0.1") + @patch.object(ceph_hooks, 'get_cluster_addr', lambda *args: "10.1.0.1") + @patch.object(ceph_hooks, 'cmp_pkgrevno', lambda *args: 1) + @patch.object(ceph_hooks, 'get_mon_hosts', lambda *args: ['10.0.0.1', + '10.0.0.2']) + @patch.object(ceph_hooks, 'get_networks', lambda *args: "") + @patch.object(ceph_hooks, 'leader_get', lambda *args: '1234') + @patch.object(ceph, 'config') + @patch.object(ceph_hooks, 'config') + def test_get_ceph_context_w_config_flags_invalid(self, mock_config, + mock_config2, + _get_rbd_features): + config = copy.deepcopy(CHARM_CONFIG) + config['config-flags'] = ('{"mon": {"mon sync max retries": 10},' + '"foo": "bar"}') + mock_config.side_effect = lambda key: config[key] + mock_config2.side_effect = lambda key: config[key] + ctxt = ceph_hooks.get_ceph_context() + expected = {'auth_supported': 'cephx', + 'ceph_cluster_network': '', + 'ceph_public_network': '', + 'cluster_addr': '10.1.0.1', + 'dio': 'true', + 'fsid': '1234', + 'loglevel': 1, + 'mon_hosts': '10.0.0.1 10.0.0.2', + 'mon_data_avail_warn': 30, + 'mon_data_avail_crit': 5, + 'old_auth': False, + 'mon': {'mon sync max retries': 10}, + 'public_addr': '10.0.0.1', + 'use_syslog': 'true'} + self.assertEqual(ctxt, expected) + + @patch.object(ceph_hooks, 'get_rbd_features', return_value=None) + # Provide multiple local addresses, + # we'll check that the right (second) one is used + @patch.object(ceph_hooks, 'get_ipv6_addr', + lambda **kwargs: ["2a01:348:2f4:0:bad:bad:bad:bad", + "2a01:348:2f4:0:685e:5748:ae62:209f"]) + @patch.object(ceph_hooks, 'get_public_addr', + lambda *args: "2a01:348:2f4:0:685e:5748:ae62:209f") + @patch.object(ceph_hooks, 'get_cluster_addr', + lambda *args: "2a01:348:2f4:0:685e:5748:ae62:209f") + @patch.object(ceph_hooks, 'cmp_pkgrevno', lambda *args: 1) + @patch.object(ceph_hooks, 'get_mon_hosts', + lambda *args: ['2a01:348:2f4:0:685e:5748:ae62:209f', + '2a01:348:2f4:0:685e:5748:ae62:20a0']) + @patch.object(ceph_hooks, 'get_networks', lambda *args: "") + @patch.object(ceph_hooks, 'leader_get', lambda *args: '1234') + @patch.object(ceph, 'config') + @patch.object(ceph_hooks, 'config') + def test_get_ceph_context_prefer_ipv6(self, mock_config, mock_config2, + _get_rbd_features): + config = copy.deepcopy(CHARM_CONFIG) + config['prefer-ipv6'] = True + mock_config.side_effect = lambda key: config[key] + mock_config2.side_effect = lambda key: config[key] + ctxt = ceph_hooks.get_ceph_context() + expected = {'auth_supported': 'cephx', + 'ceph_cluster_network': '', + 'ceph_public_network': '', + 'cluster_addr': '2a01:348:2f4:0:685e:5748:ae62:209f', + 'dio': 'true', + 'fsid': '1234', + 'loglevel': 1, + 'mon_hosts': '2a01:348:2f4:0:685e:5748:ae62:209f ' + '2a01:348:2f4:0:685e:5748:ae62:20a0', + 'mon_data_avail_warn': 30, + 'mon_data_avail_crit': 5, + 'old_auth': False, + 'public_addr': '2a01:348:2f4:0:685e:5748:ae62:209f', + 'use_syslog': 'true', + 'ms_bind_ipv4': False, + 'ms_bind_ipv6': True} + self.assertEqual(ctxt, expected) + + @patch.object(ceph_hooks, 'config') + def test_nrpe_dependency_installed(self, mock_config): + config = copy.deepcopy(CHARM_CONFIG) + mock_config.side_effect = lambda key: config[key] + with patch.multiple(ceph_hooks, + apt_install=DEFAULT, + rsync=DEFAULT, + log=DEFAULT, + write_file=DEFAULT, + nrpe=DEFAULT) as mocks: + ceph_hooks.update_nrpe_config() + mocks["apt_install"].assert_called_with( + "lockfile-progs", fatal=True) + + @patch.object(ceph_hooks, 'notify_prometheus') + @patch.object(ceph_hooks, 'notify_rbd_mirrors') + @patch.object(ceph_hooks, 'service_pause') + @patch.object(ceph_hooks, 'notify_radosgws') + @patch.object(ceph_hooks, 'ceph') + @patch.object(ceph_hooks, 'config') + def test_upgrade_charm_with_nrpe_relation_installs_dependencies( + self, + mock_config, + mock_ceph, + mock_notify_radosgws, + mock_service_pause, + mock_notify_rbd_mirrors, + mock_notify_prometheus): + config = copy.deepcopy(CHARM_CONFIG) + mock_config.side_effect = lambda key: config[key] + with patch.multiple( + ceph_hooks, + apt_install=DEFAULT, + rsync=DEFAULT, + log=DEFAULT, + write_file=DEFAULT, + nrpe=DEFAULT, + emit_cephconf=DEFAULT, + mon_relation_joined=DEFAULT, + is_relation_made=DEFAULT) as mocks, patch( + "charmhelpers.contrib.hardening.harden.config"): + mocks["is_relation_made"].return_value = True + ceph_hooks.upgrade_charm() + mocks["apt_install"].assert_called_with( + "lockfile-progs", fatal=True) + mock_notify_radosgws.assert_called_once_with( + reprocess_broker_requests=True) + mock_ceph.update_monfs.assert_called_once_with() + mock_notify_prometheus.assert_called_once_with() + mock_service_pause.assert_called_with('ceph-create-keys') + + @patch.object(ceph_hooks, 'rbd_mirror_relation') + @patch.object(ceph_hooks, 'related_units') + @patch.object(ceph_hooks, 'relation_ids') + def test_notify_rbd_mirrors(self, mock_relation_ids, mock_related_units, + mock_rbd_mirror_relation): + mock_relation_ids.return_value = ['arelid'] + mock_related_units.return_value = ['aunit'] + ceph_hooks.notify_rbd_mirrors() + mock_relation_ids.assert_called_once_with('rbd-mirror') + mock_related_units.assert_called_once_with('arelid') + mock_rbd_mirror_relation.assert_called_once_with( + relid='arelid', + unit='aunit', + recurse=False, + reprocess_broker_requests=False) + + @patch.object(ceph_hooks, 'uuid') + @patch.object(ceph_hooks, 'relation_set') + @patch.object(ceph_hooks, 'related_units') + @patch.object(ceph_hooks, 'relation_ids') + def test_notify_mons(self, mock_relation_ids, mock_related_units, + mock_relation_set, mock_uuid): + mock_relation_ids.return_value = ['arelid'] + mock_related_units.return_value = ['aunit'] + mock_uuid.uuid4.return_value = 'FAKE-UUID' + ceph_hooks.notify_mons() + mock_relation_ids.assert_called_once_with('mon') + mock_related_units.assert_called_once_with('arelid') + mock_relation_set.assert_called_once_with(relation_id='arelid', + relation_settings={ + 'nonce': 'FAKE-UUID'}) + + @patch.object(ceph_hooks, 'relation_set') + @patch.object(ceph_hooks, 'ready_for_service') + def test_dashboard_relation(self, ready_for_service, relation_set): + ready_for_service.return_value = True + ceph_hooks.dashboard_relation() + relation_set.assert_called_once_with( + relation_id=None, + relation_settings={'mon-ready': True}) + relation_set.reset_mock() + ceph_hooks.dashboard_relation('rid1') + relation_set.assert_called_once_with( + relation_id='rid1', + relation_settings={'mon-ready': True}) + ready_for_service.return_value = False + relation_set.reset_mock() + ceph_hooks.dashboard_relation() + self.assertFalse(relation_set.called) + + @patch.object(ceph_hooks.hookenv, 'remote_service_name') + @patch.object(ceph_hooks, 'relation_get') + @patch.object(ceph_hooks, 'remote_unit') + def test_get_client_application_name(self, remote_unit, relation_get, + remote_service_name): + relation_get.return_value = { + 'application-name': 'glance'} + remote_unit.return_value = 'glance/0' + self.assertEqual( + ceph_hooks.get_client_application_name('rel:1', None), + 'glance') + relation_get.return_value = {} + remote_service_name.return_value = 'glance' + self.assertEqual( + ceph_hooks.get_client_application_name('rel:1', None), + 'glance') + + @patch.object(utils, 'is_leader', lambda: False) + @patch.object(ceph_hooks.ceph, 'mgr_config_set', lambda _key, _value: None) + @patch.object(ceph_hooks.ceph, 'list_pools') + @patch.object(ceph_hooks, 'mgr_enable_module') + @patch.object(ceph_hooks, 'emit_cephconf') + @patch.object(ceph_hooks, 'create_sysctl') + @patch.object(ceph_hooks, 'check_for_upgrade') + @patch.object(ceph_hooks, 'get_mon_hosts') + @patch.object(ceph_hooks, 'bootstrap_source_relation_changed') + @patch.object(ceph_hooks, 'relations_of_type') + def test_config_changed_no_autotune(self, + relations_of_type, + bootstrap_source_rel_changed, + get_mon_hosts, + check_for_upgrade, + create_sysctl, + emit_ceph_conf, + mgr_enable_module, + list_pools): + relations_of_type.return_value = False + self.test_config.set('pg-autotune', 'false') + self.test_config.set('balancer-mode', '') + ceph_hooks.config_changed() + mgr_enable_module.assert_not_called() + + @patch.object(utils, 'is_leader', lambda: False) + @patch.object(ceph_hooks.ceph, 'mgr_config_set', lambda _key, _value: None) + @patch.object(ceph_hooks.ceph, 'monitor_key_set') + @patch.object(ceph_hooks.ceph, 'list_pools') + @patch.object(ceph_hooks, 'mgr_enable_module') + @patch.object(ceph_hooks, 'emit_cephconf') + @patch.object(ceph_hooks, 'create_sysctl') + @patch.object(ceph_hooks, 'check_for_upgrade') + @patch.object(ceph_hooks, 'get_mon_hosts') + @patch.object(ceph_hooks, 'bootstrap_source_relation_changed') + @patch.object(ceph_hooks, 'relations_of_type') + @patch.object(ceph_hooks, 'cmp_pkgrevno') + def test_config_changed_with_autotune(self, + cmp_pkgrevno, + relations_of_type, + bootstrap_source_rel_changed, + get_mon_hosts, + check_for_upgrade, + create_sysctl, + emit_ceph_conf, + mgr_enable_module, + list_pools, + monitor_key_set): + relations_of_type.return_value = False + cmp_pkgrevno.return_value = 1 + self.test_config.set('pg-autotune', 'true') + self.test_config.set('balancer-mode', '') + ceph_hooks.config_changed() + mgr_enable_module.assert_called_once_with('pg_autoscaler') + monitor_key_set.assert_called_once_with('admin', 'autotune', 'true') + + @patch.object(utils, 'is_leader', lambda: False) + @patch.object(ceph_hooks.ceph, 'mgr_config_set', lambda _key, _value: None) + @patch.object(ceph_hooks.ceph, 'list_pools') + @patch.object(ceph_hooks, 'mgr_enable_module') + @patch.object(ceph_hooks, 'emit_cephconf') + @patch.object(ceph_hooks, 'create_sysctl') + @patch.object(ceph_hooks, 'check_for_upgrade') + @patch.object(ceph_hooks, 'get_mon_hosts') + @patch.object(ceph_hooks, 'bootstrap_source_relation_changed') + @patch.object(ceph_hooks, 'relations_of_type') + @patch.object(ceph_hooks, 'cmp_pkgrevno') + def test_config_changed_with_default_autotune(self, + cmp_pkgrevno, + relations_of_type, + bootstrap_source_rel_changed, + get_mon_hosts, + check_for_upgrade, + create_sysctl, + emit_ceph_conf, + mgr_enable_module, + list_pools): + relations_of_type.return_value = False + cmp_pkgrevno.return_value = 1 + self.test_config.set('pg-autotune', 'auto') + self.test_config.set('balancer-mode', '') + ceph_hooks.config_changed() + mgr_enable_module.assert_not_called() + + +class CephMonRelationTestCase(test_utils.CharmTestCase): + + def setUp(self): + super(CephMonRelationTestCase, self).setUp(ceph_hooks, [ + 'config', + 'is_leader', + 'is_relation_made', + 'leader_get', + 'leader_set', + 'log', + 'relation_ids', + 'related_units', + 'relation_get', + 'relations_of_type', + 'status_set', + 'get_mon_hosts', + 'notify_relations', + 'emit_cephconf', + ]) + self.config.side_effect = self.test_config.get + self.leader_get.side_effect = self.test_leader_settings.get + self.leader_set.side_effect = self.test_leader_settings.set + self.relation_get.side_effect = self.test_relation.get + self.test_config.set('monitor-count', 3) + self.test_leader_settings.set({'monitor-secret': '42'}) + self.get_mon_hosts.return_value = ['foo', 'bar', 'baz'] + + @patch.object(ceph_hooks.ceph, 'is_bootstrapped') + def test_mon_relation_bootstrapped(self, _is_bootstrapped): + _is_bootstrapped.return_value = True + ceph_hooks.mon_relation() + self.notify_relations.assert_called_with() + + @patch.object(ceph_hooks, 'attempt_mon_cluster_bootstrap') + @patch.object(ceph_hooks.ceph, 'is_bootstrapped') + def test_mon_relation_attempt_bootstrap_success(self, _is_bootstrapped, + _attempt_bootstrap): + _is_bootstrapped.return_value = False + _attempt_bootstrap.return_value = True + ceph_hooks.mon_relation() + self.notify_relations.assert_called_with() + + @patch.object(ceph_hooks, 'attempt_mon_cluster_bootstrap') + @patch.object(ceph_hooks.ceph, 'is_bootstrapped') + def test_mon_relation_attempt_bootstrap_failure(self, _is_bootstrapped, + _attempt_bootstrap): + _is_bootstrapped.return_value = False + _attempt_bootstrap.return_value = False + ceph_hooks.mon_relation() + self.notify_relations.assert_not_called() + + @patch.object(ceph_hooks, 'attempt_mon_cluster_bootstrap') + @patch.object(ceph_hooks.ceph, 'is_bootstrapped') + def test_mon_relation_no_enough_mons(self, _is_bootstrapped, + _attempt_bootstrap): + _is_bootstrapped.return_value = False + _attempt_bootstrap.return_value = False + self.get_mon_hosts.return_value = ['foo', 'bar'] + ceph_hooks.mon_relation() + self.notify_relations.assert_not_called() + self.log.assert_called_once_with('Not enough mons (2), punting.') + + @patch.object(ceph_hooks, 'attempt_mon_cluster_bootstrap') + @patch.object(ceph_hooks.ceph, 'is_bootstrapped') + def test_mon_relation_no_secret(self, _is_bootstrapped, + _attempt_bootstrap): + _is_bootstrapped.return_value = False + _attempt_bootstrap.return_value = False + self.get_mon_hosts.return_value = ['foo', 'bar'] + self.test_leader_settings.set({'monitor-secret': None}) + ceph_hooks.mon_relation() + self.notify_relations.assert_not_called() + _attempt_bootstrap.assert_not_called() + self.log.assert_called_once_with( + 'still waiting for leader to setup keys') + + +class RelatedUnitsTestCase(unittest.TestCase): + + _units = { + 'osd:0': ['ceph-osd-a/0', + 'ceph-osd-a/1', + 'ceph-osd-a/2'], + 'osd:23': ['ceph-osd-b/1', + 'ceph-osd-b/2', + 'ceph-osd-b/3'], + } + + def setUp(self): + super(RelatedUnitsTestCase, self).setUp() + + @patch.object(ceph_hooks, 'relation_ids') + @patch.object(ceph_hooks, 'related_units') + def test_related_osd_single_relation(self, + related_units, + relation_ids): + relation_ids.return_value = ['osd:0'] + related_units.side_effect = lambda x: self._units.get(x) + self.assertTrue(ceph_hooks.related_osds()) + self.assertFalse(ceph_hooks.related_osds(6)) + relation_ids.assert_called_with('osd') + related_units.assert_called_with('osd:0') + + @patch.object(ceph_hooks, 'relation_ids') + @patch.object(ceph_hooks, 'related_units') + def test_related_osd_multi_relation(self, + related_units, + relation_ids): + relation_ids.return_value = ['osd:0', 'osd:23'] + related_units.side_effect = lambda x: self._units.get(x) + self.assertTrue(ceph_hooks.related_osds()) + self.assertTrue(ceph_hooks.related_osds(6)) + self.assertFalse(ceph_hooks.related_osds(9)) + relation_ids.assert_called_with('osd') + related_units.assert_has_calls([ + call('osd:0'), + call('osd:23') + ]) + + @patch.object(ceph_hooks, 'req_already_treated') + @patch.object(ceph_hooks, 'relation_ids') + @patch.object(ceph_hooks, 'notify_mons') + @patch.object(ceph_hooks, 'notify_rbd_mirrors') + @patch.object(ceph_hooks, 'process_requests') + @patch.object(ceph_hooks.ceph, 'is_leader') + @patch.object(ceph_hooks, 'relation_get') + @patch.object(ceph_hooks, 'remote_unit') + def test_handle_broker_request(self, mock_remote_unit, mock_relation_get, + mock_ceph_is_leader, + mock_broker_process_requests, + mock_notify_rbd_mirrors, + mock_notify_mons, + mock_relation_ids, + req_already_treated): + mock_remote_unit.return_value = 'glance/0' + req_already_treated.return_value = False + ceph_hooks.handle_broker_request('rel1', None) + mock_remote_unit.assert_called_once_with() + mock_relation_get.assert_called_once_with(rid='rel1', unit='glance/0') + mock_relation_get.reset_mock() + mock_relation_get.return_value = { + 'broker_req': '{"request-id": "FAKE-REQUEST"}' + } + mock_broker_process_requests.return_value = 'AOK' + self.assertEqual( + ceph_hooks.handle_broker_request('rel1', 'glance/0'), + {'broker-rsp-glance-0': 'AOK'}) + mock_notify_rbd_mirrors.assert_called_with() + mock_notify_mons.assert_called_with() + mock_relation_get.assert_called_once_with(rid='rel1', unit='glance/0') + self.assertEqual( + ceph_hooks.handle_broker_request('rel1', 'glance/0', + add_legacy_response=True), + {'broker_rsp': 'AOK', 'broker-rsp-glance-0': 'AOK'}) + mock_notify_rbd_mirrors.reset_mock() + mock_notify_mons.reset_mock() + ceph_hooks.handle_broker_request('rel1', None, recurse=False) + self.assertFalse(mock_notify_rbd_mirrors.called) + mock_notify_mons.assert_called_once_with() + + @patch.object(ceph_hooks, 'local_unit') + @patch.object(ceph_hooks, 'relation_get') + @patch.object(ceph_hooks.ceph, 'is_leader') + @patch.object(ceph_hooks, 'process_requests') + def test_multi_broker_req_ignored_on_rel(self, process_requests, + is_leader, + relation_get, + local_unit): + is_leader.return_value = True + relation_get.side_effect = [{'broker_req': {'request-id': '1'}}, + {'broker-rsp-glance-0': + {"request-id": "1"}}] + local_unit.return_value = "mon/0" + ceph_hooks.handle_broker_request(relid='rel1', + unit='glance/0', + recurse=False) + process_requests.assert_not_called() + + @patch.object(ceph_hooks, 'relation_ids') + @patch.object(ceph_hooks, 'local_unit') + @patch.object(ceph_hooks, 'relation_get') + @patch.object(ceph_hooks.ceph, 'is_leader') + @patch.object(ceph_hooks, 'process_requests') + def test_multi_broker_req_handled_on_rel(self, process_requests, + is_leader, + relation_get, + local_unit, + _relation_ids): + is_leader.return_value = True + relation_get.side_effect = [{'broker_req': {'request-id': '2'}}, + {'broker-rsp-glance-0': + {"request-id": "1"}}] + local_unit.return_value = "mon/0" + ceph_hooks.handle_broker_request(relid='rel1', + unit='glance/0', + recurse=False) + process_requests.assert_called_once_with({'request-id': '2'}) + + @patch.object(ceph_hooks, 'relation_ids') + @patch.object(ceph_hooks, 'local_unit') + @patch.object(ceph_hooks, 'relation_get') + @patch.object(ceph_hooks.ceph, 'is_leader') + @patch.object(ceph_hooks, 'process_requests') + def test_multi_broker_req_handled_on_rel_errored(self, process_requests, + is_leader, + relation_get, + local_unit, + _relation_ids): + is_leader.return_value = True + relation_get.side_effect = [ + { + 'broker_req': {'request-id': '2'}}, + { + 'broker-rsp-glance-0': { + 'exit-code': 1, + 'stderr': 'Unexpected error'}}] + + local_unit.return_value = "mon/0" + ceph_hooks.handle_broker_request(relid='rel1', + unit='glance/0', + recurse=False) + process_requests.assert_called_once_with({'request-id': '2'}) + + +class BootstrapSourceTestCase(test_utils.CharmTestCase): + + def setUp(self): + super(BootstrapSourceTestCase, self).setUp(ceph_hooks, TO_PATCH) + self.config.side_effect = self.test_config.get + self.leader_get.side_effect = self.test_leader_settings.get + self.leader_set.side_effect = self.test_leader_settings.set + self.relation_get.side_effect = self.test_relation.get + self.test_config.set('no-bootstrap', True) + self.is_leader.return_value = True + self.relation_ids.return_value = ['bootstrap-source:0'] + self.related_units.return_value = ['ceph/0', 'ceph/1', 'ceph/2'] + + def test_bootstrap_source_no_bootstrap(self): + """Ensure the config option of no-bootstrap is set to continue""" + self.test_config.set('no-bootstrap', False) + ceph_hooks.bootstrap_source_relation_changed() + self.status_set.assert_called_once_with('blocked', + 'Cannot join the ' + 'bootstrap-source relation ' + 'when no-bootstrap is False') + + def test_bootstrap_source_not_leader(self): + """Ensure the processing is deferred to the leader""" + self.is_leader.return_value = False + ceph_hooks.bootstrap_source_relation_changed() + self.assertEqual(self.leader_set.call_count, 0) + + def test_bootstrap_source_relation_data_not_ready(self): + """Ensures no bootstrapping done if relation data not present""" + ceph_hooks.bootstrap_source_relation_changed() + expected_calls = [] + relid = 'bootstrap-source:0' + for unit in ('ceph/0', 'ceph/1', 'ceph/2'): + expected_calls.append(call('monitor-secret', unit, relid)) + expected_calls.append(call('fsid', unit, relid)) + self.relation_get.assert_has_calls(expected_calls) + self.assertEqual(self.leader_set.call_count, 0) + self.assertEqual(self.mon_relation.call_count, 0) + + def test_bootstrap_source_good_path(self): + """Tests the good path where all is setup and relations established""" + self.test_relation.set({'monitor-secret': 'abcd', + 'fsid': '1234'}) + ceph_hooks.bootstrap_source_relation_changed() + self.leader_set.assert_called_with({'fsid': '1234', + 'monitor-secret': 'abcd'}) + self.mon_relation.assert_called_once_with() + + def test_bootstrap_source_different_fsid_secret(self): + """Tests where the bootstrap relation has a different fsid""" + self.test_relation.set({'monitor-secret': 'abcd', + 'fsid': '1234'}) + self.test_leader_settings.set({'monitor-secret': 'mysecret', + 'fsid': '7890'}) + self.assertRaises(AssertionError, + ceph_hooks.bootstrap_source_relation_changed) + + @patch.object(utils, 'is_leader', lambda: False) + @patch.object(ceph_hooks.ceph, 'is_bootstrapped') + @patch.object(ceph_hooks, 'emit_cephconf') + @patch.object(ceph_hooks, 'leader_get') + @patch.object(ceph_hooks, 'is_leader') + @patch.object(ceph_hooks, 'relations_of_type') + @patch.object(ceph_hooks, 'get_mon_hosts') + @patch.object(ceph_hooks, 'check_for_upgrade') + @patch.object(ceph_hooks, 'config') + def test_config_changed(self, + _config, + _check_for_upgrade, + _get_mon_hosts, + _relations_of_type, + _is_leader, + _leader_get, + _emit_cephconf, + _is_bootstrapped): + config = copy.deepcopy(CHARM_CONFIG) + _config.side_effect = \ + lambda key=None: config.get(key, None) if key else config + _relations_of_type.return_value = False + _is_leader.return_value = False + _leader_get.side_effect = ['fsid', 'monsec'] + _is_bootstrapped.return_value = True + ceph_hooks.config_changed() + _check_for_upgrade.assert_called_once_with() + _get_mon_hosts.assert_called_once_with() + _leader_get.assert_has_calls([ + call('fsid'), + call('monitor-secret'), + ]) + _emit_cephconf.assert_called_once_with() + _is_bootstrapped.assert_called_once_with() + + @patch.object(utils, 'is_leader', lambda: True) + @patch.object(utils, 'config', lambda _: 'pool1') + @patch.object(utils.ceph_utils, 'mgr_config_set') + @patch.object(ceph_hooks.ceph, 'is_bootstrapped') + @patch.object(ceph_hooks, 'emit_cephconf') + @patch.object(ceph_hooks, 'leader_get') + @patch.object(ceph_hooks, 'is_leader') + @patch.object(ceph_hooks, 'relations_of_type') + @patch.object(ceph_hooks, 'get_mon_hosts') + @patch.object(ceph_hooks, 'check_for_upgrade') + @patch.object(ceph_hooks, 'config') + def test_config_changed_leader( + self, + _config, + _check_for_upgrade, + _get_mon_hosts, + _relations_of_type, + _is_leader, + _leader_get, + _emit_cephconf, + _is_bootstrapped, + _mgr_config_set + ): + config = copy.deepcopy(CHARM_CONFIG) + _config.side_effect = \ + lambda key=None: config.get(key, None) if key else config + _relations_of_type.return_value = False + _is_leader.return_value = True + _leader_get.side_effect = ['fsid', 'monsec', 'fsid', 'monsec'] + _is_bootstrapped.return_value = True + ceph_hooks.config_changed() + _check_for_upgrade.assert_called_once_with() + _get_mon_hosts.assert_called_once_with() + _leader_get.assert_has_calls([ + call('fsid'), + call('monitor-secret'), + ]) + _emit_cephconf.assert_called_once_with() + _is_bootstrapped.assert_has_calls([call(), call()]) + _mgr_config_set.assert_called_once_with( + 'mgr/prometheus/rbd_stats_pools', 'pool1' + ) + + @patch.object(utils, 'is_leader', lambda: False) + @patch.object(ceph_hooks, 'emit_cephconf') + @patch.object(ceph_hooks, 'create_sysctl') + @patch.object(ceph_hooks, 'check_for_upgrade') + @patch.object(ceph_hooks, 'get_mon_hosts') + @patch.object(ceph_hooks, 'bootstrap_source_relation_changed') + def test_config_changed_no_bootstrap_changed(self, + bootstrap_source_rel_changed, + get_mon_hosts, + check_for_upgrade, + create_sysctl, + emit_ceph_conf): + """Tests that changing no-bootstrap invokes the bs relation changed""" + self.relations_of_type.return_value = [] + self.is_relation_made.return_value = True + self.test_config.set_changed('no-bootstrap', True) + self.test_config.set('balancer-mode', '') + ceph_hooks.config_changed() + bootstrap_source_rel_changed.assert_called_once() + + @patch.object(ceph_hooks, 'get_public_addr') + def test_get_mon_hosts(self, get_public_addr): + """Tests that bootstrap-source relations are used""" + unit_addrs = { + 'mon:0': { + 'ceph-mon/0': '172.16.0.2', + 'ceph-mon/1': '172.16.0.3', + }, + 'bootstrap-source:1': { + 'ceph/0': '172.16.10.2', + 'ceph/1': '172.16.10.3', + 'cehp/2': '172.16.10.4', + } + } + + def rel_ids_side_effect(relname): + for key in unit_addrs.keys(): + if key.split(':')[0] == relname: + return [key] + return None + + def rel_get_side_effect(attr, unit, relid): + return unit_addrs[relid][unit] + + def rel_units_side_effect(relid): + if relid in unit_addrs: + return unit_addrs[relid].keys() + return [] + + self.relation_ids.side_effect = rel_ids_side_effect + self.related_units.side_effect = rel_units_side_effect + get_public_addr.return_value = '172.16.0.4' + self.relation_get.side_effect = rel_get_side_effect + hosts = ceph_hooks.get_mon_hosts() + self.assertEqual(hosts, [ + '172.16.0.2', '172.16.0.3', '172.16.0.4', + '172.16.10.2', '172.16.10.3', '172.16.10.4', + ]) + + +class RGWRelationTestCase(test_utils.CharmTestCase): + + TO_PATCH = [ + 'relation_get', + 'get_public_addr', + 'ready_for_service', + 'remote_unit', + 'apt_install', + 'filter_installed_packages', + 'leader_get', + 'ceph', + 'process_requests', + 'log', + 'relation_set', + 'config', + ] + + test_key = 'OTQ1MDdiODYtMmZhZi00M2IwLTkzYTgtZWI0MGRhNzdmNzBlCg==' + test_fsid = '96ca5e7d-a9e3-4af1-be2b-85621eb6a8e8' + + def setUp(self): + super(RGWRelationTestCase, self).setUp(ceph_hooks, self.TO_PATCH) + self.relation_get.side_effect = self.test_relation.get + self.config.side_effect = self.test_config.get + self.test_config.set('auth-supported', 'cephx') + self.filter_installed_packages.side_effect = lambda pkgs: pkgs + self.ready_for_service.return_value = True + self.leader_get.return_value = self.test_fsid + self.ceph.is_leader.return_value = True + self.ceph.get_radosgw_key.return_value = self.test_key + self.get_public_addr.return_value = '10.10.10.2' + + def test_legacy_radosgw_key(self): + self.test_relation.set({ + 'key_name': None + }) + ceph_hooks.radosgw_relation('radosgw:1', 'ceph-radosgw/0') + self.relation_set.assert_called_once_with( + relation_id='radosgw:1', + relation_settings={ + 'fsid': self.test_fsid, + 'auth': self.test_config.get('auth-supported'), + 'ceph-public-address': '10.10.10.2', + 'radosgw_key': self.test_key, + } + ) + self.ceph.get_radosgw_key.assert_called_once_with() + + def test_per_unit_radosgw_key(self): + self.test_relation.set({ + 'key_name': 'testhostname' + }) + ceph_hooks.radosgw_relation('radosgw:1', 'ceph-radosgw/0') + self.relation_set.assert_called_once_with( + relation_id='radosgw:1', + relation_settings={ + 'fsid': self.test_fsid, + 'auth': self.test_config.get('auth-supported'), + 'ceph-public-address': '10.10.10.2', + 'testhostname_key': self.test_key, + } + ) + self.ceph.get_radosgw_key.assert_called_once_with(name='testhostname') + + +class RBDMirrorRelationTestCase(test_utils.CharmTestCase): + + TO_PATCH = [ + 'related_units', + 'relation_ids', + 'relation_get', + 'get_cluster_addr', + 'get_public_addr', + 'ready_for_service', + 'remote_unit', + 'apt_install', + 'filter_installed_packages', + 'leader_get', + 'ceph', + 'process_requests', + 'log', + 'relation_set', + 'config', + 'handle_broker_request', + ] + + test_key = 'OTQ1MDdiODYtMmZhZi00M2IwLTkzYTgtZWI0MGRhNzdmNzBlCg==' + + class FakeCephBrokerRq(object): + + def __init__(self, raw_request_data=None): + if raw_request_data: + self.__dict__ = { + k.replace('-', '_'): v + for k, v in raw_request_data.items()} + + def setUp(self): + super(RBDMirrorRelationTestCase, self).setUp(ceph_hooks, self.TO_PATCH) + self.relation_get.side_effect = self.test_relation.get + self.config.side_effect = self.test_config.get + self.test_config.set('auth-supported', 'cephx') + self.filter_installed_packages.side_effect = lambda pkgs: pkgs + self.ready_for_service.return_value = True + self.ceph.is_leader.return_value = True + self.ceph.get_rbd_mirror_key.return_value = self.test_key + self.get_cluster_addr.return_value = '192.0.2.10' + self.get_public_addr.return_value = '198.51.100.10' + self.ceph.list_pools_detail.return_value = {'pool': {}} + + @patch.object(ceph_hooks, 'retrieve_client_broker_requests') + def test_rbd_mirror_relation(self, + _retrieve_client_broker_requests): + self.handle_broker_request.return_value = {} + base_relation_settings = { + 'auth': self.test_config.get('auth-supported'), + 'ceph-public-address': '198.51.100.10', + 'ceph-cluster-address': '192.0.2.10', + 'pools': json.dumps({'pool': {}}), + 'broker_requests': '["fakejsonstr0", "fakejsonstr1"]', + } + _retrieve_client_broker_requests.return_value = [ + self.FakeCephBrokerRq(raw_request_data={ + 'request': 'fakejsonstr0'}), + self.FakeCephBrokerRq(raw_request_data={ + 'request': 'fakejsonstr1'}), + ] + ceph_hooks.rbd_mirror_relation('rbd-mirror:51', 'ceph-rbd-mirror/0') + self.handle_broker_request.assert_called_with( + 'rbd-mirror:51', 'ceph-rbd-mirror/0', recurse=True, force=False) + self.relation_set.assert_called_with( + relation_id='rbd-mirror:51', + relation_settings=base_relation_settings) + self.test_relation.set( + {'unique_id': None}) + ceph_hooks.rbd_mirror_relation('rbd-mirror:52', 'ceph-rbd-mirror/0', + recurse=False) + self.relation_set.assert_called_with( + relation_id='rbd-mirror:52', + relation_settings=base_relation_settings) + self.test_relation.set( + {'unique_id': json.dumps('otherSideIsReactiveEndpoint')}) + ceph_hooks.rbd_mirror_relation('rbd-mirror:53', 'ceph-rbd-mirror/0') + self.ceph.get_rbd_mirror_key.assert_called_once_with( + 'rbd-mirror.otherSideIsReactiveEndpoint') + key_relation_settings = base_relation_settings.copy() + key_relation_settings.update( + {'otherSideIsReactiveEndpoint_key': self.test_key}) + self.relation_set.assert_called_with( + relation_id='rbd-mirror:53', + relation_settings=key_relation_settings) + self.test_relation.set({'unique_id': 'somehostname'}) + ceph_hooks.rbd_mirror_relation('rbd-mirror:42', 'ceph-rbd-mirror/0') + self.ceph.get_rbd_mirror_key.assert_called_with( + 'rbd-mirror.somehostname') + key_relation_settings = base_relation_settings.copy() + key_relation_settings.update({ + 'otherSideIsReactiveEndpoint_key': self.test_key, + 'somehostname_key': self.test_key + }) + self.relation_set.assert_called_with( + relation_id='rbd-mirror:42', + relation_settings=key_relation_settings) + + @patch.object(ceph_hooks, 'CephBrokerRq') + def test_retrieve_client_broker_requests(self, _CephBrokerRq): + self.maxDiff = None + self.relation_ids.side_effect = lambda endpoint: { + 'client': ['ceph-client:0'], + 'mds': ['ceph-client:1'], + 'radosgw': ['ceph-client:2'], + }.get(endpoint) + self.related_units.return_value = ['unit/0', 'unit/1', 'unit/3'] + self.relation_get.side_effect = lambda **kwargs: { + 'ceph-client:0': {'broker_req': {'request-id': 'fakeid0'}}, + 'ceph-client:1': {'broker_req': {'request-id': 'fakeid1'}}, + 'ceph-client:2': {}, + }.get(kwargs['rid'], {}) + + _CephBrokerRq.side_effect = self.FakeCephBrokerRq + + for req in ceph_hooks.retrieve_client_broker_requests(): + self.assertIn(req.request_id, ('fakeid0', 'fakeid1')) diff --git a/ceph-mon/unit_tests/test_ceph_mds_relation.py b/ceph-mon/unit_tests/test_ceph_mds_relation.py new file mode 100644 index 00000000..dbcdc9cd --- /dev/null +++ b/ceph-mon/unit_tests/test_ceph_mds_relation.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# Copyright 2022 Canonical Ltd. +# See LICENSE file for licensing details. +from unittest import mock +from unittest.mock import patch +import unittest +from ops.testing import Harness + +import ceph_mds +import charm +from manage_test_relations import ( + add_ceph_mds_relation, +) + + +@patch("charm.hooks") +class TestCephShared(unittest.TestCase): + def setUp(self): + super().setUp() + self.harness = Harness(charm.CephMonCharm) + self.addCleanup(self.harness.cleanup) + + def test_init(self, _hooks): + self.harness.begin() + mds = ceph_mds.CephMdsProvides(self.harness.charm) + self.assertTrue(mds.this_unit) + + @mock.patch("src.charm.ceph_client.ceph.is_leader") + @mock.patch("src.charm.ceph_mds.leader_get", return_value="test-fsid") + @mock.patch("src.charm.ceph_mds.ceph") + @mock.patch.object(charm.CephMonCharm, "process_broker_request") + @mock.patch("src.charm.ceph_client.ceph.get_named_key") + @mock.patch("src.charm.ceph_client.get_rbd_features") + @mock.patch("src.charm.ceph_client.get_public_addr") + @mock.patch.object(charm.CephMonCharm, "ready_for_service") + @mock.patch("src.charm.ceph_client.send_osd_settings") + def test_client_relation_broker( + self, + _send_osd_settings, + mock_ready_for_service, + mock_get_public_addr, + mock_get_rbd_features, + mock_get_named_key, + mock_process_broker_request, + mock_ceph_utils, + mock_leader_get, + mock_is_leader, + _hooks, + ): + mock_get_public_addr.return_value = "127.0.0.1" + mock_ready_for_service.return_value = True + mock_get_rbd_features.return_value = 42 + mock_get_named_key.return_value = "test key" + mock_process_broker_request.return_value = "AOK" + mock_ceph_utils.get_mds_key.return_value = "test-mds-key" + mock_is_leader.return_value = True + self.harness.begin() + self.harness.set_leader() + mds = ceph_mds.CephMdsProvides(self.harness.charm) + rel_id = add_ceph_mds_relation(self.harness) + self.harness.update_relation_data( + rel_id, "ceph-fs/0", {"broker_req": '{"request-id": "req"}'} + ) + self.assertEqual(mds._mds_name, "ceph-fs") + mock_leader_get.assert_called_with("fsid") + unit_rel_data = self.harness.get_relation_data(rel_id, "ceph-mon/0") + self.assertEqual( + unit_rel_data, + { + "auth": "cephx", + "ceph-public-address": "127.0.0.1", + "key": "test key", + "rbd-features": "42", + "broker-rsp-ceph-fs-0": "AOK", + "broker_rsp": "AOK", + 'ceph-fs_mds_key': 'test-mds-key', + 'fsid': 'test-fsid', + + }, + ) + mock_process_broker_request.reset_mock() diff --git a/ceph-mon/unit_tests/test_ceph_metrics.py b/ceph-mon/unit_tests/test_ceph_metrics.py new file mode 100644 index 00000000..d684c322 --- /dev/null +++ b/ceph-mon/unit_tests/test_ceph_metrics.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3 +# Copyright 2022 Canonical Ltd. +# See LICENSE file for licensing details. + +import json +import pathlib +import tempfile +import textwrap + +from unittest.mock import patch +import unittest + +from ops.testing import Harness + +import ceph_metrics # noqa: avoid circ. import +import charm + + +class CephMetricsTestBase(unittest.TestCase): + @classmethod + def setUpClass(cls): + """Run once before tests begin.""" + cls.tempdir = tempfile.TemporaryDirectory() + cls.tmp = pathlib.Path(cls.tempdir.name) + cls.rules_dir = cls.tmp / "rules" + cls.rules_dir.mkdir() + cls.rules = textwrap.dedent( + """ + groups: + - name: "testgroup" + rules: [] + """ + ) + rules_file = cls.rules_dir / "alert-rules.yaml" + with rules_file.open("w") as f: + f.write(cls.rules) + + @classmethod + def tearDownClass(cls): + cls.tempdir.cleanup() + + +class TestCephMetrics(CephMetricsTestBase): + def setUp(self): + super().setUp() + self.harness = Harness(charm.CephMonCharm) + self.addCleanup(self.harness.cleanup) + self.harness.begin() + self.harness.set_leader(True) + self.harness.charm.metrics_endpoint._alert_rules_path = self.rules_dir + self.harness.add_network('10.0.0.10') + + def test_init(self): + self.assertEqual( + self.harness.charm.metrics_endpoint._relation_name, + "metrics-endpoint", + ) + + @patch("ceph_metrics.mgr_config_set_rbd_stats_pools", lambda: None) + @patch("ceph_metrics.ceph_utils.is_bootstrapped", return_value=True) + @patch("ceph_metrics.ceph_utils.is_mgr_module_enabled", return_value=False) + @patch("ceph_metrics.ceph_utils.mgr_enable_module") + @patch("ceph_metrics.ceph_utils.mgr_disable_module") + def test_add_remove_rel( + self, + mgr_disable_module, + mgr_enable_module, + _is_mgr_module_enable, + _is_bootstrapped, + ): + rel_id = self.harness.add_relation("metrics-endpoint", "prometheus") + self.harness.add_relation_unit(rel_id, "prometheus/0") + + unit_rel_data = self.harness.get_relation_data( + rel_id, self.harness.model.unit + ) + self.assertEqual( + unit_rel_data["prometheus_scrape_unit_address"], "10.0.0.10" + ) + + # Trigger relation change event as a side effect + self.harness.update_relation_data( + rel_id, "prometheus/0", {"foo": "bar"} + ) + + mgr_enable_module.assert_called_once() + + app_rel_data = self.harness.get_relation_data( + rel_id, self.harness.model.app + ) + jobs = app_rel_data["scrape_jobs"] + self.assertEqual( + jobs, + ( + '[{"metrics_path": "/metrics", ' + '"static_configs": [{"targets": ["*:9283"]}]}]' + ), + ) + + self.harness.remove_relation(rel_id) + mgr_disable_module.assert_called_once() + + def get_alert_rules(self, rel_id): + app_rel_data = self.harness.get_relation_data( + rel_id, self.harness.model.app + ) + return json.loads(app_rel_data["alert_rules"]) + + @patch("ceph_metrics.ceph_utils.is_bootstrapped", return_value=True) + @patch("ceph_metrics.CephMetricsEndpointProvider._set_alert_rules") + def test_update_alert_rules_empty( + self, + set_alert_rules, + _is_bootstrapped, + ): + """Test: no alert rules created with empty alert rules file.""" + rel_id = self.harness.add_relation("metrics-endpoint", "prometheus") + self.harness.add_relation_unit(rel_id, "prometheus/0") + self.harness.add_resource("alert-rules", "") + self.harness.charm.metrics_endpoint.update_alert_rules() + set_alert_rules.assert_called_with({}) + + @patch("ceph_metrics.ceph_utils.is_bootstrapped", return_value=True) + def test_update_alert_rules_invalid(self, _is_bootstrapped): + rel_id = self.harness.add_relation("metrics-endpoint", "prometheus") + self.harness.add_relation_unit(rel_id, "prometheus/0") + self.harness.add_resource("alert-rules", "not-a-rule") + self.harness.charm.metrics_endpoint.update_alert_rules() + self.assertTrue( + self.harness.charm.metrics_endpoint.have_alert_rule_errors() + ) + + @patch("ceph_metrics.ceph_utils.is_bootstrapped", return_value=True) + def test_update_alert_rules(self, _is_bootstrapped): + rel_id = self.harness.add_relation("metrics-endpoint", "prometheus") + self.harness.add_relation_unit(rel_id, "prometheus/0") + self.harness.add_resource("alert-rules", self.rules) + self.harness.charm.metrics_endpoint.update_alert_rules() + alert_rules = self.get_alert_rules(rel_id) + self.assertTrue(alert_rules.get("groups")) + + +class TestCephCOSAgentProvider(CephMetricsTestBase): + def setUp(self): + super().setUp() + self.harness = Harness(charm.CephMonCharm) + self.addCleanup(self.harness.cleanup) + self.harness.begin() + self.harness.set_leader(True) + self.harness.charm.cos_agent._metrics_rules = self.rules_dir + + def test_init(self): + self.assertEqual( + self.harness.charm.cos_agent._relation_name, + "cos-agent", + ) + + @patch("ceph_metrics.mgr_config_set_rbd_stats_pools", lambda: None) + @patch("ceph_metrics.ceph_utils.is_bootstrapped", return_value=True) + @patch("ceph_metrics.ceph_utils.is_mgr_module_enabled", return_value=False) + @patch("ceph_metrics.ceph_utils.mgr_enable_module") + @patch("ceph_metrics.ceph_utils.mgr_disable_module") + def test_add_remove_rel( + self, + mgr_disable_module, + mgr_enable_module, + _is_mgr_module_enable, + _is_bootstrapped, + ): + rel_id = self.harness.add_relation("cos-agent", "grafana-agent") + self.harness.add_relation_unit(rel_id, "grafana-agent/0") + + unit_rel_data = self.harness.get_relation_data( + rel_id, self.harness.model.unit + ) + data = json.loads(unit_rel_data["config"]) + self.assertTrue("metrics_scrape_jobs" in data) + self.assertEqual( + data["metrics_scrape_jobs"][0]["metrics_path"], "/metrics" + ) + self.assertTrue("metrics_alert_rules" in data) + self.assertTrue("groups" in data["metrics_alert_rules"]) + mgr_enable_module.assert_called_once() + + self.harness.remove_relation(rel_id) + mgr_disable_module.assert_called_once() + + @patch("socket.getfqdn", return_value="node1.ceph.example.com") + def test_custom_scrape_configs(self, _mock_getfqdn): + configs = self.harness.charm.cos_agent._custom_scrape_configs() + self.assertEqual( + configs[0]["static_configs"][0]["targets"], ["localhost:9283"] + ) + self.assertEqual( + configs[0]["metric_relabel_configs"][0]["replacement"], + "ceph_cluster", + ) diff --git a/ceph-mon/unit_tests/test_ceph_networking.py b/ceph-mon/unit_tests/test_ceph_networking.py new file mode 100644 index 00000000..9d16573c --- /dev/null +++ b/ceph-mon/unit_tests/test_ceph_networking.py @@ -0,0 +1,65 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import charmhelpers.core.hookenv as hookenv +import test_utils +import utils as ceph_utils + +TO_PATCH_SPACES = [ + 'network_get_primary_address', + 'log', + 'get_host_ip', + 'config', + 'get_network_addrs', + 'cached', +] + + +class CephNetworkSpaceTestCase(test_utils.CharmTestCase): + def setUp(self): + super(CephNetworkSpaceTestCase, self).setUp(ceph_utils, + TO_PATCH_SPACES) + self.config.side_effect = self.test_config.get + + def tearDown(self): + # Reset @cached cache + hookenv.cache = {} + + def test_no_network_space_support(self): + self.get_host_ip.return_value = '192.168.2.1' + self.network_get_primary_address.side_effect = NotImplementedError + self.assertEqual(ceph_utils.get_cluster_addr(), + '192.168.2.1') + self.assertEqual(ceph_utils.get_public_addr(), + '192.168.2.1') + + def test_public_network_space(self): + self.network_get_primary_address.return_value = '10.20.40.2' + self.assertEqual(ceph_utils.get_public_addr(), + '10.20.40.2') + self.network_get_primary_address.assert_called_with('public') + self.config.assert_called_with('ceph-public-network') + + def test_cluster_network_space(self): + self.network_get_primary_address.return_value = '10.20.50.2' + self.assertEqual(ceph_utils.get_cluster_addr(), + '10.20.50.2') + self.network_get_primary_address.assert_called_with('cluster') + self.config.assert_called_with('ceph-cluster-network') + + def test_config_options_in_use(self): + self.get_network_addrs.return_value = ['192.122.20.2'] + self.test_config.set('ceph-cluster-network', '192.122.20.0/24') + self.assertEqual(ceph_utils.get_cluster_addr(), + '192.122.20.2') diff --git a/ceph-mon/unit_tests/test_ceph_ops.py b/ceph-mon/unit_tests/test_ceph_ops.py new file mode 100644 index 00000000..25e095e4 --- /dev/null +++ b/ceph-mon/unit_tests/test_ceph_ops.py @@ -0,0 +1,236 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import unittest + +from unittest.mock import ( + patch, +) + +from charms_ceph import broker + + +class TestCephOps(unittest.TestCase): + + @patch.object(broker, 'create_erasure_profile') + @patch.object(broker, 'log', lambda *args, **kwargs: None) + def test_create_erasure_profile(self, mock_create_erasure): + req = json.dumps({'api-version': 1, + 'ops': [{ + 'op': 'create-erasure-profile', + 'name': 'foo', + 'erasure-type': 'jerasure', + 'failure-domain': 'rack', + 'k': 3, + 'm': 2, + }]}) + rc = broker.process_requests(req) + mock_create_erasure.assert_called_with( + service='admin', + erasure_plugin_name='jerasure', + profile_name='foo', + failure_domain='rack', + data_chunks=3, coding_chunks=2, + locality=None, + durability_estimator=None, + helper_chunks=None, + scalar_mds=None, + crush_locality=None, + device_class=None, + erasure_plugin_technique=None) + self.assertEqual(json.loads(rc), {'exit-code': 0}) + + @patch.object(broker, 'delete_pool') + @patch.object(broker, 'log', lambda *args, **kwargs: None) + def test_process_requests_delete_pool(self, + mock_delete_pool): + reqs = json.dumps({'api-version': 1, + 'ops': [{ + 'op': 'delete-pool', + 'name': 'foo', + }]}) + mock_delete_pool.return_value = {'exit-code': 0} + rc = broker.process_requests(reqs) + mock_delete_pool.assert_called_with(service='admin', name='foo') + self.assertEqual(json.loads(rc), {'exit-code': 0}) + + @patch('charmhelpers.contrib.storage.linux.ceph.cmp_pkgrevno') + @patch.object(broker, 'pool_exists') + @patch.object(broker.ReplicatedPool, 'create') + @patch.object(broker, 'log', lambda *args, **kwargs: None) + def test_process_requests_create_replicated_pool(self, + mock_replicated_pool, + mock_pool_exists, + mock_cmp_pkgrevno): + mock_pool_exists.return_value = False + mock_cmp_pkgrevno.return_value = 1 + reqs = json.dumps({'api-version': 1, + 'ops': [{ + 'op': 'create-pool', + 'name': 'foo', + 'replicas': 3 + }]}) + rc = broker.process_requests(reqs) + mock_pool_exists.assert_called_with(service='admin', name='foo') + mock_replicated_pool.assert_called_with() + self.assertEqual(json.loads(rc), {'exit-code': 0}) + + @patch('charmhelpers.contrib.storage.linux.ceph.cmp_pkgrevno') + @patch.object(broker, 'pool_exists') + @patch.object(broker.ErasurePool, 'create') + @patch.object(broker, 'erasure_profile_exists') + @patch.object(broker, 'log', lambda *args, **kwargs: None) + def test_process_requests_create_erasure_pool(self, mock_profile_exists, + mock_erasure_pool, + mock_pool_exists, + mock_cmp_pkgrevno): + mock_pool_exists.return_value = False + mock_cmp_pkgrevno.return_value = 1 + reqs = json.dumps({'api-version': 1, + 'ops': [{ + 'op': 'create-pool', + 'pool-type': 'erasure', + 'name': 'foo', + 'erasure-profile': 'default' + }]}) + rc = broker.process_requests(reqs) + mock_profile_exists.assert_called_with(service='admin', name='default') + mock_pool_exists.assert_called_with(service='admin', name='foo') + mock_erasure_pool.assert_called_with() + self.assertEqual(json.loads(rc), {'exit-code': 0}) + + @patch('charmhelpers.contrib.storage.linux.ceph.cmp_pkgrevno') + @patch.object(broker, 'pool_exists') + @patch.object(broker.BasePool, 'add_cache_tier') + @patch.object(broker, 'log', lambda *args, **kwargs: None) + def test_process_requests_create_cache_tier(self, mock_pool, + mock_pool_exists, + mock_cmp_pkgrevno): + mock_pool_exists.return_value = True + mock_cmp_pkgrevno.return_value = 1 + reqs = json.dumps({'api-version': 1, + 'ops': [{ + 'op': 'create-cache-tier', + 'cold-pool': 'foo', + 'hot-pool': 'foo-ssd', + 'mode': 'writeback', + 'erasure-profile': 'default' + }]}) + rc = broker.process_requests(reqs) + self.assertEqual(json.loads(rc), {'exit-code': 0}) + + mock_pool_exists.assert_any_call(service='admin', name='foo') + mock_pool_exists.assert_any_call(service='admin', name='foo-ssd') + + mock_pool.assert_called_with(cache_pool='foo-ssd', mode='writeback') + + @patch('charmhelpers.contrib.storage.linux.ceph.cmp_pkgrevno') + @patch.object(broker, 'pool_exists') + @patch.object(broker.BasePool, 'remove_cache_tier') + @patch.object(broker, 'log', lambda *args, **kwargs: None) + def test_process_requests_remove_cache_tier(self, mock_pool, + mock_pool_exists, + mock_cmp_pkgrevno): + mock_pool_exists.return_value = True + mock_cmp_pkgrevno.return_value = 1 + reqs = json.dumps({'api-version': 1, + 'ops': [{ + 'op': 'remove-cache-tier', + 'hot-pool': 'foo-ssd', + }]}) + rc = broker.process_requests(reqs) + self.assertEqual(json.loads(rc), {'exit-code': 0}) + mock_pool_exists.assert_any_call(service='admin', name='foo-ssd') + + mock_pool.assert_called_with(cache_pool='foo-ssd') + + @patch.object(broker, 'snapshot_pool') + @patch.object(broker, 'log', lambda *args, **kwargs: None) + def test_snapshot_pool(self, mock_snapshot_pool): + reqs = json.dumps({'api-version': 1, + 'ops': [{ + 'op': 'snapshot-pool', + 'name': 'foo', + 'snapshot-name': 'foo-snap1', + }]}) + mock_snapshot_pool.return_value = {'exit-code': 0} + rc = broker.process_requests(reqs) + self.assertEqual(json.loads(rc), {'exit-code': 0}) + + mock_snapshot_pool.assert_called_with(service='admin', + pool_name='foo', + snapshot_name='foo-snap1') + + @patch.object(broker, 'rename_pool') + @patch.object(broker, 'log', lambda *args, **kwargs: None) + def test_rename_pool(self, mock_rename_pool): + reqs = json.dumps({'api-version': 1, + 'ops': [{ + 'op': 'rename-pool', + 'name': 'foo', + 'new-name': 'foo2', + }]}) + mock_rename_pool.return_value = {'exit-code': 0} + rc = broker.process_requests(reqs) + mock_rename_pool.assert_called_with(service='admin', + old_name='foo', + new_name='foo2') + self.assertEqual(json.loads(rc), {'exit-code': 0}) + + @patch.object(broker, 'remove_pool_snapshot') + @patch.object(broker, 'log', lambda *args, **kwargs: None) + def test_remove_pool_snapshot(self, mock_snapshot_pool): + reqs = json.dumps({'api-version': 1, + 'ops': [{ + 'op': 'remove-pool-snapshot', + 'name': 'foo', + 'snapshot-name': 'foo-snap1', + }]}) + mock_snapshot_pool.return_value = {'exit-code': 0} + rc = broker.process_requests(reqs) + mock_snapshot_pool.assert_called_with(service='admin', + pool_name='foo', + snapshot_name='foo-snap1') + self.assertEqual(json.loads(rc), {'exit-code': 0}) + + @patch.object(broker, 'pool_set') + @patch.object(broker, 'log', lambda *args, **kwargs: None) + def test_set_pool_value(self, mock_set_pool): + reqs = json.dumps({'api-version': 1, + 'ops': [{ + 'op': 'set-pool-value', + 'name': 'foo', + 'key': 'size', + 'value': 3, + }]}) + mock_set_pool.return_value = {'exit-code': 0} + rc = broker.process_requests(reqs) + mock_set_pool.assert_called_with(service='admin', + pool_name='foo', + key='size', + value=3) + self.assertEqual(json.loads(rc), {'exit-code': 0}) + + @patch.object(broker, 'log', lambda *args, **kwargs: None) + def test_set_invalid_pool_value(self): + reqs = json.dumps({'api-version': 1, + 'ops': [{ + 'op': 'set-pool-value', + 'name': 'foo', + 'key': 'size', + 'value': 'abc', + }]}) + rc = broker.process_requests(reqs) + self.assertEqual(json.loads(rc)['exit-code'], 1) diff --git a/ceph-mon/unit_tests/test_ceph_shared.py b/ceph-mon/unit_tests/test_ceph_shared.py new file mode 100644 index 00000000..9279d1b7 --- /dev/null +++ b/ceph-mon/unit_tests/test_ceph_shared.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +# Copyright 2022 Canonical Ltd. +# See LICENSE file for licensing details. + +from unittest.mock import patch +import unittest + +from ops.testing import Harness + +import ceph_shared +import charm + + +@patch("charm.hooks") +class TestCephShared(unittest.TestCase): + def setUp(self): + super().setUp() + self.harness = Harness(charm.CephMonCharm) + self.addCleanup(self.harness.cleanup) + + def test_init(self, _hooks): + self.harness.begin() + ceph_info = ceph_shared.CephMonInfo(self.harness.charm) + self.assertTrue(ceph_info.relations) + + def test_get_peer_mons(self, _hooks): + self.harness.begin() + self.harness.set_leader(True) + ceph_info = ceph_shared.CephMonInfo(self.harness.charm) + self.harness.add_relation_unit( + self.harness.add_relation("mon", "ceph-mon"), "ceph-mon/0" + ) + peer_mons = ceph_info.get_peer_mons() + self.assertEqual(len(peer_mons), 1) + peer = list(peer_mons.keys())[0] + self.assertEqual(peer.name, "ceph-mon/0") + + def test_not_sufficient_osds(self, _hooks): + self.harness.begin() + ceph_info = ceph_shared.CephMonInfo(self.harness.charm) + rel_id = self.harness.add_relation("osd", "ceph-osd") + self.harness.add_relation_unit(rel_id, "ceph-osd/0") + have_enough = ceph_info.sufficient_osds(minimum_osds=77) + self.assertFalse(have_enough) + + def test_sufficient_osds(self, _hooks): + self.harness.begin() + ceph_info = ceph_shared.CephMonInfo(self.harness.charm) + rel_id = self.harness.add_relation("osd", "ceph-osd") + self.harness.add_relation_unit(rel_id, "ceph-osd/0") + self.harness.update_relation_data( + rel_id, "ceph-osd/0", {"bootstrapped-osds": "77"} + ) + have_enough = ceph_info.sufficient_osds(minimum_osds=77) + self.assertTrue(have_enough) diff --git a/ceph-mon/unit_tests/test_ceph_status.py b/ceph-mon/unit_tests/test_ceph_status.py new file mode 100644 index 00000000..f5e7960e --- /dev/null +++ b/ceph-mon/unit_tests/test_ceph_status.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +# Copyright 2022 Canonical Ltd. +# See LICENSE file for licensing details. + +from unittest.mock import patch +import unittest + +from ops import model +from ops.testing import Harness + +import ceph_status +import charm + +from charmhelpers.contrib.storage.linux import ceph as ch_ceph + + +@patch("charm.hooks") +class TestCephStatus(unittest.TestCase): + def setUp(self): + super().setUp() + self.harness = Harness(charm.CephMonCharm) + self.addCleanup(self.harness.cleanup) + + def test_init(self, _hooks): + self.harness.begin() + status = ceph_status.StatusAssessor(self.harness.charm) + self.assertTrue(status.charm.custom_status_checks) + + def test_check_insecure_cmr(self, _hooks): + self.harness.begin() + status = ceph_status.StatusAssessor(self.harness.charm) + result = status.check_insecure_cmr() + self.assertIsInstance(result, model.ActiveStatus) + self.harness.add_relation_unit( + self.harness.add_relation("client", "remote"), "remote-foo/0" + ) + result = status.check_insecure_cmr() + self.assertIsInstance(result, model.BlockedStatus) + + def test_check_moncount(self, _hooks): + self.harness.begin() + status = ceph_status.StatusAssessor(self.harness.charm) + result = status.check_moncount() + self.assertIsInstance(result, model.BlockedStatus) + rel_id = self.harness.add_relation("mon", "ceph-mon") + for n in (0, 1, 2): + self.harness.add_relation_unit(rel_id, "ceph-mon/{}".format(n)) + result = status.check_moncount() + self.assertIsInstance(result, model.ActiveStatus) + + def test_check_ready_mons(self, _hooks): + self.harness.begin() + status = ceph_status.StatusAssessor(self.harness.charm) + result = status.check_ready_mons() + self.assertIsInstance(result, model.WaitingStatus) + rel_id = self.harness.add_relation("mon", "ceph-mon") + for n in (0, 1, 2): + self.harness.add_relation_unit(rel_id, "ceph-mon/{}".format(n)) + self.harness.update_relation_data( + rel_id, "ceph-mon/{}".format(n), {"ceph-public-address": "foo"} + ) + result = status.check_ready_mons() + self.assertIsInstance(result, model.ActiveStatus) + + @patch("ceph_status.ch_ceph.get_osd_settings") + def test_check_get_osd_settings(self, get_osd_settings, _hooks): + self.harness.begin() + status = ceph_status.StatusAssessor(self.harness.charm) + result = status.check_get_osd_settings() + self.assertIsInstance(result, model.ActiveStatus) + get_osd_settings.side_effect = ch_ceph.OSDSettingConflict( + "testexception" + ) + result = status.check_get_osd_settings() + self.assertIsInstance(result, model.BlockedStatus) + + def test_check_alert_rule_errors(self, _hooks): + self.harness.begin() + status = ceph_status.StatusAssessor(self.harness.charm) + with patch.object( + self.harness.charm, + "metrics_endpoint", + create=True, + ) as metrics_endpoint: + metrics_endpoint.have_alert_rule_errors.return_value = True + result = status.check_alert_rule_errors() + self.assertIsInstance(result, model.BlockedStatus) + + metrics_endpoint.have_alert_rule_errors.return_value = False + result = status.check_alert_rule_errors() + self.assertIsInstance(result, model.ActiveStatus) + + @patch("ceph_status.ceph_utils") + def test_check_expected_osd_count(self, ceph_utils, _hooks): + self.harness.begin() + status = ceph_status.StatusAssessor(self.harness.charm) + + # not bootstrapped + ceph_utils.is_bootstrapped.return_value = False + ceph_utils.is_quorum.return_value = False + result = status.check_expected_osd_count() + self.assertIsInstance(result, model.BlockedStatus) + self.assertEqual(result.message, "Unit not clustered (no quorum)") + + # bootstrapped, no osd rel + ceph_utils.is_bootstrapped.return_value = True + ceph_utils.is_quorum.return_value = True + result = status.check_expected_osd_count() + self.assertIsInstance(result, model.BlockedStatus) + self.assertEqual(result.message, "Missing relation: OSD") + + # bootstrapped, enough osds + rel_id = self.harness.add_relation("osd", "ceph-osd") + for n in (0, 1, 2): + self.harness.add_relation_unit(rel_id, "ceph-osd/{}".format(n)) + self.harness.update_relation_data( + rel_id, "ceph-osd/{}".format(n), {"bootstrapped-osds": "1"} + ) + result = status.check_expected_osd_count() + self.assertIsInstance(result, model.ActiveStatus) diff --git a/ceph-mon/unit_tests/test_ceph_utils.py b/ceph-mon/unit_tests/test_ceph_utils.py new file mode 100644 index 00000000..ff05b3b1 --- /dev/null +++ b/ceph-mon/unit_tests/test_ceph_utils.py @@ -0,0 +1,403 @@ +# Copyright 2019 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import unittest.mock as mock + +import test_utils + +import utils + + +class CephUtilsTestCase(test_utils.CharmTestCase): + + def setUp(self): + super().setUp() + + @mock.patch.object(utils, 'related_units') + @mock.patch.object(utils, 'relation_ids') + def test_has_rbd_mirrors(self, _relation_ids, _related_units): + # NOTE(fnordahl): This optimization will not be useful until we get a + # resolution on LP: #1818245 + # _goal_state.return_value = {'relations': {'rbd-mirror': None}} + # self.assertTrue(utils.has_rbd_mirrors()) + # _goal_state.assert_called_once_with() + # _goal_state.side_effect = NotImplementedError + _relation_ids.return_value = ['arelid'] + _related_units.return_value = ['aunit/0'] + self.assertTrue(utils.has_rbd_mirrors()) + _relation_ids.assert_called_once_with('rbd-mirror') + _related_units.assert_called_once_with('arelid') + + @mock.patch.object(utils.ceph, 'enabled_manager_modules') + def test_mgr_module_enabled(self, _enabled_modules): + _enabled_modules.return_value = [] + self.assertFalse(utils.is_mgr_module_enabled('test-module')) + + @mock.patch.object(utils.ceph, 'enabled_manager_modules') + def test_mgr_module__is_enabled(self, _enabled_modules): + _enabled_modules.return_value = ['test-module'] + self.assertTrue(utils.is_mgr_module_enabled('test-module')) + + @mock.patch.object(utils.ceph, 'enabled_manager_modules') + @mock.patch.object(utils.subprocess, 'check_call') + def test_mgr_disable_module(self, _call, _enabled_modules): + _enabled_modules.return_value = ['test-module'] + utils.mgr_disable_module('test-module') + _call.assert_called_once_with( + ['ceph', 'mgr', 'module', 'disable', 'test-module']) + + @mock.patch.object(utils.ceph, 'enabled_manager_modules') + @mock.patch.object(utils.subprocess, 'check_call') + def test_mgr_enable_module(self, _call, _enabled_modules): + _enabled_modules.return_value = [] + utils.mgr_enable_module('test-module') + _call.assert_called_once_with( + ['ceph', 'mgr', 'module', 'enable', 'test-module']) + + @mock.patch.object(utils.ceph, 'enabled_manager_modules') + @mock.patch.object(utils.subprocess, 'check_call') + def test_mgr_enable_module_again(self, _call, _enabled_modules): + _enabled_modules.return_value = ['test-module'] + utils.mgr_enable_module('test-module') + _call.assert_not_called() + + @mock.patch.object(utils.subprocess, 'check_output') + def test_get_default_rbd_features(self, _check_output): + _check_output.return_value = json.dumps( + {'a': 'b', + 'rbd_default_features': '61', + 'c': 'd'}) + self.assertEqual( + utils.get_default_rbd_features(), + 61) + _check_output.assert_called_once_with( + ['ceph-conf', '-c', '/dev/null', '-D', '--format', 'json'], + universal_newlines=True) + + def test_add_mirror_rbd_features(self): + DEFAULT_FEATURES = 61 + RBD_FEATURE_EXCLUSIVE_LOCK = 4 + RBD_FEATURE_JOURNALING = 64 + COMBINED_FEATURES = (DEFAULT_FEATURES | RBD_FEATURE_EXCLUSIVE_LOCK | + RBD_FEATURE_JOURNALING) + self.assertEqual(utils.add_rbd_mirror_features(DEFAULT_FEATURES), + COMBINED_FEATURES) + + @mock.patch.object(utils, 'get_default_rbd_features') + @mock.patch.object(utils, 'has_rbd_mirrors') + @mock.patch.object(utils, 'config') + def test_get_rbd_features(self, _config, _has_rbd_mirrors, + _get_default_rbd_features): + _config.side_effect = \ + lambda key: {'default-rbd-features': 42}.get(key, None) + self.assertEqual(utils.get_rbd_features(), 42) + _has_rbd_mirrors.return_value = True + _get_default_rbd_features.return_value = 61 + _config.side_effect = lambda key: {}.get(key, None) + self.assertEqual(utils.get_rbd_features(), 125) + _has_rbd_mirrors.return_value = False + self.assertEqual(utils.get_rbd_features(), None) + + @mock.patch.object(utils, '_is_required_osd_release') + @mock.patch.object(utils, '_all_ceph_versions_same') + @mock.patch.object(utils, '_set_require_osd_release') + @mock.patch.object(utils, 'log') + def test_execute_post_osd_upgrade_steps_executes( + self, log, _set_require_osd_release, + _all_ceph_versions_same, _is_required_osd_release): + release = 'luminous' + + _all_ceph_versions_same.return_value = True + _is_required_osd_release.return_value = False + + utils.execute_post_osd_upgrade_steps(release) + + _set_require_osd_release.assert_called_once_with(release) + + @mock.patch.object(utils, '_is_required_osd_release') + @mock.patch.object(utils, '_all_ceph_versions_same') + @mock.patch.object(utils, '_set_require_osd_release') + @mock.patch.object(utils, 'log') + def test_execute_post_osd_upgrade_steps_no_exec_already_set( + self, log, _set_require_osd_release, + _all_ceph_versions_same, _is_required_osd_release): + release = 'jewel' + + _all_ceph_versions_same.return_value = True + _is_required_osd_release.return_value = True + + utils.execute_post_osd_upgrade_steps(release) + + _set_require_osd_release.assert_not_called() + + @mock.patch.object(utils, '_is_required_osd_release') + @mock.patch.object(utils, '_all_ceph_versions_same') + @mock.patch.object(utils, '_set_require_osd_release') + @mock.patch.object(utils, 'log') + def test_execute_post_osd_upgrade_steps_handle_upgrade_error( + self, log, _set_require_osd_release, + _all_ceph_versions_same, _is_required_osd_release): + release = 'luminous' + + _all_ceph_versions_same.side_effect = utils.OsdPostUpgradeError() + + utils.execute_post_osd_upgrade_steps(release) + + log.assert_called_with(message=mock.ANY, level='ERROR') + + @mock.patch.object(utils.subprocess, 'check_output') + @mock.patch.object(utils.json, 'loads') + @mock.patch.object(utils, 'log') + def test_all_ceph_versions_same_one_overall_one_osd_true( + self, log, json_loads, subprocess_check_output): + mock_versions_dict = dict( + osd=dict(version_1=1), + overall=dict(version_1=2) + ) + json_loads.return_value = mock_versions_dict + + return_bool = utils._all_ceph_versions_same() + + self.assertTrue( + return_bool, + msg='all_ceph_versions_same returned False but should be True') + log.assert_called_once() + + @mock.patch.object(utils.subprocess, 'check_output') + @mock.patch.object(utils.json, 'loads') + @mock.patch.object(utils, 'log') + def test_all_ceph_versions_same_two_overall_returns_false( + self, log, json_loads, subprocess_check_output): + mock_versions_dict = dict( + osd=dict(version_1=1), + overall=dict(version_1=1, version_2=2) + ) + json_loads.return_value = mock_versions_dict + + return_bool = utils._all_ceph_versions_same() + + self.assertFalse( + return_bool, + msg='all_ceph_versions_same returned True but should be False') + self.assertEqual(log.call_count, 2) + + @mock.patch.object(utils.subprocess, 'check_output') + @mock.patch.object(utils.json, 'loads') + @mock.patch.object(utils, 'log') + def test_all_ceph_versions_same_one_overall_no_osd_returns_false( + self, log, json_loads, subprocess_check_output): + mock_versions_dict = dict( + osd=dict(), + overall=dict(version_1=1) + ) + json_loads.return_value = mock_versions_dict + + return_bool = utils._all_ceph_versions_same() + + self.assertFalse( + return_bool, + msg='all_ceph_versions_same returned True but should be False') + self.assertEqual(log.call_count, 2) + + @mock.patch.object(utils.subprocess, 'check_output') + @mock.patch.object(utils, 'log') + def test_all_ceph_versions_same_cmd_not_found( + self, log, subprocess_check_output): + call_exception = utils.subprocess.CalledProcessError( + 22, mock.MagicMock() + ) + subprocess_check_output.side_effect = call_exception + + return_bool = utils._all_ceph_versions_same() + + self.assertFalse(return_bool) + + @mock.patch.object(utils.subprocess, 'check_output') + @mock.patch.object(utils, 'log') + def test_all_ceph_versions_same_raise_error_on_unknown_rc( + self, log, subprocess_check_output): + call_exception = utils.subprocess.CalledProcessError( + 0, mock.MagicMock() + ) + subprocess_check_output.side_effect = call_exception + + with self.assertRaises(utils.OsdPostUpgradeError): + utils._all_ceph_versions_same() + + @mock.patch.object(utils.subprocess, 'check_call') + @mock.patch.object(utils, 'log') + def test_set_require_osd_release_success(self, log, check_call): + release = 'luminous' + utils._set_require_osd_release(release) + expected_call = mock.call( + ['ceph', 'osd', 'require-osd-release', release, + '--yes-i-really-mean-it'] + ) + check_call.assert_has_calls([expected_call]) + + @mock.patch.object(utils.subprocess, 'check_call') + @mock.patch.object(utils, 'log') + def test_set_require_osd_release_raise_call_error(self, log, check_call): + release = 'luminous' + check_call.side_effect = utils.subprocess.CalledProcessError( + 0, mock.MagicMock() + ) + expected_call = mock.call([ + 'ceph', 'osd', 'require-osd-release', release, + '--yes-i-really-mean-it' + ]) + + with self.assertRaises(utils.OsdPostUpgradeError): + utils._set_require_osd_release(release) + + check_call.assert_has_calls([expected_call]) + log.assert_called_once() + + @mock.patch.object(utils, 'relation_ids') + @mock.patch.object(utils, 'related_units') + @mock.patch.object(utils, 'relation_get') + def test_get_ceph_osd_releases_one_release( + self, relation_get, related_units, relation_ids): + r_ids = ['a', 'b', 'c'] + r_units = ['1'] + ceph_release = 'mimic' + + relation_ids.return_value = r_ids + related_units.return_value = r_units + relation_get.return_value = ceph_release + + releases = utils.get_ceph_osd_releases() + + self.assertEqual(len(releases), 1) + self.assertEqual(releases[0], ceph_release) + + @mock.patch.object(utils, 'relation_ids') + @mock.patch.object(utils, 'related_units') + @mock.patch.object(utils, 'relation_get') + def test_get_ceph_osd_releases_two_releases( + self, relation_get, related_units, relation_ids): + r_ids = ['a', 'b'] + r_units = ['1'] + ceph_release_1 = 'luminous' + ceph_release_2 = 'mimic' + + relation_ids.return_value = r_ids + related_units.return_value = r_units + relation_get.side_effect = [ceph_release_1, ceph_release_2] + + releases = utils.get_ceph_osd_releases() + + self.assertEqual(len(releases), 2) + self.assertEqual(sorted(releases), [ceph_release_1, ceph_release_2]) + + @mock.patch.object(utils.subprocess, 'check_output') + @mock.patch.object(utils.json, 'loads') + def test_is_required_osd_release_not_set_return_false( + self, loads, check_output): + release = 'luminous' + previous_release = 'jewel' + osd_dump_dict = dict(require_osd_release=previous_release) + + loads.return_value = osd_dump_dict + + return_bool = utils._is_required_osd_release(release) + + self.assertFalse(return_bool) + + @mock.patch.object(utils.subprocess, 'check_output') + @mock.patch.object(utils.json, 'loads') + def test_is_required_osd_release_is_set_return_true( + self, loads, check_output): + release = 'luminous' + osd_dump_dict = dict(require_osd_release=release) + + loads.return_value = osd_dump_dict + + return_bool = utils._is_required_osd_release(release) + + self.assertTrue(return_bool) + + @mock.patch.object(utils.subprocess, 'check_output') + @mock.patch.object(utils.json, 'loads') + def test_is_required_osd_release_subprocess_error(self, loads, + check_output): + release = 'luminous' + + call_exception = utils.subprocess.CalledProcessError( + 0, mock.MagicMock() + ) + check_output.side_effect = call_exception + + with self.assertRaises(utils.OsdPostUpgradeError): + utils._is_required_osd_release(release) + + @mock.patch.object(utils.subprocess, 'check_output') + @mock.patch.object(utils.json, 'loads') + def test_is_required_osd_release_json_loads_error(self, loads, + check_output): + release = 'luminous' + + call_exception = utils.json.JSONDecodeError( + '', mock.MagicMock(), 0 + ) + loads.side_effect = call_exception + + with self.assertRaises(utils.OsdPostUpgradeError): + utils._is_required_osd_release(release) + + @mock.patch.object(utils.subprocess, 'check_call') + @mock.patch.object(utils, 'is_mgr_module_enabled') + @mock.patch.object(utils, 'cmp_pkgrevno') + def test_balancer_mode(self, + cmp_pkgrevno, + is_mgr_module_enabled, + check_call): + cmp_pkgrevno.return_value = 0 + is_mgr_module_enabled.return_value = True + utils.set_balancer_mode('upmap') + check_call.assert_called_with(['ceph', 'balancer', 'mode', + 'upmap'], shell=True) + + @mock.patch.object(utils.subprocess, 'check_call') + @mock.patch.object(utils, 'cmp_pkgrevno') + def test_balancer_mode_before_luminous(self, + cmp_pkgrevno, + check_call): + cmp_pkgrevno.return_value = -1 + utils.set_balancer_mode('upmap') + check_call.assert_not_called() + + @mock.patch.object(utils.subprocess, 'check_call') + @mock.patch.object(utils, 'is_mgr_module_enabled') + @mock.patch.object(utils, 'cmp_pkgrevno') + def test_balancer_mode_no_balancer(self, + cmp_pkgrevno, + is_mgr_module_enabled, + check_call): + cmp_pkgrevno.return_value = 0 + is_mgr_module_enabled.return_value = False + utils.set_balancer_mode('upmap') + check_call.assert_not_called() + + @mock.patch.object(utils.subprocess, 'check_call') + @mock.patch.object(utils, 'is_leader') + def test_disable_insecure_reclaim(self, + is_leader, + check_call): + is_leader.return_value = True + utils.try_disable_insecure_reclaim() + check_call.assert_called_once_with([ + 'ceph', '--id', 'admin', + 'config', 'set', 'mon', + 'auth_allow_insecure_global_id_reclaim', 'false']) diff --git a/ceph-mon/unit_tests/test_charm.py b/ceph-mon/unit_tests/test_charm.py new file mode 100644 index 00000000..edd009a4 --- /dev/null +++ b/ceph-mon/unit_tests/test_charm.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 + +# Copyright 2022 Canonical Ltd. +# See LICENSE file for licensing details. + +from unittest.mock import patch +import unittest + +from ops.testing import Harness + +import ceph_metrics # noqa: avoid circ. import +import charm + + +class TestCephCharm(unittest.TestCase): + def setUp(self): + super().setUp() + self.harness = Harness(charm.CephMonCharm) + self.harness.begin() + self.addCleanup(self.harness.cleanup) + + def test_init(self): + self.assertTrue(self.harness.charm.framework) + self.assertTrue(self.harness.charm.metrics_endpoint) + self.assertTrue(self.harness.charm.ceph_status) + + @patch.object(charm.ceph_client.CephClientProvides, 'notify_all') + @patch("charm.hooks") + def test_on_config_changed(self, hooks, _notify_all): + self.harness.update_config({"permit-insecure-cmr": None}) + hooks.config_changed.assert_called() + + @patch.object(charm.ceph_client.CephClientProvides, 'notify_all') + @patch("charm.ops_openstack.core.apt_install") + @patch("charm.ops_openstack.core.apt_update") + @patch("charm.ops_openstack.core.add_source") + @patch("charm.ops_openstack.core.OSBaseCharm.update_status") + @patch("charm.hooks") + @patch("charm.systemd") + @patch("charm.apt") + def test_on_install( + self, + _apt, + _systemd, + _hooks, + _update_status, + _add_source, + apt_update, + apt_install, + _notify_all + ): + self.harness.update_config({"permit-insecure-cmr": None}) + self.harness.charm.on.install.emit() + apt_install.assert_called_with( + [ + "ceph", + "gdisk", + "radosgw", + "lvm2", + "parted", + "smartmontools", + ], + fatal=True, + ) + apt_update.assert_called() + + @patch("charm.hooks") + def test_on_pre_commit(self, hooks): + self.harness.charm.on.framework.on.pre_commit.emit() + hooks.hookenv._run_atexit.assert_called() diff --git a/ceph-mon/unit_tests/test_check_ceph_osd_count.py b/ceph-mon/unit_tests/test_check_ceph_osd_count.py new file mode 100644 index 00000000..22aa382c --- /dev/null +++ b/ceph-mon/unit_tests/test_check_ceph_osd_count.py @@ -0,0 +1,216 @@ +# Copyright 2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import unittest + +from unittest.mock import patch, mock_open +from src.ceph_hooks import update_host_osd_count_report + +os.sys.path.insert(1, os.path.join(sys.path[0], 'lib')) +os.sys.path.insert(1, os.path.join(sys.path[0], 'files/nagios')) + +import check_ceph_osd_count + +from charms_ceph.utils import CrushLocation + + +class CheckCephOsdCountTestCase(unittest.TestCase): + + @patch("check_ceph_osd_count.get_osd_tree") + def test_check_equal_ceph_osd_trees(self, mock_get_osd_tree): + """Check that if current and expected osd trees match return OK exit""" + + current_osd_tree = {"host1": [0]} + mock_get_osd_tree.return_value = current_osd_tree + expected_osd_tree = """{"host1": [0]}""" + with patch( + "check_ceph_osd_count.open", + mock_open(read_data=expected_osd_tree), + ) as file: + (exit_code, _) = check_ceph_osd_count.check_ceph_osd_count(file) + self.assertEqual(exit_code, check_ceph_osd_count.EXIT_OK) + + # change osd order + current_osd_tree = {"host1": [0, 1]} + mock_get_osd_tree.return_value = current_osd_tree + expected_osd_tree = """{"host1": [1, 0]}""" + with patch( + "check_ceph_osd_count.open", + mock_open(read_data=expected_osd_tree), + ) as file: + (exit_code, _) = check_ceph_osd_count.check_ceph_osd_count(file) + self.assertEqual(exit_code, check_ceph_osd_count.EXIT_OK) + + @patch("check_ceph_osd_count.get_osd_tree") + def test_check_missing_expected_osd(self, mock_get_osd_tree): + """Check that missing expected osd returns appropriate exit code.""" + current_osd_tree = {"host1": [0]} + mock_get_osd_tree.return_value = current_osd_tree + expected_osd_tree = """{"host1": [0, 1]}""" + with patch( + "check_ceph_osd_count.open", + mock_open(read_data=expected_osd_tree), + ) as file: + + (exit_code, _) = check_ceph_osd_count.check_ceph_osd_count(file) + self.assertEqual(exit_code, check_ceph_osd_count.EXIT_CRIT) + + @patch("check_ceph_osd_count.get_osd_tree") + def test_check_missing_expected_host(self, + mock_get_osd_tree): + """Check that missing expected host returns appropriate exit code.""" + current_osd_tree = {"host1": [0]} + mock_get_osd_tree.return_value = current_osd_tree + expected_osd_tree = """{"host1": [0], "host2": [1]}""" + with patch( + "check_ceph_osd_count.open", + mock_open(read_data=expected_osd_tree), + ) as file: + + (exit_code, _) = check_ceph_osd_count.check_ceph_osd_count(file) + self.assertEqual(exit_code, check_ceph_osd_count.EXIT_CRIT) + + @patch("check_ceph_osd_count.get_osd_tree") + def test_check_change_osd_ids(self, mock_get_osd_tree): + """Check that a change in osd ids (of same length) is OK.""" + current_osd_tree = {"host1": [1], "host2": [3]} + mock_get_osd_tree.return_value = current_osd_tree + expected_osd_tree = """{"host1": [0], "host2": [1]}""" + with patch( + "check_ceph_osd_count.open", + mock_open(read_data=expected_osd_tree), + ) as file: + (exit_code, _) = check_ceph_osd_count.check_ceph_osd_count(file) + self.assertEqual(exit_code, check_ceph_osd_count.EXIT_OK) + + @patch("check_ceph_osd_count.get_osd_tree") + def test_osd_tree_current_gt_expected(self, mock_get_osd_tree): + """Check that growing osd list is added to expected.""" + current_osd_tree = {"host1": [0, 1], "host2": [2]} + mock_get_osd_tree.return_value = current_osd_tree + expected_osd_tree = """{"host1": [0]}""" + with patch( + "check_ceph_osd_count.open", + mock_open(read_data=expected_osd_tree), + ) as file: + (exit_code, _) = check_ceph_osd_count.check_ceph_osd_count(file) + self.assertEqual(exit_code, check_ceph_osd_count.EXIT_OK) + + @patch("json.dumps") + @patch("src.ceph_hooks.write_file") + @patch("src.ceph_hooks.pathlib") + @patch("charms_ceph.utils.get_osd_tree") + def test_update_report_fresh_tree(self, + mock_get_osd_tree, + mock_pathlib, + mock_write_file, + mock_json_dumps): + """Check that an empty expected tree triggers an update to expected.""" + new_osd_tree = [CrushLocation(0, "osd.0", osd="osd.0", host="host1"), + CrushLocation(1, "osd.1", osd="osd.1", host="host1")] + new_osd_dict = {"host1": [0, 1]} + mock_get_osd_tree.return_value = new_osd_tree + + with patch( + "src.ceph_hooks.open", + mock_open(read_data="{}"), + ): + update_host_osd_count_report() + mock_json_dumps.assert_called_with(new_osd_dict) + + @patch("json.dumps") + @patch("src.ceph_hooks.write_file") + @patch("src.ceph_hooks.pathlib") + @patch("charms_ceph.utils.get_osd_tree") + def test_update_report_new_host(self, + mock_get_osd_tree, + mock_pathlib, + mock_write_file, + mock_json_dumps): + """Check that adding new host adds new host to expected tree.""" + new_osd_tree = [CrushLocation(0, "osd.0", osd="osd.0", host="host1"), + CrushLocation(1, "osd.1", osd="osd.1", host="host1"), + CrushLocation(2, "osd.2", osd="osd.2", host="host2")] + mock_get_osd_tree.return_value = new_osd_tree + with patch( + "src.ceph_hooks.open", + mock_open(read_data="""{"host1": [0, 1]}"""), + ): + update_host_osd_count_report() + mock_json_dumps.assert_called_with( + {"host1": [0, 1], "host2": [2]}) + + @patch("json.dumps") + @patch("src.ceph_hooks.write_file") + @patch("src.ceph_hooks.pathlib") + @patch("charms_ceph.utils.get_osd_tree") + def test_update_report_missing_host(self, + mock_get_osd_tree, + mock_pathlib, + mock_write_file, + mock_json_dumps): + """Check that missing host is not removed from expected tree.""" + new_osd_tree = [CrushLocation(0, "osd.0", osd="osd.0", host="host1"), + CrushLocation(2, "osd.2", osd="osd.2", host="host1")] + mock_get_osd_tree.return_value = new_osd_tree + with patch( + "src.ceph_hooks.open", + mock_open(read_data="""{"host1": [0], "host2": [1]}"""), + ): + update_host_osd_count_report() + mock_json_dumps.assert_called_with( + {"host1": [0, 2], "host2": [1]}) + + @patch("json.dumps") + @patch("src.ceph_hooks.write_file") + @patch("src.ceph_hooks.pathlib") + @patch("charms_ceph.utils.get_osd_tree") + def test_update_report_fewer_osds(self, + mock_get_osd_tree, + mock_pathlib, + mock_write_file, + mock_json_dumps): + """Check that report isn't updated when osd list shrinks.""" + new_osd_tree = [CrushLocation(0, "osd.0", osd="osd.0", host="host1")] + mock_get_osd_tree.return_value = new_osd_tree + with patch( + "src.ceph_hooks.open", + mock_open(read_data="""{"host1": [0, 1]}"""), + ): + update_host_osd_count_report() + mock_json_dumps.assert_called_with( + {"host1": [0, 1]}) + + @patch("json.dumps") + @patch("src.ceph_hooks.write_file") + @patch("src.ceph_hooks.pathlib") + @patch("charms_ceph.utils.get_osd_tree") + def test_update_report_diff_osd_ids(self, + mock_get_osd_tree, + mock_write_file, + mock_pathlib, + mock_json_dumps): + """Check that new osdid list (of same length) becomes new expected.""" + new_osd_tree = [CrushLocation(2, "osd.2", osd="osd.2", host="host1"), + CrushLocation(3, "osd.3", osd="osd.3", host="host1")] + mock_get_osd_tree.return_value = new_osd_tree + with patch( + "src.ceph_hooks.open", + mock_open(read_data="""{"host1": [0, 1]}"""), + ): + update_host_osd_count_report() + mock_json_dumps.assert_called_with( + {"host1": [2, 3]}) diff --git a/ceph-mon/unit_tests/test_check_ceph_status.py b/ceph-mon/unit_tests/test_check_ceph_status.py new file mode 100644 index 00000000..5342ce55 --- /dev/null +++ b/ceph-mon/unit_tests/test_check_ceph_status.py @@ -0,0 +1,366 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import os +import sys + +from unittest.mock import patch + +# import the module we want to test +os.sys.path.insert(1, os.path.join(sys.path[0], 'files/nagios')) +import check_ceph_status + + +@patch('subprocess.check_output') +class NagiosTestCase(unittest.TestCase): + + def test_get_ceph_version(self, mock_subprocess): + mock_subprocess.return_value = 'ceph version 10.2.9 ' \ + '(2ee413f77150c0f375ff6f10edd6c8f9c7d060d0)'.encode('UTF-8') + ceph_version = check_ceph_status.get_ceph_version() + self.assertEqual(ceph_version, [10, 2, 9]) + + # All OK, pre-luminoius + @patch('check_ceph_status.get_ceph_version') + def test_health_ok(self, mock_ceph_version, mock_subprocess): + mock_ceph_version.return_value = [10, 2, 9] + with open('unit_tests/ceph_ok.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--degraded_thresh', '1']) + check_output = check_ceph_status.check_ceph_status(args) + self.assertRegex(check_output, r"^All OK$") + + # Warning, pre-luminous + @patch('check_ceph_status.get_ceph_version') + def test_health_warn(self, mock_ceph_version, mock_subprocess): + mock_ceph_version.return_value = [10, 2, 9] + with open('unit_tests/ceph_warn.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args("") + self.assertRaises(check_ceph_status.WarnError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Error, pre-luminous, health_critical status + @patch('check_ceph_status.get_ceph_version') + def test_health_err(self, mock_ceph_version, mock_subprocess): + mock_ceph_version.return_value = [10, 2, 9] + with open('unit_tests/ceph_crit.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args("") + self.assertRaises(check_ceph_status.CriticalError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Error, pre-luminous, overall HEALTH_ERR + @patch('check_ceph_status.get_ceph_version') + def test_health_crit(self, mock_ceph_version, mock_subprocess): + mock_ceph_version.return_value = [10, 2, 9] + with open('unit_tests/ceph_error.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args("") + self.assertRaises(check_ceph_status.CriticalError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Error, pre-luminous, because misplaced ratio is too big + @patch('check_ceph_status.get_ceph_version') + def test_health_crit_misplaced(self, mock_ceph_version, mock_subprocess): + mock_ceph_version.return_value = [10, 2, 9] + with open('unit_tests/ceph_params.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--misplaced_thresh', '0.1']) + self.assertRaises(check_ceph_status.CriticalError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Error, pre-luminous, because recovery rate is too low + @patch('check_ceph_status.get_ceph_version') + def test_health_crit_recovery(self, mock_ceph_version, mock_subprocess): + mock_ceph_version.return_value = [10, 2, 9] + with open('unit_tests/ceph_params.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--recovery_rate', '400']) + self.assertRaises(check_ceph_status.CriticalError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Warning, pre-luminous, deepscrub + @patch('check_ceph_status.get_ceph_version') + def test_health_warn_deepscrub(self, mock_ceph_version, mock_subprocess): + mock_ceph_version.return_value = [10, 2, 9] + with open('unit_tests/ceph_nodeepscrub.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args("") + self.assertRaises(check_ceph_status.WarnError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Error, pre-luminous, deepscrub + @patch('check_ceph_status.get_ceph_version') + def test_health_crit_deepscrub(self, mock_ceph_version, mock_subprocess): + mock_ceph_version.return_value = [10, 2, 9] + with open('unit_tests/ceph_nodeepscrub.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--raise_nodeepscrub']) + self.assertRaises(check_ceph_status.CriticalError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Error, pre-luminous, noout + @patch('check_ceph_status.get_ceph_version') + def test_health_crit_noout(self, mock_ceph_version, mock_subprocess): + mock_ceph_version.return_value = [10, 2, 9] + with open('unit_tests/ceph_noout.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args("") + self.assertRaises(check_ceph_status.CriticalError, + lambda: check_ceph_status.check_ceph_status(args)) + + # All OK, luminous + @patch('check_ceph_status.get_ceph_version') + def test_health_ok_luminous(self, mock_ceph_version, mock_subprocess): + mock_ceph_version.return_value = [12, 2, 0] + with open('unit_tests/ceph_ok_luminous.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--degraded_thresh', '1']) + check_output = check_ceph_status.check_ceph_status(args) + self.assertRegex(check_output, r"^All OK$") + + # Warning, luminous + @patch('check_ceph_status.get_ceph_version') + def test_health_warn_luminous(self, mock_ceph_version, mock_subprocess): + mock_ceph_version.return_value = [12, 2, 0] + with open('unit_tests/ceph_many_warnings_luminous.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args("") + self.assertRaises(check_ceph_status.WarnError, + lambda: check_ceph_status.check_ceph_status(args)) + +# Error, luminous, because of overall status + + # Error, luminous, because misplaced ratio is too big + @patch('check_ceph_status.get_ceph_version') + def test_health_critical_misplaced_luminous(self, + mock_ceph_version, + mock_subprocess): + mock_ceph_version.return_value = [12, 2, 0] + with open('unit_tests/ceph_many_warnings_luminous.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--misplaced_thresh', '0.1']) + self.assertRaises(check_ceph_status.CriticalError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Error, luminous, because degraded ratio is too big + @patch('check_ceph_status.get_ceph_version') + def test_health_critical_degraded_luminous(self, + mock_ceph_version, + mock_subprocess): + mock_ceph_version.return_value = [12, 2, 0] + with open('unit_tests/ceph_degraded_luminous.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--degraded_thresh', '0.1']) + self.assertRaises(check_ceph_status.CriticalError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Error, luminous, because recovery rate is too low + @patch('check_ceph_status.get_ceph_version') + def test_health_critical_recovery_luminous(self, + mock_ceph_version, + mock_subprocess): + mock_ceph_version.return_value = [12, 2, 0] + with open('unit_tests/ceph_many_warnings_luminous.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--recovery_rate', '20']) + self.assertRaises(check_ceph_status.CriticalError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Warning, luminous, deepscrub + @patch('check_ceph_status.get_ceph_version') + def test_health_warn_deepscrub_luminous(self, + mock_ceph_version, + mock_subprocess): + mock_ceph_version.return_value = [12, 2, 0] + with open('unit_tests/ceph_nodeepscrub_luminous.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args("") + self.assertRaises(check_ceph_status.WarnError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Error, luminous, deepscrub + @patch('check_ceph_status.get_ceph_version') + def test_health_crit_deepscrub_luminous(self, + mock_ceph_version, + mock_subprocess): + mock_ceph_version.return_value = [12, 2, 0] + with open('unit_tests/ceph_nodeepscrub_luminous.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--raise_nodeepscrub']) + self.assertRaises(check_ceph_status.CriticalError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Error, luminous, noout + @patch('check_ceph_status.get_ceph_version') + def test_health_crit_noout_luminous(self, + mock_ceph_version, + mock_subprocess): + mock_ceph_version.return_value = [12, 2, 0] + with open('unit_tests/ceph_noout_luminous.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args("") + self.assertRaises(check_ceph_status.CriticalError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Additional Ok, luminous, deepscrub + @patch('check_ceph_status.get_ceph_version') + def test_additional_ok_deepscrub_luminous(self, + mock_ceph_version, + mock_subprocess): + mock_ceph_version.return_value = [12, 2, 0] + with open('unit_tests/ceph_nodeepscrub_luminous.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--additional_check', 'osd out']) + check_output = check_ceph_status.check_ceph_status(args) + self.assertRegex(check_output, r"^All OK$") + + # Additional warning, luminous, deepscrub + @patch('check_ceph_status.get_ceph_version') + def test_additional_warn_deepscrub_luminous(self, + mock_ceph_version, + mock_subprocess): + mock_ceph_version.return_value = [12, 2, 0] + with open('unit_tests/ceph_nodeepscrub_luminous.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--additional_check', 'deep']) + self.assertRaises(check_ceph_status.WarnError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Additional error, luminous, deepscrub + @patch('check_ceph_status.get_ceph_version') + def test_additional_error_deepscrub_luminous(self, + mock_ceph_version, + mock_subprocess): + mock_ceph_version.return_value = [12, 2, 0] + with open('unit_tests/ceph_nodeepscrub_luminous.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--additional_check', 'deep', + '--additional_check_critical']) + self.assertRaises(check_ceph_status.CriticalError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Additional Ok, pre-luminous, deepscrub + @patch('check_ceph_status.get_ceph_version') + def test_additional_ok_deepscrub_pre_luminous(self, + mock_ceph_version, + mock_subprocess): + mock_ceph_version.return_value = [10, 2, 9] + with open('unit_tests/ceph_nodeepscrub.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--additional_check', 'osd out']) + check_output = check_ceph_status.check_ceph_status(args) + self.assertRegex(check_output, r"^All OK$") + + # Additional warning, pre-luminous, deepscrub + @patch('check_ceph_status.get_ceph_version') + def test_additional_warn_deepscrub_pre_luminous(self, + mock_ceph_version, + mock_subprocess): + mock_ceph_version.return_value = [10, 2, 9] + with open('unit_tests/ceph_nodeepscrub.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--additional_check', 'deep']) + self.assertRaises(check_ceph_status.WarnError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Additional error, pre-luminous, deepscrub + @patch('check_ceph_status.get_ceph_version') + def test_additional_error_deepscrub_pre_luminous(self, + mock_ceph_version, + mock_subprocess): + mock_ceph_version.return_value = [10, 2, 9] + with open('unit_tests/ceph_nodeepscrub.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--additional_check', 'deep', + '--additional_check_critical']) + self.assertRaises(check_ceph_status.CriticalError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Num OSD OK, pre-luminous + @patch('check_ceph_status.get_ceph_version') + def test_num_osds_ok_pre_luminous(self, + mock_ceph_version, + mock_subprocess): + mock_ceph_version.return_value = [10, 2, 9] + with open('unit_tests/ceph_ok.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--check_num_osds']) + check_output = check_ceph_status.check_ceph_status(args) + self.assertRegex(check_output, r"^OK") + + # Num OSD error, pre-luminous + @patch('check_ceph_status.get_ceph_version') + def test_num_osds_error_pre_luminous(self, + mock_ceph_version, + mock_subprocess): + mock_ceph_version.return_value = [10, 2, 9] + with open('unit_tests/ceph_warn.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--check_num_osds']) + self.assertRaises(check_ceph_status.CriticalError, + lambda: check_ceph_status.check_ceph_status(args)) + + # Num OSD OK, luminous + @patch('check_ceph_status.get_ceph_version') + def test_num_osds_ok_luminous(self, + mock_ceph_version, + mock_subprocess): + mock_ceph_version.return_value = [12, 2, 0] + with open('unit_tests/ceph_many_warnings_luminous.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--check_num_osds']) + check_output = check_ceph_status.check_ceph_status(args) + self.assertRegex(check_output, r"^OK") + + # Num OSD error, luminous + @patch('check_ceph_status.get_ceph_version') + def test_num_osds_error_luminous(self, + mock_ceph_version, + mock_subprocess): + mock_ceph_version.return_value = [12, 2, 0] + with open('unit_tests/ceph_degraded_luminous.json') as f: + tree = f.read() + mock_subprocess.return_value = tree.encode('UTF-8') + args = check_ceph_status.parse_args(['--check_num_osds']) + self.assertRaises(check_ceph_status.CriticalError, + lambda: check_ceph_status.check_ceph_status(args)) diff --git a/ceph-mon/unit_tests/test_upgrade.py b/ceph-mon/unit_tests/test_upgrade.py new file mode 100644 index 00000000..9c3c6335 --- /dev/null +++ b/ceph-mon/unit_tests/test_upgrade.py @@ -0,0 +1,127 @@ +from unittest.mock import patch +from ceph_hooks import check_for_upgrade +from test_utils import CharmTestCase +from charms_ceph.utils import resolve_ceph_version as resolve_ceph_version_orig + + +__author__ = 'Chris Holcombe ' + + +def config_side_effect(*args): + if args[0] == 'source': + return 'cloud:trusty-kilo' + elif args[0] == 'key': + return 'key' + elif args[0] == 'release-version': + return 'cloud:trusty-kilo' + + +class UpgradeRollingTestCase(CharmTestCase): + + @patch('ceph_hooks.ceph.is_bootstrapped') + @patch('ceph_hooks.hookenv') + @patch('ceph_hooks.ceph.roll_monitor_cluster') + def test_check_for_upgrade(self, roll_monitor_cluster, hookenv, + is_bootstrapped): + is_bootstrapped.return_value = True + self.test_config.set_previous('source', 'cloud:trusty-juno') + self.test_config.set('source', 'cloud:trusty-kilo') + hookenv.config.side_effect = self.test_config + check_for_upgrade() + + roll_monitor_cluster.assert_called_with( + new_version='hammer', + upgrade_key='admin') + + @patch('ceph_hooks.ceph.is_bootstrapped') + @patch('ceph_hooks.hookenv') + @patch('ceph_hooks.ceph.roll_monitor_cluster') + def test_check_for_upgrade_not_bootstrapped(self, roll_monitor_cluster, + hookenv, is_bootstrapped): + is_bootstrapped.return_value = False + self.test_config.set_previous('source', 'cloud:trusty-juno') + self.test_config.set('source', 'cloud:trusty-kilo') + hookenv.config.side_effect = self.test_config + check_for_upgrade() + + roll_monitor_cluster.assert_not_called() + + @patch('ceph_hooks.add_source') + @patch('ceph_hooks.ceph.is_bootstrapped') + @patch('ceph_hooks.hookenv') + @patch('ceph_hooks.ceph.roll_monitor_cluster') + def test_check_for_upgrade_from_pike_to_queens(self, roll_monitor_cluster, + hookenv, is_bootstrapped, + add_source): + is_bootstrapped.return_value = True + hookenv.config.side_effect = self.test_config + self.test_config.set('key', 'some-key') + self.test_config.set_previous('source', 'cloud:xenial-pike') + self.test_config.set('source', 'cloud:xenial-queens') + check_for_upgrade() + roll_monitor_cluster.assert_not_called() + add_source.assert_called_with('cloud:xenial-queens', 'some-key') + + @patch('ceph_hooks.add_source') + @patch('ceph_hooks.ceph.is_bootstrapped') + @patch('ceph_hooks.hookenv') + @patch('ceph_hooks.ceph.roll_monitor_cluster') + def test_check_for_upgrade_from_rocky_to_stein(self, roll_monitor_cluster, + hookenv, is_bootstrapped, + add_source): + is_bootstrapped.return_value = True + hookenv.config.side_effect = self.test_config + self.test_config.set('key', 'some-key') + self.test_config.set_previous('source', 'cloud:bionic-rocky') + self.test_config.set('source', 'cloud:bionic-stein') + check_for_upgrade() + roll_monitor_cluster.assert_not_called() + add_source.assert_called_with('cloud:bionic-stein', 'some-key') + + @patch('ceph_hooks.ceph.resolve_ceph_version') + @patch('ceph_hooks.subprocess.check_output') + @patch('ceph_hooks.add_source') + @patch('ceph_hooks.ceph.is_bootstrapped') + @patch('ceph_hooks.hookenv') + @patch('ceph_hooks.ceph.roll_monitor_cluster') + def test_check_for_upgrade_no_current_version(self, roll_monitor_cluster, + hookenv, is_bootstrapped, + add_source, check_output, + resolve_ceph_version): + _resolve_first = True + + def _resolve_version(arg): + nonlocal _resolve_first + if _resolve_first: + _resolve_first = False + return None + return resolve_ceph_version_orig(arg) + + resolve_ceph_version.side_effect = _resolve_version + check_output.return_value = b""" +ceph version 16.2.13 (123) pacific (stable)""" + is_bootstrapped.return_value = True + hookenv.config.side_effect = self.test_config + self.test_config.set('source', 'cloud:focal-yoga') + check_for_upgrade() + roll_monitor_cluster.assert_called() + add_source.assert_not_called() + + @patch('ceph_hooks.ceph.resolve_ceph_version') + @patch('ceph_hooks.subprocess.check_output') + @patch('ceph_hooks.add_source') + @patch('ceph_hooks.ceph.is_bootstrapped') + @patch('ceph_hooks.hookenv') + @patch('ceph_hooks.ceph.roll_monitor_cluster') + def test_check_for_upgrade_no_versions(self, roll_monitor_cluster, + hookenv, is_bootstrapped, + add_source, check_output, + resolve_ceph_version): + resolve_ceph_version.return_value = None + check_output.return_value = b""" +ceph version 17.2.5 (456) quincy (stable)""" + is_bootstrapped.return_value = True + hookenv.config.side_effect = self.test_config + check_for_upgrade() + roll_monitor_cluster.assert_not_called() + add_source.assert_not_called() diff --git a/ceph-mon/unit_tests/test_utils.py b/ceph-mon/unit_tests/test_utils.py new file mode 100644 index 00000000..538aec0d --- /dev/null +++ b/ceph-mon/unit_tests/test_utils.py @@ -0,0 +1,167 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import unittest +import os +import yaml + +from unittest.mock import patch, MagicMock + + +def load_config(): + ''' + Walk backwords from __file__ looking for config.yaml, load and return the + 'options' section' + ''' + config = None + f = __file__ + while config is None: + d = os.path.dirname(f) + if os.path.isfile(os.path.join(d, 'config.yaml')): + config = os.path.join(d, 'config.yaml') + break + f = d + + if not config: + logging.error('Could not find config.yaml in any parent directory ' + 'of {}. '.format(f)) + raise Exception + + return yaml.safe_load(open(config).read())['options'] + + +def get_default_config(): + ''' + Load default charm config from config.yaml return as a dict. + If no default is set in config.yaml, its value is None. + ''' + default_config = {} + config = load_config() + for k, v in config.items(): + if 'default' in v: + default_config[k] = v['default'] + else: + default_config[k] = None + return default_config + + +class CharmTestCase(unittest.TestCase): + + def setUp(self, obj=None, patches=None): + super(CharmTestCase, self).setUp() + self.patches = patches or [] + self.obj = obj or [] + self.test_config = TestConfig() + self.test_relation = TestRelation() + self.test_leader_settings = TestLeaderSettings() + self.patch_all() + + def patch(self, method): + _m = patch.object(self.obj, method) + mock = _m.start() + self.addCleanup(_m.stop) + return mock + + def patch_all(self): + for method in self.patches: + setattr(self, method, self.patch(method)) + + +class TestConfig(object): + + def __init__(self): + self.config = get_default_config() + self.config_changed = {} + self.config_changed.setdefault(False) + self._previous = get_default_config() + + def __call__(self, key=None): + if key: + return self[key] + else: + return self + + def get(self, attr=None): + if not attr: + # Return a copy of self to allow emulation closer to what + # hookenv.config() returns (not a dict). + return self + try: + return self.config[attr] + except KeyError: + return None + + def get_all(self): + return self.config + + def set(self, attr, value): + if attr not in self.config: + raise KeyError + self.config[attr] = value + + def __getitem__(self, item): + return self.config[item] + + def changed(self, attr): + return self.config_changed[attr] + + def set_changed(self, attr, changed=True): + self.config_changed[attr] = changed + + def set_previous(self, key, value): + self._previous[key] = value + + def previous(self, key): + return self._previous[key] + + +class TestRelation(object): + + def __init__(self, relation_data={}): + self.relation_data = relation_data + + def set(self, relation_data): + self.relation_data = relation_data + + def get(self, attr=None, unit=None, rid=None): + if attr is None: + return self.relation_data + elif attr in self.relation_data: + return self.relation_data[attr] + return None + + +class TestLeaderSettings(object): + + def __init__(self, settings={}): + self.settings = settings + + def set(self, settings): + self.settings = settings + + def get(self, attr=None): + if attr is None: + return self.settings + elif attr in self.settings: + return self.settings[attr] + return None + + +class MockActionEvent: + + def __init__(self, params={}): + self.params = params + self.fail = MagicMock() + self.set_results = MagicMock() diff --git a/ceph-nfs/.flake8 b/ceph-nfs/.flake8 new file mode 100644 index 00000000..293e63b1 --- /dev/null +++ b/ceph-nfs/.flake8 @@ -0,0 +1,11 @@ +[flake8] +max-line-length = 99 +select: E,W,F,C,N +exclude: + venv + .git + build + dist + *.egg_info + # Excluded because it is imported almost verbatim from Manila + src/manager.py \ No newline at end of file diff --git a/ceph-nfs/.gitignore b/ceph-nfs/.gitignore new file mode 100644 index 00000000..8beafeb3 --- /dev/null +++ b/ceph-nfs/.gitignore @@ -0,0 +1,9 @@ +.tox +**/*.swp +__pycache__ +.stestr/ +lib/* +!lib/README.txt +*.charm +.vscode/settings.json +build diff --git a/ceph-nfs/.gitreview b/ceph-nfs/.gitreview new file mode 100644 index 00000000..10ffb19c --- /dev/null +++ b/ceph-nfs/.gitreview @@ -0,0 +1,6 @@ +[gerrit] +host=review.opendev.org +port=29418 +project=openstack/charm-ceph-nfs.git + +defaultbranch=stable/squid-jammy diff --git a/ceph-nfs/.jujuignore b/ceph-nfs/.jujuignore new file mode 100644 index 00000000..6ccd559e --- /dev/null +++ b/ceph-nfs/.jujuignore @@ -0,0 +1,3 @@ +/venv +*.py[cod] +*.charm diff --git a/ceph-nfs/.stestr.conf b/ceph-nfs/.stestr.conf new file mode 100644 index 00000000..5fcccaca --- /dev/null +++ b/ceph-nfs/.stestr.conf @@ -0,0 +1,3 @@ +[DEFAULT] +test_path=./unit_tests +top_dir=./ diff --git a/ceph-nfs/.zuul.yaml b/ceph-nfs/.zuul.yaml new file mode 100644 index 00000000..1ffc530a --- /dev/null +++ b/ceph-nfs/.zuul.yaml @@ -0,0 +1,4 @@ +- project: + templates: + - openstack-python3-charm-yoga-jobs + - openstack-cover-jobs \ No newline at end of file diff --git a/ceph-nfs/CONTRIBUTING.md b/ceph-nfs/CONTRIBUTING.md new file mode 100644 index 00000000..61ef5c87 --- /dev/null +++ b/ceph-nfs/CONTRIBUTING.md @@ -0,0 +1,34 @@ +# ceph-nfs + +## Developing + +Create and activate a virtualenv with the development requirements: + + virtualenv -p python3 venv + source venv/bin/activate + pip install -r requirements-dev.txt + +## Code overview + +TEMPLATE-TODO: +One of the most important things a consumer of your charm (or library) +needs to know is what set of functionality it provides. Which categories +does it fit into? Which events do you listen to? Which libraries do you +consume? Which ones do you export and how are they used? + +## Intended use case + +TEMPLATE-TODO: +Why were these decisions made? What's the scope of your charm? + +## Roadmap + +If this Charm doesn't fulfill all of the initial functionality you were +hoping for or planning on, please add a Roadmap or TODO here + +## Testing + +The Python operator framework includes a very nice harness for testing +operator behaviour without full deployment. Just `run_tests`: + + ./run_tests diff --git a/ceph-nfs/LICENSE b/ceph-nfs/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/ceph-nfs/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ceph-nfs/README.md b/ceph-nfs/README.md new file mode 100644 index 00000000..547ad129 --- /dev/null +++ b/ceph-nfs/README.md @@ -0,0 +1,59 @@ +# ceph-nfs + +## Description + +CephNFS is a charm designed to enable management of NFS shares backed +by CephFS. It supports Ceph Pacific and above. + +## Usage + +CephNFS provides an additional service when deployed with Ceph and CephFS. +It should be related to CephMon: + + juju add-relation ceph-nfs:ceph-client ceph-mon:client + +Once all relations have settled, it is possible to create a new export: + + juju run-action --wait ceph-nfs/0 create-share name=test-share size=10 allowed-ips=10.0.0.0/24 + +The above command has creates an NFS share that is 10GB in size, and is +accessible from any machine in the 10.0.0.0-10.0.0.255 network space. To +grant access to a new network address, the `grant-access` action should be +used: + + juju run-action --wait ceph-nfs/0 grant-access name=test-share allowed-ips=192.168.0.10 + +This command has granted access to the named share to a specific +address: `192.168.0.1`. + +It is possible to delete the created share with: + + juju run-action --wait ceph-nfs/0 delete-share name=test-share + +## High Availability + +To gain high availability for NFS shares, it is necessary to scale ceph-nfs and relate it to a loadbalancer charm: + + juju add-unit ceph-nfs -n 2 + juju config vip=10.5.0.100 + juju deploy hacluster + juju add-relation ceph-nfs hacluster + +Once everything settles, your shares will be accessible over the loadbalancer's vip (`10.5.0.100` in this example), and connections will load-balance across backends. + +## Relations + +Ceph-NFS consumes the ceph-client relation from the ceph-mon charm. + +# Bugs + +Please report bugs on [Launchpad][lp-bugs-charm-ceph-fs]. + +For general charm questions refer to the OpenStack [Charm Guide][cg]. + +Note that starting with the squid track of the ceph-nfs charm, deployment of Ceph Pacific and older clusters is not supported anymore. + + + +[lp-bugs-charm-ceph-fs]: https://bugs.launchpad.net/charm-ceph-fs/+filebug +[cg]: https://docs.openstack.org/charm-guide diff --git a/ceph-nfs/actions.yaml b/ceph-nfs/actions.yaml new file mode 100644 index 00000000..ccb3e744 --- /dev/null +++ b/ceph-nfs/actions.yaml @@ -0,0 +1,80 @@ +# Copyright 2022 Canonical +# See LICENSE file for licensing details. + +create-share: + description: Create a new CephFS Backed NFS export + params: + allowed-ips: + description: | + Comma separated list of IP Addresses to grant Read/Write access to. + The default allows read/write access to any address that cana access + this application. + type: string + default: "0.0.0.0/0" + size: + description: | + Size in gigabytes of the share. When unset, the share will not be + restricted in size. + type: integer + default: + name: + description: | + Name of the share that will be exported. + type: string + default: +grant-access: + description: | + Grant the specified client access to a share. + params: + name: + description: Name of the share + type: string + default: + client: + description: IP address or network to change access for + type: string + default: +resize-share: + description: | + Resize a specified share. + params: + name: + description: Name of the share + type: string + default: + size: + description: What size (GB) should the share be + type: integer + default: + +revoke-access: + description: | + Revoke the specified client's access to a share. + params: + name: + description: Name of the share + type: string + default: + client: + description: IP address or network to change access for + type: string + default: + +delete-share: + description: | + Delete a CephFS Backed NFS export. Note that this does not delete + the backing CephFS share. + params: + name: + description: | + Name of the share that will be deleted. If this share doesn't + exist then this action will have no effect. + type: string + default: + purge: + type: boolean + default: False + description: Delete the backing CephFS share as well. +list-shares: + description: List all shares that this application is managing +# TODO: Update, delete share \ No newline at end of file diff --git a/ceph-nfs/build-requirements.txt b/ceph-nfs/build-requirements.txt new file mode 100644 index 00000000..0fbd084b --- /dev/null +++ b/ceph-nfs/build-requirements.txt @@ -0,0 +1,7 @@ +# NOTES(lourot): +# * We don't install charmcraft via pip anymore because it anyway spins up a +# container and scp the system's charmcraft snap inside it. So the charmcraft +# snap is necessary on the system anyway. +# * `tox -e build` successfully validated with charmcraft 1.2.1 + +cffi==1.14.6; python_version < '3.6' # cffi 1.15.0 drops support for py35. \ No newline at end of file diff --git a/ceph-nfs/charmcraft.yaml b/ceph-nfs/charmcraft.yaml new file mode 100644 index 00000000..fe0f4b66 --- /dev/null +++ b/ceph-nfs/charmcraft.yaml @@ -0,0 +1,32 @@ +type: charm + +parts: + charm: + after: + - update-certificates + build-packages: + - git + + update-certificates: + plugin: nil + # See https://github.com/canonical/charmcraft/issues/658 + override-build: | + apt update + apt install -y ca-certificates + update-ca-certificates + +base: ubuntu@22.04 +build-base: ubuntu@24.04 +platforms: + amd64: + build-on: amd64 + build-for: amd64 + arm64: + build-on: arm64 + build-for: arm64 + s390x: + build-on: s390x + build-for: s390x + ppc64el: + build-on: ppc64el + build-for: ppc64el diff --git a/ceph-nfs/config.yaml b/ceph-nfs/config.yaml new file mode 100644 index 00000000..5dfe1c9b --- /dev/null +++ b/ceph-nfs/config.yaml @@ -0,0 +1,68 @@ +# Copyright 2021 OpenStack Charmers +# See LICENSE file for licensing details. +# +# TEMPLATE-TODO: change this example to suit your needs. +# If you don't need a config, you can remove the file entirely. +# It ties in to the example _on_config_changed handler in src/charm.py +# +# Learn more about config at: https://juju.is/docs/sdk/config + +options: + source: + type: string + default: caracal + description: | + Optional configuration to support use of additional sources such as: + - ppa:myteam/ppa + - cloud:trusty-proposed/kilo + - http://my.archive.com/ubuntu main + The last option should be used in conjunction with the key configuration + option. + Note that a minimum ceph version of 0.48.2 is required for use with this + charm which is NOT provided by the packages in the main Ubuntu archive + for precise but is provided in the Ubuntu cloud archive. + key: + type: string + default: + description: | + Key ID to import to the apt keyring to support use with arbitary source + configuration from outside of Launchpad archives or PPA's. + ceph-osd-replication-count: + type: int + default: 3 + description: | + This value dictates the number of replicas ceph must make of any + object it stores within the images rbd pool. Of course, this only + applies if using Ceph as a backend store. Note that once the images + rbd pool has been created, changing this value will not have any + effect (although it can be changed in ceph by manually configuring + your ceph cluster). + ceph-pool-weight: + type: int + default: 5 + description: | + Defines a relative weighting of the pool as a percentage of the total + amount of data in the Ceph cluster. This effectively weights the number + of placement groups for the pool created to be appropriately portioned + to the amount of data expected. For example, if the compute images + for the OpenStack compute instances are expected to take up 20% of the + overall configuration then this value would be specified as 20. Note - + it is important to choose an appropriate value for the pool weight as + this directly affects the number of placement groups which will be + created for the pool. The number of placement groups for a pool can + only be increased, never decreased - so it is important to identify the + percent of data that will likely reside in the pool. + rbd-pool-name: + default: + type: string + description: | + Optionally specify an existing pool that Ganesha should store recovery + data into. Defaults to the application's name. + vip: + type: string + default: + description: | + Virtual IP(s) to use to front API services in HA configuration. + . + If multiple networks are being used, a VIP should be provided for each + network, separated by spaces. diff --git a/ceph-nfs/metadata.yaml b/ceph-nfs/metadata.yaml new file mode 100644 index 00000000..a2527159 --- /dev/null +++ b/ceph-nfs/metadata.yaml @@ -0,0 +1,26 @@ +name: ceph-nfs +summary: Gateway for provisioning NFS shares backed by ceph. +maintainer: OpenStack Charmers +description: | + The NFS gateway is provided by NFS-Ganesha and provides NFS shares + that are backed by CephFS. +docs: https://discourse.charmhub.io/t/ceph-nfs-docs-index/11224 +tags: + - storage + - misc +series: + - focal + - jammy +subordinate: false +min-juju-version: 2.7.6 +extra-bindings: + public: +requires: + ceph-client: + interface: ceph-client + ha: + interface: hacluster + scope: container +peers: + cluster: + interface: ceph-nfs-peer diff --git a/ceph-nfs/osci.yaml b/ceph-nfs/osci.yaml new file mode 100644 index 00000000..04f68b6c --- /dev/null +++ b/ceph-nfs/osci.yaml @@ -0,0 +1,45 @@ +- project: + templates: + - charm-unit-jobs-py38 + - charm-unit-jobs-py310 + check: + jobs: + - focal-quincy + - jammy-quincy + - jammy-reef + - jammy-squid + vars: + needs_charm_build: true + charm_build_name: ceph-nfs + build_type: charmcraft + charmcraft_channel: 2.x/stable +- job: + name: focal-quincy + parent: func-target + dependencies: + - charm-build + - osci-lint + - tox-py38 + vars: + tox_extra_args: -- focal-quincy +- job: + name: jammy-quincy + parent: func-target + dependencies: + - focal-quincy + vars: + tox_extra_args: -- jammy-quincy +- job: + name: jammy-reef + parent: func-target + dependencies: + - focal-quincy + vars: + tox_extra_args: -- jammy-reef +- job: + name: jammy-squid + parent: func-target + dependencies: + - focal-quincy + vars: + tox_extra_args: -- jammy-squid diff --git a/ceph-nfs/rename.sh b/ceph-nfs/rename.sh new file mode 100755 index 00000000..283a01bf --- /dev/null +++ b/ceph-nfs/rename.sh @@ -0,0 +1,13 @@ +#!/bin/bash +charm=$(grep "charm_build_name" osci.yaml | awk '{print $2}') +echo "renaming ${charm}_*.charm to ${charm}.charm" +echo -n "pwd: " +pwd +ls -al +echo "Removing bad downloaded charm maybe?" +if [[ -e "${charm}.charm" ]]; +then + rm "${charm}.charm" +fi +echo "Renaming charm here." +mv ${charm}_*.charm ${charm}.charm \ No newline at end of file diff --git a/ceph-nfs/requirements-dev.txt b/ceph-nfs/requirements-dev.txt new file mode 100644 index 00000000..4f2a3f5b --- /dev/null +++ b/ceph-nfs/requirements-dev.txt @@ -0,0 +1,3 @@ +-r requirements.txt +coverage +flake8 diff --git a/ceph-nfs/requirements.txt b/ceph-nfs/requirements.txt new file mode 100644 index 00000000..1a8dbfad --- /dev/null +++ b/ceph-nfs/requirements.txt @@ -0,0 +1,6 @@ +# requirements +ops <= 1.6.0 +git+https://github.com/juju/charm-helpers.git#egg=charmhelpers +git+https://opendev.org/openstack/charm-ops-interface-ceph-client#egg=interface_ceph_client +git+https://opendev.org/openstack/charm-ops-openstack#egg=ops_openstack +git+https://opendev.org/openstack/charm-interface-hacluster#egg=interface_hacluster diff --git a/ceph-nfs/src/charm.py b/ceph-nfs/src/charm.py new file mode 100755 index 00000000..10061250 --- /dev/null +++ b/ceph-nfs/src/charm.py @@ -0,0 +1,556 @@ +#!/usr/bin/env python3 +# Copyright 2021 OpenStack Charmers +# See LICENSE file for licensing details. +# +# Learn more at: https://juju.is/docs/sdk + +"""Charm the service. + +Refer to the following post for a quick-start guide that will help you +develop a new k8s charm using the Operator Framework: + + https://discourse.charmhub.io/t/4208 +""" + +import ipaddress +import logging +import os +from pathlib import Path +import socket +import subprocess +import tempfile + +from ops.framework import StoredState +from ops.main import main +# from ops.model import ActiveStatus + +import charmhelpers.core.host as ch_host +import charmhelpers.core.templating as ch_templating +import interface_ceph_client.ceph_client as ceph_client +import interface_ceph_nfs_peer + +import interface_hacluster.ops_ha_interface as ops_ha_interface + +# TODO: Add the below class functionaity to action / relations +from ganesha import GaneshaNFS + +import ops_openstack.adapters +import ops_openstack.core +import ops_openstack.plugins.classes + +logger = logging.getLogger(__name__) + + +class CephClientAdapter(ops_openstack.adapters.OpenStackOperRelationAdapter): + """Adapter for ceph client interface.""" + + @property + def mon_hosts(self): + """Sorted list of ceph mon addresses. + + :returns: Ceph MON addresses. + :rtype: str + """ + hosts = self.relation.get_relation_data()['mon_hosts'] + return ' '.join(sorted(hosts)) + + @property + def auth_supported(self): + """Authentication type. + + :returns: Authentication type + :rtype: str + """ + return self.relation.get_relation_data()['auth'] + + @property + def key(self): + """Key client should use when communicating with Ceph cluster. + + :returns: Key + :rtype: str + """ + return self.relation.get_relation_data()['key'] + + +class CephNFSContext(object): + """Adapter for ceph NFS config.""" + + name = 'ceph_nfs' + + def __init__(self, charm_instance): + self.charm_instance = charm_instance + + @property + def pool_name(self): + """The name of the default rbd data pool to be used for shares. + + :returns: Data pool name. + :rtype: str + """ + return self.charm_instance.config_get( + 'rbd-pool-name', self.charm_instance.app.name + ) + + @property + def client_name(self): + return self.charm_instance.app.name + + @property + def hostname(self): + return socket.gethostname() + + +class OpenStackContextAdapters( + ops_openstack.adapters.OpenStackRelationAdapters): + """ + Augmentation of OpenStackRelationAdapters that also includes contexts. + Proposed for upstreaming + https://review.opendev.org/c/openstack/charm-ops-openstack/+/897238 + """ + + relation_adapters = {} + + def __init__(self, relations, charm_instance, + options_instance=None, contexts=None): + """ + :param relations: List of instances of relation classes + :param options: Configuration class to use (DEPRECATED) + :param options_instance: Instance of Configuration class to use + :param charm_instance: optional charm_instance that is captured as a + weakref for use on the adapter. + :param contexts: Optional list of contexts + """ + super().__init__( + relations, charm_instance, + options_instance=options_instance + ) + if contexts is None: + contexts = () + self._contexts = set() + for context in contexts: + self.add_context(context) + + def __iter__(self): + """ + Iterate over the relations and contexts presented to the charm. + """ + for ref in self._relations.union(self._contexts): + yield ref, getattr(self, ref) + + def add_context(self, context): + """Add the context to this adapters instance. + + :param relation: a RAW context + """ + setattr(self, context.name, context) + self._contexts.add(context.name) + + +class CephNFSAdapters(OpenStackContextAdapters): + """Collection of relation adapters.""" + + relation_adapters = { + 'ceph-client': CephClientAdapter, + } + + +class CephNFSCharm( + ops_openstack.plugins.classes.BaseCephClientCharm): + """Ceph NFS Base Charm.""" + + PACKAGES = ['nfs-ganesha-ceph', 'nfs-ganesha-rados-grace', 'ceph-common'] + + CEPH_CAPABILITIES = [ + "mgr", "allow rw", + "mds", "allow *", + "osd", "allow rw", + "mon", "allow r, " + "allow command \"auth del\", " + "allow command \"auth caps\", " + "allow command \"auth get\", " + "allow command \"auth get-or-create\""] + + REQUIRED_RELATIONS = ['ceph-client'] + + CEPH_CONFIG_PATH = Path('/etc/ceph') + GANESHA_CONFIG_PATH = Path('/etc/ganesha') + + CEPH_GANESHA_CONFIG_PATH = CEPH_CONFIG_PATH / 'ganesha' + CEPH_CONF = CEPH_CONFIG_PATH / 'ceph.conf' + GANESHA_KEYRING = CEPH_GANESHA_CONFIG_PATH / 'ceph.keyring' + GANESHA_CONF = GANESHA_CONFIG_PATH / 'ganesha.conf' + + SERVICES = ['nfs-ganesha'] + + LB_SERVICE_NAME = "nfs-ganesha" + NFS_PORT = 2049 + + RESTART_MAP = { + str(GANESHA_CONF): SERVICES, + str(CEPH_CONF): SERVICES, + str(GANESHA_KEYRING): SERVICES} + + release = 'default' + + def __init__(self, framework): + super().__init__(framework) + # super().register_status_check(self.custom_status_check) + logging.info("Using %s class", self.release) + self._stored.set_default( + is_started=False, + is_cluster_setup=False + ) + self.ceph_client = ceph_client.CephClientRequires( + self, + 'ceph-client') + self.peers = interface_ceph_nfs_peer.CephNFSPeers( + self, + 'cluster') + self.ha = ops_ha_interface.HAServiceRequires(self, 'ha') + + self.adapters = CephNFSAdapters( + (self.ceph_client, self.peers), + contexts=(CephNFSContext(self),), + charm_instance=self) + self.framework.observe( + self.ceph_client.on.broker_available, + self.request_ceph_pool) + self.framework.observe( + self.ceph_client.on.pools_available, + self.render_config) + self.framework.observe( + self.on.config_changed, + self.request_ceph_pool) + self.framework.observe( + self.on.upgrade_charm, + self.render_config) + self.framework.observe( + self.ceph_client.on.pools_available, + self.setup_ganesha), + self.framework.observe( + self.peers.on.pool_initialised, + self.on_pool_initialised) + self.framework.observe( + self.peers.on.departing, + self.on_departing) + self.framework.observe( + self.peers.on.reload_nonce, + self.on_reload_nonce) + self.framework.observe( + self.ha.on.ha_ready, + self._configure_hacluster) + # Actions + self.framework.observe( + self.on.create_share_action, + self.create_share_action) + self.framework.observe( + self.on.list_shares_action, + self.list_shares_action) + self.framework.observe( + self.on.resize_share_action, + self.resize_share_action) + self.framework.observe( + self.on.delete_share_action, + self.delete_share_action + ) + self.framework.observe( + self.on.grant_access_action, + self.grant_access_action + ) + self.framework.observe( + self.on.revoke_access_action, + self.revoke_access_action + ) + + def _get_bind_ip(self) -> str: + """Return the IP to bind the dashboard to""" + binding = self.model.get_binding('public') + return str(binding.network.ingress_address) + + def config_get(self, key, default=None): + """Retrieve config option. + + :returns: Value of the corresponding config option or None. + :rtype: Any + """ + return self.model.config.get(key, default) + + @property + def pool_name(self): + """The name of the default rbd data pool to be used for shares. + + :returns: Data pool name. + :rtype: str + """ + return self.config_get('rbd-pool-name', self.app.name) + + @property + def client_name(self): + return self.app.name + + @property + def ganesha_client(self): + return GaneshaNFS(self.client_name, self.pool_name) + + def request_ceph_pool(self, event): + """Request pools from Ceph cluster.""" + if not self.ceph_client.broker_available: + logging.info("Cannot request ceph setup at this time") + return + try: + bcomp_kwargs = self.get_bluestore_compression() + except ValueError as e: + # The end user has most likely provided a invalid value for + # a configuration option. Just log the traceback here, the + # end user will be notified by assess_status() called at + # the end of the hook execution. + logging.warn('Caught ValueError, invalid value provided for ' + 'configuration?: "{}"'.format(str(e))) + return + weight = self.config_get('ceph-pool-weight') + replicas = self.config_get('ceph-osd-replication-count') + + logging.info("Requesting replicated pool") + self.ceph_client.create_replicated_pool( + name=self.pool_name, + app_name='ganesha', + replicas=replicas, + weight=weight, + **bcomp_kwargs) + logging.info("Requesting permissions") + self.ceph_client.request_ceph_permissions( + self.client_name, + self.CEPH_CAPABILITIES) + + def refresh_request(self, event): + """Re-request Ceph pools and render config.""" + self.render_config(event) + self.request_ceph_pool(event) + + def render_config(self, event): + """Render config and restart services if config files change.""" + if not self.ceph_client.pools_available: + logging.info("Defering setup") + event.defer() + return + + self.CEPH_GANESHA_CONFIG_PATH.mkdir( + exist_ok=True, + mode=0o750) + + def daemon_reload_and_restart(service_name): + logging.debug("restarting {} after config change" + .format(service_name)) + subprocess.check_call(['systemctl', 'daemon-reload']) + subprocess.check_call(['systemctl', 'restart', service_name]) + + rfuncs = {} + + @ch_host.restart_on_change(self.RESTART_MAP, restart_functions=rfuncs) + def _render_configs(): + for config_file in self.RESTART_MAP.keys(): + ch_templating.render( + os.path.basename(config_file), + config_file, + self.adapters) + logging.info("Rendering config") + _render_configs() + logging.info("Setting started state") + self._stored.is_started = True + self.update_status() + logging.info("on_pools_available: status updated") + + def on_departing(self, event): + logging.debug("Removing this unit from Ganesha cluster") + subprocess.check_call([ + 'ganesha-rados-grace', '--userid', self.client_name, + '--cephconf', self.CEPH_CONF, '--pool', self.pool_name, + 'remove', socket.gethostname()]) + self._stored.is_cluster_setup = False + + def setup_ganesha(self, event): + if not self._stored.is_cluster_setup: + subprocess.check_call([ + 'ganesha-rados-grace', '--userid', self.client_name, + '--cephconf', self.CEPH_CONF, '--pool', self.pool_name, + 'add', socket.gethostname()]) + self._stored.is_cluster_setup = True + if not self.model.unit.is_leader(): + return + cmd = [ + 'rados', '-p', self.pool_name, + '-c', self.CEPH_CONF, + '--id', self.client_name, + 'put', 'ganesha-export-index', '/dev/null' + ] + if not self.peers.pool_initialised: + try: + logging.debug("Creating ganesha-export-index in Ceph") + subprocess.check_call(cmd) + counter = tempfile.NamedTemporaryFile('w+') + counter.write('1000') + counter.seek(0) + logging.debug("Creating ganesha-export-counter in Ceph") + cmd = [ + 'rados', '-p', self.pool_name, + '-c', self.CEPH_CONF, + '--id', self.client_name, + 'put', 'ganesha-export-counter', counter.name + ] + subprocess.check_call(cmd) + self.peers.initialised_pool() + except subprocess.CalledProcessError: + logging.error("Failed to setup ganesha index object") + event.defer() + + def _configure_hacluster(self, _): + vip_config = self.config.get('vip') + if not vip_config: + logging.warn("Cannot setup vips, vip config missing") + return + for vip in vip_config.split(): + self.ha.add_vip('vip', vip) + self.ha.add_systemd_service('ganesha-systemd', 'nfs-ganesha') + self.ha.add_colocation( + self.model.app.name, 'ALWAYS', ['ganesha-vip', 'ganesha-systemd']) + self.ha.bind_resources() + + def on_pool_initialised(self, event): + try: + logging.debug("Restarting Ganesha after pool initialisation") + subprocess.check_call(['systemctl', 'restart', 'nfs-ganesha']) + except subprocess.CalledProcessError: + logging.error("Failed torestart nfs-ganesha") + event.defer() + + def on_reload_nonce(self, _event): + logging.info("Reloading Ganesha after nonce triggered reload") + subprocess.call(['killall', '-HUP', 'ganesha.nfsd']) + + def _get_binding_subnet_map(self): + bindings = {} + for binding_name in self.meta.extra_bindings.keys(): + network = self.model.get_binding(binding_name).network + bindings[binding_name] = [i.subnet for i in network.interfaces] + return bindings + + @property + def vips(self): + return self.config.get('vip').split() + + def _get_space_vip_mapping(self): + bindings = {} + for binding_name, subnets in self._get_binding_subnet_map().items(): + bindings[binding_name] = [ + vip + for subnet in subnets + for vip in self.vips + if ipaddress.ip_address(vip) in subnet] + return bindings + + def access_address(self) -> str: + """Return the IP to advertise Ganesha on""" + binding = self.model.get_binding('public') + ingress_address = str(binding.network.ingress_address) + # Try to get the VIP for the public binding, fall back to ingress on it + return self._get_space_vip_mapping().get( + 'public', [ingress_address])[0] + + def create_share_action(self, event): + if not self.model.unit.is_leader(): + event.fail("Share creation needs to be run " + "from the application leader") + return + share_size = event.params.get('size') + name = event.params.get('name') + allowed_ips = event.params.get('allowed-ips') + allowed_ips = [ip.strip() for ip in allowed_ips.split(',')] + export_path = self.ganesha_client.create_share( + size=share_size, name=name, access_ips=allowed_ips) + if not export_path: + event.fail("Failed to create share, check the " + "log for more details") + return + self.peers.trigger_reload() + event.set_results({ + "message": "Share created", + "path": export_path, + "ip": self.access_address()}) + + def list_shares_action(self, event): + exports = self.ganesha_client.list_shares() + event.set_results({ + "exports": [ + { + "id": export.export_id, "name": export.name + } for export in exports + ] + }) + + def delete_share_action(self, event): + if not self.model.unit.is_leader(): + event.fail("Share creation needs to be run " + "from the application leader") + return + name = event.params.get('name') + purge = event.params.get('purge') + self.ganesha_client.delete_share(name, purge=purge) + self.peers.trigger_reload() + event.set_results({ + "message": "Share deleted", + }) + + def grant_access_action(self, event): + if not self.model.unit.is_leader(): + event.fail("Share creation needs to be run " + "from the application leader") + return + name = event.params.get('name') + address = event.params.get('client') + res = self.ganesha_client.grant_access(name, address) + if res is not None: + event.fail(res) + return + self.peers.trigger_reload() + event.set_results({ + "message": "Acess granted", + }) + + def revoke_access_action(self, event): + if not self.model.unit.is_leader(): + event.fail("Share creation needs to be run " + "from the application leader") + return + name = event.params.get('name') + address = event.params.get('client') + res = self.ganesha_client.revoke_access(name, address) + if res is not None: + event.fail(res) + return + self.peers.trigger_reload() + event.set_results({ + "message": "Access revoked", + }) + + def resize_share_action(self, event): + name = event.params.get('name') + size = event.params.get('size') + if size is None: + event.fail("Size must be set") + self.ganesha_client.resize_share(name=name, size=size) + event.set_results({ + "message": f"{name} is now {size}GB", + }) + + +@ops_openstack.core.charm_class +class CephNFSCharmPacific(CephNFSCharm): + """Ceph iSCSI Charm for Pacific.""" + + _stored = StoredState() + release = 'octopus' + + +if __name__ == '__main__': + main(ops_openstack.core.get_charm_class_for_release()) diff --git a/ceph-nfs/src/ganesha.py b/ceph-nfs/src/ganesha.py new file mode 100644 index 00000000..0cd71d03 --- /dev/null +++ b/ceph-nfs/src/ganesha.py @@ -0,0 +1,436 @@ +#!/usr/bin/env python3 +# Copyright 2021 OpenStack Charmers +# See LICENSE file for licensing details. + +import json +import logging +import manager +import subprocess +from typing import Dict, List, Optional +import tempfile +import uuid + +logger = logging.getLogger(__name__) + + +# TODO: Add ACL with kerberos + + +class Export(object): + """Object that encodes and decodes Ganesha export blocks""" + + def __init__(self, export_options: Optional[Dict] = None): + if export_options is None: + export_options = {} + if isinstance(export_options, Export): + raise RuntimeError('export_options must be a dictionary') + self.export_options = export_options + if not isinstance(self.export_options['EXPORT']['CLIENT'], list): + self.export_options['EXPORT']['CLIENT'] = [ + self.export_options['EXPORT']['CLIENT'] + ] + + def from_export(export: str) -> 'Export': + return Export(export_options=manager.parseconf(export)) + + def to_export(self) -> str: + return manager.mkconf(self.export_options) + + @property + def name(self): + if self.path: + return self.path.split('/')[-2] + + @property + def export(self): + return self.export_options['EXPORT'] + + @property + def clients(self) -> List[Dict[str, str]]: + return self.export_options['EXPORT']['CLIENT'] + + @property + def clients_by_mode(self): + clients_by_mode = {'r': [], 'rw': []} + for client in self.clients: + if client['Access_Type'].lower() == 'r': + clients_by_mode['r'] += [ + s.strip() for s in client['Clients'].split(',') + ] + elif client['Access_Type'].lower() == 'rw': + clients_by_mode['rw'] += [ + s.strip() for s in client['Clients'].split(',') + ] + else: + raise RuntimeError("Invalid access type") + return clients_by_mode + + @property + def export_id(self) -> int: + return int(self.export_options['EXPORT']['Export_Id']) + + @property + def path(self) -> str: + return self.export_options['EXPORT']['Path'] + + def add_client(self, client: str): + mode = "rw" + clients_by_mode = self.clients_by_mode + logging.info(f"About to add {client} to {clients_by_mode}") + if client not in clients_by_mode[mode.lower()]: + clients_by_mode[mode.lower()].append(client) + logging.info(f"new clients_by_mode: to {clients_by_mode}") + self.export_options['EXPORT']['CLIENT'] = [] + for (mode, clients) in clients_by_mode.items(): + if clients: + logging.info(f"Adding {clients} to self.export_options") + self.export_options['EXPORT']['CLIENT'].append( + {'Access_Type': mode, 'Clients': ', '.join(clients)}) + + def remove_client(self, client: str): + clients_by_mode = self.clients_by_mode + for (mode, clients) in clients_by_mode.items(): + clients_by_mode[mode] = [ + old_client for old_client in clients if old_client != client + ] + self.export_options['EXPORT']['CLIENT'] = [] + for (mode, clients) in clients_by_mode.items(): + if clients: + self.export_options['EXPORT']['CLIENT'].append( + {'Access_Type': mode, 'Clients': ', '.join(clients)}) + + +class GaneshaNFS(object): + export_index = "ganesha-export-index" + export_counter = "ganesha-export-counter" + + def __init__(self, client_name, ceph_pool): + self.client_name = client_name + self.ceph_pool = ceph_pool + + def create_share(self, name: str = None, size: int = None, + access_ips: List[str] = None) -> str: + """Create a CephFS Share and export it via Ganesha + + :param name: String name of the share to create + :param size: Int size in gigabytes of the share to create + + :returns: Path to the export + """ + if name is None: + name = str(uuid.uuid4()) + else: + existing_shares = [ + share for share in self.list_shares() if share.name == name + ] + if existing_shares: + return existing_shares[0].path + if size is not None: + size_in_bytes = size * 1024 * 1024 * 1024 + if access_ips is None: + access_ips = ['0.0.0.0'] + # Ganesha deals with networks just fine, except when the network is + # 0.0.0.0/0, then it has to be 0.0.0.0 which works as expected :-/ + if '0.0.0.0/0' in access_ips: + access_ips[access_ips.index('0.0.0.0/0')] = '0.0.0.0' + + access_id = 'ganesha-{}'.format(name) + path = self._create_cephfs_share(name, size_in_bytes) + if not path: + return + self.export_path = path + export_id = self._get_next_export_id() + export = Export( + { + 'EXPORT': { + 'Export_Id': export_id, + 'Path': self.export_path, + 'FSAL': { + 'Name': 'Ceph', + 'User_Id': access_id, + 'Secret_Access_Key': self._ceph_auth_key(access_id) + }, + 'Pseudo': self.export_path, + 'Squash': 'None', + 'CLIENT': [ + { + 'Access_Type': 'RW', + 'Clients': ', '.join(access_ips), + } + ] + } + } + ) + export_template = export.to_export() + logging.debug("Export template::\n{}".format(export_template)) + tmp_file = self._tmpfile(export_template) + self._rados_put('ganesha-export-{}'.format(export_id), tmp_file.name) + self._ganesha_add_export(self.export_path, tmp_file.name) + self._add_share_to_index(export_id) + return self.export_path + + def list_shares(self) -> List[Export]: + share_urls = [ + url.replace('%url rados://{}/'.format(self.ceph_pool), '') + for url + in self._rados_get('ganesha-export-index').splitlines()] + exports_raw = [ + self._rados_get(url) + for url in share_urls + if url.strip() + ] + exports = [] + for export_raw in exports_raw: + try: + exports.append(Export.from_export(export_raw)) + except RuntimeError: + logging.warning("Encountered an independently created export") + return exports + + def resize_share(self, name: str, size: int): + size_in_bytes = size * 1024 * 1024 * 1024 + self._ceph_subvolume_command('resize', 'ceph-fs', name, + str(size_in_bytes), '--no_shrink') + + def delete_share(self, name: str, purge=False): + share = [share for share in self.list_shares() if share.name == name] + if share: + share = share[0] + else: + return + logging.info("About to remove export {} ({})" + .format(share.name, share.export_id)) + self._ganesha_remove_export(share.export_id) + logging.debug("Removing export from index") + self._remove_share_from_index(share.export_id) + logging.debug("Removing export file from RADOS") + self._rados_rm('ganesha-export-{}'.format(share.export_id)) + if purge: + self._delete_cephfs_share(name) + + def grant_access(self, name: str, client: str) -> Optional[str]: + share = self.get_share(name) + if share is None: + return 'Share does not exist' + share.add_client(client) + export_template = share.to_export() + logging.debug("Export template::\n{}".format(export_template)) + tmp_file = self._tmpfile(export_template) + self._rados_put('ganesha-export-{}'.format(share.export_id), + tmp_file.name) + self._ganesha_update_export(share.export_id, tmp_file.name) + + def revoke_access(self, name: str, client: str): + share = self.get_share(name) + if share is None: + return 'Share does not exist' + share.remove_client(client) + export_template = share.to_export() + logging.debug("Export template::\n{}".format(export_template)) + tmp_file = self._tmpfile(export_template) + self._rados_put('ganesha-export-{}'.format(share.export_id), + tmp_file.name) + self._ganesha_update_export(share.export_id, tmp_file.name) + + def get_share(self, name: str) -> Optional[Export]: + share = [share for share in self.list_shares() if share.name == name] + if share: + return share[0] + + def update_share(self, id): + pass + + def _ganesha_add_export(self, export_path: str, tmp_path: str): + """Add a configured NFS export to Ganesha""" + self._dbus_send( + 'ExportMgr', 'AddExport', + 'string:{}'.format(tmp_path), + 'string:EXPORT(Path={})'.format(export_path)) + + def _ganesha_remove_export(self, share_id: int): + """Remove a configured NFS export from Ganesha""" + self._dbus_send( + 'ExportMgr', + 'RemoveExport', + "uint16:{}".format(share_id)) + + def _ganesha_update_export(self, share_id: int, tmp_path: str): + """Update a configured NFS export in Ganesha""" + self._dbus_send( + 'ExportMgr', 'UpdateExport', + 'string:{}'.format(tmp_path), + 'string:EXPORT(Export_Id={})'.format(share_id)) + + def _dbus_send(self, section: str, action: str, *args): + """Send a command to Ganesha via Dbus""" + cmd = [ + 'dbus-send', '--print-reply', '--system', + '--dest=org.ganesha.nfsd', + '/org/ganesha/nfsd/{}'.format(section), + 'org.ganesha.nfsd.exportmgr.{}'.format(action)] + [*args] + logging.debug("About to call: {}".format(cmd)) + return subprocess.check_output(cmd) + + def _delete_cephfs_share(self, name: str): + """Delete a CephFS share. + + :param name: String name of the share to create + """ + self._ceph_subvolume_command( + 'deauthorize', 'ceph-fs', name, + 'ganesha-{name}'.format(name=name)) + self._ceph_subvolume_command('rm', 'ceph-fs', name) + + def _create_cephfs_share(self, name: str, size_in_bytes: int = None): + """Create an authorise a CephFS share. + + :param name: String name of the share to create + :param size_in_bytes: Integer size in bytes of the size to create + + :returns: export path + :rtype: union[str, bool] + """ + try: + if size_in_bytes is not None: + self._ceph_subvolume_command('create', 'ceph-fs', + name, str(size_in_bytes)) + else: + self._ceph_subvolume_command('create', 'ceph-fs', name) + except subprocess.CalledProcessError: + logging.error("failed to create subvolume") + return False + + try: + self._ceph_subvolume_command( + 'authorize', 'ceph-fs', name, + 'ganesha-{name}'.format(name=name)) + except subprocess.CalledProcessError: + logging.error("failed to authorize subvolume") + return False + + try: + output = self._ceph_subvolume_command('getpath', 'ceph-fs', name) + return output.decode('utf-8').strip() + except subprocess.CalledProcessError: + logging.error("failed to get path") + return False + + def _ceph_subvolume_command( + self, *cmd: List[str] + ) -> subprocess.CompletedProcess: + """Run a ceph fs subvolume command""" + return self._ceph_fs_command('subvolume', *cmd) + + def _ceph_fs_command(self, *cmd: List[str]) -> subprocess.CompletedProcess: + """Run a ceph fs command""" + return self._ceph_command('fs', *cmd) + + def _ceph_auth_key(self, access_id: str) -> str: + """Retrieve the CephX key associated with this id + + :returns: The access key + :rtype: str + """ + output = self._ceph_command( + 'auth', 'get', 'client.{}'.format(access_id), '--format=json') + return json.loads(output.decode('UTF-8'))[0]['key'] + + def _ceph_command(self, *cmd: List[str]) -> subprocess.CompletedProcess: + """Run a ceph command""" + cmd = [ + "ceph", "--id", self.client_name, + "--conf=/etc/ceph/ceph.conf" + ] + [*cmd] + return subprocess.check_output(cmd, stderr=subprocess.DEVNULL) + + def _get_next_export_id(self) -> int: + """Retrieve the next available export ID, and update the rados key + + :returns: The export ID + :rtype: str + """ + next_id = int(self._rados_get(self.export_counter)) + file = self._tmpfile(next_id + 1) + self._rados_put(self.export_counter, file.name) + return next_id + + def _tmpfile(self, value: str) -> tempfile._TemporaryFileWrapper: + file = tempfile.NamedTemporaryFile(mode='w+') + file.write(str(value)) + file.seek(0) + return file + + def _rados_get(self, name: str) -> str: + """Retrieve the content of the RADOS object with a given name + + :param name: Name of the RADOS object to retrieve + + :returns: Contents of the RADOS object + :rtype: str + """ + cmd = [ + 'rados', '-p', self.ceph_pool, '--id', self.client_name, + 'get', name, '/dev/stdout' + ] + logging.debug("About to call: {}".format(cmd)) + output = subprocess.check_output(cmd) + return output.decode('utf-8') + + def _rados_put(self, name: str, source: str): + """Store the contents of the source file in a named RADOS object. + + :param name: Name of the RADOS object to retrieve + :param source: Path to a file to upload to RADOS. + + :returns: None + """ + cmd = [ + 'rados', '-p', self.ceph_pool, '--id', self.client_name, + 'put', name, source + ] + logging.debug("About to call: {}".format(cmd)) + subprocess.check_call(cmd) + + def _rados_rm(self, name: str): + """Remove a named RADOS object. + + :param name: Name of the RADOS object to remove + :param source: Path to a file to upload to RADOS. + + :returns: None + """ + cmd = [ + 'rados', '-p', self.ceph_pool, '--id', self.client_name, + 'rm', name + ] + logging.debug("About to call: {}".format(cmd)) + subprocess.check_call(cmd) + + def _add_share_to_index(self, export_id: int): + """Add an export RADOS object's URL to the RADOS URL index.""" + index_data = self._rados_get(self.export_index) + url = '%url rados://{}/ganesha-export-{}'.format( + self.ceph_pool, export_id + ) + rados_urls = index_data.split('\n') + if url not in rados_urls: + rados_urls.append(url) + tmpfile = self._tmpfile('\n'.join(rados_urls)) + self._rados_put(self.export_index, tmpfile.name) + + def _remove_share_from_index(self, export_id: int): + """Remove an export RADOS object's URL from the RADOS URL index.""" + index_data = self._rados_get(self.export_index) + if not index_data: + return + + unwanted_url = "%url rados://{0}/{1}".format( + self.ceph_pool, + 'ganesha-export-{}'.format(export_id)) + logging.debug("Looking for '{}' in index".format(unwanted_url)) + rados_urls = index_data.split('\n') + logging.debug("Index URLs: {}".format(rados_urls)) + index = [url.strip() for url in rados_urls if url != unwanted_url] + logging.debug("Index URLs without unwanted: {}".format(index)) + tmpfile = self._tmpfile('\n'.join(index)) + self._rados_put(self.export_index, tmpfile.name) diff --git a/ceph-nfs/src/interface_ceph_nfs_peer.py b/ceph-nfs/src/interface_ceph_nfs_peer.py new file mode 100644 index 00000000..f00d54aa --- /dev/null +++ b/ceph-nfs/src/interface_ceph_nfs_peer.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 + +# import json +import logging +import os +# import socket +import uuid + +from ops.framework import ( + StoredState, + EventBase, + ObjectEvents, + EventSource, + Object) + + +class PoolInitialisedEvent(EventBase): + pass + + +class ReloadNonceEvent(EventBase): + pass + + +class DepartedEvent(EventBase): + pass + + +class CephNFSPeerEvents(ObjectEvents): + pool_initialised = EventSource(PoolInitialisedEvent) + reload_nonce = EventSource(ReloadNonceEvent) + departing = EventSource(DepartedEvent) + + +class CephNFSPeers(Object): + + on = CephNFSPeerEvents() + _stored = StoredState() + + def __init__(self, charm, relation_name): + super().__init__(charm, relation_name) + self.relation_name = relation_name + self.this_unit = self.framework.model.unit + self._stored.set_default( + pool_initialised=False, + reload_nonce=None) + self.framework.observe( + charm.on[relation_name].relation_changed, + self.on_changed) + self.framework.observe( + charm.on[relation_name].relation_departed, + self.on_departed) + + def on_changed(self, event): + logging.info("CephNFSPeers on_changed") + logging.debug('pool_initialised: {}'.format(self.pool_initialised)) + if self.pool_initialised == 'True' and \ + not self._stored.pool_initialised: + logging.info("emiting pool initialised") + self.on.pool_initialised.emit() + self._stored.pool_initialised = True + logging.debug('reload_nonce: {}'.format(self.reload_nonce)) + if self._stored.reload_nonce != self.reload_nonce: + logging.info("emiting reload nonce") + self.on.reload_nonce.emit() + self._stored.reload_nonce = self.reload_nonce + + def on_departed(self, event): + logging.warning("CephNFSPeers on_departed") + if self.this_unit.name == os.getenv('JUJU_DEPARTING_UNIT'): + self.on.departing.emit() + + def initialised_pool(self): + logging.info("Setting pool initialised") + self.peer_rel.data[self.peer_rel.app]['pool_initialised'] = 'True' + self.on.pool_initialised.emit() + + def trigger_reload(self): + self.peer_rel.data[ + self.peer_rel.app + ]['reload_nonce'] = str(uuid.uuid4()) + self.on.reload_nonce.emit() + + @property + def pool_initialised(self): + return self.peer_rel.data[self.peer_rel.app].get('pool_initialised') + + @property + def reload_nonce(self): + return self.peer_rel.data[self.peer_rel.app].get('reload_nonce') + + @property + def peer_rel(self): + return self.framework.model.get_relation(self.relation_name) diff --git a/ceph-nfs/src/manager.py b/ceph-nfs/src/manager.py new file mode 100644 index 00000000..c21578b9 --- /dev/null +++ b/ceph-nfs/src/manager.py @@ -0,0 +1,200 @@ +# Copyright (c) 2014 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# The contents of this file were copied, almost straight, from +# https://github.com/openstack/manila/blob/a3aaea91494665a25bdccebf69d9e85e8475983d/manila/share/drivers/ganesha/manager.py#L205 +# +# The key differences is the lack of other Ganesha control code +# and the removal of oslo's JSON helpers. + + +import io +import json +import re +import sys + + +IWIDTH = 4 + + +def _conf2json(conf): + """Convert Ganesha config to JSON.""" + + # tokenize config string + token_list = [io.StringIO()] + state = { + 'in_quote': False, + 'in_comment': False, + 'escape': False, + } + + cbk = [] + for char in conf: + if state['in_quote']: + if not state['escape']: + if char == '"': + state['in_quote'] = False + cbk.append(lambda: token_list.append(io.StringIO())) + elif char == '\\': + cbk.append(lambda: state.update({'escape': True})) + else: + if char == "#": + state['in_comment'] = True + if state['in_comment']: + if char == "\n": + state['in_comment'] = False + else: + if char == '"': + token_list.append(io.StringIO()) + state['in_quote'] = True + state['escape'] = False + if not state['in_comment']: + token_list[-1].write(char) + while cbk: + cbk.pop(0)() + + if state['in_quote']: + raise RuntimeError("Unterminated quoted string") + + # jsonify tokens + js_token_list = ["{"] + for tok in token_list: + tok = tok.getvalue() + + if tok[0] == '"': + js_token_list.append(tok) + continue + + for pat, s in [ + # add omitted "=" signs to block openings + (r'([^=\s])\s*{', '\\1={'), + # delete trailing semicolons in blocks + (r';\s*}', '}'), + # add omitted semicolons after blocks + (r'}\s*([^}\s])', '};\\1'), + # separate syntactically significant characters + (r'([;{}=])', ' \\1 ')]: + tok = re.sub(pat, s, tok) + + # map tokens to JSON equivalents + for word in tok.split(): + if word == "=": + word = ":" + elif word == ";": + word = ',' + elif word in ['{', '}'] or \ + re.search(r'\A-?[1-9]\d*(\.\d+)?\Z', word): + pass + else: + word = json.dumps(word) + js_token_list.append(word) + js_token_list.append("}") + + # group quoted strings + token_grp_list = [] + for tok in js_token_list: + if tok[0] == '"': + if not (token_grp_list and isinstance(token_grp_list[-1], list)): + token_grp_list.append([]) + token_grp_list[-1].append(tok) + else: + token_grp_list.append(tok) + + # process quoted string groups by joining them + js_token_list2 = [] + for x in token_grp_list: + if isinstance(x, list): + x = ''.join(['"'] + [tok[1:-1] for tok in x] + ['"']) + js_token_list2.append(x) + + return ''.join(js_token_list2) + + +def _dump_to_conf(confdict, out=sys.stdout, indent=0): + """Output confdict in Ganesha config format.""" + if isinstance(confdict, dict): + for k, v in confdict.items(): + if v is None: + continue + if isinstance(v, dict): + out.write(' ' * (indent * IWIDTH) + k + ' ') + out.write("{\n") + _dump_to_conf(v, out, indent + 1) + out.write(' ' * (indent * IWIDTH) + '}') + elif isinstance(v, list): + for item in v: + out.write(' ' * (indent * IWIDTH) + k + ' ') + out.write("{\n") + _dump_to_conf(item, out, indent + 1) + out.write(' ' * (indent * IWIDTH) + '}\n') + # The 'CLIENTS' Ganesha string option is an exception in that it's + # string value can't be enclosed within quotes as can be done for + # other string options in a valid Ganesha conf file. + elif k.upper() == 'CLIENTS': + out.write(' ' * (indent * IWIDTH) + k + ' = ' + v + ';') + else: + out.write(' ' * (indent * IWIDTH) + k + ' ') + out.write('= ') + _dump_to_conf(v, out, indent) + out.write(';') + out.write('\n') + else: + dj = json.dumps(confdict) + out.write(dj) + + +def parseconf(conf): + """Parse Ganesha config. + Both native format and JSON are supported. + Convert config to a (nested) dictionary. + """ + def list_to_dict(src_list): + # Convert a list of key-value pairs stored as tuples to a dict. + # For tuples with identical keys, preserve all the values in a + # list. e.g., argument [('k', 'v1'), ('k', 'v2')] to function + # returns {'k': ['v1', 'v2']}. + dst_dict = {} + for i in src_list: + if isinstance(i, tuple): + k, v = i + if isinstance(v, list): + v = list_to_dict(v) + if k in dst_dict: + dst_dict[k] = [dst_dict[k]] + dst_dict[k].append(v) + else: + dst_dict[k] = v + return dst_dict + + try: + # allow config to be specified in JSON -- + # for sake of people who might feel Ganesha config foreign. + d = json.loads(conf) + except ValueError: + # Customize JSON decoder to convert Ganesha config to a list + # of key-value pairs stored as tuples. This allows multiple + # occurrences of a config block to be later converted to a + # dict key-value pair, with block name being the key and a + # list of block contents being the value. + li = json.loads(_conf2json(conf), object_pairs_hook=lambda x: x) + d = list_to_dict(li) + return d + + +def mkconf(confdict): + """Create Ganesha config string from confdict.""" + s = io.StringIO() + _dump_to_conf(confdict, s) + return s.getvalue() diff --git a/ceph-nfs/templates/ceph.conf b/ceph-nfs/templates/ceph.conf new file mode 100644 index 00000000..d64755f4 --- /dev/null +++ b/ceph-nfs/templates/ceph.conf @@ -0,0 +1,15 @@ +############################################################################### +# [ WARNING ] +# configuration file maintained by Juju +# local changes will be overwritten. +############################################################################### +[global] +auth supported = {{ ceph_client.auth_supported }} +mon host = {{ ceph_client.mon_hosts }} +keyring = /etc/ceph/ganesha/$cluster.keyring + +[client.{{ ceph_nfs.client_name }}] +client mount uid = 0 +client mount gid = 0 +log file = /var/log/ceph/ceph-client.{{ ceph_nfs.client_name }}.log + diff --git a/ceph-nfs/templates/ceph.keyring b/ceph-nfs/templates/ceph.keyring new file mode 100644 index 00000000..2bfa3209 --- /dev/null +++ b/ceph-nfs/templates/ceph.keyring @@ -0,0 +1,3 @@ +[client.{{ ceph_nfs.client_name }}] + key = {{ ceph_client.key }} + diff --git a/ceph-nfs/templates/ganesha.conf b/ceph-nfs/templates/ganesha.conf new file mode 100644 index 00000000..6e55f3f0 --- /dev/null +++ b/ceph-nfs/templates/ganesha.conf @@ -0,0 +1,94 @@ +# The following is copied from the Ganesha source examples: +# https://github.com/nfs-ganesha/nfs-ganesha/blob/576e3bafccb6da5c7ea18d7099013f7494ce8d2c/src/config_samples/ceph.conf +# +# It is possible to use FSAL_CEPH to provide an NFS gateway to CephFS. The +# following sample config should be useful as a starting point for +# configuration. This basic configuration is suitable for a standalone NFS +# server, or an active/passive configuration managed by some sort of clustering +# software (e.g. pacemaker, docker, etc.). +# +# Note too that it is also possible to put a config file in RADOS, and give +# ganesha a rados URL from which to fetch it. For instance, if the config +# file is stored in a RADOS pool called "nfs-ganesha", in a namespace called +# "ganesha-namespace" with an object name of "ganesha-config": +# +# %url rados://nfs-ganesha/ganesha-namespace/ganesha-config +# +# If we only export cephfs (or RGW), store the configs and recovery data in +# RADOS, and mandate NFSv4.1+ for access, we can avoid any sort of local +# storage, and ganesha can run as an unprivileged user (even inside a +# locked-down container). +# + +NFS_CORE_PARAM +{ + # Ganesha can lift the NFS grace period early if NLM is disabled. + Enable_NLM = false; + + # rquotad doesn't add any value here. CephFS doesn't support per-uid + # quotas anyway. + Enable_RQUOTA = false; + + # In this configuration, we're just exporting NFSv4. In practice, it's + # best to use NFSv4.1+ to get the benefit of sessions. + Protocols = 4; +} + +NFSv4 +{ + # Modern versions of libcephfs have delegation support, though they + # are not currently recommended in clustered configurations. They are + # disabled by default but can be reenabled for singleton or + # active/passive configurations. + # Delegations = false; + + # One can use any recovery backend with this configuration, but being + # able to store it in RADOS is a nice feature that makes it easy to + # migrate the daemon to another host. + # + # For a single-node or active/passive configuration, rados_ng driver + # is preferred. For active/active clustered configurations, the + # rados_cluster backend can be used instead. See the + # ganesha-rados-grace manpage for more information. + RecoveryBackend = rados_cluster; + + # NFSv4.0 clients do not send a RECLAIM_COMPLETE, so we end up having + # to wait out the entire grace period if there are any. Avoid them. + Minor_Versions = 1,2; +} + +# The libcephfs client will aggressively cache information while it +# can, so there is little benefit to ganesha actively caching the same +# objects. Doing so can also hurt cache coherency. Here, we disable +# as much attribute and directory caching as we can. +MDCACHE { + # Size the dirent cache down as small as possible. + Dir_Chunk = 0; +} + +# To read exports from RADOS objects +RADOS_URLS { + ceph_conf = "/etc/ceph/ceph.conf"; + userid = "{{ ceph_nfs.client_name }}"; +} + +%url rados://{{ ceph_nfs.pool_name }}/ganesha-export-index +# To store client recovery data in the same RADOS pool + +RADOS_KV { + ceph_conf = "/etc/ceph/ceph.conf"; + userid = "{{ ceph_nfs.client_name }}"; + pool = "{{ ceph_nfs.pool_name }}"; + nodeid = "{{ ceph_nfs.hostname }}"; +} + +# Config block for FSAL_CEPH +CEPH +{ + # Path to a ceph.conf file for this ceph cluster. + # Ceph_Conf = /etc/ceph/ceph.conf; + + # User file-creation mask. These bits will be masked off from the unix + # permissions on newly-created inodes. + # umask = 0; +} diff --git a/ceph-nfs/test-requirements.txt b/ceph-nfs/test-requirements.txt new file mode 100644 index 00000000..4e84afc2 --- /dev/null +++ b/ceph-nfs/test-requirements.txt @@ -0,0 +1,15 @@ +# This file is managed centrally. If you find the need to modify this as a +# one-off, please don't. Intead, consult #openstack-charms and ask about +# requirements management in charms via bot-control. Thank you. +charm-tools>=2.4.4 +coverage>=3.6 +mock>=1.2 +flake8>=2.2.4 +stestr>=2.2.0 +requests>=2.18.4 +psutil +# oslo.i18n dropped py35 support +oslo.i18n<4.0.0 +git+https://github.com/openstack-charmers/zaza.git#egg=zaza +git+https://github.com/openstack-charmers/zaza-openstack-tests.git#egg=zaza.openstack +pytz # workaround for 14.04 pip/tox diff --git a/ceph-nfs/tests/__init__.py b/ceph-nfs/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-nfs/tests/bundles/jammy-caracal.yaml b/ceph-nfs/tests/bundles/jammy-caracal.yaml new file mode 100644 index 00000000..a0b9692d --- /dev/null +++ b/ceph-nfs/tests/bundles/jammy-caracal.yaml @@ -0,0 +1,73 @@ +options: + source: &source cloud:jammy-caracal +series: &series jammy + +machines: + '0': + constraints: cores=2 mem=6G root-disk=40G virt-type=virtual-machine + '1': + constraints: cores=2 mem=6G root-disk=40G virt-type=virtual-machine + '2': + constraints: cores=2 mem=6G root-disk=40G virt-type=virtual-machine + '3': + '4': + '5': + +local_overlay_enabled: False + +applications: + ceph-nfs: + charm: ch:ceph-nfs + channel: latest/edge + num_units: 2 + options: + source: *source + to: + - '3' + - '4' + ceph-osd: + charm: ch:ceph-osd + channel: latest/edge + num_units: 3 + storage: + osd-devices: 'loop,10G' + options: + source: *source + to: + - '0' + - '1' + - '2' + ceph-mon: + charm: ch:ceph-mon + channel: latest/edge + num_units: 3 + options: + monitor-count: '3' + source: *source + to: + - '3' + - '4' + - '5' + ceph-fs: + charm: ch:ceph-fs + channel: latest/edge + num_units: 1 + options: + source: *source + to: + - '2' + hacluster: + charm: ch:hacluster + channel: 2.4/edge + options: + cluster_count: 2 + +relations: + - - 'ceph-mon:client' + - 'ceph-nfs:ceph-client' + - - 'ceph-osd:mon' + - 'ceph-mon:osd' + - - 'ceph-fs' + - 'ceph-mon' + - - 'ceph-nfs:ha' + - 'hacluster:ha' diff --git a/ceph-nfs/tests/nfs_ganesha.py b/ceph-nfs/tests/nfs_ganesha.py new file mode 100644 index 00000000..e1e9ebf1 --- /dev/null +++ b/ceph-nfs/tests/nfs_ganesha.py @@ -0,0 +1,201 @@ +# Copyright 2021 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Encapsulate ``Ceph NFS`` testing.""" + +import logging +import subprocess +import tenacity +from typing import Dict +import unittest +import yaml +import zaza +import zaza.model as model +import zaza.utilities.installers +from tenacity import stop_after_attempt, wait_exponential, retry_if_result + + +class NfsGaneshaTest(unittest.TestCase): + mount_dir = '/mnt/test' + share_protocol = 'nfs' + + def setUp(self): + super(NfsGaneshaTest, self).setUp() + self.created_share = None + self.mounts_share = False + ip1 = zaza.model.get_unit_public_address( + zaza.model.get_unit_from_name('ceph-nfs/0') + ) + ip2 = zaza.model.get_unit_public_address( + zaza.model.get_unit_from_name('ceph-nfs/1') + ) + zaza.model.set_application_config( + 'ceph-nfs', + {'vip': ' '.join([str(ip1), str(ip2)])}) + + def tearDown(self): + if self.mounts_share: + try: + zaza.utilities.generic.run_via_ssh( + unit_name='ceph-osd/0', + cmd='sudo umount /mnt/test && sudo rmdir /mnt/test') + zaza.utilities.generic.run_via_ssh( + unit_name='ceph-osd/1', + cmd='sudo umount /mnt/test && sudo rmdir /mnt/test') + except subprocess.CalledProcessError: + logging.warning("Failed to cleanup mounts") + if self.created_share: + zaza.model.run_action_on_leader( + 'ceph-nfs', + 'delete-share', + action_params={ + 'name': self.created_share, + 'purge': True + }) + + def _create_share(self, name: str, size: int = 10, + access_ip: str = '0.0.0.0') -> Dict[str, str]: + logging.info(f"create share {name}, access_ip {access_ip}") + action = zaza.model.run_action_on_leader( + 'ceph-nfs', + 'create-share', + action_params={ + 'name': name, + 'size': size, + 'allowed-ips': access_ip, + }) + self.assertEqual(action.status, 'completed') + self.created_share = name + results = action.results + logging.info("create-share action: {}".format(results)) + return results + + def _grant_access(self, share_name: str, access_ip: str): + action = zaza.model.run_action_on_leader( + 'ceph-nfs', + 'grant-access', + action_params={ + 'name': share_name, + 'client': access_ip, + }) + self.assertEqual(action.status, 'completed') + + def _mount_share(self, unit_name: str, share_ip: str, + export_path: str, perform_retry: bool = True): + self._install_dependencies(unit_name) + cmd = ( + 'sudo mkdir -p {0} && ' + 'sudo mount -t {1} -o nfsvers=4.1,proto=tcp {2}:{3} {0}'.format( + self.mount_dir, + self.share_protocol, + share_ip, + export_path)) + if perform_retry: + @tenacity.retry( + stop=stop_after_attempt(5), + wait=wait_exponential(multiplier=3, min=2, max=10), + retry=retry_if_result(lambda res: res.get('Code') != '0') + ) + def _do_mount(): + logging.info(f"Mounting CephFS on {unit_name}: {cmd}") + res = model.run_on_unit(unit_name, cmd) + logging.info(f"Mount result: {res}") + return res + + _do_mount() + else: + model.run_on_unit(unit_name, cmd) + + self.mounts_share = True + + def _install_dependencies(self, unit: str): + logging.debug("About to install nfs-common on {}".format(unit)) + zaza.utilities.generic.run_via_ssh( + unit_name=unit, + cmd='sudo apt-get install -yq nfs-common') + + @tenacity.retry( + stop=tenacity.stop_after_attempt(5), + wait=tenacity.wait_exponential(multiplier=3, min=2, max=10)) + def _write_testing_file_on_instance(self, instance_name: str): + zaza.utilities.generic.run_via_ssh( + unit_name=instance_name, + cmd='echo -n "test" | sudo tee {}/test'.format(self.mount_dir)) + + @tenacity.retry( + stop=tenacity.stop_after_attempt(5), + wait=tenacity.wait_exponential(multiplier=3, min=2, max=10)) + def _verify_testing_file_on_instance(self, instance_name: str): + run_with_juju_ssh = zaza.utilities.installers.make_juju_ssh_fn( + 'ceph-osd/1', sudo=True + ) + output = run_with_juju_ssh( + 'sudo cat {}/test'.format(self.mount_dir)) + logging.info("Verification output: {}".format(output)) + self.assertEqual('test', output.strip()) + + def _get_ipaddr(self, unit): + """Run ssh cmd on unit to get ipaddresses""" + cmd = ('''ip -o addr show | \ + awk '$2 != "lo" && ($3 == "inet" || $3 == "inet6")''' + '''{ sub("/.*","",$4); print $4 }' + ''') + res = model.run_on_unit(unit, cmd) + return res['Stdout'].strip().splitlines() + + def test_create_share(self): + logging.info("Creating a share") + # Todo - enable ACL testing + osd_0_ip = ','.join(self._get_ipaddr('ceph-osd/0')) + osd_1_ip = ','.join(self._get_ipaddr('ceph-osd/1')) + share = self._create_share('test_ganesha_share', access_ip=osd_0_ip) + sharelist = zaza.model.run_action_on_leader( + 'ceph-nfs', + 'list-shares', + action_params={}) + logging.info("sharelist: {}".format(sharelist.results)) + + export_path = share['path'] + ip = share['ip'] + logging.info("Mounting {} on ceph-osd units".format(export_path)) + self._mount_share('ceph-osd/0', ip, export_path) + logging.info("writing to the share on ceph-osd/0") + self._write_testing_file_on_instance('ceph-osd/0') + # Todo - enable ACL testing + try: + self._mount_share( + 'ceph-osd/1', ip, export_path, perform_retry=False + ) + self.fail('Mounting should not have succeeded') + except: # noqa: E722 + pass + self._grant_access('test_ganesha_share', access_ip=osd_1_ip) + + self._mount_share('ceph-osd/1', ip, export_path) + logging.info("reading from the share on ceph-osd/1") + self._verify_testing_file_on_instance('ceph-osd/1') + + def test_list_shares(self): + self._create_share('test_ganesha_list_share') + action = zaza.model.run_action_on_leader( + 'ceph-nfs', + 'list-shares', + action_params={}) + self.assertEqual(action.status, 'completed') + results = action.results + logging.debug("Action results: {}".format(results)) + logging.debug("exports: {}".format(results['exports'])) + exports = yaml.safe_load(results['exports']) + self.assertIn('test_ganesha_list_share', + [export['name'] for export in exports]) diff --git a/ceph-nfs/tests/tests.yaml b/ceph-nfs/tests/tests.yaml new file mode 100644 index 00000000..f2ae0073 --- /dev/null +++ b/ceph-nfs/tests/tests.yaml @@ -0,0 +1,16 @@ +charm_name: ceph-nfs +gate_bundles: + - jammy-caracal +smoke_bundles: + - jammy-caracal +dev_bundles: + - jammy-caracal + +configure: [] +tests: + - zaza.charm_tests.lifecycle.refresh.CharmRefreshAll + - tests.nfs_ganesha.NfsGaneshaTest +target_deploy_status: + ubuntu: + workload-status: active + workload-status-message-prefix: '' diff --git a/ceph-nfs/tox.ini b/ceph-nfs/tox.ini new file mode 100644 index 00000000..80452bfa --- /dev/null +++ b/ceph-nfs/tox.ini @@ -0,0 +1,154 @@ +# Classic charm (with zaza): ./tox.ini +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of tox.ini for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools +# +# TODO: Distill the func test requirements from the lint/unit test +# requirements. They are intertwined. Also, Zaza itself should specify +# all of its own requirements and if it doesn't, fix it there. +[tox] +envlist = pep8,py3 +skipsdist = True +# NOTE: Avoid build/test env pollution by not enabling sitepackages. +sitepackages = False +# NOTE: Avoid false positives by not skipping missing interpreters. +skip_missing_interpreters = False + +# NOTE: https://wiki.canonical.com/engineering/OpenStack/InstallLatestToxOnOsci +minversion = 3.18.0 + +[testenv] +setenv = VIRTUAL_ENV={envdir} + PYTHONHASHSEED=0 + TEST_JUJU3=1 + CHARM_DIR={envdir} + CHARMS_ARTIFACT_DIR={toxinidir}/.. +install_command = + pip install {opts} {packages} +commands = stestr run --slowest {posargs} +allowlist_externals = + charmcraft + {toxinidir}/rename.sh +passenv = + HOME + TERM + CS_* + OS_* + TEST_* +deps = -r{toxinidir}/test-requirements.txt + +[testenv:build] +basepython = python3 +deps = -r{toxinidir}/build-requirements.txt +commands = + charmcraft clean + charmcraft -v pack + {toxinidir}/rename.sh + +[testenv:py36] +basepython = python3.6 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py37] +basepython = python3.7 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py38] +basepython = python3.8 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py39] +basepython = python3.9 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py310] +basepython = python3.10 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py311] +basepython = python3.11 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py312] +basepython = python3.12 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py3] +basepython = python3 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:pep8] +basepython = python3 +deps = flake8 + charm-tools +commands = flake8 {posargs} unit_tests tests src + +[testenv:cover] +# Technique based heavily upon +# https://github.com/openstack/nova/blob/master/tox.ini +basepython = python3 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt +setenv = + {[testenv]setenv} + PYTHON=coverage run +commands = + coverage erase + stestr run --slowest {posargs} + coverage combine + coverage html -d cover + coverage xml -o cover/coverage.xml + coverage report + +[coverage:run] +branch = True +concurrency = multiprocessing +parallel = True +source = + . +omit = + .tox/* + */charmhelpers/* + unit_tests/* + +[testenv:venv] +basepython = python3 +commands = {posargs} + +[testenv:func-noop] +basepython = python3 +commands = + functest-run-suite --help + +[testenv:func] +basepython = python3 +commands = + functest-run-suite --keep-model + +[testenv:func-smoke] +basepython = python3 +commands = + functest-run-suite --keep-model --smoke + +[testenv:func-dev] +basepython = python3 +commands = + functest-run-suite --keep-model --dev + +[testenv:func-target] +basepython = python3 +commands = + functest-run-suite --keep-model --bundle {posargs} + +[flake8] +ignore = E402,E226,W503,W504 +exclude = */charmhelpers diff --git a/ceph-nfs/unit_tests/__init__.py b/ceph-nfs/unit_tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-nfs/unit_tests/test_ceph_nfs_charm.py b/ceph-nfs/unit_tests/test_ceph_nfs_charm.py new file mode 100644 index 00000000..6b92a603 --- /dev/null +++ b/ceph-nfs/unit_tests/test_ceph_nfs_charm.py @@ -0,0 +1,67 @@ +# Copyright 2021 OpenStack Charmers +# See LICENSE file for licensing details. +# +# Learn more about testing at: https://juju.is/docs/sdk/testing + + +import unittest +import sys + +sys.path.append('lib') # noqa +sys.path.append('src') # noqa + +from unittest.mock import patch, Mock + +from charm import CephNFSCharm +# from ops.model import ActiveStatus +from ops.testing import Harness + +with patch('charmhelpers.core.host_factory.ubuntu.cmp_pkgrevno', + Mock(return_value=1)): + import charm + + +class CharmTestCase(unittest.TestCase): + + def setUp(self, obj, patches): + super().setUp() + self.patches = patches + self.obj = obj + self.patch_all() + + def patch(self, method): + _m = patch.object(self.obj, method) + mock = _m.start() + self.addCleanup(_m.stop) + return mock + + def patch_all(self): + for method in self.patches: + setattr(self, method, self.patch(method)) + + +class _CephNFSCharm(CephNFSCharm): + + @staticmethod + def get_bluestore_compression(): + return {} + + +class TestCephNFSCharmBase(CharmTestCase): + + PATCHES = [ + 'ch_templating', + 'os', + 'subprocess', + ] + + def setUp(self): + super().setUp(charm, self.PATCHES) + self.harness = Harness( + _CephNFSCharm, + ) + self.addCleanup(self.harness.cleanup) + + def test_init(self): + self.harness.begin() + self.assertFalse(self.harness.charm._stored.is_started) diff --git a/ceph-nfs/unit_tests/test_ganesha.py b/ceph-nfs/unit_tests/test_ganesha.py new file mode 100644 index 00000000..be8d0fee --- /dev/null +++ b/ceph-nfs/unit_tests/test_ganesha.py @@ -0,0 +1,120 @@ +import unittest +import ganesha + + +EXAMPLE_EXPORT = """## This export is managed by the CephNFS charm ## +EXPORT { + # Each EXPORT must have a unique Export_Id. + Export_Id = 1000; + + # The directory in the exported file system this export + # is rooted on. + Path = '/volumes/_nogroup/test_ganesha_share/e12a49ef-1b2b-40b3-ba6c'; + + # FSAL, Ganesha's module component + FSAL { + # FSAL name + Name = "Ceph"; + User_Id = "ganesha-test_ganesha_share"; + Secret_Access_Key = "AQCT9+9h4cwJOxAAue2fFvvGTWziUiR9koCHEw=="; + } + + # Path of export in the NFSv4 pseudo filesystem + Pseudo = '/volumes/_nogroup/test_ganesha_share/e12a49ef-1b2b-40b3-ba6c'; + + SecType = "sys"; + CLIENT { + Access_Type = "rw"; + Clients = 0.0.0.0; + } + # User id squashing, one of None, Root, All + Squash = "None"; +} +""" + + +class ExportTest(unittest.TestCase): + + def test_parser(self): + export = ganesha.Export.from_export(EXAMPLE_EXPORT) + self.assertEqual(export.export_id, 1000) + self.assertEqual(export.clients, + [{'Access_Type': 'rw', 'Clients': '0.0.0.0'}]) + self.assertEqual(export.name, 'test_ganesha_share') + + def test_add_client(self): + export = ganesha.Export.from_export(EXAMPLE_EXPORT) + export.add_client('10.0.0.0/8') + self.assertEqual( + export.clients, + [{'Access_Type': 'rw', 'Clients': '0.0.0.0, 10.0.0.0/8'}]) + # adding again shouldn't duplicate export + export.add_client('10.0.0.0/8') + self.assertEqual( + export.clients, + [{'Access_Type': 'rw', 'Clients': '0.0.0.0, 10.0.0.0/8'}]) + + export.add_client('192.168.0.0/16') + self.assertEqual( + export.clients, + [{ + 'Access_Type': 'rw', + 'Clients': '0.0.0.0, 10.0.0.0/8, 192.168.0.0/16' + }]) + + def test_remove_client(self): + export = ganesha.Export.from_export(EXAMPLE_EXPORT) + export.add_client('10.0.0.0/8') + export.add_client('192.168.0.0/16') + self.assertEqual( + export.clients, + [{ + 'Access_Type': 'rw', + 'Clients': '0.0.0.0, 10.0.0.0/8, 192.168.0.0/16' + }]) + export.remove_client('0.0.0.0') + self.assertEqual( + export.clients, + [ + {'Access_Type': 'rw', 'Clients': '10.0.0.0/8, 192.168.0.0/16'}, + ]) + + +class TestGaneshaNFS(unittest.TestCase): + + @unittest.mock.patch.object(ganesha.GaneshaNFS, '_ceph_subvolume_command') + @unittest.mock.patch.object(ganesha.GaneshaNFS, '_ganesha_add_export') + @unittest.mock.patch.object(ganesha.GaneshaNFS, '_get_next_export_id') + @unittest.mock.patch.object(ganesha.GaneshaNFS, 'list_shares') + @unittest.mock.patch.object(ganesha.GaneshaNFS, '_ceph_auth_key') + @unittest.mock.patch.object(ganesha.GaneshaNFS, '_rados_get') + @unittest.mock.patch.object(ganesha.GaneshaNFS, '_rados_put') + @unittest.mock.patch.object(ganesha.Export, 'to_export') + def test_create_share(self, mock_export, + mock_rados_put, + mock_rados_get, + mock_auth_key, + mock_list_shares, + mock_export_id, + mock_add_export, + mock_subvolume_command): + mock_subvolume_command.return_value = b'mock-volume' + mock_list_shares.return_value = [] + mock_export_id.return_value = 1 + mock_auth_key.return_value = b'mock-auth-key' + + inst = ganesha.GaneshaNFS('ceph-client', 'mypool') + inst.create_share('test-create-share', size=3, access_ips=None) + + mock_subvolume_command.assert_any_call('create', 'ceph-fs', + 'test-create-share', + str(3 * 1024 * 1024 * 1024)) + + @unittest.mock.patch.object(ganesha.GaneshaNFS, '_ceph_subvolume_command') + def test_resize_share(self, mock_subvolume_command): + inst = ganesha.GaneshaNFS('ceph-client', 'mypool') + inst.resize_share('test-resize-share', 5) + mock_subvolume_command.assert_any_call('resize', 'ceph-fs', + 'test-resize-share', + str(5 * 1024 * 1024 * 1024), + '--no_shrink') diff --git a/ceph-osd/.gitignore b/ceph-osd/.gitignore new file mode 100644 index 00000000..b7937771 --- /dev/null +++ b/ceph-osd/.gitignore @@ -0,0 +1,14 @@ +.coverage +.project +.tox +.testrepository +.stestr +bin +*.sw[nop] +*.charm +*.pyc +.unit-state.db +.idea +func-results.json +*__pycache__ +.settings diff --git a/ceph-osd/.gitreview b/ceph-osd/.gitreview new file mode 100644 index 00000000..0e144d01 --- /dev/null +++ b/ceph-osd/.gitreview @@ -0,0 +1,6 @@ +[gerrit] +host=review.opendev.org +port=29418 +project=openstack/charm-ceph-osd.git + +defaultbranch=stable/squid-jammy diff --git a/ceph-osd/.project b/ceph-osd/.project new file mode 100644 index 00000000..c5e385b7 --- /dev/null +++ b/ceph-osd/.project @@ -0,0 +1,17 @@ + + + ceph-osd + + + + + + org.python.pydev.PyDevBuilder + + + + + + org.python.pydev.pythonNature + + diff --git a/ceph-osd/.pydevproject b/ceph-osd/.pydevproject new file mode 100644 index 00000000..5ed03c7e --- /dev/null +++ b/ceph-osd/.pydevproject @@ -0,0 +1,12 @@ + + +python 2.7 +Default + +/${PROJECT_DIR_NAME}/lib +/${PROJECT_DIR_NAME}/unit_tests +/${PROJECT_DIR_NAME}/tests +/${PROJECT_DIR_NAME}/hooks +/${PROJECT_DIR_NAME}/actions + + diff --git a/ceph-osd/.stestr.conf b/ceph-osd/.stestr.conf new file mode 100644 index 00000000..5fcccaca --- /dev/null +++ b/ceph-osd/.stestr.conf @@ -0,0 +1,3 @@ +[DEFAULT] +test_path=./unit_tests +top_dir=./ diff --git a/ceph-osd/.zuul.yaml b/ceph-osd/.zuul.yaml new file mode 100644 index 00000000..fd20909e --- /dev/null +++ b/ceph-osd/.zuul.yaml @@ -0,0 +1,4 @@ +- project: + templates: + - openstack-python3-charm-jobs + - openstack-cover-jobs diff --git a/ceph-osd/LICENSE b/ceph-osd/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/ceph-osd/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ceph-osd/Makefile b/ceph-osd/Makefile new file mode 100644 index 00000000..4e7d3f76 --- /dev/null +++ b/ceph-osd/Makefile @@ -0,0 +1,34 @@ +#!/usr/bin/make +PYTHON := /usr/bin/env python3 + +lint: + @tox -e pep8 + +test: + @echo Starting unit tests... + @tox -e py27 + +functional_test: + @echo Starting Zaza functional tests... + @tox -e func + +bin/charm_helpers_sync.py: + @mkdir -p bin + @curl -o bin/charm_helpers_sync.py https://raw.githubusercontent.com/juju/charm-helpers/master/tools/charm_helpers_sync/charm_helpers_sync.py + + +bin/git_sync.py: + @mkdir -p bin + @wget -O bin/git_sync.py https://raw.githubusercontent.com/CanonicalLtd/git-sync/master/git_sync.py + +ch-sync: bin/charm_helpers_sync.py + $(PYTHON) bin/charm_helpers_sync.py -c charm-helpers-hooks.yaml + +ceph-sync: bin/git_sync.py + $(PYTHON) bin/git_sync.py -d lib -s https://github.com/openstack/charms.ceph.git + +sync: ch-sync + +publish: lint + bzr push lp:charms/ceph-osd + bzr push lp:charms/trusty/ceph-osd diff --git a/ceph-osd/README.md b/ceph-osd/README.md new file mode 100644 index 00000000..f7704ecb --- /dev/null +++ b/ceph-osd/README.md @@ -0,0 +1,484 @@ +# Overview + +[Ceph][ceph-upstream] is a unified, distributed storage system designed for +excellent performance, reliability, and scalability. + +The ceph-osd charm deploys the Ceph object storage daemon (OSD) and manages its +volumes. It is used in conjunction with the [ceph-mon][ceph-mon-charm] charm. +Together, these charms can scale out the amount of storage available in a Ceph +cluster. + +# Usage + +## Configuration + +This section covers common and/or important configuration options. See file +`config.yaml` for the full list of options, along with their descriptions and +default values. A YAML file (e.g. `ceph-osd.yaml`) is often used to store +configuration options. See the [Juju documentation][juju-docs-config-apps] for +details on configuring applications. + +#### `bluestore` + +The `bluestore` option specifies whether the +[BlueStore][upstream-ceph-bluestore] storage backend is used for all OSD +devices. The feature is enabled by default (value 'True'). If set to 'True', +this option overrides the `osd-format` option as BlueStore does not use a +traditional filesystem. + +> **Important**: This option has no effect unless Ceph Luminous (or greater) is + in use. + +#### `customize-failure-domain` + +The `customize-failure-domain` option determines how a Ceph CRUSH map is +configured. + +A value of 'false' (the default) will lead to a map that will replicate data +across hosts (implemented as [Ceph bucket type][upstream-ceph-buckets] 'host'). +With a value of 'true' all MAAS-defined zones will be used to generate a map +that will replicate data across Ceph availability zones (implemented as bucket +type 'rack'). + +This option is also supported by the ceph-mon charm. Its value must be the same +for both charms. + +#### `osd-devices` + +The `osd-devices` option lists what block devices can be used for OSDs across +the cluster. Devices that are listed in this option, but do not exist, will +be ignored. + +See section 'Storage devices' for an elaboration on this fundamental topic. + +#### `osd-format` + +The `osd-format` option specifies what filesystem to use for all OSD devices +('xfs' or 'ext4'). The default value is 'xfs'. This option only applies when +Ceph Luminous (or greater) is in use and option `bluestore` is set to 'False'. + +#### `source` + +The `source` option states the software sources. A common value is an OpenStack +UCA release (e.g. 'cloud:xenial-queens' or 'cloud:bionic-ussuri'). See [Ceph +and the UCA][cloud-archive-ceph]. The underlying host's existing apt sources +will be used if this option is not specified (this behaviour can be explicitly +chosen by using the value of 'distro'). + +### Storage devices + +A storage device is destined as an OSD (Object Storage Device). There can be +multiple OSDs per storage node (ceph-osd unit). + +The list of all possible storage devices for the cluster is defined by the +`osd-devices` option. The below examples can be used in the `ceph-osd.yaml` +configuration file. + +Block devices (regular), + + ceph-osd: + osd-devices: /dev/vdb /dev/vdc /dev/vdd + +Each regular block device must be an absolute path to a device node. + +Block devices (Juju storage), + + ceph-osd: + storage: + osd-devices: cinder,20G + +See the [Juju documentation][juju-docs-storage] for guidance on implementing +Juju storage. + +Directory-backed OSDs, + + ceph-osd: + storage: + osd-devices: /var/tmp/osd-1 + +> **Note**: OSD directories can no longer be created starting with Ceph + Nautilus. Existing OSD directories will continue to function after an upgrade + to Nautilus. + +The list defined by option `osd-devices` may affect newly added ceph-osd units +as well as existing units (the option may be modified after units have been +added). The charm will attempt to activate as Ceph storage any listed device +that is visible by the unit's underlying machine. To prevent the activation of +volumes on existing units the `blacklist-add-disk` action may be used. + +The configuration option is modified in the usual way. For instance, to have it +consist solely of devices '/dev/sdb' and '/dev/sdc': + + juju config ceph-osd osd-devices='/dev/sdb /dev/sdc' + +The charm will go into a blocked state (visible in `juju status` output) if it +detects pre-existing data on a device. In this case the operator can either +instruct the charm to ignore the disk (action `blacklist-add-disk`) or to have +it purge all data on the disk (action `zap-disk`). + +> **Important**: The recommended minimum number of OSDs in the cluster is three + and this is what the ceph-mon charm expects (the cluster will not form with a + lesser number). See option `expected-osd-count` in the ceph-mon charm to + overcome this but beware that going below three is not a supported + configuration. + +## Deployment + +A cloud with three MON nodes is a typical design whereas three OSDs are +considered the minimum. For example, to deploy a Ceph cluster consisting of +three OSDs (one per ceph-osd unit) and three MONs: + + juju deploy -n 3 --config ceph-osd.yaml ceph-osd + juju deploy -n 3 --to lxd:0,lxd:1,lxd:2 ceph-mon + juju integrate ceph-osd:mon ceph-mon:osd + +Here, a containerised MON is running alongside each storage node. We've assumed +that the machines spawned in the first command are assigned IDs of 0, 1, and 2. + +> **Note**: Refer to the [Install OpenStack][cdg-install-openstack] page in the + OpenStack Charms Deployment Guide for instructions on installing the ceph-osd + application for use with OpenStack. + +For each ceph-osd unit, the ceph-osd charm will scan for all the devices +configured via the `osd-devices` option and attempt to assign to it all of the +ones it finds. The cluster's initial pool of available storage is the "sum" of +all these assigned devices. + +## Network spaces + +This charm supports the use of Juju [network spaces][juju-docs-spaces] (Juju +`v.2.0`). This feature optionally allows specific types of the application's +network traffic to be bound to subnets that the underlying hardware is +connected to. + +> **Note**: Spaces must be configured in the backing cloud prior to deployment. + +The ceph-osd charm exposes the following Ceph traffic types (bindings): + +* 'public' (front-side) +* 'cluster' (back-side) + +For example, providing that spaces 'data-space' and 'cluster-space' exist, the +deploy command above could look like this: + + juju deploy --config ceph-osd.yaml -n 3 ceph-osd \ + --bind "public=data-space cluster=cluster-space" + +Alternatively, configuration can be provided as part of a bundle: + +```yaml + ceph-osd: + charm: cs:ceph-osd + num_units: 1 + bindings: + public: data-space + cluster: cluster-space +``` + +Refer to the [Ceph Network Reference][ceph-docs-network-ref] to learn about the +implications of segregating Ceph network traffic. + +> **Note**: Existing ceph-osd units configured with the `ceph-public-network` + or `ceph-cluster-network` options will continue to honour them. Furthermore, + these options override any space bindings, if set. + +## AppArmor profiles + +Although AppArmor is not enabled for Ceph by default, an AppArmor profile can +be generated by the charm by assigning a value of 'complain', 'enforce', or +'disable' (the default) to option `aa-profile-mode`. + +> **Caution**: Enabling an AppArmor profile is disruptive to a running Ceph + cluster as all ceph-osd processes must be restarted. + +The new profile has a narrow supported use case, and it should always be +verified in pre-production against the specific configurations and topologies +intended for production. + +The profiles generated by the charm should **not** be used in the following +scenarios: + +* On any version of Ubuntu older than 16.04 +* On any version of Ceph older than Luminous +* When OSD journal devices are in use +* When Ceph BlueStore is enabled + +## Block device encryption + +The ceph-osd charm supports encryption for OSD volumes that are backed by block +devices. To use Ceph's native key management framework, available since Ceph +Jewel, set option `osd-encrypt` for the ceph-osd charm: + +```yaml + ceph-osd: + osd-encrypt: True +``` + +Here, dm-crypt keys are stored in the MON sub-cluster. + +Alternatively, since Ceph Luminous, encryption keys can be stored in Vault, +which is deployed and initialised via the [vault][vault-charm] charm. Set +options `osd-encrypt` and `osd-encrypt-keymanager` for the ceph-osd charm: + +```yaml + ceph-osd: + osd-encrypt: True + osd-encrypt-keymanager: vault +``` + +> **Important**: Post deployment configuration will only affect block devices + associated with **new** ceph-osd units. + +## Actions + +This section covers Juju [actions][juju-docs-actions] supported by the charm. +Actions allow specific operations to be performed on a per-unit basis. To +display action descriptions run `juju actions --schema ceph-osd`. If the charm +is not deployed then see file `actions.yaml`. + +* `add-disk` +* `blacklist-add-disk` +* `blacklist-remove-disk` +* `get-availibility-zone` +* `list-disks` +* `osd-in` +* `osd-out` +* `security-checklist` +* `start` +* `stop` +* `zap-disk` + +## Working with OSDs + +### Set OSDs to 'out' + +Use the `osd-out` action to set OSD volumes on a unit to 'out'. + +> **Warning**: This action has the potential of impacting your cluster + significantly. The [Ceph documentation][ceph-docs-removing-osds] on this + topic is considered essential reading. + +Unless the cluster itself is set to 'noout' this action will cause Ceph to +rebalance data by migrating PGs out of the affected OSDs and onto OSDs +available on other units. The impact is twofold: + +1. The available space on the remaining OSDs is reduced. Not only is there less + space for future workloads but there is a danger of exceeding the cluster's + storage capacity. +1. The traffic and CPU load on the cluster is increased. + +> **Note**: It has been reported that setting OSDs to 'out' may cause some PGs + to get stuck in the 'active+remapped' state. This is an upstream issue. + +The [ceph-mon][ceph-mon-charm] charm has an action called `set-noout` that sets +'noout' for the cluster. + +It may be perfectly fine to have data rebalanced. The decisive factor is +whether the OSDs are being paused temporarily (e.g. the underlying machine is +scheduled for maintenance) or whether they are being removed from the cluster +completely (e.g. the storage hardware is reaching EOL). + +Examples: + + # Set OSDs '0' and '1' to 'out' on unit `ceph-osd/4` + juju run ceph-osd/4 osd-out osds=osd.0,osd.1 + + # Set all OSDs to 'out' on unit `ceph-osd/2` + juju run ceph-osd/2 osd-out osds=all + +### Set OSDs to 'in' + +Use the `osd-in` action to set OSD volumes on a unit to 'in'. + +The `osd-in` action is reciprocal to the `osd-out` action. The OSDs are set to +'in'. It is typically used when the `osd-out` action was used in conjunction +with the cluster 'noout' flag. + +Examples: + + # Set OSDs '0' and '1' to 'in' on unit `ceph-osd/4` + juju run ceph-osd/4 osd-in osds=osd.0,osd.1 + + # Set all OSDs to 'in' on unit `ceph-osd/2` + juju run ceph-osd/2 osd-in osds=all + +### Stop and start OSDs + +Use the `stop` and `start` actions to stop and start OSD daemons on a unit. + +> **Important**: These actions are not available on the 'trusty' series due to + the reliance on `systemd`. + +Examples: + + # Stop services 'ceph-osd@0' and 'ceph-osd@1' on unit `ceph-osd/4` + juju run ceph-osd/4 stop osds=0,1 + + # Start all ceph-osd services on unit `ceph-osd/2` + juju run ceph-osd/2 start osds=all + +> **Note**: Stopping an OSD daemon will put the associated unit into a blocked + state. + +## Working with disks + +### List disks + +Use the `list-disks` action to list disks known to a unit. + +The action lists the unit's block devices by categorising them in three ways: + +* `disks`: visible (known by udev), unused (not mounted), and not designated as + an OSD journal (via the `osd-journal` configuration option) + +* `blacklist`: like `disks` but blacklisted (see action `blacklist-add-disk`) + +* `non-pristine`: like `disks` but not eligible for use due to the presence of + existing data + +Example: + + # List disks on unit `ceph-osd/4` + juju run ceph-osd/4 list-disks + +### Add a disk + +Use the `add-disk` action to add a disk to a unit. + +A ceph-osd unit is automatically assigned OSD volumes based on the current +value of the `osd-devices` application option. The `add-disk` action allows the +operator to manually add OSD volumes (for disks that are not listed by +`osd-devices`) to an existing unit. + +**Parameters** + + + +* `osd-devices` (required) + A space-separated list of devices to format and initialise as OSD volumes. + + + +* `bucket` + The name of a Ceph bucket to add these devices to. + +Example: + + # Add disk /dev/vde on unit `ceph-osd/4` + juju run ceph-osd/4 add-disk osd-devices=/dev/vde + +### Blacklist a disk + +Use the `blacklist-add-disk` action to add a disk to a unit's blacklist. + +The action allows the operator to add disks (that are visible to the unit's +underlying machine) to the unit's blacklist. A blacklisted device will not be +initialised as an OSD volume when the value of the `osd-devices` application +option changes. This action does not prevent a device from being activated via +the `add-disk` action. + +Use the `list-disks` action to list the unit's blacklist entries. + +> **Important**: This action and blacklist do not have any effect on current + OSD volumes. + +**Parameters** + + + +* `osd-devices` (required) + A space-separated list of devices to add to a unit's blacklist. + +Example: + + # Blacklist disks /dev/vda and /dev/vdf on unit `ceph-osd/0` + juju run ceph-osd/0 \ + blacklist-add-disk osd-devices='/dev/vda /dev/vdf' + +### Un-blacklist a disk + +Use the `blacklist-remove-disk` action to remove a disk from a unit's +blacklist. + +**Parameters** + + + +* `osd-devices` (required) + A space-separated list of devices to remove from a unit's blacklist. + +Each device should have an existing entry in the unit's blacklist. Use the +`list-disks` action to list the unit's blacklist entries. + +Example: + + # Un-blacklist disk /dev/vdb on unit `ceph-osd/1` + juju run ceph-osd/1 \ + blacklist-remove-disk osd-devices=/dev/vdb + +### Zap a disk + +Use the `zap-disk` action to purge a disk of all data. + +In order to prevent unintentional data loss, the charm will not use a disk that +contains data. To forcibly make a disk available, the `zap-disk` action can be +used. Due to the destructive nature of this action the `i-really-mean-it` +option must be passed. This action is normally followed by the `add-disk` +action. + +**Parameters** + + + +* `devices` (required) + A space-separated list of devices to be recycled. + + + +* `i-really-mean-it` (required) + A boolean option for confirming the action. + +Example: + + # Zap disk /dev/vdc on unit `ceph-osd/3` + juju run ceph-osd/3 \ + zap-disk i-really-mean-it=true devices=/dev/vdc + +> **Note**: The `zap-disk` action cannot be run on a mounted device, an active + BlueStore device, or an encrypted device. There are also issues with + LVM-backed volumes (see [LP #1858519][lp-bug-1858519]). + +# Documentation + +The OpenStack Charms project maintains two documentation guides: + +* [OpenStack Charm Guide][cg]: for project information, including development + and support notes +* [OpenStack Charms Deployment Guide][cdg]: for charm usage information + +See also the [Charmed Ceph documentation][charmed-ceph-docs]. + +# Bugs + +Please report bugs on [Launchpad][lp-bugs-charm-ceph-osd]. + + + +[ceph-upstream]: https://ceph.io +[cg]: https://docs.openstack.org/charm-guide +[cdg]: https://docs.openstack.org/project-deploy-guide/charm-deployment-guide +[ceph-mon-charm]: https://jaas.ai/ceph-mon +[vault-charm]: https://jaas.ai/vault +[charmed-ceph-docs]: https://ubuntu.com/ceph/docs +[juju-docs-storage]: https://jaas.ai/docs/storage +[juju-docs-actions]: https://jaas.ai/docs/actions +[juju-docs-spaces]: https://jaas.ai/docs/spaces +[juju-docs-config-apps]: https://juju.is/docs/configuring-applications +[ceph-docs-removing-osds]: https://docs.ceph.com/docs/master/rados/operations/add-or-rm-osds/ +[ceph-docs-network-ref]: http://docs.ceph.com/docs/master/rados/configuration/network-config-ref +[lp-bugs-charm-ceph-osd]: https://bugs.launchpad.net/charm-ceph-osd/+filebug +[cdg-install-openstack]: https://docs.openstack.org/project-deploy-guide/charm-deployment-guide/latest/install-openstack.html +[upstream-ceph-buckets]: https://docs.ceph.com/docs/master/rados/operations/crush-map/#types-and-buckets +[upstream-ceph-bluestore]: https://docs.ceph.com/en/latest/rados/configuration/storage-devices/#bluestore +[cloud-archive-ceph]: https://wiki.ubuntu.com/OpenStack/CloudArchive#Ceph_and_the_UCA +[lp-bug-1858519]: https://bugs.launchpad.net/charm-ceph-osd/+bug/1858519 diff --git a/ceph-osd/TODO b/ceph-osd/TODO new file mode 100644 index 00000000..782a7a43 --- /dev/null +++ b/ceph-osd/TODO @@ -0,0 +1,4 @@ +Ceph OSD Charm +============== + + * Nothing TODO! diff --git a/ceph-osd/actions.yaml b/ceph-osd/actions.yaml new file mode 100644 index 00000000..3fe3c29d --- /dev/null +++ b/ceph-osd/actions.yaml @@ -0,0 +1,207 @@ +# NOTE(fnordahl): Output of `juju list-action` is at time of this writing +# formatted in such a way that we should keep description +# as terse as possible and refer to documentation elsewhere. +# +# Verify with `juju list-action` before proposing/committing +# changes. +osd-out: + description: | + \ + USE WITH CAUTION - Mark unit OSDs as 'out'. + Documentation: https://jaas.ai/ceph-osd/ + params: + osds: + description: A comma-separated list of OSD IDs to stop (or keyword 'all') + required: + - osds +osd-in: + description: | + \ + Set the local osd units in the charm to 'in'. + Documentation: https://jaas.ai/ceph-osd/ + params: + osds: + description: A comma-separated list of OSD IDs to start (or keyword 'all') + required: + - osds +list-disks: + description: | + \ + List disks. + Documentation: https://jaas.ai/ceph-osd/ +add-disk: + description: | + \ + Add disk(s) to Ceph. + Documentation: https://jaas.ai/ceph-osd/ + params: + osd-devices: + type: string + description: The devices to format and set up as osd volumes. + bucket: + type: string + description: The name of the bucket in Ceph to add these devices into + osd-ids: + type: string + description: | + The OSD ids to recycle. If specified, the number of elements in this + list must be the same as the number of 'osd-devices'. + cache-devices: + type: string + description: | + A list of devices to act as caching devices for 'bcache', using the + 'osd-devices' as backing. If the number of elements in this list is + less than the number of 'osd-devices', then the caching ones will be + distributed in a round-robin fashion. + partition-size: + type: integer + description: | + The size of the partitions to create for the caching devices. If left + unspecified, then the full size of the devices will be split evenly + across partitions. + use-crimson: + type: boolean + description: | + Whether to use the Crimson implementation for the new OSD. Note that + this is an experimental feature, and the charm doesn't provide any + lifecycle support for OSD's that run on Crimson. + i-really-mean-it: + type: boolean + description: | + Must be set when 'use-crimson' is True. + bluestore-skip: + type: string + description: | + A comma-separated list of what Bluestore features to omit. This can + be the WAL and DB devices (for example - "wal,db"). + required: + - osd-devices +blacklist-add-disk: + description: | + \ + Add disk(s) to blacklist. + Documentation: https://jaas.ai/ceph-osd/ + params: + osd-devices: + type: string + description: | + A space-separated list of devices to add to blacklist. + . + Each element should be a absolute path to a device node or filesystem + directory (the latter is supported for ceph >= 0.56.6). + . + Example: '/dev/vdb /var/tmp/test-osd' + required: + - osd-devices +blacklist-remove-disk: + description: | + \ + Remove disk(s) from blacklist. + Documentation: https://jaas.ai/ceph-osd/ + params: + osd-devices: + type: string + description: | + A space-separated list of devices to remove from blacklist. + . + Each element should be a existing entry in the units blacklist. + Use list-disks action to list current blacklist entries. + . + Example: '/dev/vdb /var/tmp/test-osd' + required: + - osd-devices +zap-disk: + description: | + \ + USE WITH CAUTION - Purge disk of all data and signatures for use by Ceph. + Documentation: https://jaas.ai/ceph-osd/ + params: + devices: + type: string + description: | + A space-separated list of devices to remove the partition table from. + i-really-mean-it: + type: boolean + description: | + This must be toggled to enable actually performing this action + required: + - devices + - i-really-mean-it +start: + description: | + \ + Start OSD by ID + Documentation: https://jaas.ai/ceph-osd/ + params: + osds: + description: A comma-separated list of OSD IDs to start (or keyword 'all') + required: + - osds +stop: + description: | + \ + Stop OSD by ID + Documentation: https://jaas.ai/ceph-osd/ + params: + osds: + description: A comma-separated list of OSD IDs to stop (or keyword 'all') + required: + - osds +security-checklist: + description: Validate the running configuration against the OpenStack security guides checklist +update-apparmor-and-restart-osds: + description: | + Invoke pending continuation of update of AppArmor profiles followed by restarting OSD + services. Make sure to run this action separately in each unit at different times + to avoid simultaneous restart of OSDs. +get-availability-zone: + description: | + Obtain information about the availability zone, which will contain information about the CRUSH + structure. Specifically 'rack' and 'row'. + params: + format: + type: string + default: text + enum: + - text + - json + description: Specify output format (text|json). + show-all: + type: boolean + description: Option to view information for all units. Default is 'false'. +remove-disk: + description: | + Remove disks from Ceph, producing a report afterwards indicating the user + as to how to replace them in the closest way possible. + params: + osd-devices: + type: string + description: A space-separated list of devices to remove + osd-ids: + type: string + description: | + A space separated list of OSD ids to remove. This parameter is mutually + exclusive with the parameter 'osd-devices'. + purge: + type: boolean + description: | + Whether to fully purge the OSD or let the id be available for reuse. + default: false + timeout: + type: integer + description: | + The time in minutes to wait for the OSD to be safe to remove. + default: 5 + force: + type: boolean + description: | + Whether to forcefully remove the OSD even if it's determined to be + unsafe to destroy it. + default: false + format: + type: string + enum: + - text + - json + default: text + description: The output format returned for the command. diff --git a/ceph-osd/actions/__init__.py b/ceph-osd/actions/__init__.py new file mode 100644 index 00000000..8d6182b7 --- /dev/null +++ b/ceph-osd/actions/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +sys.path.append('hooks') +sys.path.append('lib') diff --git a/ceph-osd/actions/add-disk b/ceph-osd/actions/add-disk new file mode 120000 index 00000000..4379d79b --- /dev/null +++ b/ceph-osd/actions/add-disk @@ -0,0 +1 @@ +add_disk.py \ No newline at end of file diff --git a/ceph-osd/actions/add_disk.py b/ceph-osd/actions/add_disk.py new file mode 100755 index 00000000..184c796e --- /dev/null +++ b/ceph-osd/actions/add_disk.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +import psutil +import shutil +import subprocess +import sys + +sys.path.append('lib') +sys.path.append('hooks') + +import charmhelpers.contrib.storage.linux.ceph as ch_ceph +import charmhelpers.core.hookenv as hookenv +from charmhelpers.core.hookenv import function_fail +from charmhelpers.fetch import apt_install + +from charmhelpers.core.unitdata import kv +from utils import (PartitionIter, device_size, DeviceError) + +import ceph_hooks +import charms_ceph.utils + + +CRIMSON_PACKAGES = ['crimson-osd', 'libc-ares2', 'libcrypto++-dev', + 'libyaml-cpp-dev'] +CRIMSON_SYSTEMD_FILE = '/lib/systemd/system/crimson-osd@.service' + + +def get_osd_from_device(device): + """Given a device, return the OSD ID that it maps to.""" + output = subprocess.check_output(['ceph-volume', 'lvm', 'list', + '--format=json']) + devmap = json.loads(output.decode('utf8')) + for osd_id, data in devmap.items(): + for elem in data: + if device in elem.get('devices', []): + return osd_id + + +def start_crimson_osd(osd_id, device): + """An OSD was started with the classic daemon, but Crimson was + requested. As such, stop the current one and launch the correct daemon.""" + + if osd_id is None: + osd_id = get_osd_from_device(device) + + charms_ceph.utils.stop_osd(osd_id) + charms_ceph.utils.disable_osd(osd_id) + unit_name = ( + '/run/systemd/system/ceph-osd.target.wants/ceph-osd@{}.service' + .format(osd_id)) + + if os.path.exists(unit_name): + os.remove(unit_name) + + if not os.path.exists(CRIMSON_SYSTEMD_FILE): + apt_install(CRIMSON_PACKAGES, fatal=True) + shutil.copy('files/systemd/crimson-osd@.service', CRIMSON_SYSTEMD_FILE) + subprocess.check_call(['systemctl', 'daemon-reload']) + + subprocess.check_call(['systemctl', 'enable', + 'crimson-osd@{}'.format(osd_id)]) + subprocess.check_call(['systemctl', 'start', + 'crimson-osd@{}'.format(osd_id)]) + + +def add_device(request, device_path, bucket=None, osd_id=None, + part_iter=None, use_crimson=False, bluestore_skip=None): + """Add a new device to be used by the OSD unit. + + :param request: A broker request to notify monitors of changes. + :type request: CephBrokerRq + + :param device_path: The absolute path to the device to be added. + :type device_path: str + + :param bucket: The bucket name in ceph to add the device into, or None. + :type bucket: Option[str, None] + + :param osd_id: The OSD Id to use, or None. + :type osd_id: Option[str, None] + + :param part_iter: The partition iterator that will create partitions on + demand, to service bcache creation, or None, if no + partitions need to be created. + :type part_iter: Option[PartitionIter, None] + + :param use_crimson: Whether to use Crimson for the OSD (Experimental). + :type use_crimson: bool + + :param bluestore_skip: Which Bluestore features to avoid. + :type bluestore_skip: Option[str, None] + """ + if part_iter is not None: + effective_dev = part_iter.create_bcache(device_path) + if not effective_dev: + raise DeviceError( + 'Failed to create bcache for device {}'.format(device_path)) + else: + effective_dev = device_path + + if osd_id is not None and osd_id.startswith('osd.'): + osd_id = osd_id[4:] + + if bluestore_skip: + bluestore_skip = bluestore_skip.split(',') + + charms_ceph.utils.osdize(effective_dev, hookenv.config('osd-format'), + ceph_hooks.get_journal_devices(), + hookenv.config('ignore-device-errors'), + hookenv.config('osd-encrypt'), + hookenv.config('osd-encrypt-keymanager'), + osd_id, bluestore_skip) + + if use_crimson: + start_crimson_osd(osd_id, effective_dev) + + # Make it fast! + if hookenv.config('autotune'): + charms_ceph.utils.tune_dev(device_path) + mounts = filter(lambda disk: device_path + in disk.device, psutil.disk_partitions()) + for osd in mounts: + osd_id = osd.mountpoint.split('/')[-1].split('-')[-1] + request.ops.append({ + 'op': 'move-osd-to-bucket', + 'osd': "osd.{}".format(osd_id), + 'bucket': bucket}) + + # Ensure mon's count of osds is accurate + db = kv() + bootstrapped_osds = len(db.get('osd-devices', [])) + for r_id in hookenv.relation_ids('mon'): + hookenv.relation_set( + relation_id=r_id, + relation_settings={ + 'bootstrapped-osds': bootstrapped_osds, + } + ) + + if part_iter is not None: + # Update the alias map so we can refer to an OSD via the original + # device instead of the newly created cache name. + aliases = db.get('osd-aliases', {}) + aliases[device_path] = effective_dev + db.set('osd-aliases', aliases) + db.flush() + + return request + + +def get_devices(key): + """Get a list of the devices passed for this action, for a key.""" + devices = [] + for path in (hookenv.action_get(key) or '').split(): + path = path.strip() + if os.path.isabs(path): + devices.append(path) + + return devices + + +def cache_storage(): + """Return a list of Juju storage for caches.""" + cache_ids = hookenv.storage_list('cache-devices') + return [hookenv.storage_get('location', cid) for cid in cache_ids] + + +def validate_osd_id(osd_id): + """Test that an OSD id is actually valid.""" + if isinstance(osd_id, str): + if osd_id.startswith('osd.'): + osd_id = osd_id[4:] + try: + return int(osd_id) >= 0 + except ValueError: + return False + elif isinstance(osd_id, int): + return osd_id >= 0 + return False + + +def validate_partition_size(psize, devices, caches): + """Test that the cache devices have enough room.""" + sizes = [device_size(cache) for cache in caches] + n_caches = len(caches) + for idx in range(len(devices)): + cache_idx = idx % n_caches + prev = sizes[cache_idx] - psize + if prev < 0: + function_fail('''Cache device {} does not have enough + room to provide {} {}GB partitions'''.format( + caches[cache_idx], (idx + 1) // n_caches, psize)) + sys.exit(1) + sizes[cache_idx] = prev + + +if __name__ == "__main__": + crimson = hookenv.action_get('use-crimson') + if crimson and not hookenv.action_get('i-really-mean-it'): + function_fail('Need to pass i-really-mean-it for Crimson OSDs') + sys.exit(1) + + request = ch_ceph.CephBrokerRq() + devices = get_devices('osd-devices') + caches = get_devices('cache-devices') or cache_storage() + if caches: + psize = hookenv.action_get('partition-size') + if psize: + validate_partition_size(psize, devices, caches) + + part_iter = PartitionIter(caches, psize, devices) + else: + part_iter = None + + osd_ids = hookenv.action_get('osd-ids') + if osd_ids: + # Validate number and format for OSD ids. + osd_ids = osd_ids.split() + if len(osd_ids) != len(devices): + function_fail('The number of osd-ids and osd-devices must match') + sys.exit(1) + for osd_id in osd_ids: + if not validate_osd_id(osd_id): + function_fail('Invalid OSD ID passed: {}'.format(osd_id)) + sys.exit(1) + else: + osd_ids = [None] * len(devices) + + bluestore_skip = hookenv.action_get('bluestore-skip') + + errors = [] + for dev, osd_id in zip(devices, osd_ids): + try: + request = add_device(request=request, + device_path=dev, + bucket=hookenv.action_get("bucket"), + osd_id=osd_id, part_iter=part_iter, + use_crimson=crimson) + except Exception: + errors.append(dev) + + ch_ceph.send_request_if_needed(request, relation='mon') + if errors: + if part_iter is not None: + for error in errors: + part_iter.cleanup(error) + + function_fail('Failed to add devices: {}'.format(','.join(errors))) + sys.exit(1) diff --git a/ceph-osd/actions/blacklist-add-disk b/ceph-osd/actions/blacklist-add-disk new file mode 120000 index 00000000..d3da513e --- /dev/null +++ b/ceph-osd/actions/blacklist-add-disk @@ -0,0 +1 @@ +blacklist.py \ No newline at end of file diff --git a/ceph-osd/actions/blacklist-remove-disk b/ceph-osd/actions/blacklist-remove-disk new file mode 120000 index 00000000..d3da513e --- /dev/null +++ b/ceph-osd/actions/blacklist-remove-disk @@ -0,0 +1 @@ +blacklist.py \ No newline at end of file diff --git a/ceph-osd/actions/blacklist.py b/ceph-osd/actions/blacklist.py new file mode 100755 index 00000000..9f7f39a3 --- /dev/null +++ b/ceph-osd/actions/blacklist.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +# +# Copyright 2017 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys + +sys.path.append('hooks') + +import charmhelpers.core.hookenv as hookenv +import charmhelpers.core.unitdata as unitdata + +BLACKLIST_KEY = 'osd-blacklist' + + +class Error(Exception): + def __init__(self, message): + self.message = message + + def __str__(self): + return repr(self.message) + + +def get_devices(): + """Parse 'osd-devices' action parameter, returns list.""" + devices = [] + for path in hookenv.action_get('osd-devices').split(' '): + path = path.strip() + if not os.path.isabs(path): + raise Error('{}: Not absolute path.'.format(path)) + devices.append(path) + return devices + + +def blacklist_add(): + """ + Add devices given in 'osd-devices' action parameter to + unit-local devices blacklist. + """ + db = unitdata.kv() + blacklist = db.get(BLACKLIST_KEY, []) + for device in get_devices(): + if not os.path.exists(device): + raise Error('{}: No such file or directory.'.format(device)) + if device not in blacklist: + blacklist.append(device) + db.set(BLACKLIST_KEY, blacklist) + db.flush() + + +def blacklist_remove(): + """ + Remove devices given in 'osd-devices' action parameter from + unit-local devices blacklist. + """ + db = unitdata.kv() + blacklist = db.get(BLACKLIST_KEY, []) + for device in get_devices(): + try: + blacklist.remove(device) + except ValueError: + raise Error('{}: Device not in blacklist.'.format(device)) + db.set(BLACKLIST_KEY, blacklist) + db.flush() + + +# A dictionary of all the defined actions to callables +ACTIONS = { + "blacklist-add-disk": blacklist_add, + "blacklist-remove-disk": blacklist_remove, +} + + +def main(args): + """Main program""" + action_name = os.path.basename(args[0]) + try: + action = ACTIONS[action_name] + except KeyError: + return "Action {} undefined".format(action_name) + else: + try: + action() + except Exception as e: + hookenv.action_fail("Action {} failed: {}" + "".format(action_name, str(e))) + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/ceph-osd/actions/get-availability-zone b/ceph-osd/actions/get-availability-zone new file mode 120000 index 00000000..47227f6f --- /dev/null +++ b/ceph-osd/actions/get-availability-zone @@ -0,0 +1 @@ +get_availability_zone.py \ No newline at end of file diff --git a/ceph-osd/actions/get_availability_zone.py b/ceph-osd/actions/get_availability_zone.py new file mode 100755 index 00000000..bcbeade6 --- /dev/null +++ b/ceph-osd/actions/get_availability_zone.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +# +# Copyright 2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import sys + +from tabulate import tabulate + +sys.path.append("hooks") +sys.path.append("lib") + +from charms_ceph.utils import get_osd_tree +from charmhelpers.core import hookenv +from utils import get_unit_hostname + + +CRUSH_MAP_HIERARCHY = [ + "root", # 10 + "region", # 9 + "datacenter", # 8 + "room", # 7 + "pod", # 6 + "pdu", # 5 + "row", # 4 + "rack", # 3 + "chassis", # 2 + "host", # 1 + "osd", # 0 +] + + +def _get_human_readable(availability_zones): + """Get human readable table format. + + :param availability_zones: information about the availability zone + :type availability_zones: Dict[str, Dict[str, str]] + :returns: formatted data as table + :rtype: str + """ + data = availability_zones.get( + "all-units", {get_unit_hostname(): availability_zones["unit"]} + ) + data = [[unit, *crush_map.values()] for unit, crush_map in data.items()] + return tabulate( + data, tablefmt="grid", headers=["unit", *CRUSH_MAP_HIERARCHY] + ) + + +def _get_crush_map(crush_location): + """Get Crush Map hierarchy from CrushLocation. + + :param crush_location: CrushLocation from function get_osd_tree + :type crush_location: charms_ceph.utils.CrushLocation + :returns: dictionary contains the Crush Map hierarchy, where + the keys are according to the defined types of the + Ceph Map Hierarchy + :rtype: Dict[str, str] + """ + return { + crush_map_type: getattr(crush_location, crush_map_type) + for crush_map_type in CRUSH_MAP_HIERARCHY + if getattr(crush_location, crush_map_type, None) + } + + +def get_availability_zones(show_all=False): + """Get information about the availability zones. + + Returns dictionary contains the unit as the current unit and other_units + (if the action was executed with the parameter show-all) that provide + information about other units. + + :param show_all: define whether the result should contain AZ information + for all units + :type show_all: bool + :returns: {"unit": , + "all-units": {: }} + :rtype: Dict[str, Dict[str, str]] + """ + results = {"unit": {}, "all-units": {}} + osd_tree = get_osd_tree(service="osd-upgrade") + + this_unit_host = get_unit_hostname() + for crush_location in osd_tree: + crush_map = _get_crush_map(crush_location) + if this_unit_host == crush_location.name: + results["unit"] = crush_map + + results["all-units"][crush_location.name] = crush_map + + if not show_all: + results.pop("all-units") + + return results + + +def format_availability_zones(availability_zones, human_readable=True): + """Format availability zones to action output format.""" + if human_readable: + return _get_human_readable(availability_zones) + + return json.dumps(availability_zones) + + +def main(): + try: + show_all = hookenv.action_get("show-all") + human_readable = hookenv.action_get("format") == "text" + availability_zones = get_availability_zones(show_all) + if not availability_zones["unit"]: + hookenv.log( + "Availability zone information for current unit not found.", + hookenv.DEBUG + ) + + formatted_azs = format_availability_zones(availability_zones, + human_readable) + hookenv.action_set({"availability-zone": formatted_azs}) + except Exception as error: + hookenv.action_fail("Action failed: {}".format(str(error))) + + +if __name__ == "__main__": + main() diff --git a/ceph-osd/actions/list-disks b/ceph-osd/actions/list-disks new file mode 120000 index 00000000..ebe3b65f --- /dev/null +++ b/ceph-osd/actions/list-disks @@ -0,0 +1 @@ +list_disks.py \ No newline at end of file diff --git a/ceph-osd/actions/list_disks.py b/ceph-osd/actions/list_disks.py new file mode 100755 index 00000000..861b975c --- /dev/null +++ b/ceph-osd/actions/list_disks.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +List disks + +The 'disks' key is populated with block devices that are known by udev, +are not mounted and not mentioned in 'osd-journal' configuration option. + +The 'blacklist' key is populated with osd-devices in the blacklist stored +in the local kv store of this specific unit. + +The 'non-pristine' key is populated with block devices that are known by +udev, are not mounted, not mentioned in 'osd-journal' configuration option +and are currently not eligible for use because of presence of foreign data. +""" + +import sys +import os + +sys.path.append('hooks/') +sys.path.append('lib/') + +import charmhelpers.core.hookenv as hookenv + +import charms_ceph.utils +import utils + + +def list_disk(): + non_pristine = [] + osd_journal = [] + for journal in utils.get_journal_devices(): + osd_journal.append(os.path.realpath(journal)) + + for dev in list(set(charms_ceph.utils.unmounted_disks()) - + set(osd_journal)): + if (not charms_ceph.utils.is_active_bluestore_device(dev) and + not charms_ceph.utils.is_pristine_disk(dev)): + non_pristine.append(dev) + + hookenv.action_set({ + 'disks': list(set(charms_ceph.utils.unmounted_disks()) - + set(osd_journal)), + 'blacklist': utils.get_blacklist(), + 'non-pristine': non_pristine, + }) + + +if __name__ == '__main__': + list_disk() diff --git a/ceph-osd/actions/osd-in b/ceph-osd/actions/osd-in new file mode 120000 index 00000000..1cc47e9f --- /dev/null +++ b/ceph-osd/actions/osd-in @@ -0,0 +1 @@ +osd_in_out.py \ No newline at end of file diff --git a/ceph-osd/actions/osd-out b/ceph-osd/actions/osd-out new file mode 120000 index 00000000..1cc47e9f --- /dev/null +++ b/ceph-osd/actions/osd-out @@ -0,0 +1 @@ +osd_in_out.py \ No newline at end of file diff --git a/ceph-osd/actions/osd_in_out.py b/ceph-osd/actions/osd_in_out.py new file mode 100755 index 00000000..8b579a98 --- /dev/null +++ b/ceph-osd/actions/osd_in_out.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# osd_out/osd_in actions file. + +import os +import sys + +from subprocess import check_output, STDOUT + +sys.path.append('lib') +sys.path.append('hooks') + +from charmhelpers.core.hookenv import ( + function_fail, + function_set, + log, + ERROR, +) + +from charms_ceph.utils import get_local_osd_ids +from ceph_hooks import assess_status +from utils import parse_osds_arguments, ALL + +IN = "in" +OUT = "out" + + +def check_osd_id(osds): + """Check ceph OSDs existence. + + :param osds: list of osds IDs + :type osds: set + :returns: list of osds IDs present on the local machine and + list of failed osds IDs + :rtype: Tuple[set, set] + :raises OSError: if the unit can't get the local osd ids + """ + all_local_osd = get_local_osd_ids() + if ALL in osds: + return set(all_local_osd), set() + + failed_osds = osds.difference(all_local_osd) + if failed_osds: + log("Ceph OSDs not present: {}".format(", ".join(failed_osds)), + level=ERROR) + + return osds, failed_osds + + +def ceph_osd_upgrade(action, osd_id): + """Execute ceph osd-upgrade command. + + :param action: action type IN/OUT + :type action: str + :param osd_id: osd ID + :type osd_id: str + :returns: output message + :rtype: str + :raises subprocess.CalledProcessError: if the ceph commands fails + """ + cmd = ["ceph", "--id", "osd-upgrade", "osd", action, osd_id] + output = check_output(cmd, stderr=STDOUT).decode("utf-8") + + log("ceph-osd {osd_id} was updated by the action osd-{action} with " + "output: {output}".format(osd_id=osd_id, action=action, output=output)) + + return output + + +def osd_in_out(action): + """Pause/Resume the ceph OSDs unit ont the local machine only. + + :param action: Either IN or OUT (see global constants) + :type action: string + :raises RuntimeError: if a supported action is not used + :raises subprocess.CalledProcessError: if the ceph commands fails + :raises OSError: if the unit can't get the local osd ids + """ + if action not in (IN, OUT): + raise RuntimeError("Unknown action \"{}\"".format(action)) + + osds = parse_osds_arguments() + osds, failed_osds = check_osd_id(osds) + + if failed_osds: + function_fail("invalid ceph OSD device id: " + "{}".format(",".join(failed_osds))) + return + + outputs = [] + for osd_id in osds: + output = ceph_osd_upgrade(action, str(osd_id)) + outputs.append(output) + + function_set({ + "message": "osd-{action} action was successfully executed for ceph " + "OSD devices [{osds}]".format(action=action, + osds=",".join(osds)), + "outputs": os.linesep.join(outputs) + }) + + assess_status() + + +def osd_in(): + """Shortcut to execute 'osd_in' action""" + osd_in_out(IN) + + +def osd_out(): + """Shortcut to execute 'osd_out' action""" + osd_in_out(OUT) + + +# A dictionary of all the defined actions to callables (which take +# parsed arguments). +ACTIONS = {"osd-out": osd_out, "osd-in": osd_in} + + +def main(args): + action_name = os.path.basename(args[0]) + try: + action = ACTIONS[action_name] + except KeyError: + s = "Action {} undefined".format(action_name) + function_fail(s) + return s + else: + try: + action() + except Exception as e: + function_fail("Action {} failed: {}".format(action_name, str(e))) + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/ceph-osd/actions/remove-disk b/ceph-osd/actions/remove-disk new file mode 120000 index 00000000..29934df0 --- /dev/null +++ b/ceph-osd/actions/remove-disk @@ -0,0 +1 @@ +./remove_disk.py \ No newline at end of file diff --git a/ceph-osd/actions/remove_disk.py b/ceph-osd/actions/remove_disk.py new file mode 100755 index 00000000..ec9e546f --- /dev/null +++ b/ceph-osd/actions/remove_disk.py @@ -0,0 +1,375 @@ +#!/usr/bin/env python3 +# +# Copyright 2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import errno +import json +from math import ceil +import os +import subprocess +import sys +import time + +sys.path.append('lib') +sys.path.append('hooks') + +import charmhelpers.core.hookenv as hookenv +from charmhelpers.core.hookenv import function_fail + +import charms_ceph.utils +from charmhelpers.core.unitdata import kv +from utils import (get_bcache_names, bcache_remove, device_size, + get_parent_device, remove_lvm, wipefs_safely) + + +def normalize_osd_id(osd_id): + """Make sure an OSD id has the form 'osd.'. + + :param osd_id: The OSD id, either a string or the integer ID. + :type osd_id: Option[int, str] + + :returns: A string with the form 'osd.. + :rtype: str + """ + if not isinstance(osd_id, str) or not osd_id.startswith('osd.'): + osd_id = 'osd.' + str(osd_id) + return osd_id + + +def get_device_map(): + """Get a list of osd.id, device-path for every device that + is being used by local OSD. + + :returns: A list of OSD ids and devices. + :rtype: list[dict['id', 'path']] + """ + ret = [] + vlist = subprocess.check_output(['ceph-volume', 'lvm', 'list', + '--format=json']) + for osd_id, data in json.loads(vlist.decode('utf8')).items(): + osd_id = normalize_osd_id(osd_id) + for elem in data: + for device in elem['devices']: + ret.append({'id': osd_id, 'path': device}) + return ret + + +def map_device_to_id(dev_map, device): + """Get the OSD id for a device or bcache name. + + :param dev_map: A map with the same form as that returned by + the function 'get_device_map'. + :type dev_map: list[dict['id', 'path']] + + :param device: The path to the device. + :type device: str + + :returns: The OSD id in use by the device, if any. + :rtype: Option[None, str] + """ + for elem in dev_map: + if device == elem['path']: + return elem['id'] + + +def map_id_to_device(dev_map, osd_id): + """Get the device path for an OSD id. + + :param dev_map: A map with the same form as that returned by + the function 'get_device_map'. + :type dev_map: list[dict['id', 'path']] + + :param osd_id: The OSD id to check against. + :type osd_id: str + + :returns: The device path being used by the OSD id, if any. + :rtype: Option[None, str] + """ + for elem in dev_map: + if elem['id'] == osd_id: + return elem['path'] + + +def safe_to_destroy(osd_id, timeout=300): + """Test whether an OSD id is safe to destroy per the Ceph cluster.""" + ret = subprocess.call([ + 'ceph', '--id', 'osd-removal', + 'osd', 'safe-to-destroy', osd_id + ], timeout=timeout) + return ret == 0 + + +def safe_to_stop(osd_id, timeout=300): + """Test whether an OSD is safe to stop.""" + ret = subprocess.call([ + 'ceph', '--id', 'osd-removal', + 'osd', 'ok-to-stop', osd_id + ], timeout=timeout) + return ret == 0 + + +def reweight_osd(osd_id, timeout=300): + """Set the weight of the OSD id to zero.""" + subprocess.check_call([ + 'ceph', '--id', 'osd-removal', + 'osd', 'crush', 'reweight', osd_id, '0' + ], timeout=timeout) + + +def destroy(osd_id, purge=False, timeout=600): + """Destroy or purge an OSD id.""" + for _ in range(10): + # We might get here before the OSD is marked as down. As such, + # retry if the error code is EBUSY. + try: + subprocess.check_call([ + 'ceph', '--id', 'osd-removal', 'osd', + 'purge' if purge else 'destroy', + osd_id, '--yes-i-really-mean-it' + ], timeout=timeout) + return + except subprocess.CalledProcessError as e: + if e.returncode != errno.EBUSY: + raise + time.sleep(0.1) + + +class RemoveException(Exception): + """Exception type used to notify of errors for this action.""" + pass + + +class ActionOSD: + + """Class used to encapsulate all the needed information to + perform OSD removal.""" + + def __init__(self, dev_map, dev=None, osd_id=None, aliases={}): + """Construct an action-OSD. + + :param dev_map: A map with the same form as that returned by + the function 'get_device_map'. + :type dev_map: list[dict['id', 'path']] + + :param dev: The device being used by an OSD. + :type dev: Option[None, str] + + :param osd_id: The OSD id. + :type osd_id: Option[None, int, str] + """ + if dev is not None: + if osd_id is not None: + raise RemoveException( + 'osd-ids and osd-devices are mutually exclusive') + elif dev in aliases: + self.alias = dev + self.device = aliases.get(dev) + else: + self.device, self.alias = dev, None + + self.osd_id = map_device_to_id(dev_map, self.device) + self.bcache_backing, self.bcache_caching = \ + get_bcache_names(self.device) + if self.osd_id is None: + raise RemoveException('Device {} is not being used' + .format(self.device)) + else: + self.alias = None + self.osd_id = normalize_osd_id(osd_id) + self.device = map_id_to_device(dev_map, self.osd_id) + if self.device is None: + raise RemoveException('Invalid osd ID: {}'.format(self.osd_id)) + + self.bcache_backing, self.bcache_caching = \ + get_bcache_names(self.device) + + self.report = {} # maps device -> actions. + + @property + def osd_device(self): + return self.bcache_backing or self.device + + def remove(self, purge, timeout, force): + """Remove the OSD from the cluster. + + :param purge: Whether to purge or just destroy the OSD. + :type purge: bool + + :param timeout: The number of minutes to wait for until the OSD + is safe to destroy. + :type timeout: int + + :param force: Whether to proceed with OSD removal, even when + it's not safe to do so. + :type force: bool + """ + # Set the CRUSH weight to 0. + hookenv.log('Reweighting OSD', hookenv.DEBUG) + reweight_osd(self.osd_id) + + # Ensure that the OSD is safe to stop and destroy. + end = (datetime.datetime.now() + + datetime.timedelta(seconds=timeout * 60)) + safe_stop, safe_destroy = False, False + + while True: + if not safe_stop and safe_to_stop(self.osd_id): + safe_stop = True + if not safe_destroy and safe_to_destroy(self.osd_id): + safe_destroy = True + + if safe_stop and safe_destroy: + break + + curr = datetime.datetime.now() + if curr >= end: + if force: + hookenv.log( + 'OSD not safe to destroy, but "force" was specified', + hookenv.DEBUG) + break + + raise RemoveException( + 'timed out waiting for an OSD to be safe to destroy') + time.sleep(min(1, (end - curr).total_seconds())) + + # Stop the OSD service. + hookenv.log('Stopping the OSD service', hookenv.DEBUG) + charms_ceph.utils.stop_osd(self.osd_id[4:]) + charms_ceph.utils.disable_osd(self.osd_id[4:]) + unit_filename = \ + '/run/systemd/system/ceph-osd.target.wants/ceph-osd@{}.service' \ + .format(self.osd_id[4:]) + if os.path.exists(unit_filename): + os.remove(unit_filename) + + subprocess.check_call(['systemctl', 'daemon-reload']) + + # Remove the OSD from the cluster. + hookenv.log('Destroying the OSD', hookenv.DEBUG) + destroy(self.osd_id, purge) + report = self.report.setdefault(self.osd_device, + {'osd-ids': self.osd_id}) + + if self.bcache_backing: + # Remove anything related to bcache. + size = int(ceil(device_size(self.bcache_caching))) + caching = get_parent_device(self.bcache_caching) + report.update({'cache-devices': caching, 'partition-size': size}) + bcache_remove(self.device, self.bcache_backing, + self.bcache_caching) + else: + remove_lvm(self.device) + wipefs_safely(self.device) + + +def make_same_length(l1, l2): + """Make sure 2 lists have the same length, padding out with None's.""" + ln = max(len(l1), len(l2)) + l1.extend([None] * (ln - len(l1))) + l2.extend([None] * (ln - len(l2))) + + +def write_report(report, ftype): + """Generate a report on how to re-established the removed disks + to be part of the cluster again, then set the 'message' attribute to + either a JSON object or a textual representation. + + :param report: The initial, raw report from the 'ActionOSD' objects. + :type report: dict + + :param ftype: Either 'text' or 'json'; specifies the type of report + :type ftype: Enum['text', 'json'] + """ + if ftype == 'text': + msg = '{} disks have been removed\n'.format(len(report)) + msg += 'To replace them, run:\n' + for device, action_args in report.items(): + args = json.dumps(action_args, separators=(' ', '=')) + args = args.replace('{', '').replace('}', '').replace('"', '') + msg += 'juju run {} add-disk {} {}'.format( + hookenv.local_unit(), 'osd-devices=' + device, args) + else: + msg = json.dumps(report) + + hookenv.action_set({'message': msg}) + + +def get_list(key): + """Retrieve the action arguments based on the key as a list.""" + ret = hookenv.action_get(key) + return ret.split() if ret else [] + + +def advertise_osd_count(count): + """Let the Ceph-mon know of the updated OSD number.""" + for relid in hookenv.relation_ids('mon'): + hookenv.relation_set( + relation_id=relid, + relation_settings={'bootstrapped-osds': count} + ) + + +def main(): + osd_ids = get_list('osd-ids') + osd_devs = get_list('osd-devices') + purge = hookenv.action_get('purge') + force = hookenv.action_get('force') + timeout = hookenv.action_get('timeout') + + if timeout <= 0: + function_fail('timeout must be > 0') + sys.exit(1) + elif not osd_ids and not osd_devs: + function_fail('One of osd-ids or osd-devices must be provided') + sys.exit(1) + + make_same_length(osd_ids, osd_devs) + errors = [] + report = {} + dev_map = get_device_map() + charm_devices = kv().get('osd-devices', []) + aliases = kv().get('osd-aliases', {}) + + for dev, osd_id in zip(osd_devs, osd_ids): + try: + action_osd = ActionOSD(dev_map, dev=dev, osd_id=osd_id, + aliases=aliases) + if action_osd.device not in charm_devices: + errors.append('Device {} not being used by Ceph' + .format(action_osd.device)) + continue + action_osd.remove(purge, timeout, force) + charm_devices.remove(action_osd.device) + if action_osd.alias: + aliases.pop(action_osd.alias) + report.update(action_osd.report) + except RemoveException as e: + errors.append(str(e)) + + kv().set('osd-devices', charm_devices) + kv().set('osd-aliases', aliases) + kv().flush() + advertise_osd_count(len(charm_devices)) + write_report(report, hookenv.action_get('format')) + + if errors: + function_fail('Failed to remove devices: {}'.format(','.join(errors))) + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/ceph-osd/actions/security-checklist b/ceph-osd/actions/security-checklist new file mode 120000 index 00000000..47464970 --- /dev/null +++ b/ceph-osd/actions/security-checklist @@ -0,0 +1 @@ +security_checklist.py \ No newline at end of file diff --git a/ceph-osd/actions/security_checklist.py b/ceph-osd/actions/security_checklist.py new file mode 100755 index 00000000..2013c772 --- /dev/null +++ b/ceph-osd/actions/security_checklist.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# +# Copyright 2019 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +sys.path.append('hooks') + +import charmhelpers.contrib.openstack.audits as audits +from charmhelpers.contrib.openstack.audits import ( + openstack_security_guide, +) + + +# Via the openstack_security_guide above, we are running the following +# security assertions automatically: +# +# - validate-file-ownership +# - validate-file-permissions + + +def main(): + config = { + 'audit_type': audits.AuditType.OpenStackSecurityGuide, + 'files': openstack_security_guide.FILE_ASSERTIONS['ceph-osd'], + 'excludes': [ + 'validate-uses-keystone', + 'validate-uses-tls-for-glance', + 'validate-uses-tls-for-keystone', + ], + } + return audits.action_parse_results(audits.run(config)) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/ceph-osd/actions/service.py b/ceph-osd/actions/service.py new file mode 100755 index 00000000..b00f0cf5 --- /dev/null +++ b/ceph-osd/actions/service.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 +# +# Copyright 2020 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import sys +import shutil +import subprocess + + +sys.path.append('lib') +sys.path.append('hooks') + +from charmhelpers.core.hookenv import ( + function_fail, + log, +) + +from ceph_hooks import assess_status, update_apparmor +from utils import parse_osds_arguments, ALL + +START = 'start' +STOP = 'stop' + + +def systemctl_execute(action, services): + """ + Execute `systemctl` action on specified services. + + Action can be either 'start' or 'stop' (defined by global constants + START, STOP). Parameter `services` is list of service names on which the + action will be executed. If the parameter `services` contains constant + ALL, the action will be executed on all ceph-osd services. + + :param action: Action to be executed (start or stop) + :type action: str + :param services: List of services to be targetd by the action + :type services: list[str] + :return: None + """ + if ALL in services: + cmd = ['systemctl', action, 'ceph-osd.target'] + else: + cmd = ['systemctl', action] + services + subprocess.check_call(cmd, timeout=300) + + +def osd_ids_to_service_names(osd_ids): + """ + Transform set of OSD IDs into the list of respective service names. + + Example: + >>> osd_ids_to_service_names({0,1}) + ['ceph-osd@0.service', 'ceph-osd@1.service'] + + :param osd_ids: Set of service IDs to be converted + :type osd_ids: set[str | int] + :return: List of service names + :rtype: list[str] + """ + service_list = [] + for id_ in osd_ids: + if id_ == ALL: + service_list.append(ALL) + else: + service_list.append("ceph-osd@{}.service".format(id_)) + return service_list + + +def check_service_is_present(service_list): + """ + Checks that every service, from the `service_list` parameter exists + on the system. Raises RuntimeError if any service is missing. + + :param service_list: List of systemd services + :type service_list: list[str] + :raises RuntimeError: if any service is missing + """ + if ALL in service_list: + return + + service_list_cmd = ['systemctl', 'list-units', '--full', + '--all', '--no-pager', '-t', 'service'] + present_services = subprocess.run(service_list_cmd, + stdout=subprocess.PIPE, + timeout=30).stdout.decode('utf-8') + + missing_services = [] + for service_name in service_list: + if service_name not in present_services: + missing_services.append(service_name) + + if missing_services: + raise RuntimeError('Some services are not present on this ' + 'unit: {}'.format(missing_services)) + + +def execute_action(action): + """Core implementation of the 'start'/'stop' actions + + :param action: Either START or STOP (see global constants) + :return: None + """ + if action not in (START, STOP): + raise RuntimeError('Unknown action "{}"'.format(action)) + + osds = parse_osds_arguments() + services = osd_ids_to_service_names(osds) + + check_service_is_present(services) + + systemctl_execute(action, services) + + assess_status() + + +def stop(): + """Shortcut to execute 'stop' action""" + execute_action(STOP) + + +def start(): + """Shortcut to execute 'start' action""" + execute_action(START) + + +ACTIONS = {'stop': stop, + 'start': start, + 'update-apparmor-and-restart-osds': update_apparmor, + } + + +def main(args): + action_name = os.path.basename(args.pop(0)) + try: + action = ACTIONS[action_name] + except KeyError: + s = "Action {} undefined".format(action_name) + function_fail(s) + return + else: + try: + log("Running action '{}'.".format(action_name)) + if shutil.which('systemctl') is None: + raise RuntimeError("This action requires systemd") + action() + except Exception as e: + function_fail("Action '{}' failed: {}".format(action_name, str(e))) + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/ceph-osd/actions/start b/ceph-osd/actions/start new file mode 120000 index 00000000..12afe70c --- /dev/null +++ b/ceph-osd/actions/start @@ -0,0 +1 @@ +service.py \ No newline at end of file diff --git a/ceph-osd/actions/stop b/ceph-osd/actions/stop new file mode 120000 index 00000000..12afe70c --- /dev/null +++ b/ceph-osd/actions/stop @@ -0,0 +1 @@ +service.py \ No newline at end of file diff --git a/ceph-osd/actions/update-apparmor-and-restart-osds b/ceph-osd/actions/update-apparmor-and-restart-osds new file mode 120000 index 00000000..12afe70c --- /dev/null +++ b/ceph-osd/actions/update-apparmor-and-restart-osds @@ -0,0 +1 @@ +service.py \ No newline at end of file diff --git a/ceph-osd/actions/zap-disk b/ceph-osd/actions/zap-disk new file mode 120000 index 00000000..0814a432 --- /dev/null +++ b/ceph-osd/actions/zap-disk @@ -0,0 +1 @@ +zap_disk.py \ No newline at end of file diff --git a/ceph-osd/actions/zap_disk.py b/ceph-osd/actions/zap_disk.py new file mode 100755 index 00000000..ec5ca1f2 --- /dev/null +++ b/ceph-osd/actions/zap_disk.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +# +# Copyright 2018 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys + +sys.path.append('lib') +sys.path.append('hooks') + +import charmhelpers.core.hookenv as hookenv +from charmhelpers.contrib.storage.linux.utils import ( + is_block_device, + is_device_mounted, + zap_disk, +) +from charmhelpers.core.unitdata import kv +from charms_ceph.utils import is_active_bluestore_device +from charms_ceph.utils import is_mapped_luks_device +from charmhelpers.contrib.storage.linux.lvm import is_lvm_physical_volume +from charmhelpers.core.hookenv import log + + +class ZapDiskError(Exception): + pass + + +def get_devices(): + """Parse 'devices' action parameter, returns list.""" + devices, errors = [], [] + + for path in hookenv.action_get('devices').split(' '): + path = path.strip() + if not os.path.isabs(path): + errors.append('{}: Not absolute path.'.format(path)) + elif not os.path.exists(path): + errors.append('{}: Device does not exist.'.format(path)) + else: + devices.append(path) + + if errors: + raise ZapDiskError(", ".join(errors)) + + return devices + + +def zap(): + if not hookenv.action_get('i-really-mean-it'): + hookenv.action_fail('i-really-mean-it is a required parameter') + return + + failed_devices = [] + not_block_devices = [] + lvm_devices = [] + try: + devices = get_devices() + except ZapDiskError as error: + hookenv.action_fail("Failed due to: {}".format(error)) + return + + for device in devices: + if is_lvm_physical_volume(device): + lvm_devices.append(device) + if not is_block_device(device): + not_block_devices.append(device) + if (is_device_mounted(device) or + is_active_bluestore_device(device) or + is_mapped_luks_device(device)): + failed_devices.append(device) + + if lvm_devices or failed_devices or not_block_devices: + message = "" + if lvm_devices: + log('Cannot zap a device used by lvm') + message = "{} devices are lvm devices: {}".format( + len(lvm_devices), + ", ".join(lvm_devices)) + if failed_devices: + message += "{} devices are mounted: {}".format( + len(failed_devices), + ", ".join(failed_devices)) + if not_block_devices: + if len(message): + message += "\n\n" + message += "{} devices are not block devices: {}".format( + len(not_block_devices), + ", ".join(not_block_devices)) + hookenv.action_fail(message) + return + db = kv() + used_devices = db.get('osd-devices', []) + for device in devices: + zap_disk(device) + if device in used_devices: + used_devices.remove(device) + db.set('osd-devices', used_devices) + db.flush() + hookenv.action_set({ + 'message': "{} disk(s) have been zapped, to use them as OSDs, run: \n" + "juju run {} add-disk osd-devices=\"{}\"".format( + len(devices), + hookenv.local_unit(), + " ".join(devices)) + }) + + +if __name__ == "__main__": + zap() diff --git a/ceph-osd/build-requirements.txt b/ceph-osd/build-requirements.txt new file mode 100644 index 00000000..b6d2452f --- /dev/null +++ b/ceph-osd/build-requirements.txt @@ -0,0 +1,7 @@ +# NOTES(lourot): +# * We don't install charmcraft via pip anymore because it anyway spins up a +# container and scp the system's charmcraft snap inside it. So the charmcraft +# snap is necessary on the system anyway. +# * `tox -e build` successfully validated with charmcraft 1.2.1 + +cffi==1.14.6; python_version < '3.6' # cffi 1.15.0 drops support for py35. diff --git a/ceph-osd/charm-helpers-hooks.yaml b/ceph-osd/charm-helpers-hooks.yaml new file mode 100644 index 00000000..da52e9c8 --- /dev/null +++ b/ceph-osd/charm-helpers-hooks.yaml @@ -0,0 +1,27 @@ +repo: https://github.com/juju/charm-helpers +destination: hooks/charmhelpers +include: + - core + - cli + - osplatform + - fetch + - contrib.hahelpers: + - apache + - cluster + - contrib.python + - contrib.storage.linux + - contrib.openstack + - contrib.network.ip + - contrib.openstack: + - alternatives + - audits + - context + - exceptions + - ip + - neutron + - utils + - contrib.charmsupport + - contrib.hardening|inc=* + - contrib.hardware + - contrib.openstack.policyd + - contrib.openstack.templates|inc=*/section-ceph-bluestore-compression diff --git a/ceph-osd/charmcraft.yaml b/ceph-osd/charmcraft.yaml new file mode 100644 index 00000000..20675a9e --- /dev/null +++ b/ceph-osd/charmcraft.yaml @@ -0,0 +1,21 @@ +type: charm + +parts: + charm: + plugin: dump + source: . + +base: ubuntu@22.04 +platforms: + amd64: + build-on: amd64 + build-for: amd64 + arm64: + build-on: arm64 + build-for: arm64 + ppc64el: + build-on: ppc64el + build-for: ppc64el + s390x: + build-on: s390x + build-for: s390x diff --git a/ceph-osd/config.yaml b/ceph-osd/config.yaml new file mode 100644 index 00000000..838261fb --- /dev/null +++ b/ceph-osd/config.yaml @@ -0,0 +1,488 @@ +options: + loglevel: + type: int + default: 1 + description: OSD debug level. Max is 20. + source: + type: string + default: caracal + description: | + Optional configuration to support use of additional sources such as: + . + - ppa:myteam/ppa + - cloud:bionic-ussuri + - cloud:xenial-proposed/queens + - http://my.archive.com/ubuntu main + . + The last option should be used in conjunction with the key configuration + option. + key: + type: string + default: + description: | + Key ID to import to the apt keyring to support use with arbitrary source + configuration from outside of Launchpad archives or PPA's. + The accepted formats should be a GPG key in ASCII armor format, + including BEGIN and END markers or a keyid. + use-syslog: + type: boolean + default: False + description: | + If set to True, supporting services will log to syslog. + harden: + type: string + default: + description: | + Apply system hardening. Supports a space-delimited list of modules + to run. Supported modules currently include os, ssh, apache and mysql. + config-flags: + type: string + default: + description: | + User provided Ceph configuration. Supports a string representation of + a python dictionary where each top-level key represents a section in + the ceph.conf template. You may only use sections supported in the + template. + . + WARNING: this is not the recommended way to configure the underlying + services that this charm installs and is used at the user's own risk. + This option is mainly provided as a stop-gap for users that either + want to test the effect of modifying some config or who have found + a critical bug in the way the charm has configured their services + and need it fixed immediately. We ask that whenever this is used, + that the user consider opening a bug on this charm at + http://bugs.launchpad.net/charms providing an explanation of why the + config was needed so that we may consider it for inclusion as a + natively supported config in the charm. + osd-devices: + type: string + default: + description: | + The devices to format and set up as OSD volumes, space separated. + . + These devices are the range of devices that will be checked for and + used across all service units, in addition to any volumes attached + via the --storage flag during deployment. + Any devices not found will be ignored. + . + For ceph < 14.2.0 (Nautilus) these can also be directories instead of + devices. If the value does not start with "/dev" then it will be + interpreted as a directory. + NOTE: if the value does not start with "/dev" then apparmor + "enforce" profile is not supported. + + bdev-enable-discard: + type: string + default: auto + description: | + Enables async discard on devices. This option will enable/disable both + bdev-enable-discard and bdev-async-discard options in ceph configuration + at the same time. The default value "auto" will try to autodetect and + should work in most cases. If you need to force a behaviour you can + set it to "enable" or "disable". Only applies for Ceph Mimic or later. + osd-journal: + type: string + default: + description: | + The devices to use as shared journal drives for all OSDs on a node, space separated. + By default a journal partition will be created on each OSD volume device for + use by that OSD. The default behaviour is also the fallback for the case + where the specified journal device does not exist on a node. + . + Only supported with ceph >= 0.48.3. + bluestore-wal: + type: string + default: + description: | + Path to BlueStore WAL block devices or files, space separated. + Should only be set if using + a separate physical device that is faster than the DB device (such as an + NVDIMM or faster SSD). Otherwise BlueStore automatically maintains the + WAL inside of the DB device. This block device is used as an LVM PV and + then space is allocated for each block device as needed based on the + bluestore-block-wal-size setting. + bluestore-db: + type: string + default: + description: | + Path to BlueStore WAL db block devices or files, space separated. + If you have a separate + physical device faster than the block device this will store all of the + filesystem metadata (RocksDB) there and also integrates the Write Ahead + Log (WAL) unless a further separate bluestore-wal device is configured + which is not needed unless it is faster again than the bluestore-db + device. This block device is used as an LVM PV and then space is + allocated for each block device as needed based on the + bluestore-block-db-size setting. + osd-journal-size: + type: int + default: 1024 + description: | + Ceph OSD journal size. The journal size should be at least twice the + product of the expected drive speed multiplied by filestore max sync + interval. However, the most common practice is to partition the journal + drive (often an SSD), and mount it such that Ceph uses the entire + partition for the journal. + . + Only supported with ceph >= 0.48.3. + bluestore-block-wal-size: + type: int + default: 0 + description: | + Size (in bytes) of a partition, file or LV to use for + BlueStore WAL (RocksDB WAL), provided on a per backend device basis. + . + Example: 128 GB device, 8 data devices provided in "osd-devices" + gives 128 / 8 GB = 16 GB = 16000000000 bytes per device. + . + A default value is not set as it is calculated by ceph-disk (before Luminous) + or the charm itself, when ceph-volume is used (Luminous and above). + bluestore-block-db-size: + type: int + default: 0 + description: | + Size (in bytes) of a partition, file or LV to use for BlueStore + metadata or RocksDB SSTs, provided on a per backend device basis. + . + Example: 128 GB device, 8 data devices provided in "osd-devices" + gives 128 / 8 GB = 16 GB = 16000000000 bytes per device. + . + A default value is not set as it is calculated by ceph-disk (before Luminous) + or the charm itself, when ceph-volume is used (Luminous and above). + osd-format: + type: string + default: xfs + description: | + Format of filesystem to use for OSD devices. Supported formats include: + . + xfs (Default with >= ceph 0.48.3) + ext4 (Only option < ceph 0.48.3) + btrfs (experimental and not recommended) + . + Only supported with >= ceph 0.48.3. + . + Used with FileStore storage backend. + . + Always applies prior to ceph 12.2.0. Otherwise, only applies when the + "bluestore" option is False. + osd-encrypt: + type: boolean + default: False + description: | + By default, the charm will not encrypt Ceph OSD devices; however, by + setting osd-encrypt to True, Ceph's dmcrypt support will be used to + encrypt OSD devices. + . + Specifying this option on a running Ceph OSD node will have no effect + until new disks are added, at which point new disks will be encrypted. + osd-encrypt-keymanager: + type: string + default: ceph + description: | + Keymanager to use for storage of dm-crypt keys used for OSD devices; + by default 'ceph' itself will be used for storage of keys, making use + of the key/value storage provided by the ceph-mon cluster. + . + Alternatively 'vault' may be used for storage of dm-crypt keys. Both + approaches ensure that keys are never written to the local filesystem. + This also requires a relation to the vault charm. + crush-initial-weight: + type: float + default: + description: | + The initial crush weight for newly added osds into crushmap. Use this + option only if you wish to set the weight for newly added OSDs in order + to gradually increase the weight over time. Be very aware that setting + this overrides the default setting, which can lead to imbalance in the + cluster, especially if there are OSDs of different sizes in use. By + default, the initial crush weight for the newly added osd is set to its + volume size in TB. Leave this option unset to use the default provided + by Ceph itself. This option only affects NEW OSDs, not existing ones. + osd-max-backfills: + type: int + default: + description: | + The maximum number of backfills allowed to or from a single OSD. + . + Setting this option on a running Ceph OSD node will not affect running + OSD devices, but will add the setting to ceph.conf for the next restart. + osd-recovery-max-active: + type: int + default: + description: | + The number of active recovery requests per OSD at one time. More requests + will accelerate recovery, but the requests places an increased load on the + cluster. + . + Setting this option on a running Ceph OSD node will not affect running + OSD devices, but will add the setting to ceph.conf for the next restart. + tune-osd-memory-target: + type: string + default: + description: | + Set to tune the value of osd_memory_target. + + If unset or set to an empty string, + the charm will not update the value for ceph. + This means that a new deployment with this value unset will default to ceph's default (4GB). + And if a value was set, but then later unset, ceph will remain configured with the last set value. + This is to allow for manually configuring this value in ceph without interference from the charm. + + If set to "{n}%" (where n is an integer), the value will be set as follows: + + total ram * (n/100) / number of osds on the host + + If set to "{n}GB" (n is an integer), osd_memory_target will be set per OSD directly. + + Take care when choosing a value that it both provides enough memory for ceph + and leave enough memory for the system and other workloads to function. + For common cases, + it is recommended to stay within the bounds of 4GB < value < 90% of system memory. + If these bounds are broken, a warning will be emitted by the charm, + but the value will still be set. + ignore-device-errors: + type: boolean + default: False + description: | + By default, the charm will raise errors if a whitelisted device is found, + but for some reason the charm is unable to initialize the device for use + by Ceph. + . + Setting this option to 'True' will result in the charm classifying such + problems as warnings only and will not result in a hook error. + ephemeral-unmount: + type: string + default: + description: | + Cloud instances provide ephemeral storage which is normally mounted + on /mnt. + . + Setting this option to the path of the ephemeral mountpoint will force + an unmount of the corresponding device so that it can be used as a OSD + storage device. This is useful for testing purposes (cloud deployment + is not a typical use case). + ceph-public-network: + type: string + default: + description: | + The IP address and netmask of the public (front-side) network (e.g., + 192.168.0.0/24) + . + If multiple networks are to be used, a space-delimited list of a.b.c.d/x + can be provided. + ceph-cluster-network: + type: string + default: + description: | + The IP address and netmask of the cluster (back-side) network (e.g., + 192.168.0.0/24) + . + If multiple networks are to be used, a space-delimited list of a.b.c.d/x + can be provided. + prefer-ipv6: + type: boolean + default: False + description: | + If True enables IPv6 support. The charm will expect network interfaces + to be configured with an IPv6 address. If set to False (default) IPv4 + is expected. + . + NOTE: these charms do not currently support IPv6 privacy extension. In + order for this charm to function correctly, the privacy extension must be + disabled and a non-temporary address must be configured/available on + your network interface. + sysctl: + type: string + default: '{ kernel.pid_max : 2097152, vm.max_map_count : 524288, + kernel.threads-max: 2097152 }' + description: | + YAML-formatted associative array of sysctl key/value pairs to be set + persistently. By default we set pid_max, max_map_count and + threads-max to a high value to avoid problems with large numbers (>20) + of OSDs recovering. very large clusters should set those values even + higher (e.g. max for kernel.pid_max is 4194303). + customize-failure-domain: + type: boolean + default: false + description: | + Setting this to true will tell Ceph to replicate across Juju's + Availability Zone instead of specifically by host. + availability_zone: + type: string + default: + description: | + Custom availability zone to provide to Ceph for the OSD placement + max-sectors-kb: + type: int + default: 1048576 + description: | + This parameter will adjust every block device in your server to allow + greater IO operation sizes. If you have a RAID card with cache on it + consider tuning this much higher than the 1MB default. 1MB is a safe + default for spinning HDDs that don't have much cache. + nagios_context: + type: string + default: "juju" + description: | + Used by the nrpe-external-master subordinate charm. + A string that will be prepended to instance name to set the hostname + in nagios. So for instance the hostname would be something like: + . + juju-myservice-0 + . + If you're running multiple environments with the same services in them + this allows you to differentiate between them. + nagios_servicegroups: + type: string + default: "" + description: | + A comma-separated list of nagios servicegroups. + If left empty, the nagios_context will be used as the servicegroup + use-direct-io: + type: boolean + default: True + description: Configure use of direct IO for OSD journals. + autotune: + type: boolean + default: False + description: | + Enabling this option will attempt to tune your network card sysctls and + hard drive settings. This changes hard drive read ahead settings and + max_sectors_kb. For the network card this will detect the link speed + and make appropriate sysctl changes. + WARNING: This option is DEPRECATED and will be removed in the next release. + Exercise caution when enabling this feature; examine and + confirm sysctl values are appropriate for your environment. See + http://pad.lv/1798794 for a full discussion. + aa-profile-mode: + type: string + default: 'disable' + description: | + Enable apparmor profile. Valid settings: 'complain', 'enforce' or + 'disable'. + . + NOTE: changing the value of this option is disruptive to a running Ceph + cluster as all ceph-osd processes must be restarted as part of changing + the apparmor profile enforcement mode. Always test in pre-production + before enabling AppArmor on a live cluster. + NOTE: apparmor 'enforce' profile is supported only if osd-device + name starts with "/dev" + bluestore-compression-algorithm: + type: string + default: lz4 + description: | + The default compressor to use (if any) if the per-pool property + compression_algorithm is not set. + . + NOTE: The recommended approach is to adjust this configuration option on + the charm responsible for creating the specific pool you are interested + in tuning. Changing the configuration option on the ceph-osd charm will + affect ALL pools on the OSDs managed by the named application of the + ceph-osd charm in the Juju model. + bluestore-compression-mode: + type: string + default: + description: | + The default policy for using compression if the per-pool property + compression_mode is not set. 'none' means never use compression. + 'passive' means use compression when clients hint that data is + compressible. 'aggressive' means use compression unless clients hint that + data is not compressible. 'force' means use compression under all + circumstances even if the clients hint that the data is not compressible. + . + NOTE: The recommended approach is to adjust this configuration option on + the charm responsible for creating the specific pool you are interested + in tuning. Changing the configuration option on the ceph-osd charm will + affect ALL pools on the OSDs managed by the named application of the + ceph-osd charm in the Juju model. + bluestore-compression-required-ratio: + type: float + default: + description: | + The ratio of the size of the data chunk after compression relative to the + original size must be at least this small in order to store the + compressed version. The per-pool property `compression-required-ratio` + overrides this setting. + . + NOTE: The recommended approach is to adjust this configuration option on + the charm responsible for creating the specific pool you are interested + in tuning. Changing the configuration option on the ceph-osd charm will + affect ALL pools on the OSDs managed by the named application of the + ceph-osd charm in the Juju model. + bluestore-compression-min-blob-size: + type: int + default: + description: | + Chunks smaller than this are never compressed. The per-pool property + `compression_min_blob_size` overrides this setting. + . + NOTE: The recommended approach is to adjust this configuration option on + the charm responsible for creating the specific pool you are interested + in tuning. Changing the configuration option on the ceph-osd charm will + affect ALL pools on the OSDs managed by the named application of the + ceph-osd charm in the Juju model. + bluestore-compression-min-blob-size-hdd: + type: int + default: + description: | + Default value of bluestore compression min blob size for rotational + media. The per-pool property `compression-min-blob-size-hdd` overrides + this setting. + . + NOTE: The recommended approach is to adjust this configuration option on + the charm responsible for creating the specific pool you are interested + in tuning. Changing the configuration option on the ceph-osd charm will + affect ALL pools on the OSDs managed by the named application of the + ceph-osd charm in the Juju model. + bluestore-compression-min-blob-size-ssd: + type: int + default: + description: | + Default value of bluestore compression min blob size for solid state + media. The per-pool property `compression-min-blob-size-ssd` overrides + this setting. + . + NOTE: The recommended approach is to adjust this configuration option on + the charm responsible for creating the specific pool you are interested + in tuning. Changing the configuration option on the ceph-osd charm will + affect ALL pools on the OSDs managed by the named application of the + ceph-osd charm in the Juju model. + bluestore-compression-max-blob-size: + type: int + default: + description: | + Chunks larger than this are broken into smaller blobs sizing bluestore + compression max blob size before being compressed. The per-pool property + `compression_max_blob_size` overrides this setting. + . + NOTE: The recommended approach is to adjust this configuration option on + the charm responsible for creating the specific pool you are interested + in tuning. Changing the configuration option on the ceph-osd charm will + affect ALL pools on the OSDs managed by the named application of the + ceph-osd charm in the Juju model. + bluestore-compression-max-blob-size-hdd: + type: int + default: + description: | + Default value of bluestore compression max blob size for rotational + media. The per-pool property `compression-max-blob-size-hdd` overrides + this setting. + . + NOTE: The recommended approach is to adjust this configuration option on + the charm responsible for creating the specific pool you are interested + in tuning. Changing the configuration option on the ceph-osd charm will + affect ALL pools on the OSDs managed by the named application of the + ceph-osd charm in the Juju model. + bluestore-compression-max-blob-size-ssd: + type: int + default: + description: | + Default value of bluestore compression max blob size for solid state + media. The per-pool property `compression-max-blob-size-ssd` overrides + this setting. + . + NOTE: The recommended approach is to adjust this configuration option on + the charm responsible for creating the specific pool you are interested + in tuning. Changing the configuration option on the ceph-osd charm will + affect ALL pools on the OSDs managed by the named application of the + ceph-osd charm in the Juju model. diff --git a/ceph-osd/copyright b/ceph-osd/copyright new file mode 100644 index 00000000..c801b143 --- /dev/null +++ b/ceph-osd/copyright @@ -0,0 +1,16 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0 + +Files: * +Copyright: 2012, Canonical Ltd. +License: Apache-2.0 + Licensed under the Apache License, Version 2.0 (the "License"); you may + not use this file except in compliance with the License. You may obtain + a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + License for the specific language governing permissions and limitations + under the License. diff --git a/ceph-osd/files/apparmor/usr.bin.ceph-osd b/ceph-osd/files/apparmor/usr.bin.ceph-osd new file mode 100644 index 00000000..ac11e4d9 --- /dev/null +++ b/ceph-osd/files/apparmor/usr.bin.ceph-osd @@ -0,0 +1,107 @@ +# vim:syntax=apparmor +#include + +/usr/bin/ceph-osd { + #include + #include + #include + #include + + /usr/bin/ceph-osd mr, + + capability setgid, + capability setuid, + capability dac_override, + capability dac_read_search, + + network inet stream, + network inet6 stream, + + /etc/ceph/* r, + /var/lib/charm/*/ceph.conf r, + + owner @{PROC}/@{pids}/auxv r, + owner @{PROC}/@{pids}/net/dev r, + owner @{PROC}/@{pids}/task/*/comm rw, + + @{PROC}/loadavg r, + @{PROC}/1/cmdline r, + @{PROC}/partitions r, + @{PROC}/sys/kernel/random/uuid r, + + /var/lib/ceph/** rwkl, + /srv/ceph/** rwkl, + + /var/log/ceph/* rwk, + + /{,var/}run/ceph/* rwk, + /{,var/}tmp/ r, + + / r, + /dev/ r, + /dev/** rwk, + /run/udev/data/* r, + /sys/bus/nd/devices/ r, + /sys/bus/nd/devices/** r, + /sys/devices/** r, + + /run/blkid/blkid.tab r, + + /bin/dash rix, + + /usr/bin/lsb_release rix, + /usr/share/distro-info/** r, + /etc/lsb-release r, + /etc/debian_version r, + + /usr/bin/sudo Px -> ceph-osd-sudo, +} + +profile ceph-osd-sudo flags=(attach_disconnected) { + #include + #include + #include + #include + + capability audit_write, + capability setgid, + capability setuid, + capability sys_resource, + + /usr/bin/sudo r, + /usr/libexec/sudo/* mr, + + /etc/default/locale r, + /etc/environment r, + /etc/security/limits.d/ r, + /etc/security/limits.d/* r, + /etc/sudo.conf r, + /etc/sudoers r, + /etc/sudoers.d/ r, + /etc/sudoers.d/* r, + + owner @{PROC}/1/limits r, + owner @{PROC}/@{pids}/stat r, + + /usr/sbin/nvme Cx, + /usr/sbin/smartctl Cx, + + profile /usr/sbin/nvme { + #include + + /usr/sbin/nvme r, + } + + profile /usr/sbin/smartctl { + #include + + capability sys_admin, + capability sys_rawio, + + /usr/sbin/smartctl r, + /var/lib/smartmontools/** r, + + /dev/* r, + /sys/devices/** r, + } +} diff --git a/ceph-osd/files/nagios/check_ceph_osd_services.py b/ceph-osd/files/nagios/check_ceph_osd_services.py new file mode 100755 index 00000000..cff66541 --- /dev/null +++ b/ceph-osd/files/nagios/check_ceph_osd_services.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2018 Canonical +# All Rights Reserved +# Author: Alex Kavanagh + +import os +import sys +from datetime import datetime, timedelta + +CRON_CHECK_TMPFILE = 'ceph-osd-checks' +NAGIOS_HOME = '/var/lib/nagios' +CACHE_MAX_AGE = timedelta(minutes=10) + +STATE_OK = 0 +STATE_WARNING = 1 +STATE_CRITICAL = 2 +STATE_UNKNOWN = 3 + + +def run_main(): + """Process the CRON_CHECK_TMP_FILE and see if any line is not OK. + + If a line is not OK, the main returns STATE_CRITICAL. + If there are no lines, or the file doesn't exist, it returns STATE_UNKNOWN + Otherwise it returns STATE_OK. + + :returns: nagios state 0,2 or 3 + """ + _tmp_file = os.path.join(NAGIOS_HOME, CRON_CHECK_TMPFILE) + + if not os.path.isfile(_tmp_file): + print("File '{}' doesn't exist".format(_tmp_file)) + return STATE_UNKNOWN + + try: + s = os.stat(_tmp_file) + if datetime.now() - datetime.fromtimestamp(s.st_mtime) > CACHE_MAX_AGE: + print("Status file is older than {}".format(CACHE_MAX_AGE)) + return STATE_CRITICAL + except Exception as e: + print("Something went wrong grabbing stats for the file: {}".format( + str(e))) + return STATE_UNKNOWN + + try: + with open(_tmp_file, 'rt') as f: + lines = f.readlines() + except Exception as e: + print("Something went wrong reading the file: {}".format(str(e))) + return STATE_UNKNOWN + + if not lines: + print("checked status file is empty: {}".format(_tmp_file)) + return STATE_UNKNOWN + + # finally, check that the file contains all ok lines. Unfortunately, it's + # not consistent across releases, but what is consistent is that the check + # command in the collect phase does fail, and so the start of the line is + # 'Failed' + state = STATE_OK + for line in lines: + print(line, end='') + if line.startswith('Failed'): + state = STATE_CRITICAL + + return state + + +if __name__ == '__main__': + sys.exit(run_main()) diff --git a/ceph-osd/files/nagios/check_ceph_status.py b/ceph-osd/files/nagios/check_ceph_status.py new file mode 100755 index 00000000..843391d7 --- /dev/null +++ b/ceph-osd/files/nagios/check_ceph_status.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python + +# Copyright (C) 2014 Canonical +# All Rights Reserved +# Author: Jacek Nykis + +import re +import argparse +import subprocess +import nagios_plugin + + +def check_ceph_status(args): + if args.status_file: + nagios_plugin.check_file_freshness(args.status_file, 3600) + with open(args.status_file, "rt", encoding='UTF-8') as f: + lines = f.readlines() + else: + lines = (subprocess + .check_output(["ceph", "status"]) + .decode('UTF-8') + .split('\n')) + status_data = dict( + line.strip().split(' ', 1) for line in lines if len(line) > 1) + + if ('health' not in status_data or + 'monmap' not in status_data or + 'osdmap' not in status_data): + raise nagios_plugin.UnknownError('UNKNOWN: status data is incomplete') + + if status_data['health'] != 'HEALTH_OK': + msg = 'CRITICAL: ceph health status: "{}'.format(status_data['health']) + if (len(status_data['health'].split(' '))) == 1: + a = iter(lines) + for line in a: + if re.search('health', line) is not None: + msg1 = next(a) + msg += " " + msg += msg1.strip() + break + msg += '"' + raise nagios_plugin.CriticalError(msg) + + osds = re.search(r"^.*: (\d+) osds: (\d+) up, (\d+) in", + status_data['osdmap']) + if osds.group(1) > osds.group(2): # not all OSDs are "up" + msg = 'CRITICAL: Some OSDs are not up. Total: {}, up: {}'.format( + osds.group(1), osds.group(2)) + raise nagios_plugin.CriticalError(msg) + print("All OK") + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Check ceph status') + parser.add_argument('-f', + '--file', + dest='status_file', + default=False, + help='Optional file with "ceph status" output') + args = parser.parse_args() + nagios_plugin.try_check(check_ceph_status, args) diff --git a/ceph-osd/files/nagios/collect_ceph_osd_services.py b/ceph-osd/files/nagios/collect_ceph_osd_services.py new file mode 100755 index 00000000..633148a2 --- /dev/null +++ b/ceph-osd/files/nagios/collect_ceph_osd_services.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2018 Canonical +# All Rights Reserved +# Author: Alex Kavanagh + +import os +import subprocess +from pwd import getpwnam + +# fasteners only exists in Bionic, so this will fail on xenial and trusty +try: + import fasteners +except ImportError: + fasteners = None + +SYSTEMD_SYSTEM = '/run/systemd/system' +LOCKFILE = '/var/lock/check-osds.lock' +CRON_CHECK_TMPFILE = 'ceph-osd-checks' +NAGIOS_HOME = '/var/lib/nagios' + + +def init_is_systemd(): + """Return True if the host system uses systemd, False otherwise.""" + if lsb_release()['DISTRIB_CODENAME'] == 'trusty': + return False + return os.path.isdir(SYSTEMD_SYSTEM) + + +def lsb_release(): + """Return /etc/lsb-release in a dict""" + d = {} + with open('/etc/lsb-release', 'r') as lsb: + for el in lsb: + k, v = el.split('=') + d[k.strip()] = v.strip() + return d + + +def get_osd_units(): + """Returns a list of strings, one for each unit that is live""" + cmd = '/bin/cat /var/lib/ceph/osd/ceph-*/whoami' + try: + output = (subprocess + .check_output([cmd], shell=True).decode('utf-8') + .split('\n')) + return [u for u in output if u] + except subprocess.CalledProcessError: + return [] + + +def do_status(): + if init_is_systemd(): + cmd = "/usr/local/lib/nagios/plugins/check_systemd.py ceph-osd@{}" + else: + cmd = "/sbin/status ceph-osd id={}" + + lines = [] + + for unit in get_osd_units(): + try: + output = (subprocess + .check_output(cmd.format(unit).split(), + stderr=subprocess.STDOUT) + .decode('utf-8')) + except subprocess.CalledProcessError as e: + output = ("Failed: check command raised: {}" + .format(e.output.decode('utf-8'))) + lines.append(output) + + _tmp_file = os.path.join(NAGIOS_HOME, CRON_CHECK_TMPFILE) + with open(_tmp_file, 'wt') as f: + f.writelines(lines) + + # In cis hardened environments check_ceph_osd_services cannot + # read _tmp_file due to restrained permissions (#LP1879667). + # Changing the owner of the file to nagios solves this problem. + # check_ceph_osd_services.py removes this file, so make + # sure that we change permissions on a file that exists. + nagios_uid = getpwnam('nagios').pw_uid + nagios_gid = getpwnam('nagios').pw_gid + if os.path.isfile(_tmp_file): + os.chown(_tmp_file, nagios_uid, nagios_gid) + + +def run_main(): + # on bionic we can interprocess lock; we don't do it for older platforms + if fasteners is not None: + lock = fasteners.InterProcessLock(LOCKFILE) + + if lock.acquire(blocking=False): + try: + do_status() + finally: + lock.release() + else: + do_status() + + +if __name__ == '__main__': + run_main() diff --git a/ceph-osd/files/nagios/collect_ceph_status.sh b/ceph-osd/files/nagios/collect_ceph_status.sh new file mode 100755 index 00000000..dbdd3acf --- /dev/null +++ b/ceph-osd/files/nagios/collect_ceph_status.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Copyright (C) 2014 Canonical +# All Rights Reserved +# Author: Jacek Nykis + +LOCK=/var/lock/ceph-status.lock +lockfile-create -r2 --lock-name $LOCK > /dev/null 2>&1 +if [ $? -ne 0 ]; then + exit 1 +fi +trap "rm -f $LOCK > /dev/null 2>&1" exit + +DATA_DIR="/var/lib/nagios" +if [ ! -d $DATA_DIR ]; then + mkdir -p $DATA_DIR +fi + +ceph status >${DATA_DIR}/cat-ceph-status.txt diff --git a/ceph-osd/files/systemd/crimson-osd@.service b/ceph-osd/files/systemd/crimson-osd@.service new file mode 100644 index 00000000..3982fc97 --- /dev/null +++ b/ceph-osd/files/systemd/crimson-osd@.service @@ -0,0 +1,9 @@ +[Unit] +Description=Ceph object storage daemon crimson-osd.%i + +[Service] +Environment=CLUSTER=ceph +ExecStart=/usr/bin/crimson-osd -i %i +ExecStop=/usr/bin/kill -QUIT $MAINPID +User=ceph +Group=ceph diff --git a/ceph-osd/files/udev/95-charm-ceph-osd.rules b/ceph-osd/files/udev/95-charm-ceph-osd.rules new file mode 100644 index 00000000..418cb976 --- /dev/null +++ b/ceph-osd/files/udev/95-charm-ceph-osd.rules @@ -0,0 +1,11 @@ +# OSD LV (ceph-osd charm layout) +ACTION=="add", SUBSYSTEM=="block", \ + ENV{DEVTYPE}=="disk", \ + ENV{DM_LV_NAME}=="osd-*", \ + ENV{DM_VG_NAME}=="ceph-*", \ + OWNER:="ceph", GROUP:="ceph", MODE:="660" +ACTION=="change", SUBSYSTEM=="block", \ + ENV{DEVTYPE}=="disk", \ + ENV{DM_LV_NAME}=="osd-*", \ + ENV{DM_VG_NAME}=="ceph-*", \ + OWNER="ceph", GROUP="ceph", MODE="660" diff --git a/ceph-osd/hardening.yaml b/ceph-osd/hardening.yaml new file mode 100644 index 00000000..314bb385 --- /dev/null +++ b/ceph-osd/hardening.yaml @@ -0,0 +1,5 @@ +# Overrides file for contrib.hardening. See README.hardening in +# contrib.hardening for info on how to use this file. +ssh: + server: + use_pam: 'yes' # juju requires this diff --git a/ceph-osd/hooks/add-storage b/ceph-osd/hooks/add-storage new file mode 100755 index 00000000..d7a82411 --- /dev/null +++ b/ceph-osd/hooks/add-storage @@ -0,0 +1,21 @@ +#!/bin/bash +# shim used to determine that the ceph packages have been installed +# before running hook execution. The add-storage hook fires before +# the install hook in order to provide storage for charms which need +# it at install time, however the storage added for the ceph-osd +# application will be used to create OSDs, which require the ceph +# binaries, bootstrapping the node, etc. +# +# Note: this doesn't wait to ensure that ceph is bootstrapped because +# that logic is already existing in the charm's hook. + +IFS='/' read -r -a array <<< "$JUJU_UNIT_NAME" +LOCAL_UNIT="${array[0]}" +charm_ceph_conf="/var/lib/charm/$LOCAL_UNIT/ceph.conf" + +if ! test -e $charm_ceph_conf; then + juju-log "Ceph not yet installed." + exit 0 +fi + +exec ./hooks/storage.real diff --git a/ceph-osd/hooks/ceph_hooks.py b/ceph-osd/hooks/ceph_hooks.py new file mode 100755 index 00000000..37e58307 --- /dev/null +++ b/ceph-osd/hooks/ceph_hooks.py @@ -0,0 +1,1082 @@ +#!/usr/bin/env python3 +# +# Copyright 2016-2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import glob +import json +import netifaces +import os +import re +import shutil +import socket +import subprocess +import sys +import traceback + +import utils + +sys.path.append('lib') +import charms_ceph.utils as ceph +from charmhelpers.core import hookenv +from charmhelpers.core.hookenv import ( + log, + DEBUG, + ERROR, + INFO, + WARNING, + config, + relation_ids, + related_units, + relation_get, + relation_set, + relations_of_type, + Hooks, + local_unit, + UnregisteredHookError, + service_name, + status_get, + status_set, + storage_get, + storage_list, + application_version_set, +) +from charmhelpers.core.host import ( + add_to_updatedb_prunepath, + cmp_pkgrevno, + is_container, + get_total_ram, + lsb_release, + mkdir, + service_reload, + service_restart, + umount, + write_file, + CompareHostReleases, + file_hash, +) +from charmhelpers.fetch import ( + add_source, + apt_install, + apt_update, + filter_installed_packages, + get_upstream_version, +) +from charmhelpers.core.sysctl import create as create_sysctl +import charmhelpers.contrib.openstack.context as ch_context +from charmhelpers.contrib.openstack.context import ( + AppArmorContext, +) +from utils import ( + is_osd_bootstrap_ready, + import_osd_bootstrap_key, + import_osd_upgrade_key, + import_osd_removal_key, + import_client_crash_key, + import_pending_key, + get_host_ip, + get_networks, + assert_charm_supports_ipv6, + render_template, + get_public_addr, + get_cluster_addr, + get_blacklist, + get_journal_devices, + should_enable_discard, + _upgrade_keyring, +) +from charmhelpers.contrib.openstack.alternatives import install_alternative +from charmhelpers.contrib.network.ip import ( + get_ipv6_addr, + format_ipv6_addr, + get_relation_ip, +) +import charmhelpers.contrib.storage.linux.ceph as ch_ceph +from charmhelpers.contrib.storage.linux.utils import ( + is_device_mounted, + is_block_device, +) +from charmhelpers.contrib.charmsupport import nrpe +from charmhelpers.contrib.hardening.harden import harden + +from charmhelpers.contrib.openstack.utils import ( + clear_unit_paused, + clear_unit_upgrading, + get_os_codename_install_source, + is_unit_paused_set, + is_unit_upgrading_set, + set_unit_paused, + set_unit_upgrading, +) + +from charmhelpers.core.unitdata import kv + +import charmhelpers.contrib.openstack.vaultlocker as vaultlocker + +hooks = Hooks() +STORAGE_MOUNT_PATH = '/var/lib/ceph' + +# cron.d related files +CRON_CEPH_CHECK_FILE = '/etc/cron.d/check-osd-services' + + +class AppArmorProfileNeverInstalledException(Exception): + pass + + +def check_for_upgrade(): + + if not os.path.exists(_upgrade_keyring): + log("Ceph upgrade keyring not detected, skipping upgrade checks.") + return + + c = hookenv.config() + old_version = ceph.resolve_ceph_version(c.previous('source') or + 'distro') + log('old_version: {}'.format(old_version)) + new_version = ceph.resolve_ceph_version(hookenv.config('source') or + 'distro') + log('new_version: {}'.format(new_version)) + + old_version_os = get_os_codename_install_source(c.previous('source') or + 'distro') + new_version_os = get_os_codename_install_source(hookenv.config('source')) + + # If the new version is reef, and we detect that we are running FileStore + # bail out with an error message + filestore_osds = utils.find_filestore_osds() + if new_version == 'reef' and filestore_osds: + log("Refuse to upgrade to reef with FileStore OSDs present: {}".format( + filestore_osds), level=ERROR) + return + + # May be in a previous upgrade that was failed if the directories + # still need an ownership update. Check this condition. + resuming_upgrade = ceph.dirs_need_ownership_update('osd') + + if (ceph.UPGRADE_PATHS.get(old_version) == new_version) or\ + resuming_upgrade: + if old_version == new_version: + log('Attempting to resume possibly failed upgrade.', + INFO) + else: + log("{} to {} is a valid upgrade path. Proceeding.".format( + old_version, new_version)) + + emit_cephconf(upgrading=True) + ceph.roll_osd_cluster(new_version=new_version, + upgrade_key='osd-upgrade') + emit_cephconf(upgrading=False) + notify_mon_of_upgrade(new_version) + elif (old_version == new_version and + old_version_os < new_version_os): + # See LP: #1778823 + add_source(hookenv.config('source'), hookenv.config('key')) + log(("The installation source has changed yet there is no new major " + "version of Ceph in this new source. As a result no package " + "upgrade will take effect. Please upgrade manually if you need " + "to."), level=INFO) + else: + # Log a helpful error message + log("Invalid upgrade path from {} to {}. " + "Valid paths are: {}".format(old_version, + new_version, + ceph.pretty_print_upgrade_paths()), + level=ERROR) + + +def notify_mon_of_upgrade(release): + for relation_id in relation_ids('mon'): + log('Notifying relation {} of upgrade to {}'.format( + relation_id, release)) + relation_set(relation_id=relation_id, + relation_settings=dict(ceph_release=release)) + + +def tune_network_adapters(): + interfaces = netifaces.interfaces() + for interface in interfaces: + if interface == "lo": + # Skip the loopback + continue + log("Looking up {} for possible sysctl tuning.".format(interface)) + ceph.tune_nic(interface) + + +def check_aa_profile_needs_update(): + """ + Compares the hash of a new AA profile and the previously installed one, + if one exists. + """ + db = kv() + for x in glob.glob('files/apparmor/*'): + db_key = 'hash:{}'.format(x) + previous_hash = db.get(db_key) + if previous_hash is None: + raise AppArmorProfileNeverInstalledException() + new_hash = file_hash(x) + if new_hash != previous_hash: + return True + return False + + +def _set_pending_apparmor_update_status(): + # Setting to active to avoid impact of other workflows + status_set('active', + ('Pending update-apparmor-and-restart-osds action required,' + ' please refer to the action documentation.')) + + +def aa_profile_changed(service_name='ceph-osd-all'): + """ + Reload AA profile and restart OSD processes. + """ + log("Loading new AppArmor profile") + service_reload('apparmor') + if config('aa-profile-mode') == 'disable': + # No need to restart services if AppArmor is not enabled + return + log("Restarting ceph-osd services with new AppArmor profile") + if ceph.systemd(): + service_restart('ceph-osd.target') + else: + service_restart(service_name) + assess_status() + + +def copy_profile_into_place(): + """ + Copy the apparmor profiles included with the charm + into the /etc/apparmor.d directory. + + File are only copied if they have changed at source + to avoid overwriting any aa-complain mode flags set + + :returns: flag indicating if any profiles where newly + installed or changed + :rtype: boolean + """ + db = kv() + changes = False + apparmor_dir = os.path.join(os.sep, 'etc', 'apparmor.d') + for x in glob.glob('files/apparmor/*'): + db_key = 'hash:{}'.format(x) + new_hash = file_hash(x) + previous_hash = db.get(db_key) + if new_hash != previous_hash: + log('Installing apparmor profile for {}' + .format(os.path.basename(x))) + shutil.copy(x, apparmor_dir) + db.set(db_key, new_hash) + db.flush() + changes = True + return changes + + +class CephOsdAppArmorContext(AppArmorContext): + """"Apparmor context for ceph-osd binary""" + def __init__(self): + super(CephOsdAppArmorContext, self).__init__() + self.aa_profile = 'usr.bin.ceph-osd' + + def __call__(self): + super(CephOsdAppArmorContext, self).__call__() + if not self.ctxt: + return self.ctxt + self._ctxt.update({'aa_profile': self.aa_profile}) + return self.ctxt + + +def use_vaultlocker(): + """Determine whether vaultlocker should be used for OSD encryption + + :returns: whether vaultlocker should be used for key management + :rtype: bool + :raises: ValueError if vaultlocker is enable but ceph < 12.2.4""" + if (config('osd-encrypt') and + config('osd-encrypt-keymanager') == ceph.VAULT_KEY_MANAGER): + if cmp_pkgrevno('ceph', '12.2.4') < 0: + msg = ('vault usage only supported with ceph >= 12.2.4') + status_set('blocked', msg) + raise ValueError(msg) + else: + return True + return False + + +def update_apparmor(): + """Action: Proceed to updating the profile and restarting OSDs.""" + changes = copy_profile_into_place() + # NOTE(jamespage): If any profiles where changed or + # freshly installed then force + # re-assertion of the current profile mode + # to avoid complain->enforce side effects + if changes or config().changed('aa-profile-mode'): + aa_context = CephOsdAppArmorContext() + aa_context.setup_aa_profile() + aa_profile_changed() + + +def install_apparmor_profile(): + """ + Install ceph apparmor profiles and configure + based on current setting of 'aa-profile-mode' + configuration option. + """ + changes = False + try: + changes = check_aa_profile_needs_update() + except AppArmorProfileNeverInstalledException: + update_apparmor() + return + if not changes: + return + if config('aa-profile-mode') != 'disable': + log("Deferring update of AppArmor profiles to avoid " + "restarting ceph-osd services all at the same time.") + _set_pending_apparmor_update_status() + else: + update_apparmor() + + +def install_udev_rules(): + """ + Install and reload udev rules for ceph-volume LV + permissions + """ + if is_container(): + log('Skipping udev rule installation ' + 'as unit is in a container', level=DEBUG) + return + for x in glob.glob('files/udev/*'): + shutil.copy(x, '/lib/udev/rules.d') + subprocess.check_call(['udevadm', 'control', + '--reload-rules']) + + +@hooks.hook('install.real') +@harden() +def install(): + add_source(config('source'), config('key')) + apt_update(fatal=True) + packages = ceph.determine_packages() + # TODO(chrome0): temp. fix for bug #2064717; remove once this has been + # fixed + packages.append('python3-packaging') + apt_install(packages=packages, fatal=True) + if config('autotune'): + log('The autotune config is deprecated and planned ' + 'for removal in the next release.', level=WARNING) + tune_network_adapters() + install_udev_rules() + + +def az_info(): + az_info = "" + config_az = config("availability_zone") + juju_az_info = os.environ.get('JUJU_AVAILABILITY_ZONE') + if juju_az_info: + # NOTE(jamespage): avoid conflicting key with root + # of crush hierarchy + if juju_az_info == 'default': + juju_az_info = 'default-rack' + az_info = "{} rack={}".format(az_info, juju_az_info) + if config_az: + # NOTE(jamespage): avoid conflicting key with root + # of crush hierarchy + if config_az == 'default': + config_az = 'default-row' + az_info = "{} row={}".format(az_info, config_az) + if az_info != "": + log("AZ Info: " + az_info) + return az_info + + +def use_short_objects(): + ''' + Determine whether OSD's should be configured with + limited object name lengths. + + @return: boolean indicating whether OSD's should be limited + ''' + if cmp_pkgrevno('ceph', "10.2.0") >= 0: + if config('osd-format') in ('ext4'): + return True + devices = config('osd-devices') + if not devices: + return False + + for device in devices.split(): + if device and not device.startswith('/dev'): + # TODO: determine format of directory based + # OSD location + return True + return False + + +def warn_if_memory_outside_bounds(value): + """ + Log a warning if value < 4GB or (value * osds) > 90% total memory. + + :param value: int - proposed value for osd_memory_target in bytes + """ + ninety_percent = int(0.9 * get_total_ram()) + four_GB = 4 * 1024 * 1024 * 1024 + num_osds = len(kv().get("osd-devices", [])) + + # 4GB is the default value; we don't want to go lower than that, + # otherwise performance will be impacted. + if value < four_GB: + log("tune-osd-memory-target results in value < 4GB. " + "This is not recommended.", level=WARNING) + + # 90% is a somewhat arbitrary upper limit, + # that should allow enough memory for the OS to function, + # while not limiting ceph too much. + elif (value * num_osds) > ninety_percent: + log("tune-osd-memory-target results in value > 90% of system ram. " + "This is not recommended.", level=WARNING) + + +def is_tune_osd_memory_target_valid() -> bool: + """ + Check if the tune-osd-memory-target value is valid + + :returns: True if valid, else False + :rtype: bool + """ + # NOTE: keep this logic in sync with get_osd_memory_target() + value = config('tune-osd-memory-target') + return not value or bool(re.match(r"\d+(?:GB|%)$", value)) + + +def get_osd_memory_target(): + """ + Processes the config value of tune-osd-memory-target. + + Returns a safe value for osd_memory_target. + + :returns: integer value for osd_memory_target, converted to a string. + :rtype: string + """ + tune_osd_memory_target = config('tune-osd-memory-target') + + if not tune_osd_memory_target: + return "" + + match = re.match(r"(\d+)GB$", tune_osd_memory_target) + if match: + osd_memory_target = int(match.group(1)) * 1024 * 1024 * 1024 + warn_if_memory_outside_bounds(osd_memory_target) + return str(osd_memory_target) + + match = re.match(r"(\d+)%$", tune_osd_memory_target) + if match: + percentage = int(match.group(1)) / 100 + num_osds = len(kv().get("osd-devices", [])) + osd_memory_target = int(get_total_ram() * percentage / num_osds) + warn_if_memory_outside_bounds(osd_memory_target) + return str(osd_memory_target) + + log("tune-osd-memory-target value invalid," + " leaving the OSD memory target unchanged", level=ERROR) + return "" + + +def get_ceph_context(upgrading=False): + """Returns the current context dictionary for generating ceph.conf + + :param upgrading: bool - determines if the context is invoked as + part of an upgrade procedure. Setting this to true + causes settings useful during an upgrade to be + defined in the ceph.conf file + """ + mon_hosts = get_mon_hosts() + log('Monitor hosts are ' + repr(mon_hosts)) + + networks = get_networks('ceph-public-network') + public_network = ', '.join(networks) + + networks = get_networks('ceph-cluster-network') + cluster_network = ', '.join(networks) + + cephcontext = { + 'auth_supported': get_auth(), + 'mon_hosts': ' '.join(mon_hosts), + 'fsid': get_fsid(), + 'old_auth': cmp_pkgrevno('ceph', "0.51") < 0, + 'crush_initial_weight': config('crush-initial-weight'), + 'osd_journal_size': config('osd-journal-size'), + 'osd_max_backfills': config('osd-max-backfills'), + 'osd_recovery_max_active': config('osd-recovery-max-active'), + 'use_syslog': str(config('use-syslog')).lower(), + 'ceph_public_network': public_network, + 'ceph_cluster_network': cluster_network, + 'loglevel': config('loglevel'), + 'dio': str(config('use-direct-io')).lower(), + 'short_object_len': use_short_objects(), + 'upgrade_in_progress': upgrading, + 'bluestore_experimental': cmp_pkgrevno('ceph', '12.1.0') < 0, + 'bluestore_block_wal_size': config('bluestore-block-wal-size'), + 'bluestore_block_db_size': config('bluestore-block-db-size'), + } + + try: + cephcontext['bdev_discard'] = get_bdev_enable_discard() + except ValueError as ex: + # the user set bdev-enable-discard to a non valid value, so logging the + # issue as a warning and falling back to False/disable + log(str(ex), level=WARNING) + cephcontext['bdev_discard'] = False + + if config('prefer-ipv6'): + cephcontext['ms_bind_ipv4'] = False + cephcontext['ms_bind_ipv6'] = True + dynamic_ipv6_address = get_ipv6_addr()[0] + if not public_network: + cephcontext['public_addr'] = dynamic_ipv6_address + if not cluster_network: + cephcontext['cluster_addr'] = dynamic_ipv6_address + else: + cephcontext['public_addr'] = get_public_addr() + cephcontext['cluster_addr'] = get_cluster_addr() + + if config('customize-failure-domain'): + az = az_info() + if az: + cephcontext['crush_location'] = "root=default {} host={}" \ + .format(az, socket.gethostname()) + else: + log( + "Your Juju environment doesn't" + "have support for Availability Zones" + ) + + # NOTE(dosaboy): these sections must correspond to what is supported in the + # config template. + sections = ['global', 'osd'] + cephcontext.update( + ch_ceph.CephOSDConfContext(permitted_sections=sections)()) + cephcontext.update( + ch_context.CephBlueStoreCompressionContext()()) + return cephcontext + + +def emit_cephconf(upgrading=False): + # Install ceph.conf as an alternative to support + # co-existence with other charms that write this file + charm_ceph_conf = "/var/lib/charm/{}/ceph.conf".format(service_name()) + mkdir(os.path.dirname(charm_ceph_conf), owner=ceph.ceph_user(), + group=ceph.ceph_user()) + context = get_ceph_context(upgrading) + write_file(charm_ceph_conf, render_template('ceph.conf', context), + ceph.ceph_user(), ceph.ceph_user(), 0o644) + install_alternative('ceph.conf', '/etc/ceph/ceph.conf', + charm_ceph_conf, 90) + + +@hooks.hook('config-changed') +@harden() +def config_changed(): + # Determine whether vaultlocker is required and install + if use_vaultlocker(): + installed = len(filter_installed_packages(['vaultlocker'])) == 0 + if not installed: + apt_install('vaultlocker', fatal=True) + + # Check if an upgrade was requested + check_for_upgrade() + + # Preflight checks + if config('osd-format') not in ceph.DISK_FORMATS: + log('Invalid OSD disk format configuration specified', level=ERROR) + sys.exit(1) + + if config('prefer-ipv6'): + assert_charm_supports_ipv6() + + sysctl_dict = config('sysctl') + if sysctl_dict: + create_sysctl(sysctl_dict, '/etc/sysctl.d/50-ceph-osd-charm.conf') + + for r_id in hookenv.relation_ids('mon'): + hookenv.relation_set( + relation_id=r_id, + relation_settings={ + 'osd-host': socket.gethostname(), + 'osd-memory-target': get_osd_memory_target(), + } + ) + + e_mountpoint = config('ephemeral-unmount') + if e_mountpoint and ceph.filesystem_mounted(e_mountpoint): + umount(e_mountpoint) + prepare_disks_and_activate() + install_apparmor_profile() + add_to_updatedb_prunepath(STORAGE_MOUNT_PATH) + + +@hooks.hook('storage.real') +def prepare_disks_and_activate(): + if use_vaultlocker(): + # NOTE: vault/vaultlocker preflight check + vault_kv = vaultlocker.VaultKVContext(vaultlocker.VAULTLOCKER_BACKEND) + context = vault_kv() + if not vault_kv.complete: + log('Deferring OSD preparation as vault not ready', + level=DEBUG) + return + else: + log('Vault ready, writing vaultlocker configuration', + level=DEBUG) + vaultlocker.write_vaultlocker_conf(context) + + osd_journal = get_journal_devices() + if not osd_journal.isdisjoint(set(get_devices())): + raise ValueError('`osd-journal` and `osd-devices` options must not' + 'overlap.') + log("got journal devs: {}".format(osd_journal), level=DEBUG) + + # pre-flight check of eligible device pristinity + devices = get_devices() + + # if a device has been previously touched we need to consider it as + # non-pristine. If it needs to be re-processed it has to be zapped + # via the respective action which also clears the unitdata entry. + db = kv() + touched_devices = db.get('osd-devices', []) + devices = [dev for dev in devices if dev not in touched_devices] + log('Skipping osd devices previously processed by this unit: {}' + .format(touched_devices)) + # filter osd-devices that are file system paths + devices = [dev for dev in devices if dev.startswith('/dev')] + # filter osd-devices that does not exist on this unit + devices = [dev for dev in devices if os.path.exists(dev)] + # filter osd-devices that are already mounted + devices = [dev for dev in devices if not is_device_mounted(dev)] + # filter osd-devices that are active bluestore devices + devices = [dev for dev in devices + if not ceph.is_active_bluestore_device(dev)] + # filter osd-devices that are used as dmcrypt devices + devices = [dev for dev in devices + if not ceph.is_mapped_luks_device(dev)] + + log('Checking for pristine devices: "{}"'.format(devices), level=DEBUG) + if not all(ceph.is_pristine_disk(dev) for dev in devices): + status_set('blocked', + 'Non-pristine devices detected, consult ' + '`list-disks`, `zap-disk` and `blacklist-*` actions.') + return + + if is_osd_bootstrap_ready(): + log('ceph bootstrapped, rescanning disks') + emit_cephconf() + ceph.udevadm_settle() + for dev in get_devices(): + ceph.osdize(dev, config('osd-format'), + osd_journal, + config('ignore-device-errors'), + config('osd-encrypt'), + config('osd-encrypt-keymanager')) + # Make it fast! + if config('autotune'): + log('The autotune config is deprecated and planned ' + 'for removal in the next release.', level=WARNING) + ceph.tune_dev(dev) + ceph.start_osds(get_devices()) + + # Notify MON cluster as to how many OSD's this unit bootstrapped + # into the cluster + for r_id in relation_ids('mon'): + relation_set( + relation_id=r_id, + relation_settings={ + 'bootstrapped-osds': len(db.get('osd-devices', [])), + 'ceph_release': ceph.resolve_ceph_version( + hookenv.config('source') or 'distro' + ), + 'osd-host': socket.gethostname(), + 'osd-memory-target': get_osd_memory_target(), + } + ) + + +def get_mon_hosts(): + hosts = [] + for relid in relation_ids('mon'): + for unit in related_units(relid): + addr = \ + relation_get('ceph-public-address', + unit, + relid) or get_host_ip( + relation_get( + 'private-address', + unit, + relid)) + + if addr: + hosts.append('{}'.format(format_ipv6_addr(addr) or addr)) + + return sorted(hosts) + + +def get_fsid(): + return get_conf('fsid') + + +def get_auth(): + return get_conf('auth') + + +def get_conf(name): + for relid in relation_ids('mon'): + for unit in related_units(relid): + conf = relation_get(name, + unit, relid) + if conf: + return conf + return None + + +def get_devices(): + devices = [] + if config('osd-devices'): + for path in config('osd-devices').split(' '): + path = path.strip() + # Ensure that only block devices + # are considered for evaluation as block devices. + # This avoids issues with relative directories + # being passed via configuration, and ensures that + # the path to a block device provided by the user + # is used, rather than its target which may change + # between reboots in the case of bcache devices. + if is_block_device(path): + devices.append(path) + # Make sure its a device which is specified using an + # absolute path so that the current working directory + # or any relative path under this directory is not used + elif os.path.isabs(path): + devices.append(os.path.realpath(path)) + + # List storage instances for the 'osd-devices' + # store declared for this charm too, and add + # their block device paths to the list. + storage_ids = storage_list('osd-devices') + devices.extend((storage_get('location', s) for s in storage_ids)) + + # Filter out any devices in the action managed unit-local device blacklist + _blacklist = get_blacklist() + return [device for device in devices if device not in _blacklist] + + +def get_bdev_enable_discard(): + bdev_enable_discard = config('bdev-enable-discard').lower() + if bdev_enable_discard in ['enable', 'enabled']: + return True + elif bdev_enable_discard == 'auto': + return should_enable_discard(get_devices()) + elif bdev_enable_discard in ['disable', 'disabled']: + return False + else: + raise ValueError(("Invalid value for configuration " + "bdev-enable-discard: %s") % bdev_enable_discard) + + +def handle_pending_key(pending_key): + for osd_id, key in json.loads(pending_key).items(): + if not os.path.exists('/var/lib/ceph/osd/ceph-%s' % osd_id): + continue + import_pending_key(key, osd_id) + service_restart('ceph-osd@%s' % osd_id) + + +@hooks.hook('mon-relation-changed') +def mon_relation(): + pending_key = relation_get('pending_key') + if pending_key: + handle_pending_key(pending_key) + return + + bootstrap_key = relation_get('osd_bootstrap_key') + upgrade_key = relation_get('osd_upgrade_key') + removal_key = relation_get('osd_disk_removal_key') + client_crash_key = relation_get('client_crash_key') + if get_fsid() and get_auth() and bootstrap_key: + log('mon has provided conf- scanning disks') + emit_cephconf() + import_osd_bootstrap_key(bootstrap_key) + import_osd_upgrade_key(upgrade_key) + if removal_key: + import_osd_removal_key(removal_key) + prepare_disks_and_activate() + _, settings, _ = (ch_ceph.CephOSDConfContext() + .filter_osd_from_mon_settings()) + ceph.apply_osd_settings(settings) + if client_crash_key: + import_client_crash_key(client_crash_key) + else: + log('mon cluster has not yet provided conf') + + +@hooks.hook('upgrade-charm.real') +@harden() +def upgrade_charm(): + apt_install(packages=filter_installed_packages(ceph.determine_packages()), + fatal=True) + if get_fsid() and get_auth(): + emit_cephconf() + install_udev_rules() + remap_resolved_targets() + maybe_refresh_nrpe_files() + # NOTE(jamespage): https://pad.lv/1861996 + # ensure number of bootstrapped OSD's is presented to ceph-mon + prepare_disks_and_activate() + + +def remap_resolved_targets(): + '''Remap any previous fully resolved target devices to provided names''' + # NOTE(jamespage): Deal with any prior provided dev to + # target device resolution which occurred in prior + # releases of the charm - the user provided value + # should be used in preference to the target path + # to the block device as in some instances this + # is not consistent between reboots (bcache). + db = kv() + touched_devices = db.get('osd-devices', []) + osd_devices = get_devices() + for dev in osd_devices: + real_path = os.path.realpath(dev) + if real_path != dev and real_path in touched_devices: + log('Device {} already processed by charm using ' + 'actual device path {}, updating block device ' + 'usage with provided device path ' + 'and skipping'.format(dev, + real_path)) + touched_devices.remove(real_path) + touched_devices.append(dev) + db.set('osd-devices', touched_devices) + db.flush() + + +@hooks.hook('nrpe-external-master-relation-joined', + 'nrpe-external-master-relation-changed') +def update_nrpe_config(): + # python-dbus is used by check_upstart_job + # fasteners is used by apt_install collect_ceph_osd_services.py + pkgs = ['python3-dbus'] + if CompareHostReleases(lsb_release()['DISTRIB_CODENAME']) >= 'bionic': + pkgs.append('python3-fasteners') + apt_install(pkgs) + + # copy the check and collect files over to the plugins directory + charm_dir = os.environ.get('CHARM_DIR', '') + nagios_plugins = '/usr/local/lib/nagios/plugins' + # Grab nagios user/group ID's from original source + _dir = os.stat(nagios_plugins) + uid = _dir.st_uid + gid = _dir.st_gid + for name in ('collect_ceph_osd_services.py', 'check_ceph_osd_services.py'): + target = os.path.join(nagios_plugins, name) + shutil.copy(os.path.join(charm_dir, 'files', 'nagios', name), target) + os.chown(target, uid, gid) + + hostname = nrpe.get_nagios_hostname() + current_unit = nrpe.get_nagios_unit_name() + + # BUG#1810749 - the nagios user can't access /var/lib/ceph/.. and that's a + # GOOD THING, as it keeps ceph secure from Nagios. However, to check + # whether ceph is okay, the check_systemd.py or 'status ceph-osd' still + # needs to be called with the contents of ../osd/ceph-*/whoami files. To + # get around this conundrum, instead a cron.d job that runs as root will + # perform the checks every minute, and write to a temporary file the + # results, and the nrpe check will grep this file and error out (return 2) + # if the first 3 characters of a line are not 'OK:'. + + cmd = ('MAILTO=""\n' + '* * * * * root ' + '/usr/local/lib/nagios/plugins/collect_ceph_osd_services.py' + ' 2>&1 | logger -t check-osd\n') + with open(CRON_CEPH_CHECK_FILE, "wt") as f: + f.write(cmd) + + nrpe_cmd = '/usr/local/lib/nagios/plugins/check_ceph_osd_services.py' + + nrpe_setup = nrpe.NRPE(hostname=hostname) + nrpe_setup.add_check( + shortname='ceph-osd', + description='process check {%s}' % current_unit, + check_cmd=nrpe_cmd + ) + nrpe_setup.write() + + +def maybe_refresh_nrpe_files(): + """if the nrpe-external-master relation exists then refresh the nrpe + configuration -- this is called during a charm upgrade + """ + if relations_of_type('nrpe-external-master'): + update_nrpe_config() + + +@hooks.hook('secrets-storage-relation-joined') +def secrets_storage_joined(relation_id=None): + relation_set(relation_id=relation_id, + secret_backend='charm-vaultlocker', + isolated=True, + access_address=get_relation_ip('secrets-storage'), + unit_name=local_unit(), + hostname=socket.gethostname()) + + +@hooks.hook('secrets-storage-relation-changed') +def secrets_storage_changed(): + vault_ca = relation_get('vault_ca') + if vault_ca: + vault_ca = base64.decodebytes(json.loads(vault_ca).encode()) + write_file('/usr/local/share/ca-certificates/vault-ca.crt', + vault_ca, perms=0o644) + subprocess.check_call(['update-ca-certificates', '--fresh']) + prepare_disks_and_activate() + + +VERSION_PACKAGE = 'ceph-common' + + +def assess_status(): + """Assess status of current unit""" + + if not is_tune_osd_memory_target_valid(): + status_set('blocked', 'tune-osd-memory-target config value is invalid') + return + + # check to see if the unit is paused. + application_version_set(get_upstream_version(VERSION_PACKAGE)) + if is_unit_upgrading_set(): + status_set("blocked", + "Ready for do-release-upgrade and reboot. " + "Set complete when finished.") + return + if is_unit_paused_set(): + status_set('maintenance', + "Paused. Use 'resume' action to resume normal service.") + return + # Check for mon relation + if len(relation_ids('mon')) < 1: + status_set('blocked', 'Missing relation: monitor') + return + + # Check for monitors with presented addresses + # Check for bootstrap key presentation + monitors = get_mon_hosts() + if len(monitors) < 1 or not get_conf('osd_bootstrap_key'): + status_set('waiting', 'Incomplete relation: monitor') + return + + # Check for vault + if use_vaultlocker(): + if not relation_ids('secrets-storage'): + status_set('blocked', 'Missing relation: vault') + return + try: + if not vaultlocker.vault_relation_complete(): + status_set('waiting', 'Incomplete relation: vault') + return + except Exception as e: + status_set('blocked', "Warning: couldn't verify vault relation") + log("Exception when verifying vault relation - maybe it was " + "offline?:\n{}".format(str(e))) + log("Traceback: {}".format(traceback.format_exc())) + + # Check for OSD device creation parity i.e. at least some devices + # must have been presented and used for this charm to be operational + (prev_status, prev_message) = status_get() + running_osds = ceph.get_running_osds() + if not prev_message.startswith('Non-pristine'): + if not running_osds: + status_set('blocked', + 'No block devices detected using current configuration') + else: + aa_needs_update = False + try: + aa_needs_update = check_aa_profile_needs_update() + except AppArmorProfileNeverInstalledException: + pass + if aa_needs_update and config('aa-profile-mode') != 'disable': + _set_pending_apparmor_update_status() + else: + status_set('active', + 'Unit is ready ({} OSD)'.format(len(running_osds))) + else: + pristine = True + # Check unmounted disks that should be configured but don't check + # journals or already processed devices + config_devices = (set(get_devices()) & set(ceph.unmounted_disks())) + osd_journals = set(get_journal_devices()) + touched_devices = set(kv().get('osd-devices', [])) + for dev in config_devices - osd_journals - touched_devices: + if (not ceph.is_active_bluestore_device(dev) and + not ceph.is_pristine_disk(dev) and + not ceph.is_mapped_luks_device(dev)): + pristine = False + break + if pristine: + status_set('active', + 'Unit is ready ({} OSD)'.format(len(running_osds))) + + try: + get_bdev_enable_discard() + except ValueError as ex: + status_set('blocked', str(ex)) + + try: + bluestore_compression = ch_context.CephBlueStoreCompressionContext() + bluestore_compression.validate() + except ValueError as e: + status_set('blocked', 'Invalid configuration: {}'.format(str(e))) + + +@hooks.hook('update-status') +@harden() +def update_status(): + log('Updating status.') + + +@hooks.hook('pre-series-upgrade') +def pre_series_upgrade(): + log("Running prepare series upgrade hook", "INFO") + # NOTE: The Ceph packages handle the series upgrade gracefully. + # In order to indicate the step of the series upgrade process for + # administrators and automated scripts, the charm sets the paused and + # upgrading states. + set_unit_paused() + set_unit_upgrading() + + +@hooks.hook('post-series-upgrade') +def post_series_upgrade(): + log("Running complete series upgrade hook", "INFO") + # In order to indicate the step of the series upgrade process for + # administrators and automated scripts, the charm clears the paused and + # upgrading states. + clear_unit_paused() + clear_unit_upgrading() + + +if __name__ == '__main__': + try: + hooks.execute(sys.argv) + except UnregisteredHookError as e: + log('Unknown hook {} - skipping.'.format(e)) + assess_status() diff --git a/ceph-osd/hooks/charmhelpers/__init__.py b/ceph-osd/hooks/charmhelpers/__init__.py new file mode 100644 index 00000000..ddf30450 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/__init__.py @@ -0,0 +1,84 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Bootstrap charm-helpers, installing its dependencies if necessary using +# only standard libraries. +import functools +import inspect +import subprocess + + +try: + import yaml # NOQA:F401 +except ImportError: + subprocess.check_call(['apt-get', 'install', '-y', 'python3-yaml']) + import yaml # NOQA:F401 + + +# Holds a list of mapping of mangled function names that have been deprecated +# using the @deprecate decorator below. This is so that the warning is only +# printed once for each usage of the function. +__deprecated_functions = {} + + +def deprecate(warning, date=None, log=None): + """Add a deprecation warning the first time the function is used. + + The date which is a string in semi-ISO8660 format indicates the year-month + that the function is officially going to be removed. + + usage: + + @deprecate('use core/fetch/add_source() instead', '2017-04') + def contributed_add_source_thing(...): + ... + + And it then prints to the log ONCE that the function is deprecated. + The reason for passing the logging function (log) is so that hookenv.log + can be used for a charm if needed. + + :param warning: String to indicate what is to be used instead. + :param date: Optional string in YYYY-MM format to indicate when the + function will definitely (probably) be removed. + :param log: The log function to call in order to log. If None, logs to + stdout + """ + def wrap(f): + + @functools.wraps(f) + def wrapped_f(*args, **kwargs): + try: + module = inspect.getmodule(f) + file = inspect.getsourcefile(f) + lines = inspect.getsourcelines(f) + f_name = "{}-{}-{}..{}-{}".format( + module.__name__, file, lines[0], lines[-1], f.__name__) + except (IOError, TypeError): + # assume it was local, so just use the name of the function + f_name = f.__name__ + if f_name not in __deprecated_functions: + __deprecated_functions[f_name] = True + s = "DEPRECATION WARNING: Function {} is being removed".format( + f.__name__) + if date: + s = "{} on/around {}".format(s, date) + if warning: + s = "{} : {}".format(s, warning) + if log: + log(s) + else: + print(s) + return f(*args, **kwargs) + return wrapped_f + return wrap diff --git a/ceph-osd/hooks/charmhelpers/cli/__init__.py b/ceph-osd/hooks/charmhelpers/cli/__init__.py new file mode 100644 index 00000000..2b0c4b7a --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/cli/__init__.py @@ -0,0 +1,187 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import argparse +import sys + +import charmhelpers.core.unitdata + + +class OutputFormatter(object): + def __init__(self, outfile=sys.stdout): + self.formats = ( + "raw", + "json", + "py", + "yaml", + "csv", + "tab", + ) + self.outfile = outfile + + def add_arguments(self, argument_parser): + formatgroup = argument_parser.add_mutually_exclusive_group() + choices = self.supported_formats + formatgroup.add_argument("--format", metavar='FMT', + help="Select output format for returned data, " + "where FMT is one of: {}".format(choices), + choices=choices, default='raw') + for fmt in self.formats: + fmtfunc = getattr(self, fmt) + formatgroup.add_argument("-{}".format(fmt[0]), + "--{}".format(fmt), action='store_const', + const=fmt, dest='format', + help=fmtfunc.__doc__) + + @property + def supported_formats(self): + return self.formats + + def raw(self, output): + """Output data as raw string (default)""" + if isinstance(output, (list, tuple)): + output = '\n'.join(map(str, output)) + self.outfile.write(str(output)) + + def py(self, output): + """Output data as a nicely-formatted python data structure""" + import pprint + pprint.pprint(output, stream=self.outfile) + + def json(self, output): + """Output data in JSON format""" + import json + json.dump(output, self.outfile) + + def yaml(self, output): + """Output data in YAML format""" + import yaml + yaml.safe_dump(output, self.outfile) + + def csv(self, output): + """Output data as excel-compatible CSV""" + import csv + csvwriter = csv.writer(self.outfile) + csvwriter.writerows(output) + + def tab(self, output): + """Output data in excel-compatible tab-delimited format""" + import csv + csvwriter = csv.writer(self.outfile, dialect=csv.excel_tab) + csvwriter.writerows(output) + + def format_output(self, output, fmt='raw'): + fmtfunc = getattr(self, fmt) + fmtfunc(output) + + +class CommandLine(object): + argument_parser = None + subparsers = None + formatter = None + exit_code = 0 + + def __init__(self): + if not self.argument_parser: + self.argument_parser = argparse.ArgumentParser(description='Perform common charm tasks') + if not self.formatter: + self.formatter = OutputFormatter() + self.formatter.add_arguments(self.argument_parser) + if not self.subparsers: + self.subparsers = self.argument_parser.add_subparsers(help='Commands') + + def subcommand(self, command_name=None): + """ + Decorate a function as a subcommand. Use its arguments as the + command-line arguments""" + def wrapper(decorated): + cmd_name = command_name or decorated.__name__ + subparser = self.subparsers.add_parser(cmd_name, + description=decorated.__doc__) + for args, kwargs in describe_arguments(decorated): + subparser.add_argument(*args, **kwargs) + subparser.set_defaults(func=decorated) + return decorated + return wrapper + + def test_command(self, decorated): + """ + Subcommand is a boolean test function, so bool return values should be + converted to a 0/1 exit code. + """ + decorated._cli_test_command = True + return decorated + + def no_output(self, decorated): + """ + Subcommand is not expected to return a value, so don't print a spurious None. + """ + decorated._cli_no_output = True + return decorated + + def subcommand_builder(self, command_name, description=None): + """ + Decorate a function that builds a subcommand. Builders should accept a + single argument (the subparser instance) and return the function to be + run as the command.""" + def wrapper(decorated): + subparser = self.subparsers.add_parser(command_name) + func = decorated(subparser) + subparser.set_defaults(func=func) + subparser.description = description or func.__doc__ + return wrapper + + def run(self): + "Run cli, processing arguments and executing subcommands." + arguments = self.argument_parser.parse_args() + argspec = inspect.getfullargspec(arguments.func) + vargs = [] + for arg in argspec.args: + vargs.append(getattr(arguments, arg)) + if argspec.varargs: + vargs.extend(getattr(arguments, argspec.varargs)) + output = arguments.func(*vargs) + if getattr(arguments.func, '_cli_test_command', False): + self.exit_code = 0 if output else 1 + output = '' + if getattr(arguments.func, '_cli_no_output', False): + output = '' + self.formatter.format_output(output, arguments.format) + if charmhelpers.core.unitdata._KV: + charmhelpers.core.unitdata._KV.flush() + + +cmdline = CommandLine() + + +def describe_arguments(func): + """ + Analyze a function's signature and return a data structure suitable for + passing in as arguments to an argparse parser's add_argument() method.""" + + argspec = inspect.getfullargspec(func) + # we should probably raise an exception somewhere if func includes **kwargs + if argspec.defaults: + positional_args = argspec.args[:-len(argspec.defaults)] + keyword_names = argspec.args[-len(argspec.defaults):] + for arg, default in zip(keyword_names, argspec.defaults): + yield ('--{}'.format(arg),), {'default': default} + else: + positional_args = argspec.args + + for arg in positional_args: + yield (arg,), {} + if argspec.varargs: + yield (argspec.varargs,), {'nargs': '*'} diff --git a/ceph-osd/hooks/charmhelpers/cli/benchmark.py b/ceph-osd/hooks/charmhelpers/cli/benchmark.py new file mode 100644 index 00000000..303af14b --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/cli/benchmark.py @@ -0,0 +1,34 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import cmdline +from charmhelpers.contrib.benchmark import Benchmark + + +@cmdline.subcommand(command_name='benchmark-start') +def start(): + Benchmark.start() + + +@cmdline.subcommand(command_name='benchmark-finish') +def finish(): + Benchmark.finish() + + +@cmdline.subcommand_builder('benchmark-composite', description="Set the benchmark composite score") +def service(subparser): + subparser.add_argument("value", help="The composite score.") + subparser.add_argument("units", help="The units the composite score represents, i.e., 'reads/sec'.") + subparser.add_argument("direction", help="'asc' if a lower score is better, 'desc' if a higher score is better.") + return Benchmark.set_composite_score diff --git a/ceph-osd/hooks/charmhelpers/cli/commands.py b/ceph-osd/hooks/charmhelpers/cli/commands.py new file mode 100644 index 00000000..b9310565 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/cli/commands.py @@ -0,0 +1,30 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This module loads sub-modules into the python runtime so they can be +discovered via the inspect module. In order to prevent flake8 from (rightfully) +telling us these are unused modules, throw a ' # noqa' at the end of each import +so that the warning is suppressed. +""" + +from . import CommandLine # noqa + +""" +Import the sub-modules which have decorated subcommands to register with chlp. +""" +from . import host # noqa +from . import benchmark # noqa +from . import unitdata # noqa +from . import hookenv # noqa diff --git a/ceph-osd/hooks/charmhelpers/cli/hookenv.py b/ceph-osd/hooks/charmhelpers/cli/hookenv.py new file mode 100644 index 00000000..bd72f448 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/cli/hookenv.py @@ -0,0 +1,21 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import cmdline +from charmhelpers.core import hookenv + + +cmdline.subcommand('relation-id')(hookenv.relation_id._wrapped) +cmdline.subcommand('service-name')(hookenv.service_name) +cmdline.subcommand('remote-service-name')(hookenv.remote_service_name._wrapped) diff --git a/ceph-osd/hooks/charmhelpers/cli/host.py b/ceph-osd/hooks/charmhelpers/cli/host.py new file mode 100644 index 00000000..40396849 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/cli/host.py @@ -0,0 +1,29 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import cmdline +from charmhelpers.core import host + + +@cmdline.subcommand() +def mounts(): + "List mounts" + return host.mounts() + + +@cmdline.subcommand_builder('service', description="Control system services") +def service(subparser): + subparser.add_argument("action", help="The action to perform (start, stop, etc...)") + subparser.add_argument("service_name", help="Name of the service to control") + return host.service diff --git a/ceph-osd/hooks/charmhelpers/cli/unitdata.py b/ceph-osd/hooks/charmhelpers/cli/unitdata.py new file mode 100644 index 00000000..acce846f --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/cli/unitdata.py @@ -0,0 +1,46 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import cmdline +from charmhelpers.core import unitdata + + +@cmdline.subcommand_builder('unitdata', description="Store and retrieve data") +def unitdata_cmd(subparser): + nested = subparser.add_subparsers() + + get_cmd = nested.add_parser('get', help='Retrieve data') + get_cmd.add_argument('key', help='Key to retrieve the value of') + get_cmd.set_defaults(action='get', value=None) + + getrange_cmd = nested.add_parser('getrange', help='Retrieve multiple data') + getrange_cmd.add_argument('key', metavar='prefix', + help='Prefix of the keys to retrieve') + getrange_cmd.set_defaults(action='getrange', value=None) + + set_cmd = nested.add_parser('set', help='Store data') + set_cmd.add_argument('key', help='Key to set') + set_cmd.add_argument('value', help='Value to store') + set_cmd.set_defaults(action='set') + + def _unitdata_cmd(action, key, value): + if action == 'get': + return unitdata.kv().get(key) + elif action == 'getrange': + return unitdata.kv().getrange(key) + elif action == 'set': + unitdata.kv().set(key, value) + unitdata.kv().flush() + return '' + return _unitdata_cmd diff --git a/ceph-osd/hooks/charmhelpers/contrib/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-osd/hooks/charmhelpers/contrib/charmsupport/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/charmsupport/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/charmsupport/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-osd/hooks/charmhelpers/contrib/charmsupport/nrpe.py b/ceph-osd/hooks/charmhelpers/contrib/charmsupport/nrpe.py new file mode 100644 index 00000000..ac002bc6 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/charmsupport/nrpe.py @@ -0,0 +1,576 @@ +# Copyright 2012-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Compatibility with the nrpe-external-master charm""" +# +# Authors: +# Matthew Wedgwood + +import glob +import grp +import json +import os +import pwd +import re +import shlex +import shutil +import subprocess +import yaml + +from charmhelpers.core.hookenv import ( + application_name, + config, + ERROR, + hook_name, + local_unit, + log, + relation_get, + relation_ids, + relation_set, + relations_of_type, +) + +from charmhelpers.core.host import service +from charmhelpers.core import host + +# This module adds compatibility with the nrpe-external-master and plain nrpe +# subordinate charms. To use it in your charm: +# +# 1. Update metadata.yaml +# +# provides: +# (...) +# nrpe-external-master: +# interface: nrpe-external-master +# scope: container +# +# and/or +# +# provides: +# (...) +# local-monitors: +# interface: local-monitors +# scope: container + +# +# 2. Add the following to config.yaml +# +# nagios_context: +# default: "juju" +# type: string +# description: | +# Used by the nrpe subordinate charms. +# A string that will be prepended to instance name to set the host name +# in nagios. So for instance the hostname would be something like: +# juju-myservice-0 +# If you're running multiple environments with the same services in them +# this allows you to differentiate between them. +# nagios_servicegroups: +# default: "" +# type: string +# description: | +# A comma-separated list of nagios servicegroups. +# If left empty, the nagios_context will be used as the servicegroup +# +# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master +# +# 4. Update your hooks.py with something like this: +# +# from charmsupport.nrpe import NRPE +# (...) +# def update_nrpe_config(): +# nrpe_compat = NRPE() +# nrpe_compat.add_check( +# shortname = "myservice", +# description = "Check MyService", +# check_cmd = "check_http -w 2 -c 10 http://localhost" +# ) +# nrpe_compat.add_check( +# "myservice_other", +# "Check for widget failures", +# check_cmd = "/srv/myapp/scripts/widget_check" +# ) +# nrpe_compat.write() +# +# def config_changed(): +# (...) +# update_nrpe_config() +# +# def nrpe_external_master_relation_changed(): +# update_nrpe_config() +# +# def local_monitors_relation_changed(): +# update_nrpe_config() +# +# 4.a If your charm is a subordinate charm set primary=False +# +# from charmsupport.nrpe import NRPE +# (...) +# def update_nrpe_config(): +# nrpe_compat = NRPE(primary=False) +# +# 5. ln -s hooks.py nrpe-external-master-relation-changed +# ln -s hooks.py local-monitors-relation-changed + + +class CheckException(Exception): + pass + + +class Check(object): + shortname_re = '[A-Za-z0-9-_.@]+$' + service_template = (""" +#--------------------------------------------------- +# This file is Juju managed +#--------------------------------------------------- +define service {{ + use active-service + host_name {nagios_hostname} + service_description {nagios_hostname}[{shortname}] """ + """{description} + check_command check_nrpe!{command} + servicegroups {nagios_servicegroup} +{service_config_overrides} +}} +""") + + def __init__(self, shortname, description, check_cmd, max_check_attempts=None): + super(Check, self).__init__() + # XXX: could be better to calculate this from the service name + if not re.match(self.shortname_re, shortname): + raise CheckException("shortname must match {}".format( + Check.shortname_re)) + self.shortname = shortname + self.command = "check_{}".format(shortname) + # Note: a set of invalid characters is defined by the + # Nagios server config + # The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()= + self.description = description + self.check_cmd = self._locate_cmd(check_cmd) + self.max_check_attempts = max_check_attempts + + def _get_check_filename(self): + return os.path.join(NRPE.nrpe_confdir, '{}.cfg'.format(self.command)) + + def _get_service_filename(self, hostname): + return os.path.join(NRPE.nagios_exportdir, + 'service__{}_{}.cfg'.format(hostname, self.command)) + + def _locate_cmd(self, check_cmd): + search_path = ( + '/usr/lib/nagios/plugins', + '/usr/local/lib/nagios/plugins', + ) + parts = shlex.split(check_cmd) + for path in search_path: + if os.path.exists(os.path.join(path, parts[0])): + command = os.path.join(path, parts[0]) + if len(parts) > 1: + safe_args = [shlex.quote(arg) for arg in parts[1:]] + command += " " + " ".join(safe_args) + return command + log('Check command not found: {}'.format(parts[0])) + return '' + + def _remove_service_files(self): + if not os.path.exists(NRPE.nagios_exportdir): + return + for f in os.listdir(NRPE.nagios_exportdir): + if f.endswith('_{}.cfg'.format(self.command)): + os.remove(os.path.join(NRPE.nagios_exportdir, f)) + + def remove(self, hostname): + nrpe_check_file = self._get_check_filename() + if os.path.exists(nrpe_check_file): + os.remove(nrpe_check_file) + self._remove_service_files() + + def write(self, nagios_context, hostname, nagios_servicegroups): + nrpe_check_file = self._get_check_filename() + with open(nrpe_check_file, 'w') as nrpe_check_config: + nrpe_check_config.write("# check {}\n".format(self.shortname)) + if nagios_servicegroups: + nrpe_check_config.write( + "# The following header was added automatically by juju\n") + nrpe_check_config.write( + "# Modifying it will affect nagios monitoring and alerting\n") + nrpe_check_config.write( + "# servicegroups: {}\n".format(nagios_servicegroups)) + nrpe_check_config.write("command[{}]={}\n".format( + self.command, self.check_cmd)) + + if not os.path.exists(NRPE.nagios_exportdir): + log('Not writing service config as {} is not accessible'.format( + NRPE.nagios_exportdir)) + else: + self.write_service_config(nagios_context, hostname, + nagios_servicegroups) + + def write_service_config(self, nagios_context, hostname, + nagios_servicegroups): + self._remove_service_files() + + if self.max_check_attempts: + service_config_overrides = ' max_check_attempts {}'.format( + self.max_check_attempts + ) # Note indentation is here rather than in the template to avoid trailing spaces + else: + service_config_overrides = '' # empty string to avoid printing 'None' + templ_vars = { + 'nagios_hostname': hostname, + 'nagios_servicegroup': nagios_servicegroups, + 'description': self.description, + 'shortname': self.shortname, + 'command': self.command, + 'service_config_overrides': service_config_overrides, + } + nrpe_service_text = Check.service_template.format(**templ_vars) + nrpe_service_file = self._get_service_filename(hostname) + with open(nrpe_service_file, 'w') as nrpe_service_config: + nrpe_service_config.write(str(nrpe_service_text)) + + def run(self): + subprocess.call(self.check_cmd) + + +class NRPE(object): + nagios_logdir = '/var/log/nagios' + nagios_exportdir = '/var/lib/nagios/export' + nrpe_confdir = '/etc/nagios/nrpe.d' + homedir = '/var/lib/nagios' # home dir provided by nagios-nrpe-server + + def __init__(self, hostname=None, primary=True): + super(NRPE, self).__init__() + self.config = config() + self.primary = primary + self.nagios_context = self.config['nagios_context'] + if 'nagios_servicegroups' in self.config and self.config['nagios_servicegroups']: + self.nagios_servicegroups = self.config['nagios_servicegroups'] + else: + self.nagios_servicegroups = self.nagios_context + self.unit_name = local_unit().replace('/', '-') + if hostname: + self.hostname = hostname + else: + nagios_hostname = get_nagios_hostname() + if nagios_hostname: + self.hostname = nagios_hostname + else: + self.hostname = "{}-{}".format(self.nagios_context, self.unit_name) + self.checks = [] + # Iff in an nrpe-external-master relation hook, set primary status + relation = relation_ids('nrpe-external-master') + if relation: + log("Setting charm primary status {}".format(primary)) + for rid in relation: + relation_set(relation_id=rid, relation_settings={'primary': self.primary}) + self.remove_check_queue = set() + + @classmethod + def does_nrpe_conf_dir_exist(cls): + """Return True if th nrpe_confdif directory exists.""" + return os.path.isdir(cls.nrpe_confdir) + + def add_check(self, *args, **kwargs): + shortname = None + if kwargs.get('shortname') is None: + if len(args) > 0: + shortname = args[0] + else: + shortname = kwargs['shortname'] + + self.checks.append(Check(*args, **kwargs)) + try: + self.remove_check_queue.remove(shortname) + except KeyError: + pass + + def remove_check(self, *args, **kwargs): + if kwargs.get('shortname') is None: + raise ValueError('shortname of check must be specified') + + # Use sensible defaults if they're not specified - these are not + # actually used during removal, but they're required for constructing + # the Check object; check_disk is chosen because it's part of the + # nagios-plugins-basic package. + if kwargs.get('check_cmd') is None: + kwargs['check_cmd'] = 'check_disk' + if kwargs.get('description') is None: + kwargs['description'] = '' + + check = Check(*args, **kwargs) + check.remove(self.hostname) + self.remove_check_queue.add(kwargs['shortname']) + + def write(self): + try: + nagios_uid = pwd.getpwnam('nagios').pw_uid + nagios_gid = grp.getgrnam('nagios').gr_gid + except Exception: + log("Nagios user not set up, nrpe checks not updated") + return + + if not os.path.exists(NRPE.nagios_logdir): + os.mkdir(NRPE.nagios_logdir) + os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid) + + nrpe_monitors = {} + monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}} + + # check that the charm can write to the conf dir. If not, then nagios + # probably isn't installed, and we can defer. + if not self.does_nrpe_conf_dir_exist(): + return + + for nrpecheck in self.checks: + nrpecheck.write(self.nagios_context, self.hostname, + self.nagios_servicegroups) + nrpe_monitors[nrpecheck.shortname] = { + "command": nrpecheck.command, + } + # If we were passed max_check_attempts, add that to the relation data + if nrpecheck.max_check_attempts is not None: + nrpe_monitors[nrpecheck.shortname]['max_check_attempts'] = nrpecheck.max_check_attempts + + # update-status hooks are configured to firing every 5 minutes by + # default. When nagios-nrpe-server is restarted, the nagios server + # reports checks failing causing unnecessary alerts. Let's not restart + # on update-status hooks. + if not hook_name() == 'update-status': + service('restart', 'nagios-nrpe-server') + + monitor_ids = relation_ids("local-monitors") + \ + relation_ids("nrpe-external-master") + for rid in monitor_ids: + reldata = relation_get(unit=local_unit(), rid=rid) + if 'monitors' in reldata: + # update the existing set of monitors with the new data + old_monitors = yaml.safe_load(reldata['monitors']) + old_nrpe_monitors = old_monitors['monitors']['remote']['nrpe'] + # remove keys that are in the remove_check_queue + old_nrpe_monitors = {k: v for k, v in old_nrpe_monitors.items() + if k not in self.remove_check_queue} + # update/add nrpe_monitors + old_nrpe_monitors.update(nrpe_monitors) + old_monitors['monitors']['remote']['nrpe'] = old_nrpe_monitors + # write back to the relation + relation_set(relation_id=rid, monitors=yaml.dump(old_monitors)) + else: + # write a brand new set of monitors, as no existing ones. + relation_set(relation_id=rid, monitors=yaml.dump(monitors)) + + self.remove_check_queue.clear() + + +def get_nagios_hostcontext(relation_name='nrpe-external-master'): + """ + Query relation with nrpe subordinate, return the nagios_host_context + + :param str relation_name: Name of relation nrpe sub joined to + """ + for rel in relations_of_type(relation_name): + if 'nagios_host_context' in rel: + return rel['nagios_host_context'] + + +def get_nagios_hostname(relation_name='nrpe-external-master'): + """ + Query relation with nrpe subordinate, return the nagios_hostname + + :param str relation_name: Name of relation nrpe sub joined to + """ + for rel in relations_of_type(relation_name): + if 'nagios_hostname' in rel: + return rel['nagios_hostname'] + + +def get_nagios_unit_name(relation_name='nrpe-external-master'): + """ + Return the nagios unit name prepended with host_context if needed + + :param str relation_name: Name of relation nrpe sub joined to + """ + host_context = get_nagios_hostcontext(relation_name) + if host_context: + unit = "%s:%s" % (host_context, local_unit()) + else: + unit = local_unit() + return unit + + +def add_init_service_checks(nrpe, services, unit_name, immediate_check=True): + """ + Add checks for each service in list + + :param NRPE nrpe: NRPE object to add check to + :param list services: List of services to check + :param str unit_name: Unit name to use in check description + :param bool immediate_check: For sysv init, run the service check immediately + """ + # check_haproxy is redundant in the presence of check_crm. See LP Bug#1880601 for details. + # just remove check_haproxy if haproxy is added as a lsb resource in hacluster. + for rid in relation_ids("ha"): + ha_resources = relation_get("json_resources", rid=rid, unit=local_unit()) + if ha_resources: + try: + ha_resources_parsed = json.loads(ha_resources) + except ValueError as e: + log('Could not parse JSON from ha resources. {}'.format(e), level=ERROR) + raise + if "lsb:haproxy" in ha_resources_parsed.values(): + if "haproxy" in services: + log("removed check_haproxy. This service will be monitored by check_crm") + services.remove("haproxy") + for svc in services: + # Don't add a check for these services from neutron-gateway + if svc in ['ext-port', 'os-charm-phy-nic-mtu']: + next + + upstart_init = '/etc/init/%s.conf' % svc + sysv_init = '/etc/init.d/%s' % svc + + if host.init_is_systemd(service_name=svc): + nrpe.add_check( + shortname=svc, + description='process check {%s}' % unit_name, + check_cmd='check_systemd.py %s' % svc + ) + elif os.path.exists(upstart_init): + nrpe.add_check( + shortname=svc, + description='process check {%s}' % unit_name, + check_cmd='check_upstart_job %s' % svc + ) + elif os.path.exists(sysv_init): + cronpath = '/etc/cron.d/nagios-service-check-%s' % svc + checkpath = '%s/service-check-%s.txt' % (nrpe.homedir, svc) + croncmd = ( + '/usr/local/lib/nagios/plugins/check_exit_status.pl ' + '-e -s /etc/init.d/%s status' % svc + ) + cron_file = '*/5 * * * * root %s > %s\n' % (croncmd, checkpath) + f = open(cronpath, 'w') + f.write(cron_file) + f.close() + nrpe.add_check( + shortname=svc, + description='service check {%s}' % unit_name, + check_cmd='check_status_file.py -f %s' % checkpath, + ) + # if /var/lib/nagios doesn't exist open(checkpath, 'w') will fail + # (LP: #1670223). + if immediate_check and os.path.isdir(nrpe.homedir): + f = open(checkpath, 'w') + subprocess.call( + croncmd.split(), + stdout=f, + stderr=subprocess.STDOUT + ) + f.close() + os.chmod(checkpath, 0o644) + + +def copy_nrpe_checks(nrpe_files_dir=None): + """ + Copy the nrpe checks into place + + """ + NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins' + if nrpe_files_dir is None: + # determine if "charmhelpers" is in CHARMDIR or CHARMDIR/hooks + for segment in ['.', 'hooks']: + nrpe_files_dir = os.path.abspath(os.path.join( + os.getenv('CHARM_DIR'), + segment, + 'charmhelpers', + 'contrib', + 'openstack', + 'files')) + if os.path.isdir(nrpe_files_dir): + break + else: + raise RuntimeError("Couldn't find charmhelpers directory") + if not os.path.exists(NAGIOS_PLUGINS): + os.makedirs(NAGIOS_PLUGINS) + for fname in glob.glob(os.path.join(nrpe_files_dir, "check_*")): + if os.path.isfile(fname): + shutil.copy2(fname, + os.path.join(NAGIOS_PLUGINS, os.path.basename(fname))) + + +def add_haproxy_checks(nrpe, unit_name): + """ + Add checks for each service in list + + :param NRPE nrpe: NRPE object to add check to + :param str unit_name: Unit name to use in check description + """ + nrpe.add_check( + shortname='haproxy_servers', + description='Check HAProxy {%s}' % unit_name, + check_cmd='check_haproxy.sh') + nrpe.add_check( + shortname='haproxy_queue', + description='Check HAProxy queue depth {%s}' % unit_name, + check_cmd='check_haproxy_queue_depth.sh') + + +def remove_deprecated_check(nrpe, deprecated_services): + """ + Remove checks for deprecated services in list + + :param nrpe: NRPE object to remove check from + :type nrpe: NRPE + :param deprecated_services: List of deprecated services that are removed + :type deprecated_services: list + """ + for dep_svc in deprecated_services: + log('Deprecated service: {}'.format(dep_svc)) + nrpe.remove_check(shortname=dep_svc) + + +def add_deferred_restarts_check(nrpe): + """ + Add NRPE check for services with deferred restarts. + + :param NRPE nrpe: NRPE object to add check to + """ + unit_name = local_unit().replace('/', '-') + shortname = unit_name + '_deferred_restarts' + check_cmd = 'check_deferred_restarts.py --application {}'.format( + application_name()) + + log('Adding deferred restarts nrpe check: {}'.format(shortname)) + nrpe.add_check( + shortname=shortname, + description='Check deferred service restarts {}'.format(unit_name), + check_cmd=check_cmd) + + +def remove_deferred_restarts_check(nrpe): + """ + Remove NRPE check for services with deferred service restarts. + + :param NRPE nrpe: NRPE object to remove check from + """ + unit_name = local_unit().replace('/', '-') + shortname = unit_name + '_deferred_restarts' + check_cmd = 'check_deferred_restarts.py --application {}'.format( + application_name()) + + log('Removing deferred restarts nrpe check: {}'.format(shortname)) + nrpe.remove_check( + shortname=shortname, + description='Check deferred service restarts {}'.format(unit_name), + check_cmd=check_cmd) diff --git a/ceph-osd/hooks/charmhelpers/contrib/charmsupport/volumes.py b/ceph-osd/hooks/charmhelpers/contrib/charmsupport/volumes.py new file mode 100644 index 00000000..f7c6fbdc --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/charmsupport/volumes.py @@ -0,0 +1,173 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +''' +Functions for managing volumes in juju units. One volume is supported per unit. +Subordinates may have their own storage, provided it is on its own partition. + +Configuration stanzas:: + + volume-ephemeral: + type: boolean + default: true + description: > + If false, a volume is mounted as specified in "volume-map" + If true, ephemeral storage will be used, meaning that log data + will only exist as long as the machine. YOU HAVE BEEN WARNED. + volume-map: + type: string + default: {} + description: > + YAML map of units to device names, e.g: + "{ rsyslog/0: /dev/vdb, rsyslog/1: /dev/vdb }" + Service units will raise a configure-error if volume-ephemeral + is 'true' and no volume-map value is set. Use 'juju set' to set a + value and 'juju resolved' to complete configuration. + +Usage:: + + from charmsupport.volumes import configure_volume, VolumeConfigurationError + from charmsupport.hookenv import log, ERROR + def post_mount_hook(): + stop_service('myservice') + def post_mount_hook(): + start_service('myservice') + + if __name__ == '__main__': + try: + configure_volume(before_change=pre_mount_hook, + after_change=post_mount_hook) + except VolumeConfigurationError: + log('Storage could not be configured', ERROR) + +''' + +# XXX: Known limitations +# - fstab is neither consulted nor updated + +import os +from charmhelpers.core import hookenv +from charmhelpers.core import host +import yaml + + +MOUNT_BASE = '/srv/juju/volumes' + + +class VolumeConfigurationError(Exception): + '''Volume configuration data is missing or invalid''' + pass + + +def get_config(): + '''Gather and sanity-check volume configuration data''' + volume_config = {} + config = hookenv.config() + + errors = False + + if config.get('volume-ephemeral') in (True, 'True', 'true', 'Yes', 'yes'): + volume_config['ephemeral'] = True + else: + volume_config['ephemeral'] = False + + try: + volume_map = yaml.safe_load(config.get('volume-map', '{}')) + except yaml.YAMLError as e: + hookenv.log("Error parsing YAML volume-map: {}".format(e), + hookenv.ERROR) + errors = True + if volume_map is None: + # probably an empty string + volume_map = {} + elif not isinstance(volume_map, dict): + hookenv.log("Volume-map should be a dictionary, not {}".format( + type(volume_map))) + errors = True + + volume_config['device'] = volume_map.get(os.environ['JUJU_UNIT_NAME']) + if volume_config['device'] and volume_config['ephemeral']: + # asked for ephemeral storage but also defined a volume ID + hookenv.log('A volume is defined for this unit, but ephemeral ' + 'storage was requested', hookenv.ERROR) + errors = True + elif not volume_config['device'] and not volume_config['ephemeral']: + # asked for permanent storage but did not define volume ID + hookenv.log('Ephemeral storage was requested, but there is no volume ' + 'defined for this unit.', hookenv.ERROR) + errors = True + + unit_mount_name = hookenv.local_unit().replace('/', '-') + volume_config['mountpoint'] = os.path.join(MOUNT_BASE, unit_mount_name) + + if errors: + return None + return volume_config + + +def mount_volume(config): + if os.path.exists(config['mountpoint']): + if not os.path.isdir(config['mountpoint']): + hookenv.log('Not a directory: {}'.format(config['mountpoint'])) + raise VolumeConfigurationError() + else: + host.mkdir(config['mountpoint']) + if os.path.ismount(config['mountpoint']): + unmount_volume(config) + if not host.mount(config['device'], config['mountpoint'], persist=True): + raise VolumeConfigurationError() + + +def unmount_volume(config): + if os.path.ismount(config['mountpoint']): + if not host.umount(config['mountpoint'], persist=True): + raise VolumeConfigurationError() + + +def managed_mounts(): + '''List of all mounted managed volumes''' + return filter(lambda mount: mount[0].startswith(MOUNT_BASE), host.mounts()) + + +def configure_volume(before_change=lambda: None, after_change=lambda: None): + '''Set up storage (or don't) according to the charm's volume configuration. + Returns the mount point or "ephemeral". before_change and after_change + are optional functions to be called if the volume configuration changes. + ''' + + config = get_config() + if not config: + hookenv.log('Failed to read volume configuration', hookenv.CRITICAL) + raise VolumeConfigurationError() + + if config['ephemeral']: + if os.path.ismount(config['mountpoint']): + before_change() + unmount_volume(config) + after_change() + return 'ephemeral' + else: + # persistent storage + if os.path.ismount(config['mountpoint']): + mounts = dict(managed_mounts()) + if mounts.get(config['mountpoint']) != config['device']: + before_change() + unmount_volume(config) + mount_volume(config) + after_change() + else: + before_change() + mount_volume(config) + after_change() + return config['mountpoint'] diff --git a/ceph-osd/hooks/charmhelpers/contrib/hahelpers/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/hahelpers/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hahelpers/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-osd/hooks/charmhelpers/contrib/hahelpers/apache.py b/ceph-osd/hooks/charmhelpers/contrib/hahelpers/apache.py new file mode 100644 index 00000000..a54702bc --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hahelpers/apache.py @@ -0,0 +1,90 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Copyright 2012 Canonical Ltd. +# +# This file is sourced from lp:openstack-charm-helpers +# +# Authors: +# James Page +# Adam Gandelman +# + +import os + +from charmhelpers.core import host +from charmhelpers.core.hookenv import ( + config as config_get, + relation_get, + relation_ids, + related_units as relation_list, + log, + INFO, +) + +# This file contains the CA cert from the charms ssl_ca configuration +# option, in future the file name should be updated reflect that. +CONFIG_CA_CERT_FILE = 'keystone_juju_ca_cert' + + +def get_cert(cn=None): + # TODO: deal with multiple https endpoints via charm config + cert = config_get('ssl_cert') + key = config_get('ssl_key') + if not (cert and key): + log("Inspecting identity-service relations for SSL certificate.", + level=INFO) + cert = key = None + if cn: + ssl_cert_attr = 'ssl_cert_{}'.format(cn) + ssl_key_attr = 'ssl_key_{}'.format(cn) + else: + ssl_cert_attr = 'ssl_cert' + ssl_key_attr = 'ssl_key' + for r_id in relation_ids('identity-service'): + for unit in relation_list(r_id): + if not cert: + cert = relation_get(ssl_cert_attr, + rid=r_id, unit=unit) + if not key: + key = relation_get(ssl_key_attr, + rid=r_id, unit=unit) + return (cert, key) + + +def get_ca_cert(): + ca_cert = config_get('ssl_ca') + if ca_cert is None: + log("Inspecting identity-service relations for CA SSL certificate.", + level=INFO) + for r_id in (relation_ids('identity-service') + + relation_ids('identity-credentials')): + for unit in relation_list(r_id): + if ca_cert is None: + ca_cert = relation_get('ca_cert', + rid=r_id, unit=unit) + return ca_cert + + +def retrieve_ca_cert(cert_file): + cert = None + if os.path.isfile(cert_file): + with open(cert_file, 'rb') as crt: + cert = crt.read() + return cert + + +def install_ca_cert(ca_cert): + host.install_ca_cert(ca_cert, CONFIG_CA_CERT_FILE) diff --git a/ceph-osd/hooks/charmhelpers/contrib/hahelpers/cluster.py b/ceph-osd/hooks/charmhelpers/contrib/hahelpers/cluster.py new file mode 100644 index 00000000..7b309256 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hahelpers/cluster.py @@ -0,0 +1,455 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Copyright 2012 Canonical Ltd. +# +# Authors: +# James Page +# Adam Gandelman +# + +""" +Helpers for clustering and determining "cluster leadership" and other +clustering-related helpers. +""" + +import functools +import subprocess +import os +import time + +from socket import gethostname as get_unit_hostname + +from charmhelpers.core.hookenv import ( + log, + relation_ids, + related_units as relation_list, + relation_get, + config as config_get, + INFO, + DEBUG, + WARNING, + unit_get, + is_leader as juju_is_leader, + status_set, +) +from charmhelpers.core.host import ( + modulo_distribution, +) +from charmhelpers.core.decorators import ( + retry_on_exception, +) +from charmhelpers.core.strutils import ( + bool_from_string, +) + +DC_RESOURCE_NAME = 'DC' + + +class HAIncompleteConfig(Exception): + pass + + +class HAIncorrectConfig(Exception): + pass + + +class CRMResourceNotFound(Exception): + pass + + +class CRMDCNotFound(Exception): + pass + + +def is_elected_leader(resource): + """ + Returns True if the charm executing this is the elected cluster leader. + + It relies on two mechanisms to determine leadership: + 1. If juju is sufficiently new and leadership election is supported, + the is_leader command will be used. + 2. If the charm is part of a corosync cluster, call corosync to + determine leadership. + 3. If the charm is not part of a corosync cluster, the leader is + determined as being "the alive unit with the lowest unit number". In + other words, the oldest surviving unit. + """ + try: + return juju_is_leader() + except NotImplementedError: + log('Juju leadership election feature not enabled' + ', using fallback support', + level=WARNING) + + if is_clustered(): + if not is_crm_leader(resource): + log('Deferring action to CRM leader.', level=INFO) + return False + else: + peers = peer_units() + if peers and not oldest_peer(peers): + log('Deferring action to oldest service unit.', level=INFO) + return False + return True + + +def is_clustered(): + for r_id in (relation_ids('ha') or []): + for unit in (relation_list(r_id) or []): + clustered = relation_get('clustered', + rid=r_id, + unit=unit) + if clustered: + return True + return False + + +def is_crm_dc(): + """ + Determine leadership by querying the pacemaker Designated Controller + """ + cmd = ['crm', 'status'] + try: + status = subprocess.check_output( + cmd, stderr=subprocess.STDOUT).decode('utf-8') + except subprocess.CalledProcessError as ex: + raise CRMDCNotFound(str(ex)) + + current_dc = '' + for line in status.split('\n'): + if line.startswith('Current DC'): + # Current DC: juju-lytrusty-machine-2 (168108163) + # - partition with quorum + current_dc = line.split(':')[1].split()[0] + if current_dc == get_unit_hostname(): + return True + elif current_dc == 'NONE': + raise CRMDCNotFound('Current DC: NONE') + + return False + + +@retry_on_exception(5, base_delay=2, + exc_type=(CRMResourceNotFound, CRMDCNotFound)) +def is_crm_leader(resource, retry=False): + """ + Returns True if the charm calling this is the elected corosync leader, + as returned by calling the external "crm" command. + + We allow this operation to be retried to avoid the possibility of getting a + false negative. See LP #1396246 for more info. + """ + if resource == DC_RESOURCE_NAME: + return is_crm_dc() + cmd = ['crm', 'resource', 'show', resource] + try: + status = subprocess.check_output( + cmd, stderr=subprocess.STDOUT).decode('utf-8') + except subprocess.CalledProcessError: + status = None + + if status and get_unit_hostname() in status: + return True + + if status and "resource %s is NOT running" % (resource) in status: + raise CRMResourceNotFound("CRM resource %s not found" % (resource)) + + return False + + +def is_leader(resource): + log("is_leader is deprecated. Please consider using is_crm_leader " + "instead.", level=WARNING) + return is_crm_leader(resource) + + +def peer_units(peer_relation="cluster"): + peers = [] + for r_id in (relation_ids(peer_relation) or []): + for unit in (relation_list(r_id) or []): + peers.append(unit) + return peers + + +def peer_ips(peer_relation='cluster', addr_key='private-address'): + '''Return a dict of peers and their private-address''' + peers = {} + for r_id in relation_ids(peer_relation): + for unit in relation_list(r_id): + peers[unit] = relation_get(addr_key, rid=r_id, unit=unit) + return peers + + +def oldest_peer(peers): + """Determines who the oldest peer is by comparing unit numbers.""" + local_unit_no = int(os.getenv('JUJU_UNIT_NAME').split('/')[1]) + for peer in peers: + remote_unit_no = int(peer.split('/')[1]) + if remote_unit_no < local_unit_no: + return False + return True + + +def eligible_leader(resource): + log("eligible_leader is deprecated. Please consider using " + "is_elected_leader instead.", level=WARNING) + return is_elected_leader(resource) + + +def https(): + ''' + Determines whether enough data has been provided in configuration + or relation data to configure HTTPS + . + returns: boolean + ''' + use_https = config_get('use-https') + if use_https and bool_from_string(use_https): + return True + if config_get('ssl_cert') and config_get('ssl_key'): + return True + # Local import to avoid ciruclar dependency. + import charmhelpers.contrib.openstack.cert_utils as cert_utils + if ( + cert_utils.get_certificate_request() and not + cert_utils.get_requests_for_local_unit("certificates") + ): + return False + for r_id in relation_ids('certificates'): + for unit in relation_list(r_id): + ca = relation_get('ca', rid=r_id, unit=unit) + if ca: + return True + for r_id in relation_ids('identity-service'): + for unit in relation_list(r_id): + # TODO - needs fixing for new helper as ssl_cert/key suffixes with CN + rel_state = [ + relation_get('https_keystone', rid=r_id, unit=unit), + relation_get('ca_cert', rid=r_id, unit=unit), + ] + # NOTE: works around (LP: #1203241) + if (None not in rel_state) and ('' not in rel_state): + return True + return False + + +def determine_api_port(public_port, singlenode_mode=False): + ''' + Determine correct API server listening port based on + existence of HTTPS reverse proxy and/or haproxy. + + public_port: int: standard public port for given service + + singlenode_mode: boolean: Shuffle ports when only a single unit is present + + returns: int: the correct listening port for the API service + ''' + i = 0 + if singlenode_mode: + i += 1 + elif len(peer_units()) > 0 or is_clustered(): + i += 1 + if https(): + i += 1 + return public_port - (i * 10) + + +def determine_apache_port(public_port, singlenode_mode=False): + ''' + Description: Determine correct apache listening port based on public IP + + state of the cluster. + + public_port: int: standard public port for given service + + singlenode_mode: boolean: Shuffle ports when only a single unit is present + + returns: int: the correct listening port for the HAProxy service + ''' + i = 0 + if singlenode_mode: + i += 1 + elif len(peer_units()) > 0 or is_clustered(): + i += 1 + return public_port - (i * 10) + + +determine_apache_port_single = functools.partial( + determine_apache_port, singlenode_mode=True) + + +def get_hacluster_config(exclude_keys=None): + ''' + Obtains all relevant configuration from charm configuration required + for initiating a relation to hacluster: + + ha-bindiface, ha-mcastport, vip, os-internal-hostname, + os-admin-hostname, os-public-hostname, os-access-hostname + + param: exclude_keys: list of setting key(s) to be excluded. + returns: dict: A dict containing settings keyed by setting name. + raises: HAIncompleteConfig if settings are missing or incorrect. + ''' + settings = ['ha-bindiface', 'ha-mcastport', 'vip', 'os-internal-hostname', + 'os-admin-hostname', 'os-public-hostname', 'os-access-hostname'] + conf = {} + for setting in settings: + if exclude_keys and setting in exclude_keys: + continue + + conf[setting] = config_get(setting) + + if not valid_hacluster_config(): + raise HAIncorrectConfig('Insufficient or incorrect config data to ' + 'configure hacluster.') + return conf + + +def valid_hacluster_config(): + ''' + Check that either vip or dns-ha is set. If dns-ha then one of os-*-hostname + must be set. + + Note: ha-bindiface and ha-macastport both have defaults and will always + be set. We only care that either vip or dns-ha is set. + + :returns: boolean: valid config returns true. + raises: HAIncompatibileConfig if settings conflict. + raises: HAIncompleteConfig if settings are missing. + ''' + vip = config_get('vip') + dns = config_get('dns-ha') + if not (bool(vip) ^ bool(dns)): + msg = ('HA: Either vip or dns-ha must be set but not both in order to ' + 'use high availability') + status_set('blocked', msg) + raise HAIncorrectConfig(msg) + + # If dns-ha then one of os-*-hostname must be set + if dns: + dns_settings = ['os-internal-hostname', 'os-admin-hostname', + 'os-public-hostname', 'os-access-hostname'] + # At this point it is unknown if one or all of the possible + # network spaces are in HA. Validate at least one is set which is + # the minimum required. + for setting in dns_settings: + if config_get(setting): + log('DNS HA: At least one hostname is set {}: {}' + ''.format(setting, config_get(setting)), + level=DEBUG) + return True + + msg = ('DNS HA: At least one os-*-hostname(s) must be set to use ' + 'DNS HA') + status_set('blocked', msg) + raise HAIncompleteConfig(msg) + + log('VIP HA: VIP is set {}'.format(vip), level=DEBUG) + return True + + +def canonical_url(configs, vip_setting='vip'): + ''' + Returns the correct HTTP URL to this host given the state of HTTPS + configuration and hacluster. + + :configs : OSTemplateRenderer: A config tempating object to inspect for + a complete https context. + + :vip_setting: str: Setting in charm config that specifies + VIP address. + ''' + scheme = 'http' + if 'https' in configs.complete_contexts(): + scheme = 'https' + if is_clustered(): + addr = config_get(vip_setting) + else: + addr = unit_get('private-address') + return '%s://%s' % (scheme, addr) + + +def distributed_wait(modulo=None, wait=None, operation_name='operation'): + ''' Distribute operations by waiting based on modulo_distribution + + If modulo and or wait are not set, check config_get for those values. + If config values are not set, default to modulo=3 and wait=30. + + :param modulo: int The modulo number creates the group distribution + :param wait: int The constant time wait value + :param operation_name: string Operation name for status message + i.e. 'restart' + :side effect: Calls config_get() + :side effect: Calls log() + :side effect: Calls status_set() + :side effect: Calls time.sleep() + ''' + if modulo is None: + modulo = config_get('modulo-nodes') or 3 + if wait is None: + wait = config_get('known-wait') or 30 + if juju_is_leader(): + # The leader should never wait + calculated_wait = 0 + else: + # non_zero_wait=True guarantees the non-leader who gets modulo 0 + # will still wait + calculated_wait = modulo_distribution(modulo=modulo, wait=wait, + non_zero_wait=True) + msg = "Waiting {} seconds for {} ...".format(calculated_wait, + operation_name) + log(msg, DEBUG) + status_set('maintenance', msg) + time.sleep(calculated_wait) + + +def get_managed_services_and_ports(services, external_ports, + external_services=None, + port_conv_f=determine_apache_port_single): + """Get the services and ports managed by this charm. + + Return only the services and corresponding ports that are managed by this + charm. This excludes haproxy when there is a relation with hacluster. This + is because this charm passes responsibility for stopping and starting + haproxy to hacluster. + + Similarly, if a relation with hacluster exists then the ports returned by + this method correspond to those managed by the apache server rather than + haproxy. + + :param services: List of services. + :type services: List[str] + :param external_ports: List of ports managed by external services. + :type external_ports: List[int] + :param external_services: List of services to be removed if ha relation is + present. + :type external_services: List[str] + :param port_conv_f: Function to apply to ports to calculate the ports + managed by services controlled by this charm. + :type port_convert_func: f() + :returns: A tuple containing a list of services first followed by a list of + ports. + :rtype: Tuple[List[str], List[int]] + """ + if external_services is None: + external_services = ['haproxy'] + if relation_ids('ha'): + for svc in external_services: + try: + services.remove(svc) + except ValueError: + pass + external_ports = [port_conv_f(p) for p in external_ports] + return services, external_ports diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/README.hardening.md b/ceph-osd/hooks/charmhelpers/contrib/hardening/README.hardening.md new file mode 100644 index 00000000..91280c03 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/README.hardening.md @@ -0,0 +1,38 @@ +# Juju charm-helpers hardening library + +## Description + +This library provides multiple implementations of system and application +hardening that conform to the standards of http://hardening.io/. + +Current implementations include: + + * OS + * SSH + * MySQL + * Apache + +## Requirements + +* Juju Charms + +## Usage + +1. Synchronise this library into your charm and add the harden() decorator + (from contrib.hardening.harden) to any functions or methods you want to use + to trigger hardening of your application/system. + +2. Add a config option called 'harden' to your charm config.yaml and set it to + a space-delimited list of hardening modules you want to run e.g. "os ssh" + +3. Override any config defaults (contrib.hardening.defaults) by adding a file + called hardening.yaml to your charm root containing the name(s) of the + modules whose settings you want override at root level and then any settings + with overrides e.g. + + os: + general: + desktop_enable: True + +4. Now just run your charm as usual and hardening will be applied each time the + hook runs. diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/__init__.py new file mode 100644 index 00000000..30a3e943 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/apache/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/apache/__init__.py new file mode 100644 index 00000000..58bebd84 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/apache/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from os import path + +TEMPLATES_DIR = path.join(path.dirname(__file__), 'templates') diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/apache/checks/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/apache/checks/__init__.py new file mode 100644 index 00000000..3bc2ebd4 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/apache/checks/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import ( + log, + DEBUG, +) +from charmhelpers.contrib.hardening.apache.checks import config + + +def run_apache_checks(): + log("Starting Apache hardening checks.", level=DEBUG) + checks = config.get_audits() + for check in checks: + log("Running '%s' check" % (check.__class__.__name__), level=DEBUG) + check.ensure_compliance() + + log("Apache hardening checks complete.", level=DEBUG) diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/apache/checks/config.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/apache/checks/config.py new file mode 100644 index 00000000..e81a5f0b --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/apache/checks/config.py @@ -0,0 +1,101 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import subprocess + + +from charmhelpers.core.hookenv import ( + log, + INFO, +) +from charmhelpers.contrib.hardening.audits.file import ( + FilePermissionAudit, + DirectoryPermissionAudit, + NoReadWriteForOther, + TemplatedFile, + DeletedFile +) +from charmhelpers.contrib.hardening.audits.apache import DisabledModuleAudit +from charmhelpers.contrib.hardening.apache import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get Apache hardening config audits. + + :returns: dictionary of audits + """ + if subprocess.call(['which', 'apache2'], stdout=subprocess.PIPE) != 0: + log("Apache server does not appear to be installed on this node - " + "skipping apache hardening", level=INFO) + return [] + + context = ApacheConfContext() + settings = utils.get_settings('apache') + audits = [ + FilePermissionAudit(paths=os.path.join( + settings['common']['apache_dir'], 'apache2.conf'), + user='root', group='root', mode=0o0640), + + TemplatedFile(os.path.join(settings['common']['apache_dir'], + 'mods-available/alias.conf'), + context, + TEMPLATES_DIR, + mode=0o0640, + user='root', + service_actions=[{'service': 'apache2', + 'actions': ['restart']}]), + + TemplatedFile(os.path.join(settings['common']['apache_dir'], + 'conf-enabled/99-hardening.conf'), + context, + TEMPLATES_DIR, + mode=0o0640, + user='root', + service_actions=[{'service': 'apache2', + 'actions': ['restart']}]), + + DirectoryPermissionAudit(settings['common']['apache_dir'], + user='root', + group='root', + mode=0o0750), + + DisabledModuleAudit(settings['hardening']['modules_to_disable']), + + NoReadWriteForOther(settings['common']['apache_dir']), + + DeletedFile(['/var/www/html/index.html']) + ] + + return audits + + +class ApacheConfContext(object): + """Defines the set of key/value pairs to set in a apache config file. + + This context, when called, will return a dictionary containing the + key/value pairs of setting to specify in the + /etc/apache/conf-enabled/hardening.conf file. + """ + def __call__(self): + settings = utils.get_settings('apache') + ctxt = settings['hardening'] + + out = subprocess.check_output(['apache2', '-v']).decode('utf-8') + ctxt['apache_version'] = re.search(r'.+version: Apache/(.+?)\s.+', + out).group(1) + ctxt['apache_icondir'] = '/usr/share/apache2/icons/' + return ctxt diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/apache/templates/99-hardening.conf b/ceph-osd/hooks/charmhelpers/contrib/hardening/apache/templates/99-hardening.conf new file mode 100644 index 00000000..22b68041 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/apache/templates/99-hardening.conf @@ -0,0 +1,32 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### + + + + # http://httpd.apache.org/docs/2.4/upgrading.html + {% if apache_version > '2.2' -%} + Require all granted + {% else -%} + Order Allow,Deny + Deny from all + {% endif %} + + + + + Options -Indexes -FollowSymLinks + AllowOverride None + + + + Options -Indexes -FollowSymLinks + AllowOverride None + + +TraceEnable {{ traceenable }} +ServerTokens {{ servertokens }} + +SSLHonorCipherOrder {{ honor_cipher_order }} +SSLCipherSuite {{ cipher_suite }} diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/apache/templates/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/apache/templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/apache/templates/alias.conf b/ceph-osd/hooks/charmhelpers/contrib/hardening/apache/templates/alias.conf new file mode 100644 index 00000000..e46a58a3 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/apache/templates/alias.conf @@ -0,0 +1,31 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### + + # + # Aliases: Add here as many aliases as you need (with no limit). The format is + # Alias fakename realname + # + # Note that if you include a trailing / on fakename then the server will + # require it to be present in the URL. So "/icons" isn't aliased in this + # example, only "/icons/". If the fakename is slash-terminated, then the + # realname must also be slash terminated, and if the fakename omits the + # trailing slash, the realname must also omit it. + # + # We include the /icons/ alias for FancyIndexed directory listings. If + # you do not use FancyIndexing, you may comment this out. + # + Alias /icons/ "{{ apache_icondir }}/" + + + Options -Indexes -MultiViews -FollowSymLinks + AllowOverride None +{% if apache_version == '2.4' -%} + Require all granted +{% else -%} + Order allow,deny + Allow from all +{% endif %} + + diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/audits/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/audits/__init__.py new file mode 100644 index 00000000..6dd5b05f --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/audits/__init__.py @@ -0,0 +1,54 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class BaseAudit(object): # NO-QA + """Base class for hardening checks. + + The lifecycle of a hardening check is to first check to see if the system + is in compliance for the specified check. If it is not in compliance, the + check method will return a value which will be supplied to the. + """ + def __init__(self, *args, **kwargs): + self.unless = kwargs.get('unless', None) + super(BaseAudit, self).__init__() + + def ensure_compliance(self): + """Checks to see if the current hardening check is in compliance or + not. + + If the check that is performed is not in compliance, then an exception + should be raised. + """ + pass + + def _take_action(self): + """Determines whether to perform the action or not. + + Checks whether or not an action should be taken. This is determined by + the truthy value for the unless parameter. If unless is a callback + method, it will be invoked with no parameters in order to determine + whether or not the action should be taken. Otherwise, the truthy value + of the unless attribute will determine if the action should be + performed. + """ + # Do the action if there isn't an unless override. + if self.unless is None: + return True + + # Invoke the callback if there is one. + if hasattr(self.unless, '__call__'): + return not self.unless() + + return not self.unless diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/audits/apache.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/audits/apache.py new file mode 100644 index 00000000..31db8f62 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/audits/apache.py @@ -0,0 +1,101 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import subprocess + +from charmhelpers.core.hookenv import ( + log, + INFO, + ERROR, +) + +from charmhelpers.contrib.hardening.audits import BaseAudit + + +class DisabledModuleAudit(BaseAudit): + """Audits Apache2 modules. + + Determines if the apache2 modules are enabled. If the modules are enabled + then they are removed in the ensure_compliance. + """ + def __init__(self, modules): + if modules is None: + self.modules = [] + elif isinstance(modules, str): + self.modules = [modules] + else: + self.modules = modules + + def ensure_compliance(self): + """Ensures that the modules are not loaded.""" + if not self.modules: + return + + try: + loaded_modules = self._get_loaded_modules() + non_compliant_modules = [] + for module in self.modules: + if module in loaded_modules: + log("Module '%s' is enabled but should not be." % + (module), level=INFO) + non_compliant_modules.append(module) + + if len(non_compliant_modules) == 0: + return + + for module in non_compliant_modules: + self._disable_module(module) + self._restart_apache() + except subprocess.CalledProcessError as e: + log('Error occurred auditing apache module compliance. ' + 'This may have been already reported. ' + 'Output is: %s' % e.output, level=ERROR) + + @staticmethod + def _get_loaded_modules(): + """Returns the modules which are enabled in Apache.""" + output = subprocess.check_output(['apache2ctl', '-M']).decode('utf-8') + modules = [] + for line in output.splitlines(): + # Each line of the enabled module output looks like: + # module_name (static|shared) + # Plus a header line at the top of the output which is stripped + # out by the regex. + matcher = re.search(r'^ (\S*)_module (\S*)', line) + if matcher: + modules.append(matcher.group(1)) + return modules + + @staticmethod + def _disable_module(module): + """Disables the specified module in Apache.""" + try: + subprocess.check_call(['a2dismod', module]) + except subprocess.CalledProcessError as e: + # Note: catch error here to allow the attempt of disabling + # multiple modules in one go rather than failing after the + # first module fails. + log('Error occurred disabling module %s. ' + 'Output is: %s' % (module, e.output), level=ERROR) + + @staticmethod + def _restart_apache(): + """Restarts the apache process""" + subprocess.check_output(['service', 'apache2', 'restart']) + + @staticmethod + def is_ssl_enabled(): + """Check if SSL module is enabled or not""" + return 'ssl' in DisabledModuleAudit._get_loaded_modules() diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/audits/apt.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/audits/apt.py new file mode 100644 index 00000000..1b22925b --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/audits/apt.py @@ -0,0 +1,101 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.fetch import ( + apt_cache, + apt_purge +) +from charmhelpers.core.hookenv import ( + log, + DEBUG, + WARNING, +) +from charmhelpers.contrib.hardening.audits import BaseAudit +from charmhelpers.fetch import ubuntu_apt_pkg as apt_pkg + + +class AptConfig(BaseAudit): + + def __init__(self, config, **kwargs): + self.config = config + + def verify_config(self): + apt_pkg.init() + for cfg in self.config: + value = apt_pkg.config.get(cfg['key'], cfg.get('default', '')) + if value and value != cfg['expected']: + log("APT config '%s' has unexpected value '%s' " + "(expected='%s')" % + (cfg['key'], value, cfg['expected']), level=WARNING) + + def ensure_compliance(self): + self.verify_config() + + +class RestrictedPackages(BaseAudit): + """Class used to audit restricted packages on the system.""" + + def __init__(self, pkgs, **kwargs): + super(RestrictedPackages, self).__init__(**kwargs) + if isinstance(pkgs, str) or not hasattr(pkgs, '__iter__'): + self.pkgs = pkgs.split() + else: + self.pkgs = pkgs + + def ensure_compliance(self): + cache = apt_cache() + + for p in self.pkgs: + if p not in cache: + continue + + pkg = cache[p] + if not self.is_virtual_package(pkg): + if not pkg.current_ver: + log("Package '%s' is not installed." % pkg.name, + level=DEBUG) + continue + else: + log("Restricted package '%s' is installed" % pkg.name, + level=WARNING) + self.delete_package(cache, pkg) + else: + log("Checking restricted virtual package '%s' provides" % + pkg.name, level=DEBUG) + self.delete_package(cache, pkg) + + def delete_package(self, cache, pkg): + """Deletes the package from the system. + + Deletes the package form the system, properly handling virtual + packages. + + :param cache: the apt cache + :param pkg: the package to remove + """ + if self.is_virtual_package(pkg): + log("Package '%s' appears to be virtual - purging provides" % + pkg.name, level=DEBUG) + for _p in pkg.provides_list: + self.delete_package(cache, _p[2].parent_pkg) + elif not pkg.current_ver: + log("Package '%s' not installed" % pkg.name, level=DEBUG) + return + else: + log("Purging package '%s'" % pkg.name, level=DEBUG) + apt_purge(pkg.name) + + def is_virtual_package(self, pkg): + return (pkg.get('has_provides', False) and + not pkg.get('has_versions', False)) diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/audits/file.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/audits/file.py new file mode 100644 index 00000000..84cc2494 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/audits/file.py @@ -0,0 +1,549 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import grp +import os +import pwd +import re + +from subprocess import ( + CalledProcessError, + check_output, + check_call, +) +from traceback import format_exc +from stat import ( + S_ISGID, + S_ISUID +) + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + INFO, + WARNING, + ERROR, +) +from charmhelpers.core import unitdata +from charmhelpers.core.host import file_hash +from charmhelpers.contrib.hardening.audits import BaseAudit +from charmhelpers.contrib.hardening.templating import ( + get_template_path, + render_and_write, +) +from charmhelpers.contrib.hardening import utils + + +class BaseFileAudit(BaseAudit): + """Base class for file audits. + + Provides api stubs for compliance check flow that must be used by any class + that implemented this one. + """ + + def __init__(self, paths, always_comply=False, *args, **kwargs): + """ + :param paths: string path of list of paths of files we want to apply + compliance checks are criteria to. + :param always_comply: if true compliance criteria is always applied + else compliance is skipped for non-existent + paths. + """ + super(BaseFileAudit, self).__init__(*args, **kwargs) + self.always_comply = always_comply + if isinstance(paths, str) or not hasattr(paths, '__iter__'): + self.paths = [paths] + else: + self.paths = paths + + def ensure_compliance(self): + """Ensure that the all registered files comply to registered criteria. + """ + for p in self.paths: + if os.path.exists(p): + if self.is_compliant(p): + continue + + log('File %s is not in compliance.' % p, level=INFO) + else: + if not self.always_comply: + log("Non-existent path '%s' - skipping compliance check" + % (p), level=INFO) + continue + + if self._take_action(): + log("Applying compliance criteria to '%s'" % (p), level=INFO) + self.comply(p) + + def is_compliant(self, path): + """Audits the path to see if it is compliance. + + :param path: the path to the file that should be checked. + """ + raise NotImplementedError + + def comply(self, path): + """Enforces the compliance of a path. + + :param path: the path to the file that should be enforced. + """ + raise NotImplementedError + + @classmethod + def _get_stat(cls, path): + """Returns the Posix st_stat information for the specified file path. + + :param path: the path to get the st_stat information for. + :returns: an st_stat object for the path or None if the path doesn't + exist. + """ + return os.stat(path) + + +class FilePermissionAudit(BaseFileAudit): + """Implements an audit for file permissions and ownership for a user. + + This class implements functionality that ensures that a specific user/group + will own the file(s) specified and that the permissions specified are + applied properly to the file. + """ + def __init__(self, paths, user, group=None, mode=0o600, **kwargs): + self.user = user + self.group = group + self.mode = mode + super(FilePermissionAudit, self).__init__(paths, user, group, mode, + **kwargs) + + @property + def user(self): + return self._user + + @user.setter + def user(self, name): + try: + user = pwd.getpwnam(name) + except KeyError: + log('Unknown user %s' % name, level=ERROR) + user = None + self._user = user + + @property + def group(self): + return self._group + + @group.setter + def group(self, name): + try: + group = None + if name: + group = grp.getgrnam(name) + else: + group = grp.getgrgid(self.user.pw_gid) + except KeyError: + log('Unknown group %s' % name, level=ERROR) + self._group = group + + def is_compliant(self, path): + """Checks if the path is in compliance. + + Used to determine if the path specified meets the necessary + requirements to be in compliance with the check itself. + + :param path: the file path to check + :returns: True if the path is compliant, False otherwise. + """ + stat = self._get_stat(path) + user = self.user + group = self.group + + compliant = True + if stat.st_uid != user.pw_uid or stat.st_gid != group.gr_gid: + log('File %s is not owned by %s:%s.' % (path, user.pw_name, + group.gr_name), + level=INFO) + compliant = False + + # POSIX refers to the st_mode bits as corresponding to both the + # file type and file permission bits, where the least significant 12 + # bits (o7777) are the suid (11), sgid (10), sticky bits (9), and the + # file permission bits (8-0) + perms = stat.st_mode & 0o7777 + if perms != self.mode: + log('File %s has incorrect permissions, currently set to %s' % + (path, oct(stat.st_mode & 0o7777)), level=INFO) + compliant = False + + return compliant + + def comply(self, path): + """Issues a chown and chmod to the file paths specified.""" + utils.ensure_permissions(path, self.user.pw_name, self.group.gr_name, + self.mode) + + +class DirectoryPermissionAudit(FilePermissionAudit): + """Performs a permission check for the specified directory path.""" + + def __init__(self, paths, user, group=None, mode=0o600, + recursive=True, **kwargs): + super(DirectoryPermissionAudit, self).__init__(paths, user, group, + mode, **kwargs) + self.recursive = recursive + + def is_compliant(self, path): + """Checks if the directory is compliant. + + Used to determine if the path specified and all of its children + directories are in compliance with the check itself. + + :param path: the directory path to check + :returns: True if the directory tree is compliant, otherwise False. + """ + if not os.path.isdir(path): + log('Path specified %s is not a directory.' % path, level=ERROR) + raise ValueError("%s is not a directory." % path) + + if not self.recursive: + return super(DirectoryPermissionAudit, self).is_compliant(path) + + compliant = True + for root, dirs, _ in os.walk(path): + if len(dirs) > 0: + continue + + if not super(DirectoryPermissionAudit, self).is_compliant(root): + compliant = False + continue + + return compliant + + def comply(self, path): + for root, dirs, _ in os.walk(path): + if len(dirs) > 0: + super(DirectoryPermissionAudit, self).comply(root) + + +class ReadOnly(BaseFileAudit): + """Audits that files and folders are read only.""" + def __init__(self, paths, *args, **kwargs): + super(ReadOnly, self).__init__(paths=paths, *args, **kwargs) + + def is_compliant(self, path): + try: + output = check_output(['find', path, '-perm', '-go+w', + '-type', 'f']).strip() + + # The find above will find any files which have permission sets + # which allow too broad of write access. As such, the path is + # compliant if there is no output. + if output: + return False + + return True + except CalledProcessError as e: + log('Error occurred checking finding writable files for %s. ' + 'Error information is: command %s failed with returncode ' + '%d and output %s.\n%s' % (path, e.cmd, e.returncode, e.output, + format_exc(e)), level=ERROR) + return False + + def comply(self, path): + try: + check_output(['chmod', 'go-w', '-R', path]) + except CalledProcessError as e: + log('Error occurred removing writeable permissions for %s. ' + 'Error information is: command %s failed with returncode ' + '%d and output %s.\n%s' % (path, e.cmd, e.returncode, e.output, + format_exc(e)), level=ERROR) + + +class NoReadWriteForOther(BaseFileAudit): + """Ensures that the files found under the base path are readable or + writable by anyone other than the owner or the group. + """ + def __init__(self, paths): + super(NoReadWriteForOther, self).__init__(paths) + + def is_compliant(self, path): + try: + cmd = ['find', path, '-perm', '-o+r', '-type', 'f', '-o', + '-perm', '-o+w', '-type', 'f'] + output = check_output(cmd).strip() + + # The find above here will find any files which have read or + # write permissions for other, meaning there is too broad of access + # to read/write the file. As such, the path is compliant if there's + # no output. + if output: + return False + + return True + except CalledProcessError as e: + log('Error occurred while finding files which are readable or ' + 'writable to the world in %s. ' + 'Command output is: %s.' % (path, e.output), level=ERROR) + + def comply(self, path): + try: + check_output(['chmod', '-R', 'o-rw', path]) + except CalledProcessError as e: + log('Error occurred attempting to change modes of files under ' + 'path %s. Output of command is: %s' % (path, e.output)) + + +class NoSUIDSGIDAudit(BaseFileAudit): + """Audits that specified files do not have SUID/SGID bits set.""" + def __init__(self, paths, *args, **kwargs): + super(NoSUIDSGIDAudit, self).__init__(paths=paths, *args, **kwargs) + + def is_compliant(self, path): + stat = self._get_stat(path) + if (stat.st_mode & (S_ISGID | S_ISUID)) != 0: + return False + + return True + + def comply(self, path): + try: + log('Removing suid/sgid from %s.' % path, level=DEBUG) + check_output(['chmod', '-s', path]) + except CalledProcessError as e: + log('Error occurred removing suid/sgid from %s.' + 'Error information is: command %s failed with returncode ' + '%d and output %s.\n%s' % (path, e.cmd, e.returncode, e.output, + format_exc(e)), level=ERROR) + + +class TemplatedFile(BaseFileAudit): + """The TemplatedFileAudit audits the contents of a templated file. + + This audit renders a file from a template, sets the appropriate file + permissions, then generates a hashsum with which to check the content + changed. + """ + def __init__(self, path, context, template_dir, mode, user='root', + group='root', service_actions=None, **kwargs): + self.context = context + self.user = user + self.group = group + self.mode = mode + self.template_dir = template_dir + self.service_actions = service_actions + super(TemplatedFile, self).__init__(paths=path, always_comply=True, + **kwargs) + + def is_compliant(self, path): + """Determines if the templated file is compliant. + + A templated file is only compliant if it has not changed (as + determined by its sha256 hashsum) AND its file permissions are set + appropriately. + + :param path: the path to check compliance. + """ + same_templates = self.templates_match(path) + same_content = self.contents_match(path) + same_permissions = self.permissions_match(path) + + if same_content and same_permissions and same_templates: + return True + + return False + + def run_service_actions(self): + """Run any actions on services requested.""" + if not self.service_actions: + return + + for svc_action in self.service_actions: + name = svc_action['service'] + actions = svc_action['actions'] + log("Running service '%s' actions '%s'" % (name, actions), + level=DEBUG) + for action in actions: + cmd = ['service', name, action] + try: + check_call(cmd) + except CalledProcessError as exc: + log("Service name='%s' action='%s' failed - %s" % + (name, action, exc), level=WARNING) + + def comply(self, path): + """Ensures the contents and the permissions of the file. + + :param path: the path to correct + """ + dirname = os.path.dirname(path) + if not os.path.exists(dirname): + os.makedirs(dirname) + + self.pre_write() + render_and_write(self.template_dir, path, self.context()) + utils.ensure_permissions(path, self.user, self.group, self.mode) + self.run_service_actions() + self.save_checksum(path) + self.post_write() + + def pre_write(self): + """Invoked prior to writing the template.""" + pass + + def post_write(self): + """Invoked after writing the template.""" + pass + + def templates_match(self, path): + """Determines if the template files are the same. + + The template file equality is determined by the hashsum of the + template files themselves. If there is no hashsum, then the content + cannot be sure to be the same so treat it as if they changed. + Otherwise, return whether or not the hashsums are the same. + + :param path: the path to check + :returns: boolean + """ + template_path = get_template_path(self.template_dir, path) + key = 'hardening:template:%s' % template_path + template_checksum = file_hash(template_path) + kv = unitdata.kv() + stored_tmplt_checksum = kv.get(key) + if not stored_tmplt_checksum: + kv.set(key, template_checksum) + kv.flush() + log('Saved template checksum for %s.' % template_path, + level=DEBUG) + # Since we don't have a template checksum, then assume it doesn't + # match and return that the template is different. + return False + elif stored_tmplt_checksum != template_checksum: + kv.set(key, template_checksum) + kv.flush() + log('Updated template checksum for %s.' % template_path, + level=DEBUG) + return False + + # Here the template hasn't changed based upon the calculated + # checksum of the template and what was previously stored. + return True + + def contents_match(self, path): + """Determines if the file content is the same. + + This is determined by comparing hashsum of the file contents and + the saved hashsum. If there is no hashsum, then the content cannot + be sure to be the same so treat them as if they are not the same. + Otherwise, return True if the hashsums are the same, False if they + are not the same. + + :param path: the file to check. + """ + checksum = file_hash(path) + + kv = unitdata.kv() + stored_checksum = kv.get('hardening:%s' % path) + if not stored_checksum: + # If the checksum hasn't been generated, return False to ensure + # the file is written and the checksum stored. + log('Checksum for %s has not been calculated.' % path, level=DEBUG) + return False + elif stored_checksum != checksum: + log('Checksum mismatch for %s.' % path, level=DEBUG) + return False + + return True + + def permissions_match(self, path): + """Determines if the file owner and permissions match. + + :param path: the path to check. + """ + audit = FilePermissionAudit(path, self.user, self.group, self.mode) + return audit.is_compliant(path) + + def save_checksum(self, path): + """Calculates and saves the checksum for the path specified. + + :param path: the path of the file to save the checksum. + """ + checksum = file_hash(path) + kv = unitdata.kv() + kv.set('hardening:%s' % path, checksum) + kv.flush() + + +class DeletedFile(BaseFileAudit): + """Audit to ensure that a file is deleted.""" + def __init__(self, paths): + super(DeletedFile, self).__init__(paths) + + def is_compliant(self, path): + return not os.path.exists(path) + + def comply(self, path): + os.remove(path) + + +class FileContentAudit(BaseFileAudit): + """Audit the contents of a file.""" + def __init__(self, paths, cases, **kwargs): + # Cases we expect to pass + self.pass_cases = cases.get('pass', []) + # Cases we expect to fail + self.fail_cases = cases.get('fail', []) + super(FileContentAudit, self).__init__(paths, **kwargs) + + def is_compliant(self, path): + """ + Given a set of content matching cases i.e. tuple(regex, bool) where + bool value denotes whether or not regex is expected to match, check that + all cases match as expected with the contents of the file. Cases can be + expected to pass of fail. + + :param path: Path of file to check. + :returns: Boolean value representing whether or not all cases are + found to be compliant. + """ + log("Auditing contents of file '%s'" % (path), level=DEBUG) + with open(path, 'r') as fd: + contents = fd.read() + + matches = 0 + for pattern in self.pass_cases: + key = re.compile(pattern, flags=re.MULTILINE) + results = re.search(key, contents) + if results: + matches += 1 + else: + log("Pattern '%s' was expected to pass but instead it failed" + % (pattern), level=WARNING) + + for pattern in self.fail_cases: + key = re.compile(pattern, flags=re.MULTILINE) + results = re.search(key, contents) + if not results: + matches += 1 + else: + log("Pattern '%s' was expected to fail but instead it passed" + % (pattern), level=WARNING) + + total = len(self.pass_cases) + len(self.fail_cases) + log("Checked %s cases and %s passed" % (total, matches), level=DEBUG) + return matches == total + + def comply(self, *args, **kwargs): + """NOOP since we just issue warnings. This is to avoid the + NotImplememtedError. + """ + log("Not applying any compliance criteria, only checks.", level=INFO) diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/apache.yaml b/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/apache.yaml new file mode 100644 index 00000000..0f940d4c --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/apache.yaml @@ -0,0 +1,16 @@ +# NOTE: this file contains the default configuration for the 'apache' hardening +# code. If you want to override any settings you must add them to a file +# called hardening.yaml in the root directory of your charm using the +# name 'apache' as the root key followed by any of the following with new +# values. + +common: + apache_dir: '/etc/apache2' + +hardening: + traceenable: 'off' + allowed_http_methods: "GET POST" + modules_to_disable: [ cgi, cgid ] + servertokens: 'Prod' + honor_cipher_order: 'on' + cipher_suite: 'ALL:+MEDIUM:+HIGH:!LOW:!MD5:!RC4:!eNULL:!aNULL:!3DES' diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/apache.yaml.schema b/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/apache.yaml.schema new file mode 100644 index 00000000..c112137c --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/apache.yaml.schema @@ -0,0 +1,12 @@ +# NOTE: this schema must contain all valid keys from it's associated defaults +# file. It is used to validate user-provided overrides. +common: + apache_dir: + traceenable: + +hardening: + allowed_http_methods: + modules_to_disable: + servertokens: + honor_cipher_order: + cipher_suite: diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/mysql.yaml b/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/mysql.yaml new file mode 100644 index 00000000..682d22bf --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/mysql.yaml @@ -0,0 +1,38 @@ +# NOTE: this file contains the default configuration for the 'mysql' hardening +# code. If you want to override any settings you must add them to a file +# called hardening.yaml in the root directory of your charm using the +# name 'mysql' as the root key followed by any of the following with new +# values. + +hardening: + mysql-conf: /etc/mysql/my.cnf + hardening-conf: /etc/mysql/conf.d/hardening.cnf + +security: + # @see http://www.symantec.com/connect/articles/securing-mysql-step-step + # @see http://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_chroot + chroot: None + + # @see http://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_safe-user-create + safe-user-create: 1 + + # @see http://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_secure-auth + secure-auth: 1 + + # @see http://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_symbolic-links + skip-symbolic-links: 1 + + # @see http://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_skip-show-database + skip-show-database: True + + # @see http://dev.mysql.com/doc/refman/5.7/en/server-system-variables.html#sysvar_local_infile + local-infile: 0 + + # @see https://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_allow-suspicious-udfs + allow-suspicious-udfs: 0 + + # @see https://dev.mysql.com/doc/refman/5.7/en/server-system-variables.html#sysvar_automatic_sp_privileges + automatic-sp-privileges: 0 + + # @see https://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_secure-file-priv + secure-file-priv: /tmp diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/mysql.yaml.schema b/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/mysql.yaml.schema new file mode 100644 index 00000000..2edf325c --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/mysql.yaml.schema @@ -0,0 +1,15 @@ +# NOTE: this schema must contain all valid keys from it's associated defaults +# file. It is used to validate user-provided overrides. +hardening: + mysql-conf: + hardening-conf: +security: + chroot: + safe-user-create: + secure-auth: + skip-symbolic-links: + skip-show-database: + local-infile: + allow-suspicious-udfs: + automatic-sp-privileges: + secure-file-priv: diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/os.yaml b/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/os.yaml new file mode 100644 index 00000000..9a8627b5 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/os.yaml @@ -0,0 +1,68 @@ +# NOTE: this file contains the default configuration for the 'os' hardening +# code. If you want to override any settings you must add them to a file +# called hardening.yaml in the root directory of your charm using the +# name 'os' as the root key followed by any of the following with new +# values. + +general: + desktop_enable: False # (type:boolean) + +environment: + extra_user_paths: [] + umask: 027 + root_path: / + +auth: + pw_max_age: 60 + # discourage password cycling + pw_min_age: 7 + retries: 5 + lockout_time: 600 + timeout: 60 + allow_homeless: False # (type:boolean) + pam_passwdqc_enable: True # (type:boolean) + pam_passwdqc_options: 'min=disabled,disabled,16,12,8' + root_ttys: + console + tty1 + tty2 + tty3 + tty4 + tty5 + tty6 + uid_min: 1000 + gid_min: 1000 + sys_uid_min: 100 + sys_uid_max: 999 + sys_gid_min: 100 + sys_gid_max: 999 + chfn_restrict: + +security: + users_allow: [] + suid_sgid_enforce: True # (type:boolean) + # user-defined blacklist and whitelist + suid_sgid_blacklist: [] + suid_sgid_whitelist: [] + # if this is True, remove any suid/sgid bits from files that were not in the whitelist + suid_sgid_dry_run_on_unknown: False # (type:boolean) + suid_sgid_remove_from_unknown: False # (type:boolean) + # remove packages with known issues + packages_clean: True # (type:boolean) + packages_list: + xinetd + inetd + ypserv + telnet-server + rsh-server + rsync + kernel_enable_module_loading: True # (type:boolean) + kernel_enable_core_dump: False # (type:boolean) + ssh_tmout: 300 + +sysctl: + kernel_secure_sysrq: 244 # 4 + 16 + 32 + 64 + 128 + kernel_enable_sysrq: False # (type:boolean) + forwarding: False # (type:boolean) + ipv6_enable: False # (type:boolean) + arp_restricted: True # (type:boolean) diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/os.yaml.schema b/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/os.yaml.schema new file mode 100644 index 00000000..cc3b9c20 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/os.yaml.schema @@ -0,0 +1,43 @@ +# NOTE: this schema must contain all valid keys from it's associated defaults +# file. It is used to validate user-provided overrides. +general: + desktop_enable: +environment: + extra_user_paths: + umask: + root_path: +auth: + pw_max_age: + pw_min_age: + retries: + lockout_time: + timeout: + allow_homeless: + pam_passwdqc_enable: + pam_passwdqc_options: + root_ttys: + uid_min: + gid_min: + sys_uid_min: + sys_uid_max: + sys_gid_min: + sys_gid_max: + chfn_restrict: +security: + users_allow: + suid_sgid_enforce: + suid_sgid_blacklist: + suid_sgid_whitelist: + suid_sgid_dry_run_on_unknown: + suid_sgid_remove_from_unknown: + packages_clean: + packages_list: + kernel_enable_module_loading: + kernel_enable_core_dump: + ssh_tmout: +sysctl: + kernel_secure_sysrq: + kernel_enable_sysrq: + forwarding: + ipv6_enable: + arp_restricted: diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/ssh.yaml b/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/ssh.yaml new file mode 100644 index 00000000..cd529bca --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/ssh.yaml @@ -0,0 +1,49 @@ +# NOTE: this file contains the default configuration for the 'ssh' hardening +# code. If you want to override any settings you must add them to a file +# called hardening.yaml in the root directory of your charm using the +# name 'ssh' as the root key followed by any of the following with new +# values. + +common: + service_name: 'ssh' + network_ipv6_enable: False # (type:boolean) + ports: [22] + remote_hosts: [] + +client: + package: 'openssh-client' + cbc_required: False # (type:boolean) + weak_hmac: False # (type:boolean) + weak_kex: False # (type:boolean) + roaming: False + password_authentication: 'no' + +server: + host_key_files: ['/etc/ssh/ssh_host_rsa_key', '/etc/ssh/ssh_host_dsa_key', + '/etc/ssh/ssh_host_ecdsa_key'] + cbc_required: False # (type:boolean) + weak_hmac: False # (type:boolean) + weak_kex: False # (type:boolean) + allow_root_with_key: False # (type:boolean) + allow_tcp_forwarding: 'no' + allow_agent_forwarding: 'no' + allow_x11_forwarding: 'no' + use_privilege_separation: 'sandbox' + listen_to: ['0.0.0.0'] + use_pam: 'no' + package: 'openssh-server' + password_authentication: 'no' + alive_interval: '600' + alive_count: '3' + sftp_enable: False # (type:boolean) + sftp_group: 'sftponly' + sftp_chroot: '/home/%u' + deny_users: [] + allow_users: [] + deny_groups: [] + allow_groups: [] + print_motd: 'no' + print_last_log: 'no' + use_dns: 'no' + max_auth_tries: 2 + max_sessions: 10 diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/ssh.yaml.schema b/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/ssh.yaml.schema new file mode 100644 index 00000000..d05e054b --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/defaults/ssh.yaml.schema @@ -0,0 +1,42 @@ +# NOTE: this schema must contain all valid keys from it's associated defaults +# file. It is used to validate user-provided overrides. +common: + service_name: + network_ipv6_enable: + ports: + remote_hosts: +client: + package: + cbc_required: + weak_hmac: + weak_kex: + roaming: + password_authentication: +server: + host_key_files: + cbc_required: + weak_hmac: + weak_kex: + allow_root_with_key: + allow_tcp_forwarding: + allow_agent_forwarding: + allow_x11_forwarding: + use_privilege_separation: + listen_to: + use_pam: + package: + password_authentication: + alive_interval: + alive_count: + sftp_enable: + sftp_group: + sftp_chroot: + deny_users: + allow_users: + deny_groups: + allow_groups: + print_motd: + print_last_log: + use_dns: + max_auth_tries: + max_sessions: diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/harden.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/harden.py new file mode 100644 index 00000000..45ad076d --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/harden.py @@ -0,0 +1,93 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict + +from charmhelpers.core.hookenv import ( + config, + log, + DEBUG, + WARNING, +) +from charmhelpers.contrib.hardening.host.checks import run_os_checks +from charmhelpers.contrib.hardening.ssh.checks import run_ssh_checks +from charmhelpers.contrib.hardening.mysql.checks import run_mysql_checks +from charmhelpers.contrib.hardening.apache.checks import run_apache_checks + +_DISABLE_HARDENING_FOR_UNIT_TEST = False + + +def harden(overrides=None): + """Hardening decorator. + + This is the main entry point for running the hardening stack. In order to + run modules of the stack you must add this decorator to charm hook(s) and + ensure that your charm config.yaml contains the 'harden' option set to + one or more of the supported modules. Setting these will cause the + corresponding hardening code to be run when the hook fires. + + This decorator can and should be applied to more than one hook or function + such that hardening modules are called multiple times. This is because + subsequent calls will perform auditing checks that will report any changes + to resources hardened by the first run (and possibly perform compliance + actions as a result of any detected infractions). + + :param overrides: Optional list of stack modules used to override those + provided with 'harden' config. + :returns: Returns value returned by decorated function once executed. + """ + if overrides is None: + overrides = [] + + def _harden_inner1(f): + _logged = False + + def _harden_inner2(*args, **kwargs): + # knock out hardening via a config var; normally it won't get + # disabled. + nonlocal _logged + if _DISABLE_HARDENING_FOR_UNIT_TEST: + return f(*args, **kwargs) + if not _logged: + log("Hardening function '%s'" % (f.__name__), level=DEBUG) + _logged = True + RUN_CATALOG = OrderedDict([('os', run_os_checks), + ('ssh', run_ssh_checks), + ('mysql', run_mysql_checks), + ('apache', run_apache_checks)]) + + enabled = overrides[:] or (config("harden") or "").split() + if enabled: + modules_to_run = [] + # modules will always be performed in the following order + for module, func in RUN_CATALOG.items(): + if module in enabled: + enabled.remove(module) + modules_to_run.append(func) + + if enabled: + log("Unknown hardening modules '%s' - ignoring" % + (', '.join(enabled)), level=WARNING) + + for hardener in modules_to_run: + log("Executing hardening module '%s'" % + (hardener.__name__), level=DEBUG) + hardener() + else: + log("No hardening applied to '%s'" % (f.__name__), level=DEBUG) + + return f(*args, **kwargs) + return _harden_inner2 + + return _harden_inner1 diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/__init__.py new file mode 100644 index 00000000..58bebd84 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from os import path + +TEMPLATES_DIR = path.join(path.dirname(__file__), 'templates') diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/__init__.py new file mode 100644 index 00000000..0e7e409f --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/__init__.py @@ -0,0 +1,48 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import ( + log, + DEBUG, +) +from charmhelpers.contrib.hardening.host.checks import ( + apt, + limits, + login, + minimize_access, + pam, + profile, + securetty, + suid_sgid, + sysctl +) + + +def run_os_checks(): + log("Starting OS hardening checks.", level=DEBUG) + checks = apt.get_audits() + checks.extend(limits.get_audits()) + checks.extend(login.get_audits()) + checks.extend(minimize_access.get_audits()) + checks.extend(pam.get_audits()) + checks.extend(profile.get_audits()) + checks.extend(securetty.get_audits()) + checks.extend(suid_sgid.get_audits()) + checks.extend(sysctl.get_audits()) + + for check in checks: + log("Running '%s' check" % (check.__class__.__name__), level=DEBUG) + check.ensure_compliance() + + log("OS hardening checks complete.", level=DEBUG) diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/apt.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/apt.py new file mode 100644 index 00000000..7ce41b00 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/apt.py @@ -0,0 +1,37 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.utils import get_settings +from charmhelpers.contrib.hardening.audits.apt import ( + AptConfig, + RestrictedPackages, +) + + +def get_audits(): + """Get OS hardening apt audits. + + :returns: dictionary of audits + """ + audits = [AptConfig([{'key': 'APT::Get::AllowUnauthenticated', + 'expected': 'false'}])] + + settings = get_settings('os') + clean_packages = settings['security']['packages_clean'] + if clean_packages: + security_packages = settings['security']['packages_list'] + if security_packages: + audits.append(RestrictedPackages(security_packages)) + + return audits diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/limits.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/limits.py new file mode 100644 index 00000000..e94f5ebe --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/limits.py @@ -0,0 +1,53 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.audits.file import ( + DirectoryPermissionAudit, + TemplatedFile, +) +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get OS hardening security limits audits. + + :returns: dictionary of audits + """ + audits = [] + settings = utils.get_settings('os') + + # Ensure that the /etc/security/limits.d directory is only writable + # by the root user, but others can execute and read. + audits.append(DirectoryPermissionAudit('/etc/security/limits.d', + user='root', group='root', + mode=0o755)) + + # If core dumps are not enabled, then don't allow core dumps to be + # created as they may contain sensitive information. + if not settings['security']['kernel_enable_core_dump']: + audits.append(TemplatedFile('/etc/security/limits.d/10.hardcore.conf', + SecurityLimitsContext(), + template_dir=TEMPLATES_DIR, + user='root', group='root', mode=0o0440)) + return audits + + +class SecurityLimitsContext(object): + + def __call__(self): + settings = utils.get_settings('os') + ctxt = {'disable_core_dump': + not settings['security']['kernel_enable_core_dump']} + return ctxt diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/login.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/login.py new file mode 100644 index 00000000..fd500c8b --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/login.py @@ -0,0 +1,63 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.audits.file import TemplatedFile +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get OS hardening login.defs audits. + + :returns: dictionary of audits + """ + audits = [TemplatedFile('/etc/login.defs', LoginContext(), + template_dir=TEMPLATES_DIR, + user='root', group='root', mode=0o0444)] + return audits + + +class LoginContext(object): + + def __call__(self): + settings = utils.get_settings('os') + + # Octal numbers in yaml end up being turned into decimal, + # so check if the umask is entered as a string (e.g. '027') + # or as an octal umask as we know it (e.g. 002). If its not + # a string assume it to be octal and turn it into an octal + # string. + umask = settings['environment']['umask'] + if not isinstance(umask, str): + umask = '%s' % oct(umask) + + ctxt = { + 'additional_user_paths': + settings['environment']['extra_user_paths'], + 'umask': umask, + 'pwd_max_age': settings['auth']['pw_max_age'], + 'pwd_min_age': settings['auth']['pw_min_age'], + 'uid_min': settings['auth']['uid_min'], + 'sys_uid_min': settings['auth']['sys_uid_min'], + 'sys_uid_max': settings['auth']['sys_uid_max'], + 'gid_min': settings['auth']['gid_min'], + 'sys_gid_min': settings['auth']['sys_gid_min'], + 'sys_gid_max': settings['auth']['sys_gid_max'], + 'login_retries': settings['auth']['retries'], + 'login_timeout': settings['auth']['timeout'], + 'chfn_restrict': settings['auth']['chfn_restrict'], + 'allow_login_without_home': settings['auth']['allow_homeless'] + } + + return ctxt diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/minimize_access.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/minimize_access.py new file mode 100644 index 00000000..6e64be00 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/minimize_access.py @@ -0,0 +1,50 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.audits.file import ( + FilePermissionAudit, + ReadOnly, +) +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get OS hardening access audits. + + :returns: dictionary of audits + """ + audits = [] + settings = utils.get_settings('os') + + # Remove write permissions from $PATH folders for all regular users. + # This prevents changing system-wide commands from normal users. + path_folders = {'/usr/local/sbin', + '/usr/local/bin', + '/usr/sbin', + '/usr/bin', + '/bin'} + extra_user_paths = settings['environment']['extra_user_paths'] + path_folders.update(extra_user_paths) + audits.append(ReadOnly(path_folders)) + + # Only allow the root user to have access to the shadow file. + audits.append(FilePermissionAudit('/etc/shadow', 'root', 'root', 0o0600)) + + if 'change_user' not in settings['security']['users_allow']: + # su should only be accessible to user and group root, unless it is + # expressly defined to allow users to change to root via the + # security_users_allow config option. + audits.append(FilePermissionAudit('/bin/su', 'root', 'root', 0o750)) + + return audits diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/pam.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/pam.py new file mode 100644 index 00000000..9b38d5f0 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/pam.py @@ -0,0 +1,132 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import ( + check_output, + CalledProcessError, +) + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + ERROR, +) +from charmhelpers.fetch import ( + apt_install, + apt_purge, + apt_update, +) +from charmhelpers.contrib.hardening.audits.file import ( + TemplatedFile, + DeletedFile, +) +from charmhelpers.contrib.hardening import utils +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR + + +def get_audits(): + """Get OS hardening PAM authentication audits. + + :returns: dictionary of audits + """ + audits = [] + + settings = utils.get_settings('os') + + if settings['auth']['pam_passwdqc_enable']: + audits.append(PasswdqcPAM('/etc/passwdqc.conf')) + + if settings['auth']['retries']: + audits.append(Tally2PAM('/usr/share/pam-configs/tally2')) + else: + audits.append(DeletedFile('/usr/share/pam-configs/tally2')) + + return audits + + +class PasswdqcPAMContext(object): + + def __call__(self): + ctxt = {} + settings = utils.get_settings('os') + + ctxt['auth_pam_passwdqc_options'] = \ + settings['auth']['pam_passwdqc_options'] + + return ctxt + + +class PasswdqcPAM(TemplatedFile): + """The PAM Audit verifies the linux PAM settings.""" + def __init__(self, path): + super(PasswdqcPAM, self).__init__(path=path, + template_dir=TEMPLATES_DIR, + context=PasswdqcPAMContext(), + user='root', + group='root', + mode=0o0640) + + def pre_write(self): + # Always remove? + for pkg in ['libpam-ccreds', 'libpam-cracklib']: + log("Purging package '%s'" % pkg, level=DEBUG), + apt_purge(pkg) + + apt_update(fatal=True) + for pkg in ['libpam-passwdqc']: + log("Installing package '%s'" % pkg, level=DEBUG), + apt_install(pkg) + + def post_write(self): + """Updates the PAM configuration after the file has been written""" + try: + check_output(['pam-auth-update', '--package']) + except CalledProcessError as e: + log('Error calling pam-auth-update: %s' % e, level=ERROR) + + +class Tally2PAMContext(object): + + def __call__(self): + ctxt = {} + settings = utils.get_settings('os') + + ctxt['auth_lockout_time'] = settings['auth']['lockout_time'] + ctxt['auth_retries'] = settings['auth']['retries'] + + return ctxt + + +class Tally2PAM(TemplatedFile): + """The PAM Audit verifies the linux PAM settings.""" + def __init__(self, path): + super(Tally2PAM, self).__init__(path=path, + template_dir=TEMPLATES_DIR, + context=Tally2PAMContext(), + user='root', + group='root', + mode=0o0640) + + def pre_write(self): + # Always remove? + apt_purge('libpam-ccreds') + apt_update(fatal=True) + apt_install('libpam-modules') + + def post_write(self): + """Updates the PAM configuration after the file has been written""" + try: + check_output(['pam-auth-update', '--package']) + except CalledProcessError as e: + log('Error calling pam-auth-update: %s' % e, level=ERROR) diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/profile.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/profile.py new file mode 100644 index 00000000..2727428d --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/profile.py @@ -0,0 +1,49 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.audits.file import TemplatedFile +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get OS hardening profile audits. + + :returns: dictionary of audits + """ + audits = [] + + settings = utils.get_settings('os') + # If core dumps are not enabled, then don't allow core dumps to be + # created as they may contain sensitive information. + if not settings['security']['kernel_enable_core_dump']: + audits.append(TemplatedFile('/etc/profile.d/pinerolo_profile.sh', + ProfileContext(), + template_dir=TEMPLATES_DIR, + mode=0o0755, user='root', group='root')) + if settings['security']['ssh_tmout']: + audits.append(TemplatedFile('/etc/profile.d/99-hardening.sh', + ProfileContext(), + template_dir=TEMPLATES_DIR, + mode=0o0644, user='root', group='root')) + return audits + + +class ProfileContext(object): + + def __call__(self): + settings = utils.get_settings('os') + ctxt = {'ssh_tmout': + settings['security']['ssh_tmout']} + return ctxt diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/securetty.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/securetty.py new file mode 100644 index 00000000..34cd0217 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/securetty.py @@ -0,0 +1,37 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.audits.file import TemplatedFile +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get OS hardening Secure TTY audits. + + :returns: dictionary of audits + """ + audits = [] + audits.append(TemplatedFile('/etc/securetty', SecureTTYContext(), + template_dir=TEMPLATES_DIR, + mode=0o0400, user='root', group='root')) + return audits + + +class SecureTTYContext(object): + + def __call__(self): + settings = utils.get_settings('os') + ctxt = {'ttys': settings['auth']['root_ttys']} + return ctxt diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/suid_sgid.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/suid_sgid.py new file mode 100644 index 00000000..bcbe3fde --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/suid_sgid.py @@ -0,0 +1,129 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import subprocess + +from charmhelpers.core.hookenv import ( + log, + INFO, +) +from charmhelpers.contrib.hardening.audits.file import NoSUIDSGIDAudit +from charmhelpers.contrib.hardening import utils + + +BLACKLIST = ['/usr/bin/rcp', '/usr/bin/rlogin', '/usr/bin/rsh', + '/usr/libexec/openssh/ssh-keysign', + '/usr/lib/openssh/ssh-keysign', + '/sbin/netreport', + '/usr/sbin/usernetctl', + '/usr/sbin/userisdnctl', + '/usr/sbin/pppd', + '/usr/bin/lockfile', + '/usr/bin/mail-lock', + '/usr/bin/mail-unlock', + '/usr/bin/mail-touchlock', + '/usr/bin/dotlockfile', + '/usr/bin/arping', + '/usr/sbin/uuidd', + '/usr/bin/mtr', + '/usr/lib/evolution/camel-lock-helper-1.2', + '/usr/lib/pt_chown', + '/usr/lib/eject/dmcrypt-get-device', + '/usr/lib/mc/cons.saver'] + +WHITELIST = ['/bin/mount', '/bin/ping', '/bin/su', '/bin/umount', + '/sbin/pam_timestamp_check', '/sbin/unix_chkpwd', '/usr/bin/at', + '/usr/bin/gpasswd', '/usr/bin/locate', '/usr/bin/newgrp', + '/usr/bin/passwd', '/usr/bin/ssh-agent', + '/usr/libexec/utempter/utempter', '/usr/sbin/lockdev', + '/usr/sbin/sendmail.sendmail', '/usr/bin/expiry', + '/bin/ping6', '/usr/bin/traceroute6.iputils', + '/sbin/mount.nfs', '/sbin/umount.nfs', + '/sbin/mount.nfs4', '/sbin/umount.nfs4', + '/usr/bin/crontab', + '/usr/bin/wall', '/usr/bin/write', + '/usr/bin/screen', + '/usr/bin/mlocate', + '/usr/bin/chage', '/usr/bin/chfn', '/usr/bin/chsh', + '/bin/fusermount', + '/usr/bin/pkexec', + '/usr/bin/sudo', '/usr/bin/sudoedit', + '/usr/sbin/postdrop', '/usr/sbin/postqueue', + '/usr/sbin/suexec', + '/usr/lib/squid/ncsa_auth', '/usr/lib/squid/pam_auth', + '/usr/kerberos/bin/ksu', + '/usr/sbin/ccreds_validate', + '/usr/bin/Xorg', + '/usr/bin/X', + '/usr/lib/dbus-1.0/dbus-daemon-launch-helper', + '/usr/lib/vte/gnome-pty-helper', + '/usr/lib/libvte9/gnome-pty-helper', + '/usr/lib/libvte-2.90-9/gnome-pty-helper'] + + +def get_audits(): + """Get OS hardening suid/sgid audits. + + :returns: dictionary of audits + """ + checks = [] + settings = utils.get_settings('os') + if not settings['security']['suid_sgid_enforce']: + log("Skipping suid/sgid hardening", level=INFO) + return checks + + # Build the blacklist and whitelist of files for suid/sgid checks. + # There are a total of 4 lists: + # 1. the system blacklist + # 2. the system whitelist + # 3. the user blacklist + # 4. the user whitelist + # + # The blacklist is the set of paths which should NOT have the suid/sgid bit + # set and the whitelist is the set of paths which MAY have the suid/sgid + # bit setl. The user whitelist/blacklist effectively override the system + # whitelist/blacklist. + u_b = settings['security']['suid_sgid_blacklist'] + u_w = settings['security']['suid_sgid_whitelist'] + + blacklist = set(BLACKLIST) - set(u_w + u_b) + whitelist = set(WHITELIST) - set(u_b + u_w) + + checks.append(NoSUIDSGIDAudit(blacklist)) + + dry_run = settings['security']['suid_sgid_dry_run_on_unknown'] + + if settings['security']['suid_sgid_remove_from_unknown'] or dry_run: + # If the policy is a dry_run (e.g. complain only) or remove unknown + # suid/sgid bits then find all of the paths which have the suid/sgid + # bit set and then remove the whitelisted paths. + root_path = settings['environment']['root_path'] + unknown_paths = find_paths_with_suid_sgid(root_path) - set(whitelist) + checks.append(NoSUIDSGIDAudit(unknown_paths, unless=dry_run)) + + return checks + + +def find_paths_with_suid_sgid(root_path): + """Finds all paths/files which have an suid/sgid bit enabled. + + Starting with the root_path, this will recursively find all paths which + have an suid or sgid bit set. + """ + cmd = ['find', root_path, '-perm', '-4000', '-o', '-perm', '-2000', + '-type', 'f', '!', '-path', '/proc/*', '-print'] + + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, _ = p.communicate() + return set(out.split('\n')) diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/sysctl.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/sysctl.py new file mode 100644 index 00000000..8a57d83d --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/checks/sysctl.py @@ -0,0 +1,208 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import platform +import re +import subprocess + +from charmhelpers.core.hookenv import ( + log, + INFO, + WARNING, +) +from charmhelpers.contrib.hardening import utils +from charmhelpers.contrib.hardening.audits.file import ( + FilePermissionAudit, + TemplatedFile, +) +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR + + +SYSCTL_DEFAULTS = """net.ipv4.ip_forward=%(net_ipv4_ip_forward)s +net.ipv6.conf.all.forwarding=%(net_ipv6_conf_all_forwarding)s +net.ipv4.conf.all.rp_filter=1 +net.ipv4.conf.default.rp_filter=1 +net.ipv4.icmp_echo_ignore_broadcasts=1 +net.ipv4.icmp_ignore_bogus_error_responses=1 +net.ipv4.icmp_ratelimit=100 +net.ipv4.icmp_ratemask=88089 +net.ipv6.conf.all.disable_ipv6=%(net_ipv6_conf_all_disable_ipv6)s +net.ipv4.tcp_timestamps=%(net_ipv4_tcp_timestamps)s +net.ipv4.conf.all.arp_ignore=%(net_ipv4_conf_all_arp_ignore)s +net.ipv4.conf.all.arp_announce=%(net_ipv4_conf_all_arp_announce)s +net.ipv4.tcp_rfc1337=1 +net.ipv4.tcp_syncookies=1 +net.ipv4.conf.all.shared_media=1 +net.ipv4.conf.default.shared_media=1 +net.ipv4.conf.all.accept_source_route=0 +net.ipv4.conf.default.accept_source_route=0 +net.ipv4.conf.all.accept_redirects=0 +net.ipv4.conf.default.accept_redirects=0 +net.ipv6.conf.all.accept_redirects=0 +net.ipv6.conf.default.accept_redirects=0 +net.ipv4.conf.all.secure_redirects=0 +net.ipv4.conf.default.secure_redirects=0 +net.ipv4.conf.all.send_redirects=0 +net.ipv4.conf.default.send_redirects=0 +net.ipv4.conf.all.log_martians=0 +net.ipv6.conf.default.router_solicitations=0 +net.ipv6.conf.default.accept_ra_rtr_pref=0 +net.ipv6.conf.default.accept_ra_pinfo=0 +net.ipv6.conf.default.accept_ra_defrtr=0 +net.ipv6.conf.default.autoconf=0 +net.ipv6.conf.default.dad_transmits=0 +net.ipv6.conf.default.max_addresses=1 +net.ipv6.conf.all.accept_ra=0 +net.ipv6.conf.default.accept_ra=0 +kernel.modules_disabled=%(kernel_modules_disabled)s +kernel.sysrq=%(kernel_sysrq)s +fs.suid_dumpable=%(fs_suid_dumpable)s +kernel.randomize_va_space=2 +""" + + +def get_audits(): + """Get OS hardening sysctl audits. + + :returns: dictionary of audits + """ + audits = [] + settings = utils.get_settings('os') + + # Apply the sysctl settings which are configured to be applied. + audits.append(SysctlConf()) + # Make sure that only root has access to the sysctl.conf file, and + # that it is read-only. + audits.append(FilePermissionAudit('/etc/sysctl.conf', + user='root', + group='root', mode=0o0440)) + # If module loading is not enabled, then ensure that the modules + # file has the appropriate permissions and rebuild the initramfs + if not settings['security']['kernel_enable_module_loading']: + audits.append(ModulesTemplate()) + + return audits + + +class ModulesContext(object): + + def __call__(self): + settings = utils.get_settings('os') + with open('/proc/cpuinfo', 'r') as fd: + cpuinfo = fd.readlines() + + for line in cpuinfo: + match = re.search(r"^vendor_id\s+:\s+(.+)", line) + if match: + vendor = match.group(1) + + if vendor == "GenuineIntel": + vendor = "intel" + elif vendor == "AuthenticAMD": + vendor = "amd" + + ctxt = {'arch': platform.processor(), + 'cpuVendor': vendor, + 'desktop_enable': settings['general']['desktop_enable']} + + return ctxt + + +class ModulesTemplate(object): + + def __init__(self): + super(ModulesTemplate, self).__init__('/etc/initramfs-tools/modules', + ModulesContext(), + templates_dir=TEMPLATES_DIR, + user='root', group='root', + mode=0o0440) + + def post_write(self): + subprocess.check_call(['update-initramfs', '-u']) + + +class SysCtlHardeningContext(object): + def __call__(self): + settings = utils.get_settings('os') + ctxt = {'sysctl': {}} + + log("Applying sysctl settings", level=INFO) + extras = {'net_ipv4_ip_forward': 0, + 'net_ipv6_conf_all_forwarding': 0, + 'net_ipv6_conf_all_disable_ipv6': 1, + 'net_ipv4_tcp_timestamps': 0, + 'net_ipv4_conf_all_arp_ignore': 0, + 'net_ipv4_conf_all_arp_announce': 0, + 'kernel_sysrq': 0, + 'fs_suid_dumpable': 0, + 'kernel_modules_disabled': 1} + + if settings['sysctl']['ipv6_enable']: + extras['net_ipv6_conf_all_disable_ipv6'] = 0 + + if settings['sysctl']['forwarding']: + extras['net_ipv4_ip_forward'] = 1 + extras['net_ipv6_conf_all_forwarding'] = 1 + + if settings['sysctl']['arp_restricted']: + extras['net_ipv4_conf_all_arp_ignore'] = 1 + extras['net_ipv4_conf_all_arp_announce'] = 2 + + if settings['security']['kernel_enable_module_loading']: + extras['kernel_modules_disabled'] = 0 + + if settings['sysctl']['kernel_enable_sysrq']: + sysrq_val = settings['sysctl']['kernel_secure_sysrq'] + extras['kernel_sysrq'] = sysrq_val + + if settings['security']['kernel_enable_core_dump']: + extras['fs_suid_dumpable'] = 1 + + settings.update(extras) + for d in (SYSCTL_DEFAULTS % settings).split(): + d = d.strip().partition('=') + key = d[0].strip() + path = os.path.join('/proc/sys', key.replace('.', '/')) + if not os.path.exists(path): + log("Skipping '%s' since '%s' does not exist" % (key, path), + level=WARNING) + continue + + ctxt['sysctl'][key] = d[2] or None + + return { + 'sysctl_settings': [(k, v) for k, v in ctxt['sysctl'].items()] + } + + +class SysctlConf(TemplatedFile): + """An audit check for sysctl settings.""" + def __init__(self): + self.conffile = '/etc/sysctl.d/99-juju-hardening.conf' + super(SysctlConf, self).__init__(self.conffile, + SysCtlHardeningContext(), + template_dir=TEMPLATES_DIR, + user='root', group='root', + mode=0o0440) + + def post_write(self): + try: + subprocess.check_call(['sysctl', '-p', self.conffile]) + except subprocess.CalledProcessError as e: + # NOTE: on some systems if sysctl cannot apply all settings it + # will return non-zero as well. + log("sysctl command returned an error (maybe some " + "keys could not be set) - %s" % (e), + level=WARNING) diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/10.hardcore.conf b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/10.hardcore.conf new file mode 100644 index 00000000..0014191f --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/10.hardcore.conf @@ -0,0 +1,8 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +{% if disable_core_dump -%} +# Prevent core dumps for all users. These are usually only needed by developers and may contain sensitive information. +* hard core 0 +{% endif %} \ No newline at end of file diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/99-hardening.sh b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/99-hardening.sh new file mode 100644 index 00000000..616cef46 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/99-hardening.sh @@ -0,0 +1,5 @@ +TMOUT={{ tmout }} +readonly TMOUT +export TMOUT + +readonly HISTFILE diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/99-juju-hardening.conf b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/99-juju-hardening.conf new file mode 100644 index 00000000..101f1e1d --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/99-juju-hardening.conf @@ -0,0 +1,7 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +{% for key, value in sysctl_settings -%} +{{ key }}={{ value }} +{% endfor -%} diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/login.defs b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/login.defs new file mode 100644 index 00000000..7d107637 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/login.defs @@ -0,0 +1,349 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# +# /etc/login.defs - Configuration control definitions for the login package. +# +# Three items must be defined: MAIL_DIR, ENV_SUPATH, and ENV_PATH. +# If unspecified, some arbitrary (and possibly incorrect) value will +# be assumed. All other items are optional - if not specified then +# the described action or option will be inhibited. +# +# Comment lines (lines beginning with "#") and blank lines are ignored. +# +# Modified for Linux. --marekm + +# REQUIRED for useradd/userdel/usermod +# Directory where mailboxes reside, _or_ name of file, relative to the +# home directory. If you _do_ define MAIL_DIR and MAIL_FILE, +# MAIL_DIR takes precedence. +# +# Essentially: +# - MAIL_DIR defines the location of users mail spool files +# (for mbox use) by appending the username to MAIL_DIR as defined +# below. +# - MAIL_FILE defines the location of the users mail spool files as the +# fully-qualified filename obtained by prepending the user home +# directory before $MAIL_FILE +# +# NOTE: This is no more used for setting up users MAIL environment variable +# which is, starting from shadow 4.0.12-1 in Debian, entirely the +# job of the pam_mail PAM modules +# See default PAM configuration files provided for +# login, su, etc. +# +# This is a temporary situation: setting these variables will soon +# move to /etc/default/useradd and the variables will then be +# no more supported +MAIL_DIR /var/mail +#MAIL_FILE .mail + +# +# Enable logging and display of /var/log/faillog login failure info. +# This option conflicts with the pam_tally PAM module. +# +FAILLOG_ENAB yes + +# +# Enable display of unknown usernames when login failures are recorded. +# +# WARNING: Unknown usernames may become world readable. +# See #290803 and #298773 for details about how this could become a security +# concern +LOG_UNKFAIL_ENAB no + +# +# Enable logging of successful logins +# +LOG_OK_LOGINS yes + +# +# Enable "syslog" logging of su activity - in addition to sulog file logging. +# SYSLOG_SG_ENAB does the same for newgrp and sg. +# +SYSLOG_SU_ENAB yes +SYSLOG_SG_ENAB yes + +# +# If defined, all su activity is logged to this file. +# +#SULOG_FILE /var/log/sulog + +# +# If defined, file which maps tty line to TERM environment parameter. +# Each line of the file is in a format something like "vt100 tty01". +# +#TTYTYPE_FILE /etc/ttytype + +# +# If defined, login failures will be logged here in a utmp format +# last, when invoked as lastb, will read /var/log/btmp, so... +# +FTMP_FILE /var/log/btmp + +# +# If defined, the command name to display when running "su -". For +# example, if this is defined as "su" then a "ps" will display the +# command is "-su". If not defined, then "ps" would display the +# name of the shell actually being run, e.g. something like "-sh". +# +SU_NAME su + +# +# If defined, file which inhibits all the usual chatter during the login +# sequence. If a full pathname, then hushed mode will be enabled if the +# user's name or shell are found in the file. If not a full pathname, then +# hushed mode will be enabled if the file exists in the user's home directory. +# +HUSHLOGIN_FILE .hushlogin +#HUSHLOGIN_FILE /etc/hushlogins + +# +# *REQUIRED* The default PATH settings, for superuser and normal users. +# +# (they are minimal, add the rest in the shell startup files) +ENV_SUPATH PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +ENV_PATH PATH=/usr/local/bin:/usr/bin:/bin{% if additional_user_paths %}{{ additional_user_paths }}{% endif %} + +# +# Terminal permissions +# +# TTYGROUP Login tty will be assigned this group ownership. +# TTYPERM Login tty will be set to this permission. +# +# If you have a "write" program which is "setgid" to a special group +# which owns the terminals, define TTYGROUP to the group number and +# TTYPERM to 0620. Otherwise leave TTYGROUP commented out and assign +# TTYPERM to either 622 or 600. +# +# In Debian /usr/bin/bsd-write or similar programs are setgid tty +# However, the default and recommended value for TTYPERM is still 0600 +# to not allow anyone to write to anyone else console or terminal + +# Users can still allow other people to write them by issuing +# the "mesg y" command. + +TTYGROUP tty +TTYPERM 0600 + +# +# Login configuration initializations: +# +# ERASECHAR Terminal ERASE character ('\010' = backspace). +# KILLCHAR Terminal KILL character ('\025' = CTRL/U). +# UMASK Default "umask" value. +# +# The ERASECHAR and KILLCHAR are used only on System V machines. +# +# UMASK is the default umask value for pam_umask and is used by +# useradd and newusers to set the mode of the new home directories. +# 022 is the "historical" value in Debian for UMASK +# 027, or even 077, could be considered better for privacy +# There is no One True Answer here : each sysadmin must make up his/her +# mind. +# +# If USERGROUPS_ENAB is set to "yes", that will modify this UMASK default value +# for private user groups, i. e. the uid is the same as gid, and username is +# the same as the primary group name: for these, the user permissions will be +# used as group permissions, e. g. 022 will become 002. +# +# Prefix these values with "0" to get octal, "0x" to get hexadecimal. +# +ERASECHAR 0177 +KILLCHAR 025 +UMASK {{ umask }} + +# Enable setting of the umask group bits to be the same as owner bits (examples: `022` -> `002`, `077` -> `007`) for non-root users, if the uid is the same as gid, and username is the same as the primary group name. +# If set to yes, userdel will remove the user´s group if it contains no more members, and useradd will create by default a group with the name of the user. +USERGROUPS_ENAB yes + +# +# Password aging controls: +# +# PASS_MAX_DAYS Maximum number of days a password may be used. +# PASS_MIN_DAYS Minimum number of days allowed between password changes. +# PASS_WARN_AGE Number of days warning given before a password expires. +# +PASS_MAX_DAYS {{ pwd_max_age }} +PASS_MIN_DAYS {{ pwd_min_age }} +PASS_WARN_AGE 7 + +# +# Min/max values for automatic uid selection in useradd +# +UID_MIN {{ uid_min }} +UID_MAX 60000 +# System accounts +SYS_UID_MIN {{ sys_uid_min }} +SYS_UID_MAX {{ sys_uid_max }} + +# Min/max values for automatic gid selection in groupadd +GID_MIN {{ gid_min }} +GID_MAX 60000 +# System accounts +SYS_GID_MIN {{ sys_gid_min }} +SYS_GID_MAX {{ sys_gid_max }} + +# +# Max number of login retries if password is bad. This will most likely be +# overridden by PAM, since the default pam_unix module has it's own built +# in of 3 retries. However, this is a safe fallback in case you are using +# an authentication module that does not enforce PAM_MAXTRIES. +# +LOGIN_RETRIES {{ login_retries }} + +# +# Max time in seconds for login +# +LOGIN_TIMEOUT {{ login_timeout }} + +# +# Which fields may be changed by regular users using chfn - use +# any combination of letters "frwh" (full name, room number, work +# phone, home phone). If not defined, no changes are allowed. +# For backward compatibility, "yes" = "rwh" and "no" = "frwh". +# +{% if chfn_restrict %} +CHFN_RESTRICT {{ chfn_restrict }} +{% endif %} + +# +# Should login be allowed if we can't cd to the home directory? +# Default in no. +# +DEFAULT_HOME {% if allow_login_without_home %} yes {% else %} no {% endif %} + +# +# If defined, this command is run when removing a user. +# It should remove any at/cron/print jobs etc. owned by +# the user to be removed (passed as the first argument). +# +#USERDEL_CMD /usr/sbin/userdel_local + +# +# Enable setting of the umask group bits to be the same as owner bits +# (examples: 022 -> 002, 077 -> 007) for non-root users, if the uid is +# the same as gid, and username is the same as the primary group name. +# +# If set to yes, userdel will remove the user´s group if it contains no +# more members, and useradd will create by default a group with the name +# of the user. +# +USERGROUPS_ENAB yes + +# +# Instead of the real user shell, the program specified by this parameter +# will be launched, although its visible name (argv[0]) will be the shell's. +# The program may do whatever it wants (logging, additional authentication, +# banner, ...) before running the actual shell. +# +# FAKE_SHELL /bin/fakeshell + +# +# If defined, either full pathname of a file containing device names or +# a ":" delimited list of device names. Root logins will be allowed only +# upon these devices. +# +# This variable is used by login and su. +# +#CONSOLE /etc/consoles +#CONSOLE console:tty01:tty02:tty03:tty04 + +# +# List of groups to add to the user's supplementary group set +# when logging in on the console (as determined by the CONSOLE +# setting). Default is none. +# +# Use with caution - it is possible for users to gain permanent +# access to these groups, even when not logged in on the console. +# How to do it is left as an exercise for the reader... +# +# This variable is used by login and su. +# +#CONSOLE_GROUPS floppy:audio:cdrom + +# +# If set to "yes", new passwords will be encrypted using the MD5-based +# algorithm compatible with the one used by recent releases of FreeBSD. +# It supports passwords of unlimited length and longer salt strings. +# Set to "no" if you need to copy encrypted passwords to other systems +# which don't understand the new algorithm. Default is "no". +# +# This variable is deprecated. You should use ENCRYPT_METHOD. +# +MD5_CRYPT_ENAB no + +# +# If set to MD5 , MD5-based algorithm will be used for encrypting password +# If set to SHA256, SHA256-based algorithm will be used for encrypting password +# If set to SHA512, SHA512-based algorithm will be used for encrypting password +# If set to DES, DES-based algorithm will be used for encrypting password (default) +# Overrides the MD5_CRYPT_ENAB option +# +# Note: It is recommended to use a value consistent with +# the PAM modules configuration. +# +ENCRYPT_METHOD SHA512 + +# +# Only used if ENCRYPT_METHOD is set to SHA256 or SHA512. +# +# Define the number of SHA rounds. +# With a lot of rounds, it is more difficult to brute forcing the password. +# But note also that it more CPU resources will be needed to authenticate +# users. +# +# If not specified, the libc will choose the default number of rounds (5000). +# The values must be inside the 1000-999999999 range. +# If only one of the MIN or MAX values is set, then this value will be used. +# If MIN > MAX, the highest value will be used. +# +# SHA_CRYPT_MIN_ROUNDS 5000 +# SHA_CRYPT_MAX_ROUNDS 5000 + +################# OBSOLETED BY PAM ############## +# # +# These options are now handled by PAM. Please # +# edit the appropriate file in /etc/pam.d/ to # +# enable the equivelants of them. +# +############### + +#MOTD_FILE +#DIALUPS_CHECK_ENAB +#LASTLOG_ENAB +#MAIL_CHECK_ENAB +#OBSCURE_CHECKS_ENAB +#PORTTIME_CHECKS_ENAB +#SU_WHEEL_ONLY +#CRACKLIB_DICTPATH +#PASS_CHANGE_TRIES +#PASS_ALWAYS_WARN +#ENVIRON_FILE +#NOLOGINS_FILE +#ISSUE_FILE +#PASS_MIN_LEN +#PASS_MAX_LEN +#ULIMIT +#ENV_HZ +#CHFN_AUTH +#CHSH_AUTH +#FAIL_DELAY + +################# OBSOLETED ####################### +# # +# These options are no more handled by shadow. # +# # +# Shadow utilities will display a warning if they # +# still appear. # +# # +################################################### + +# CLOSE_SESSIONS +# LOGIN_STRING +# NO_PASSWORD_CONSOLE +# QMAIL_DIR + + + diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/modules b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/modules new file mode 100644 index 00000000..ef0354ee --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/modules @@ -0,0 +1,117 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# /etc/modules: kernel modules to load at boot time. +# +# This file contains the names of kernel modules that should be loaded +# at boot time, one per line. Lines beginning with "#" are ignored. +# Parameters can be specified after the module name. + +# Arch +# ---- +# +# Modules for certains builds, contains support modules and some CPU-specific optimizations. + +{% if arch == "x86_64" -%} +# Optimize for x86_64 cryptographic features +twofish-x86_64-3way +twofish-x86_64 +aes-x86_64 +salsa20-x86_64 +blowfish-x86_64 +{% endif -%} + +{% if cpuVendor == "intel" -%} +# Intel-specific optimizations +ghash-clmulni-intel +aesni-intel +kvm-intel +{% endif -%} + +{% if cpuVendor == "amd" -%} +# AMD-specific optimizations +kvm-amd +{% endif -%} + +kvm + + +# Crypto +# ------ + +# Some core modules which comprise strong cryptography. +blowfish_common +blowfish_generic +ctr +cts +lrw +lzo +rmd160 +rmd256 +rmd320 +serpent +sha512_generic +twofish_common +twofish_generic +xts +zlib + + +# Drivers +# ------- + +# Basics +lp +rtc +loop + +# Filesystems +ext2 +btrfs + +{% if desktop_enable -%} +# Desktop +psmouse +snd +snd_ac97_codec +snd_intel8x0 +snd_page_alloc +snd_pcm +snd_timer +soundcore +usbhid +{% endif -%} + +# Lib +# --- +xz + + +# Net +# --- + +# All packets needed for netfilter rules (ie iptables, ebtables). +ip_tables +x_tables +iptable_filter +iptable_nat + +# Targets +ipt_LOG +ipt_REJECT + +# Modules +xt_connlimit +xt_tcpudp +xt_recent +xt_limit +xt_conntrack +nf_conntrack +nf_conntrack_ipv4 +nf_defrag_ipv4 +xt_state +nf_nat + +# Addons +xt_pknock \ No newline at end of file diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/passwdqc.conf b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/passwdqc.conf new file mode 100644 index 00000000..f98d14e5 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/passwdqc.conf @@ -0,0 +1,11 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +Name: passwdqc password strength enforcement +Default: yes +Priority: 1024 +Conflicts: cracklib +Password-Type: Primary +Password: + requisite pam_passwdqc.so {{ auth_pam_passwdqc_options }} diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/pinerolo_profile.sh b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/pinerolo_profile.sh new file mode 100644 index 00000000..fd2de791 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/pinerolo_profile.sh @@ -0,0 +1,8 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# Disable core dumps via soft limits for all users. Compliance to this setting +# is voluntary and can be modified by users up to a hard limit. This setting is +# a sane default. +ulimit -S -c 0 > /dev/null 2>&1 diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/securetty b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/securetty new file mode 100644 index 00000000..15b18d4e --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/securetty @@ -0,0 +1,11 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# A list of TTYs, from which root can log in +# see `man securetty` for reference +{% if ttys -%} +{% for tty in ttys -%} +{{ tty }} +{% endfor -%} +{% endif -%} diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/tally2 b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/tally2 new file mode 100644 index 00000000..d9620299 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/host/templates/tally2 @@ -0,0 +1,14 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +Name: tally2 lockout after failed attempts enforcement +Default: yes +Priority: 1024 +Conflicts: cracklib +Auth-Type: Primary +Auth-Initial: + required pam_tally2.so deny={{ auth_retries }} onerr=fail unlock_time={{ auth_lockout_time }} +Account-Type: Primary +Account-Initial: + required pam_tally2.so diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/mysql/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/mysql/__init__.py new file mode 100644 index 00000000..58bebd84 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/mysql/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from os import path + +TEMPLATES_DIR = path.join(path.dirname(__file__), 'templates') diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/mysql/checks/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/mysql/checks/__init__.py new file mode 100644 index 00000000..1990d851 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/mysql/checks/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import ( + log, + DEBUG, +) +from charmhelpers.contrib.hardening.mysql.checks import config + + +def run_mysql_checks(): + log("Starting MySQL hardening checks.", level=DEBUG) + checks = config.get_audits() + for check in checks: + log("Running '%s' check" % (check.__class__.__name__), level=DEBUG) + check.ensure_compliance() + + log("MySQL hardening checks complete.", level=DEBUG) diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/mysql/checks/config.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/mysql/checks/config.py new file mode 100644 index 00000000..8bf9f36c --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/mysql/checks/config.py @@ -0,0 +1,86 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import subprocess + +from charmhelpers.core.hookenv import ( + log, + WARNING, +) +from charmhelpers.contrib.hardening.audits.file import ( + FilePermissionAudit, + DirectoryPermissionAudit, + TemplatedFile, +) +from charmhelpers.contrib.hardening.mysql import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get MySQL hardening config audits. + + :returns: dictionary of audits + """ + if subprocess.call(['which', 'mysql'], stdout=subprocess.PIPE) != 0: + log("MySQL does not appear to be installed on this node - " + "skipping mysql hardening", level=WARNING) + return [] + + settings = utils.get_settings('mysql') + hardening_settings = settings['hardening'] + my_cnf = hardening_settings['mysql-conf'] + + audits = [ + FilePermissionAudit(paths=[my_cnf], user='root', + group='root', mode=0o0600), + + TemplatedFile(hardening_settings['hardening-conf'], + MySQLConfContext(), + TEMPLATES_DIR, + mode=0o0750, + user='mysql', + group='root', + service_actions=[{'service': 'mysql', + 'actions': ['restart']}]), + + # MySQL and Percona charms do not allow configuration of the + # data directory, so use the default. + DirectoryPermissionAudit('/var/lib/mysql', + user='mysql', + group='mysql', + recursive=False, + mode=0o755), + + DirectoryPermissionAudit('/etc/mysql', + user='root', + group='root', + recursive=False, + mode=0o700), + ] + + return audits + + +class MySQLConfContext(object): + """Defines the set of key/value pairs to set in a mysql config file. + + This context, when called, will return a dictionary containing the + key/value pairs of setting to specify in the + /etc/mysql/conf.d/hardening.cnf file. + """ + def __call__(self): + settings = utils.get_settings('mysql') + return { + 'mysql_settings': [(k, v) for k, v in settings['security'].items()] + } diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/mysql/templates/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/mysql/templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/mysql/templates/hardening.cnf b/ceph-osd/hooks/charmhelpers/contrib/hardening/mysql/templates/hardening.cnf new file mode 100644 index 00000000..8242586c --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/mysql/templates/hardening.cnf @@ -0,0 +1,12 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +[mysqld] +{% for setting, value in mysql_settings -%} +{% if value == 'True' -%} +{{ setting }} +{% elif value != 'None' and value != None -%} +{{ setting }} = {{ value }} +{% endif -%} +{% endfor -%} diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/ssh/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/ssh/__init__.py new file mode 100644 index 00000000..58bebd84 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/ssh/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from os import path + +TEMPLATES_DIR = path.join(path.dirname(__file__), 'templates') diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/ssh/checks/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/ssh/checks/__init__.py new file mode 100644 index 00000000..edaf484b --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/ssh/checks/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import ( + log, + DEBUG, +) +from charmhelpers.contrib.hardening.ssh.checks import config + + +def run_ssh_checks(): + log("Starting SSH hardening checks.", level=DEBUG) + checks = config.get_audits() + for check in checks: + log("Running '%s' check" % (check.__class__.__name__), level=DEBUG) + check.ensure_compliance() + + log("SSH hardening checks complete.", level=DEBUG) diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/ssh/checks/config.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/ssh/checks/config.py new file mode 100644 index 00000000..41bed2d1 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/ssh/checks/config.py @@ -0,0 +1,435 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from charmhelpers.contrib.network.ip import ( + get_address_in_network, + get_iface_addr, + is_ip, +) +from charmhelpers.core.hookenv import ( + log, + DEBUG, +) +from charmhelpers.fetch import ( + apt_install, + apt_update, +) +from charmhelpers.core.host import ( + lsb_release, + CompareHostReleases, +) +from charmhelpers.contrib.hardening.audits.file import ( + TemplatedFile, + FileContentAudit, +) +from charmhelpers.contrib.hardening.ssh import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get SSH hardening config audits. + + :returns: dictionary of audits + """ + audits = [SSHConfig(), SSHDConfig(), SSHConfigFileContentAudit(), + SSHDConfigFileContentAudit()] + return audits + + +class SSHConfigContext(object): + + type = 'client' + + def get_macs(self, allow_weak_mac): + if allow_weak_mac: + weak_macs = 'weak' + else: + weak_macs = 'default' + + default = 'hmac-sha2-512,hmac-sha2-256,hmac-ripemd160' + macs = {'default': default, + 'weak': default + ',hmac-sha1'} + + default = ('hmac-sha2-512-etm@openssh.com,' + 'hmac-sha2-256-etm@openssh.com,' + 'hmac-ripemd160-etm@openssh.com,umac-128-etm@openssh.com,' + 'hmac-sha2-512,hmac-sha2-256,hmac-ripemd160') + macs_66 = {'default': default, + 'weak': default + ',hmac-sha1'} + + # Use newer ciphers on Ubuntu Trusty and above + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) >= 'trusty': + log("Detected Ubuntu 14.04 or newer, using new macs", level=DEBUG) + macs = macs_66 + + return macs[weak_macs] + + def get_kexs(self, allow_weak_kex): + if allow_weak_kex: + weak_kex = 'weak' + else: + weak_kex = 'default' + + default = 'diffie-hellman-group-exchange-sha256' + weak = (default + ',diffie-hellman-group14-sha1,' + 'diffie-hellman-group-exchange-sha1,' + 'diffie-hellman-group1-sha1') + kex = {'default': default, + 'weak': weak} + + default = ('curve25519-sha256@libssh.org,' + 'diffie-hellman-group-exchange-sha256') + weak = (default + ',diffie-hellman-group14-sha1,' + 'diffie-hellman-group-exchange-sha1,' + 'diffie-hellman-group1-sha1') + kex_66 = {'default': default, + 'weak': weak} + + # Use newer kex on Ubuntu Trusty and above + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) >= 'trusty': + log('Detected Ubuntu 14.04 or newer, using new key exchange ' + 'algorithms', level=DEBUG) + kex = kex_66 + + return kex[weak_kex] + + def get_ciphers(self, cbc_required): + if cbc_required: + weak_ciphers = 'weak' + else: + weak_ciphers = 'default' + + default = 'aes256-ctr,aes192-ctr,aes128-ctr' + cipher = {'default': default, + 'weak': default + 'aes256-cbc,aes192-cbc,aes128-cbc'} + + default = ('chacha20-poly1305@openssh.com,aes256-gcm@openssh.com,' + 'aes128-gcm@openssh.com,aes256-ctr,aes192-ctr,aes128-ctr') + ciphers_66 = {'default': default, + 'weak': default + ',aes256-cbc,aes192-cbc,aes128-cbc'} + + # Use newer ciphers on ubuntu Trusty and above + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) >= 'trusty': + log('Detected Ubuntu 14.04 or newer, using new ciphers', + level=DEBUG) + cipher = ciphers_66 + + return cipher[weak_ciphers] + + def get_listening(self, listen=['0.0.0.0']): + """Returns a list of addresses SSH can list on + + Turns input into a sensible list of IPs SSH can listen on. Input + must be a python list of interface names, IPs and/or CIDRs. + + :param listen: list of IPs, CIDRs, interface names + + :returns: list of IPs available on the host + """ + if listen == ['0.0.0.0']: + return listen + + value = [] + for network in listen: + try: + ip = get_address_in_network(network=network, fatal=True) + except ValueError: + if is_ip(network): + ip = network + else: + try: + ip = get_iface_addr(iface=network, fatal=False)[0] + except IndexError: + continue + value.append(ip) + if value == []: + return ['0.0.0.0'] + return value + + def __call__(self): + settings = utils.get_settings('ssh') + if settings['common']['network_ipv6_enable']: + addr_family = 'any' + else: + addr_family = 'inet' + + ctxt = { + 'addr_family': addr_family, + 'remote_hosts': settings['common']['remote_hosts'], + 'password_auth_allowed': + settings['client']['password_authentication'], + 'ports': settings['common']['ports'], + 'ciphers': self.get_ciphers(settings['client']['cbc_required']), + 'macs': self.get_macs(settings['client']['weak_hmac']), + 'kexs': self.get_kexs(settings['client']['weak_kex']), + 'roaming': settings['client']['roaming'], + } + return ctxt + + +class SSHConfig(TemplatedFile): + def __init__(self): + path = '/etc/ssh/ssh_config' + super(SSHConfig, self).__init__(path=path, + template_dir=TEMPLATES_DIR, + context=SSHConfigContext(), + user='root', + group='root', + mode=0o0644) + + def pre_write(self): + settings = utils.get_settings('ssh') + apt_update(fatal=True) + apt_install(settings['client']['package']) + if not os.path.exists('/etc/ssh'): + os.makedir('/etc/ssh') + # NOTE: don't recurse + utils.ensure_permissions('/etc/ssh', 'root', 'root', 0o0755, + maxdepth=0) + + def post_write(self): + # NOTE: don't recurse + utils.ensure_permissions('/etc/ssh', 'root', 'root', 0o0755, + maxdepth=0) + + +class SSHDConfigContext(SSHConfigContext): + + type = 'server' + + def __call__(self): + settings = utils.get_settings('ssh') + if settings['common']['network_ipv6_enable']: + addr_family = 'any' + else: + addr_family = 'inet' + + ctxt = { + 'ssh_ip': self.get_listening(settings['server']['listen_to']), + 'password_auth_allowed': + settings['server']['password_authentication'], + 'ports': settings['common']['ports'], + 'addr_family': addr_family, + 'ciphers': self.get_ciphers(settings['server']['cbc_required']), + 'macs': self.get_macs(settings['server']['weak_hmac']), + 'kexs': self.get_kexs(settings['server']['weak_kex']), + 'host_key_files': settings['server']['host_key_files'], + 'allow_root_with_key': settings['server']['allow_root_with_key'], + 'password_authentication': + settings['server']['password_authentication'], + 'use_priv_sep': settings['server']['use_privilege_separation'], + 'use_pam': settings['server']['use_pam'], + 'allow_x11_forwarding': settings['server']['allow_x11_forwarding'], + 'print_motd': settings['server']['print_motd'], + 'print_last_log': settings['server']['print_last_log'], + 'client_alive_interval': + settings['server']['alive_interval'], + 'client_alive_count': settings['server']['alive_count'], + 'allow_tcp_forwarding': settings['server']['allow_tcp_forwarding'], + 'allow_agent_forwarding': + settings['server']['allow_agent_forwarding'], + 'deny_users': settings['server']['deny_users'], + 'allow_users': settings['server']['allow_users'], + 'deny_groups': settings['server']['deny_groups'], + 'allow_groups': settings['server']['allow_groups'], + 'use_dns': settings['server']['use_dns'], + 'sftp_enable': settings['server']['sftp_enable'], + 'sftp_group': settings['server']['sftp_group'], + 'sftp_chroot': settings['server']['sftp_chroot'], + 'max_auth_tries': settings['server']['max_auth_tries'], + 'max_sessions': settings['server']['max_sessions'], + } + return ctxt + + +class SSHDConfig(TemplatedFile): + def __init__(self): + path = '/etc/ssh/sshd_config' + super(SSHDConfig, self).__init__(path=path, + template_dir=TEMPLATES_DIR, + context=SSHDConfigContext(), + user='root', + group='root', + mode=0o0600, + service_actions=[{'service': 'ssh', + 'actions': + ['restart']}]) + + def pre_write(self): + settings = utils.get_settings('ssh') + apt_update(fatal=True) + apt_install(settings['server']['package']) + if not os.path.exists('/etc/ssh'): + os.makedir('/etc/ssh') + # NOTE: don't recurse + utils.ensure_permissions('/etc/ssh', 'root', 'root', 0o0755, + maxdepth=0) + + def post_write(self): + # NOTE: don't recurse + utils.ensure_permissions('/etc/ssh', 'root', 'root', 0o0755, + maxdepth=0) + + +class SSHConfigFileContentAudit(FileContentAudit): + def __init__(self): + self.path = '/etc/ssh/ssh_config' + super(SSHConfigFileContentAudit, self).__init__(self.path, {}) + + def is_compliant(self, *args, **kwargs): + self.pass_cases = [] + self.fail_cases = [] + settings = utils.get_settings('ssh') + + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) >= 'trusty': + if not settings['server']['weak_hmac']: + self.pass_cases.append(r'^MACs.+,hmac-ripemd160$') + else: + self.pass_cases.append(r'^MACs.+,hmac-sha1$') + + if settings['server']['weak_kex']: + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + else: + self.pass_cases.append(r'^KexAlgorithms.+,diffie-hellman-group-exchange-sha256$') # noqa + self.fail_cases.append(r'^KexAlgorithms.*diffie-hellman-group14-sha1[,\s]?') # noqa + + if settings['server']['cbc_required']: + self.pass_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + self.fail_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.pass_cases.append(r'^Ciphers\schacha20-poly1305@openssh.com,.+') # noqa + self.pass_cases.append(r'^Ciphers\s.*aes128-ctr$') + self.pass_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + if not settings['client']['weak_hmac']: + self.fail_cases.append(r'^MACs.+,hmac-sha1$') + else: + self.pass_cases.append(r'^MACs.+,hmac-sha1$') + + if settings['client']['weak_kex']: + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + else: + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256$') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + + if settings['client']['cbc_required']: + self.pass_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + self.fail_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + + if settings['client']['roaming']: + self.pass_cases.append(r'^UseRoaming yes$') + else: + self.fail_cases.append(r'^UseRoaming yes$') + + return super(SSHConfigFileContentAudit, self).is_compliant(*args, + **kwargs) + + +class SSHDConfigFileContentAudit(FileContentAudit): + def __init__(self): + self.path = '/etc/ssh/sshd_config' + super(SSHDConfigFileContentAudit, self).__init__(self.path, {}) + + def is_compliant(self, *args, **kwargs): + self.pass_cases = [] + self.fail_cases = [] + settings = utils.get_settings('ssh') + + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) >= 'trusty': + if not settings['server']['weak_hmac']: + self.pass_cases.append(r'^MACs.+,hmac-ripemd160$') + else: + self.pass_cases.append(r'^MACs.+,hmac-sha1$') + + if settings['server']['weak_kex']: + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + else: + self.pass_cases.append(r'^KexAlgorithms.+,diffie-hellman-group-exchange-sha256$') # noqa + self.fail_cases.append(r'^KexAlgorithms.*diffie-hellman-group14-sha1[,\s]?') # noqa + + if settings['server']['cbc_required']: + self.pass_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + self.fail_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.pass_cases.append(r'^Ciphers\schacha20-poly1305@openssh.com,.+') # noqa + self.pass_cases.append(r'^Ciphers\s.*aes128-ctr$') + self.pass_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + if not settings['server']['weak_hmac']: + self.pass_cases.append(r'^MACs.+,hmac-ripemd160$') + else: + self.pass_cases.append(r'^MACs.+,hmac-sha1$') + + if settings['server']['weak_kex']: + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + else: + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256$') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + + if settings['server']['cbc_required']: + self.pass_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + self.fail_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + + if settings['server']['sftp_enable']: + self.pass_cases.append(r'^Subsystem\ssftp') + else: + self.fail_cases.append(r'^Subsystem\ssftp') + + return super(SSHDConfigFileContentAudit, self).is_compliant(*args, + **kwargs) diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/ssh/templates/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/ssh/templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/ssh/templates/ssh_config b/ceph-osd/hooks/charmhelpers/contrib/hardening/ssh/templates/ssh_config new file mode 100644 index 00000000..9742d8e2 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/ssh/templates/ssh_config @@ -0,0 +1,70 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# This is the ssh client system-wide configuration file. See +# ssh_config(5) for more information. This file provides defaults for +# users, and the values can be changed in per-user configuration files +# or on the command line. + +# Configuration data is parsed as follows: +# 1. command line options +# 2. user-specific file +# 3. system-wide file +# Any configuration value is only changed the first time it is set. +# Thus, host-specific definitions should be at the beginning of the +# configuration file, and defaults at the end. + +# Site-wide defaults for some commonly used options. For a comprehensive +# list of available options, their meanings and defaults, please see the +# ssh_config(5) man page. + +# Restrict the following configuration to be limited to this Host. +{% if remote_hosts -%} +Host {{ ' '.join(remote_hosts) }} +{% endif %} +ForwardAgent no +ForwardX11 no +ForwardX11Trusted yes +RhostsRSAAuthentication no +RSAAuthentication yes +PasswordAuthentication {{ password_auth_allowed }} +HostbasedAuthentication no +GSSAPIAuthentication no +GSSAPIDelegateCredentials no +GSSAPIKeyExchange no +GSSAPITrustDNS no +BatchMode no +CheckHostIP yes +AddressFamily {{ addr_family }} +ConnectTimeout 0 +StrictHostKeyChecking ask +IdentityFile ~/.ssh/identity +IdentityFile ~/.ssh/id_rsa +IdentityFile ~/.ssh/id_dsa +# The port at the destination should be defined +{% for port in ports -%} +Port {{ port }} +{% endfor %} +Protocol 2 +Cipher 3des +{% if ciphers -%} +Ciphers {{ ciphers }} +{%- endif %} +{% if macs -%} +MACs {{ macs }} +{%- endif %} +{% if kexs -%} +KexAlgorithms {{ kexs }} +{%- endif %} +EscapeChar ~ +Tunnel no +TunnelDevice any:any +PermitLocalCommand no +VisualHostKey no +RekeyLimit 1G 1h +SendEnv LANG LC_* +HashKnownHosts yes +{% if roaming -%} +UseRoaming {{ roaming }} +{% endif %} diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/ssh/templates/sshd_config b/ceph-osd/hooks/charmhelpers/contrib/hardening/ssh/templates/sshd_config new file mode 100644 index 00000000..5f87298a --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/ssh/templates/sshd_config @@ -0,0 +1,159 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# Package generated configuration file +# See the sshd_config(5) manpage for details + +# What ports, IPs and protocols we listen for +{% for port in ports -%} +Port {{ port }} +{% endfor -%} +AddressFamily {{ addr_family }} +# Use these options to restrict which interfaces/protocols sshd will bind to +{% if ssh_ip -%} +{% for ip in ssh_ip -%} +ListenAddress {{ ip }} +{% endfor %} +{%- else -%} +ListenAddress :: +ListenAddress 0.0.0.0 +{% endif -%} +Protocol 2 +{% if ciphers -%} +Ciphers {{ ciphers }} +{% endif -%} +{% if macs -%} +MACs {{ macs }} +{% endif -%} +{% if kexs -%} +KexAlgorithms {{ kexs }} +{% endif -%} +# HostKeys for protocol version 2 +{% for keyfile in host_key_files -%} +HostKey {{ keyfile }} +{% endfor -%} + +# Privilege Separation is turned on for security +{% if use_priv_sep -%} +UsePrivilegeSeparation {{ use_priv_sep }} +{% endif -%} + +# Lifetime and size of ephemeral version 1 server key +KeyRegenerationInterval 3600 +ServerKeyBits 1024 + +# Logging +SyslogFacility AUTH +LogLevel VERBOSE + +# Authentication: +LoginGraceTime 30s +{% if allow_root_with_key -%} +PermitRootLogin without-password +{% else -%} +PermitRootLogin no +{% endif %} +PermitTunnel no +PermitUserEnvironment no +StrictModes yes + +RSAAuthentication yes +PubkeyAuthentication yes +AuthorizedKeysFile %h/.ssh/authorized_keys + +# Don't read the user's ~/.rhosts and ~/.shosts files +IgnoreRhosts yes +# For this to work you will also need host keys in /etc/ssh_known_hosts +RhostsRSAAuthentication no +# similar for protocol version 2 +HostbasedAuthentication no +# Uncomment if you don't trust ~/.ssh/known_hosts for RhostsRSAAuthentication +IgnoreUserKnownHosts yes + +# To enable empty passwords, change to yes (NOT RECOMMENDED) +PermitEmptyPasswords no + +# Change to yes to enable challenge-response passwords (beware issues with +# some PAM modules and threads) +ChallengeResponseAuthentication no + +# Change to no to disable tunnelled clear text passwords +PasswordAuthentication {{ password_authentication }} + +# Kerberos options +KerberosAuthentication no +KerberosGetAFSToken no +KerberosOrLocalPasswd no +KerberosTicketCleanup yes + +# GSSAPI options +GSSAPIAuthentication no +GSSAPICleanupCredentials yes + +X11Forwarding {{ allow_x11_forwarding }} +X11DisplayOffset 10 +X11UseLocalhost yes +GatewayPorts no +PrintMotd {{ print_motd }} +PrintLastLog {{ print_last_log }} +TCPKeepAlive no +UseLogin no + +ClientAliveInterval {{ client_alive_interval }} +ClientAliveCountMax {{ client_alive_count }} +AllowTcpForwarding {{ allow_tcp_forwarding }} +AllowAgentForwarding {{ allow_agent_forwarding }} + +MaxStartups 10:30:100 +#Banner /etc/issue.net + +# Allow client to pass locale environment variables +AcceptEnv LANG LC_* + +# Set this to 'yes' to enable PAM authentication, account processing, +# and session processing. If this is enabled, PAM authentication will +# be allowed through the ChallengeResponseAuthentication and +# PasswordAuthentication. Depending on your PAM configuration, +# PAM authentication via ChallengeResponseAuthentication may bypass +# the setting of "PermitRootLogin without-password". +# If you just want the PAM account and session checks to run without +# PAM authentication, then enable this but set PasswordAuthentication +# and ChallengeResponseAuthentication to 'no'. +UsePAM {{ use_pam }} + +{% if deny_users -%} +DenyUsers {{ deny_users }} +{% endif -%} +{% if allow_users -%} +AllowUsers {{ allow_users }} +{% endif -%} +{% if deny_groups -%} +DenyGroups {{ deny_groups }} +{% endif -%} +{% if allow_groups -%} +AllowGroups allow_groups +{% endif -%} +UseDNS {{ use_dns }} +MaxAuthTries {{ max_auth_tries }} +MaxSessions {{ max_sessions }} + +{% if sftp_enable -%} +# Configuration, in case SFTP is used +## override default of no subsystems +## Subsystem sftp /opt/app/openssh5/libexec/sftp-server +Subsystem sftp internal-sftp -l VERBOSE + +## These lines must appear at the *end* of sshd_config +Match Group {{ sftp_group }} +ForceCommand internal-sftp -l VERBOSE +ChrootDirectory {{ sftp_chroot }} +{% else -%} +# Configuration, in case SFTP is used +## override default of no subsystems +## Subsystem sftp /opt/app/openssh5/libexec/sftp-server +## These lines must appear at the *end* of sshd_config +Match Group sftponly +ForceCommand internal-sftp -l VERBOSE +ChrootDirectory /sftpchroot/home/%u +{% endif %} diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/templating.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/templating.py new file mode 100644 index 00000000..4dee5465 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/templating.py @@ -0,0 +1,69 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + WARNING, +) + +try: + from jinja2 import FileSystemLoader, Environment +except ImportError: + from charmhelpers.fetch import apt_install + from charmhelpers.fetch import apt_update + apt_update(fatal=True) + apt_install('python3-jinja2', fatal=True) + from jinja2 import FileSystemLoader, Environment + + +# NOTE: function separated from main rendering code to facilitate easier +# mocking in unit tests. +def write(path, data): + with open(path, 'wb') as out: + out.write(data) + + +def get_template_path(template_dir, path): + """Returns the template file which would be used to render the path. + + The path to the template file is returned. + :param template_dir: the directory the templates are located in + :param path: the file path to be written to. + :returns: path to the template file + """ + return os.path.join(template_dir, os.path.basename(path)) + + +def render_and_write(template_dir, path, context): + """Renders the specified template into the file. + + :param template_dir: the directory to load the template from + :param path: the path to write the templated contents to + :param context: the parameters to pass to the rendering engine + """ + env = Environment(loader=FileSystemLoader(template_dir)) + template_file = os.path.basename(path) + template = env.get_template(template_file) + log('Rendering from template: %s' % template.name, level=DEBUG) + rendered_content = template.render(context) + if not rendered_content: + log("Render returned None - skipping '%s'" % path, + level=WARNING) + return + + write(path, rendered_content.encode('utf-8').strip()) + log('Wrote template %s' % path, level=DEBUG) diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardening/utils.py b/ceph-osd/hooks/charmhelpers/contrib/hardening/utils.py new file mode 100644 index 00000000..f93851a9 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardening/utils.py @@ -0,0 +1,154 @@ +# Copyright 2016-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import grp +import os +import pwd +import yaml + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + INFO, + WARNING, + ERROR, +) + + +# Global settings cache. Since each hook fire entails a fresh module import it +# is safe to hold this in memory and not risk missing config changes (since +# they will result in a new hook fire and thus re-import). +__SETTINGS__ = {} + + +def _get_defaults(modules): + """Load the default config for the provided modules. + + :param modules: stack modules config defaults to lookup. + :returns: modules default config dictionary. + """ + default = os.path.join(os.path.dirname(__file__), + 'defaults/%s.yaml' % (modules)) + return yaml.safe_load(open(default)) + + +def _get_schema(modules): + """Load the config schema for the provided modules. + + NOTE: this schema is intended to have 1-1 relationship with they keys in + the default config and is used a means to verify valid overrides provided + by the user. + + :param modules: stack modules config schema to lookup. + :returns: modules default schema dictionary. + """ + schema = os.path.join(os.path.dirname(__file__), + 'defaults/%s.yaml.schema' % (modules)) + return yaml.safe_load(open(schema)) + + +def _get_user_provided_overrides(modules): + """Load user-provided config overrides. + + :param modules: stack modules to lookup in user overrides yaml file. + :returns: overrides dictionary. + """ + overrides = os.path.join(os.environ['JUJU_CHARM_DIR'], + 'hardening.yaml') + if os.path.exists(overrides): + log("Found user-provided config overrides file '%s'" % + (overrides), level=DEBUG) + settings = yaml.safe_load(open(overrides)) + if settings and settings.get(modules): + log("Applying '%s' overrides" % (modules), level=DEBUG) + return settings.get(modules) + + log("No overrides found for '%s'" % (modules), level=DEBUG) + else: + log("No hardening config overrides file '%s' found in charm " + "root dir" % (overrides), level=DEBUG) + + return {} + + +def _apply_overrides(settings, overrides, schema): + """Get overrides config overlaid onto modules defaults. + + :param modules: require stack modules config. + :returns: dictionary of modules config with user overrides applied. + """ + if overrides: + for k, v in overrides.items(): + if k in schema: + if schema[k] is None: + settings[k] = v + elif type(schema[k]) is dict: + settings[k] = _apply_overrides(settings[k], overrides[k], + schema[k]) + else: + raise Exception("Unexpected type found in schema '%s'" % + type(schema[k]), level=ERROR) + else: + log("Unknown override key '%s' - ignoring" % (k), level=INFO) + + return settings + + +def get_settings(modules): + global __SETTINGS__ + if modules in __SETTINGS__: + return __SETTINGS__[modules] + + schema = _get_schema(modules) + settings = _get_defaults(modules) + overrides = _get_user_provided_overrides(modules) + __SETTINGS__[modules] = _apply_overrides(settings, overrides, schema) + return __SETTINGS__[modules] + + +def ensure_permissions(path, user, group, permissions, maxdepth=-1): + """Ensure permissions for path. + + If path is a file, apply to file and return. If path is a directory, + apply recursively (if required) to directory contents and return. + + :param user: user name + :param group: group name + :param permissions: octal permissions + :param maxdepth: maximum recursion depth. A negative maxdepth allows + infinite recursion and maxdepth=0 means no recursion. + :returns: None + """ + if not os.path.exists(path): + log("File '%s' does not exist - cannot set permissions" % (path), + level=WARNING) + return + + _user = pwd.getpwnam(user) + os.chown(path, _user.pw_uid, grp.getgrnam(group).gr_gid) + os.chmod(path, permissions) + + if maxdepth == 0: + log("Max recursion depth reached - skipping further recursion", + level=DEBUG) + return + elif maxdepth > 0: + maxdepth -= 1 + + if os.path.isdir(path): + contents = glob.glob("%s/*" % (path)) + for c in contents: + ensure_permissions(c, user=user, group=group, + permissions=permissions, maxdepth=maxdepth) diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardware/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/hardware/__init__.py new file mode 100644 index 00000000..474a8f3b --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardware/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-osd/hooks/charmhelpers/contrib/hardware/pci.py b/ceph-osd/hooks/charmhelpers/contrib/hardware/pci.py new file mode 100644 index 00000000..f6b1789a --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/hardware/pci.py @@ -0,0 +1,288 @@ +#!/usr/bin/env python3 +# +# Copyright 2016-2022 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import itertools +import logging +import os +import re +import shlex +import subprocess +import typing + + +def format_pci_addr(pci_addr: str) -> str: + """Format a PCI address with 0 fill for parts + + :param: pci_addr: unformatted PCI address + :type: str + :returns: formatted PCI address + :rtype: str + """ + domain, bus, slot_func = pci_addr.split(":") + slot, func = slot_func.split(".") + return "{}:{}:{}.{}".format( + domain.zfill(4), bus.zfill(2), slot.zfill(2), func + ) + + +def get_sysnet_interfaces_and_macs() -> list: + """Catalog interface information from local system + + each device dict contains: + + interface: logical name + mac_address: MAC address + pci_address: PCI address + state: Current interface state (up/down) + sriov: Boolean indicating whether interface is an SR-IOV + capable device. + sriov_totalvfs: Total VF capacity of device + sriov_numvfs: Configured VF capacity of device + + :returns: array of dict objects containing details of each interface + :rtype: list + """ + net_devs = [] + for sdir in itertools.chain( + glob.glob("/sys/bus/pci/devices/*/net/../"), + glob.glob("/sys/bus/pci/devices/*/virtio*/net/../")): + fq_path = os.path.realpath(sdir) + path = fq_path.split("/") + if "virtio" in path[-1]: + pci_address = path[-2] + else: + pci_address = path[-1] + ifname = get_sysnet_interface(sdir) + if not ifname: + logging.warn("Unable to determine interface name for PCI " + "device {}".format(pci_address)) + continue + device = { + "interface": ifname, + "mac_address": get_sysnet_mac(sdir, ifname), + "pci_address": pci_address, + "state": get_sysnet_device_state(sdir, ifname), + "sriov": is_sriov(sdir), + } + if device["sriov"]: + device["sriov_totalvfs"] = get_sriov_totalvfs(sdir) + device["sriov_numvfs"] = get_sriov_numvfs(sdir) + net_devs.append(device) + + return net_devs + + +def get_sysnet_mac(sysdir: str, ifname: str) -> str: + """Determine MAC address for a device + + :param: sysdir: path to device /sys directory + :type: str + :returns: MAC address of device + :rtype: str + """ + mac_addr_file = os.path.join(sysdir, "net", ifname, "address") + with open(mac_addr_file, "r") as f: + read_data = f.read() + return read_data.strip() + + +def get_sysnet_device_state(sysdir: str, ifname: str) -> str: + """Read operational state of a device + + :param: sysdir: path to device /sys directory + :type: str + :returns: current device state + :rtype: str + """ + state_file = os.path.join(sysdir, "net", ifname, "operstate") + with open(state_file, "r") as f: + read_data = f.read() + return read_data.strip() + + +def is_sriov(sysdir: str) -> bool: + """Determine whether a device is SR-IOV capable + + :param: sysdir: path to device /sys directory + :type: str + :returns: whether device is SR-IOV capable or not + :rtype: bool + """ + return os.path.exists(os.path.join(sysdir, "sriov_totalvfs")) + + +def get_sriov_totalvfs(sysdir: str) -> int: + """Read total VF capacity for a device + + :param: sysdir: path to device /sys directory + :type: str + :returns: number of VF's the device supports + :rtype: int + """ + sriov_totalvfs_file = os.path.join(sysdir, "sriov_totalvfs") + with open(sriov_totalvfs_file, "r") as f: + read_data = f.read() + return int(read_data.strip()) + + +def get_sriov_numvfs(sysdir: str) -> int: + """Read configured VF capacity for a device + + :param: sysdir: path to device /sys directory + :type: str + :returns: number of VF's the device is configured with + :rtype: int + """ + sriov_numvfs_file = os.path.join(sysdir, "sriov_numvfs") + with open(sriov_numvfs_file, "r") as f: + read_data = f.read() + return int(read_data.strip()) + + +# https://github.com/libvirt/libvirt/commit/5b1c525b1f3608156884aed0dc5e925306c1e260 +PF_PHYS_PORT_NAME_REGEX = re.compile(r"(p[0-9]+$)|(p[0-9]+s[0-9]+$)", + re.IGNORECASE) + + +def _phys_port_name_is_pf(sysnetdir: str) -> typing.Optional[bool]: + try: + with open(os.path.join(sysnetdir, "phys_port_name"), "r") as fin: + return (PF_PHYS_PORT_NAME_REGEX.match(fin.read().strip()) + is not None) + except OSError: + return + + +def get_sysnet_interface(sysdir: str) -> typing.Optional[str]: + sysnetdir = os.path.join(sysdir, "net") + netdevs = os.listdir(sysnetdir) + # Return early in case the PCI device only has one netdev + if len(netdevs) == 1: + return netdevs[0] + + # When a PCI device has multiple netdevs we need to figure out which one + # represents the PF + for netdev in netdevs: + if _phys_port_name_is_pf(os.path.join(sysnetdir, netdev)): + return netdev + + +def get_pci_ethernet_addresses() -> list: + """Generate list of PCI addresses for all network adapters + + :returns: list of PCI addresses + :rtype: list + """ + cmd = ["lspci", "-m", "-D"] + lspci_output = subprocess.check_output(cmd).decode("UTF-8") + pci_addresses = [] + for line in lspci_output.split("\n"): + columns = shlex.split(line) + if len(columns) > 1 and columns[1] == "Ethernet controller": + pci_address = columns[0] + pci_addresses.append(format_pci_addr(pci_address)) + return pci_addresses + + +class PCINetDevice(object): + def __init__(self, pci_address): + self.pci_address = pci_address + self.interface_name = None + self.mac_address = None + self.state = None + self.sriov = False + self.sriov_totalvfs = None + self.sriov_numvfs = None + self.update_attributes() + + def update_attributes(self): + self.update_interface_info() + + def update_interface_info(self): + net_devices = get_sysnet_interfaces_and_macs() + for interface in net_devices: + if self.pci_address == interface["pci_address"]: + self.interface_name = interface["interface"] + self.mac_address = interface["mac_address"] + self.state = interface["state"] + self.sriov = interface["sriov"] + if self.sriov: + self.sriov_totalvfs = interface["sriov_totalvfs"] + self.sriov_numvfs = interface["sriov_numvfs"] + + def _set_sriov_numvfs(self, numvfs: int): + sdevice = os.path.join( + "/sys/bus/pci/devices", self.pci_address, "sriov_numvfs" + ) + with open(sdevice, "w") as sh: + sh.write(str(numvfs)) + self.update_attributes() + + def set_sriov_numvfs(self, numvfs: int) -> bool: + """Set the number of VF devices for a SR-IOV PF + + Assuming the device is an SR-IOV device, this function will attempt + to change the number of VF's created by the PF. + + @param numvfs: integer to set the current number of VF's to + @returns boolean indicating whether any changes where made + """ + if self.sriov and numvfs != self.sriov_numvfs: + # NOTE(fnordahl): run-time change of numvfs is disallowed + # without resetting to 0 first. + self._set_sriov_numvfs(0) + self._set_sriov_numvfs(numvfs) + return True + return False + + +class PCINetDevices(object): + def __init__(self): + self.pci_devices = [ + PCINetDevice(dev) for dev in get_pci_ethernet_addresses() + ] + + def update_devices(self): + for pcidev in self.pci_devices: + pcidev.update_attributes() + + def get_macs(self) -> list: + macs = [] + for pcidev in self.pci_devices: + if pcidev.mac_address: + macs.append(pcidev.mac_address) + return macs + + def get_device_from_mac(self, mac: str) -> PCINetDevice: + for pcidev in self.pci_devices: + if pcidev.mac_address == mac: + return pcidev + return None + + def get_device_from_pci_address(self, pci_addr: str) -> PCINetDevice: + for pcidev in self.pci_devices: + if pcidev.pci_address == pci_addr: + return pcidev + return None + + def get_device_from_interface_name( + self, interface_name: str + ) -> PCINetDevice: + for pcidev in self.pci_devices: + if pcidev.interface_name == interface_name: + return pcidev + return None diff --git a/ceph-osd/hooks/charmhelpers/contrib/network/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/network/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/network/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-osd/hooks/charmhelpers/contrib/network/ip.py b/ceph-osd/hooks/charmhelpers/contrib/network/ip.py new file mode 100644 index 00000000..f3b4864f --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/network/ip.py @@ -0,0 +1,628 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import re +import subprocess +import socket +import ssl + +from functools import partial + +from charmhelpers.fetch import apt_install, apt_update +from charmhelpers.core.hookenv import ( + config, + log, + network_get_primary_address, + unit_get, + WARNING, + NoNetworkBinding, +) + +from charmhelpers.core.host import ( + lsb_release, + CompareHostReleases, +) + +try: + import netifaces +except ImportError: + apt_update(fatal=True) + apt_install('python3-netifaces', fatal=True) + import netifaces + +try: + import netaddr +except ImportError: + apt_update(fatal=True) + apt_install('python3-netaddr', fatal=True) + import netaddr + + +def _validate_cidr(network): + try: + netaddr.IPNetwork(network) + except (netaddr.core.AddrFormatError, ValueError): + raise ValueError("Network (%s) is not in CIDR presentation format" % + network) + + +def no_ip_found_error_out(network): + errmsg = ("No IP address found in network(s): %s" % network) + raise ValueError(errmsg) + + +def _get_ipv6_network_from_address(address): + """Get an netaddr.IPNetwork for the given IPv6 address + :param address: a dict as returned by netifaces.ifaddresses + :returns netaddr.IPNetwork: None if the address is a link local or loopback + address + """ + if address['addr'].startswith('fe80') or address['addr'] == "::1": + return None + + prefix = address['netmask'].split("/") + if len(prefix) > 1: + netmask = prefix[1] + else: + netmask = address['netmask'] + return netaddr.IPNetwork("%s/%s" % (address['addr'], + netmask)) + + +def get_address_in_network(network, fallback=None, fatal=False): + """Get an IPv4 or IPv6 address within the network from the host. + + :param network (str): CIDR presentation format. For example, + '192.168.1.0/24'. Supports multiple networks as a space-delimited list. + :param fallback (str): If no address is found, return fallback. + :param fatal (boolean): If no address is found, fallback is not + set and fatal is True then exit(1). + """ + if network is None: + if fallback is not None: + return fallback + + if fatal: + no_ip_found_error_out(network) + else: + return None + + networks = network.split() or [network] + for network in networks: + _validate_cidr(network) + network = netaddr.IPNetwork(network) + for iface in netifaces.interfaces(): + try: + addresses = netifaces.ifaddresses(iface) + except ValueError: + # If an instance was deleted between + # netifaces.interfaces() run and now, its interfaces are gone + continue + if network.version == 4 and netifaces.AF_INET in addresses: + for addr in addresses[netifaces.AF_INET]: + cidr = netaddr.IPNetwork("%s/%s" % (addr['addr'], + addr['netmask'])) + if cidr in network: + return str(cidr.ip) + + if network.version == 6 and netifaces.AF_INET6 in addresses: + for addr in addresses[netifaces.AF_INET6]: + cidr = _get_ipv6_network_from_address(addr) + if cidr and cidr in network: + return str(cidr.ip) + + if fallback is not None: + return fallback + + if fatal: + no_ip_found_error_out(network) + + return None + + +def is_ipv6(address): + """Determine whether provided address is IPv6 or not.""" + try: + address = netaddr.IPAddress(address) + except netaddr.AddrFormatError: + # probably a hostname - so not an address at all! + return False + + return address.version == 6 + + +def is_address_in_network(network, address): + """ + Determine whether the provided address is within a network range. + + :param network (str): CIDR presentation format. For example, + '192.168.1.0/24'. + :param address: An individual IPv4 or IPv6 address without a net + mask or subnet prefix. For example, '192.168.1.1'. + :returns boolean: Flag indicating whether address is in network. + """ + try: + network = netaddr.IPNetwork(network) + except (netaddr.core.AddrFormatError, ValueError): + raise ValueError("Network (%s) is not in CIDR presentation format" % + network) + + try: + address = netaddr.IPAddress(address) + except (netaddr.core.AddrFormatError, ValueError): + raise ValueError("Address (%s) is not in correct presentation format" % + address) + + if address in network: + return True + else: + return False + + +def _get_for_address(address, key): + """Retrieve an attribute of or the physical interface that + the IP address provided could be bound to. + + :param address (str): An individual IPv4 or IPv6 address without a net + mask or subnet prefix. For example, '192.168.1.1'. + :param key: 'iface' for the physical interface name or an attribute + of the configured interface, for example 'netmask'. + :returns str: Requested attribute or None if address is not bindable. + """ + address = netaddr.IPAddress(address) + for iface in netifaces.interfaces(): + addresses = netifaces.ifaddresses(iface) + if address.version == 4 and netifaces.AF_INET in addresses: + addr = addresses[netifaces.AF_INET][0]['addr'] + netmask = addresses[netifaces.AF_INET][0]['netmask'] + network = netaddr.IPNetwork("%s/%s" % (addr, netmask)) + cidr = network.cidr + if address in cidr: + if key == 'iface': + return iface + else: + return addresses[netifaces.AF_INET][0][key] + + if address.version == 6 and netifaces.AF_INET6 in addresses: + for addr in addresses[netifaces.AF_INET6]: + network = _get_ipv6_network_from_address(addr) + if not network: + continue + + cidr = network.cidr + if address in cidr: + if key == 'iface': + return iface + elif key == 'netmask' and cidr: + return str(cidr).split('/')[1] + else: + return addr[key] + return None + + +get_iface_for_address = partial(_get_for_address, key='iface') + + +get_netmask_for_address = partial(_get_for_address, key='netmask') + + +def resolve_network_cidr(ip_address): + ''' + Resolves the full address cidr of an ip_address based on + configured network interfaces + ''' + netmask = get_netmask_for_address(ip_address) + return str(netaddr.IPNetwork("%s/%s" % (ip_address, netmask)).cidr) + + +def format_ipv6_addr(address): + """If address is IPv6, wrap it in '[]' otherwise return None. + + This is required by most configuration files when specifying IPv6 + addresses. + """ + if is_ipv6(address): + return "[%s]" % address + + return None + + +def is_ipv6_disabled(): + try: + result = subprocess.check_output( + ['sysctl', 'net.ipv6.conf.all.disable_ipv6'], + stderr=subprocess.STDOUT, + universal_newlines=True) + except subprocess.CalledProcessError: + return True + + return "net.ipv6.conf.all.disable_ipv6 = 1" in result + + +def get_iface_addr(iface='eth0', inet_type='AF_INET', inc_aliases=False, + fatal=True, exc_list=None): + """Return the assigned IP address for a given interface, if any. + + :param iface: network interface on which address(es) are expected to + be found. + :param inet_type: inet address family + :param inc_aliases: include alias interfaces in search + :param fatal: if True, raise exception if address not found + :param exc_list: list of addresses to ignore + :return: list of ip addresses + """ + # Extract nic if passed /dev/ethX + if '/' in iface: + iface = iface.split('/')[-1] + + if not exc_list: + exc_list = [] + + try: + inet_num = getattr(netifaces, inet_type) + except AttributeError: + raise Exception("Unknown inet type '%s'" % str(inet_type)) + + interfaces = netifaces.interfaces() + if inc_aliases: + ifaces = [] + for _iface in interfaces: + if iface == _iface or _iface.split(':')[0] == iface: + ifaces.append(_iface) + + if fatal and not ifaces: + raise Exception("Invalid interface '%s'" % iface) + + ifaces.sort() + else: + if iface not in interfaces: + if fatal: + raise Exception("Interface '%s' not found " % (iface)) + else: + return [] + + else: + ifaces = [iface] + + addresses = [] + for netiface in ifaces: + net_info = netifaces.ifaddresses(netiface) + if inet_num in net_info: + for entry in net_info[inet_num]: + if 'addr' in entry and entry['addr'] not in exc_list: + addresses.append(entry['addr']) + + if fatal and not addresses: + raise Exception("Interface '%s' doesn't have any %s addresses." % + (iface, inet_type)) + + return sorted(addresses) + + +get_ipv4_addr = partial(get_iface_addr, inet_type='AF_INET') + + +def get_iface_from_addr(addr): + """Work out on which interface the provided address is configured.""" + for iface in netifaces.interfaces(): + addresses = netifaces.ifaddresses(iface) + for inet_type in addresses: + for _addr in addresses[inet_type]: + _addr = _addr['addr'] + # link local + ll_key = re.compile("(.+)%.*") + raw = re.match(ll_key, _addr) + if raw: + _addr = raw.group(1) + + if _addr == addr: + log("Address '%s' is configured on iface '%s'" % + (addr, iface)) + return iface + + msg = "Unable to infer net iface on which '%s' is configured" % (addr) + raise Exception(msg) + + +def sniff_iface(f): + """Ensure decorated function is called with a value for iface. + + If no iface provided, inject net iface inferred from unit private address. + """ + def iface_sniffer(*args, **kwargs): + if not kwargs.get('iface', None): + kwargs['iface'] = get_iface_from_addr(unit_get('private-address')) + + return f(*args, **kwargs) + + return iface_sniffer + + +@sniff_iface +def get_ipv6_addr(iface=None, inc_aliases=False, fatal=True, exc_list=None, + dynamic_only=True): + """Get assigned IPv6 address for a given interface. + + Returns list of addresses found. If no address found, returns empty list. + + If iface is None, we infer the current primary interface by doing a reverse + lookup on the unit private-address. + + We currently only support scope global IPv6 addresses i.e. non-temporary + addresses. If no global IPv6 address is found, return the first one found + in the ipv6 address list. + + :param iface: network interface on which ipv6 address(es) are expected to + be found. + :param inc_aliases: include alias interfaces in search + :param fatal: if True, raise exception if address not found + :param exc_list: list of addresses to ignore + :param dynamic_only: only recognise dynamic addresses + :return: list of ipv6 addresses + """ + addresses = get_iface_addr(iface=iface, inet_type='AF_INET6', + inc_aliases=inc_aliases, fatal=fatal, + exc_list=exc_list) + + if addresses: + global_addrs = [] + for addr in addresses: + key_scope_link_local = re.compile("^fe80::..(.+)%(.+)") + m = re.match(key_scope_link_local, addr) + if m: + eui_64_mac = m.group(1) + iface = m.group(2) + else: + global_addrs.append(addr) + + if global_addrs: + # Make sure any found global addresses are not temporary + cmd = ['ip', 'addr', 'show', iface] + out = subprocess.check_output( + cmd).decode('UTF-8', errors='replace') + if dynamic_only: + key = re.compile("inet6 (.+)/[0-9]+ scope global.* dynamic.*") + else: + key = re.compile("inet6 (.+)/[0-9]+ scope global.*") + + addrs = [] + for line in out.split('\n'): + line = line.strip() + m = re.match(key, line) + if m and 'temporary' not in line: + # Return the first valid address we find + for addr in global_addrs: + if m.group(1) == addr: + if not dynamic_only or \ + m.group(1).endswith(eui_64_mac): + addrs.append(addr) + + if addrs: + return addrs + + if fatal: + raise Exception("Interface '%s' does not have a scope global " + "non-temporary ipv6 address." % iface) + + return [] + + +def get_bridges(vnic_dir='/sys/devices/virtual/net'): + """Return a list of bridges on the system.""" + b_regex = "%s/*/bridge" % vnic_dir + return [x.replace(vnic_dir, '').split('/')[1] for x in glob.glob(b_regex)] + + +def get_bridge_nics(bridge, vnic_dir='/sys/devices/virtual/net'): + """Return a list of nics comprising a given bridge on the system.""" + brif_regex = "%s/%s/brif/*" % (vnic_dir, bridge) + return [x.split('/')[-1] for x in glob.glob(brif_regex)] + + +def is_bridge_member(nic): + """Check if a given nic is a member of a bridge.""" + for bridge in get_bridges(): + if nic in get_bridge_nics(bridge): + return True + + return False + + +def is_ip(address): + """ + Returns True if address is a valid IP address. + """ + try: + # Test to see if already an IPv4/IPv6 address + address = netaddr.IPAddress(address) + return True + except (netaddr.AddrFormatError, ValueError): + return False + + +def ns_query(address): + try: + import dns.resolver + except ImportError: + apt_install('python3-dnspython', fatal=True) + import dns.resolver + + if isinstance(address, dns.name.Name): + rtype = 'PTR' + elif isinstance(address, str): + rtype = 'A' + else: + return None + + try: + answers = dns.resolver.query(address, rtype) + except (dns.resolver.NXDOMAIN, dns.resolver.NoNameservers): + return None + + if answers: + return str(answers[0]) + return None + + +def get_host_ip(hostname, fallback=None): + """ + Resolves the IP for a given hostname, or returns + the input if it is already an IP. + """ + if is_ip(hostname): + return hostname + + ip_addr = ns_query(hostname) + if not ip_addr: + try: + ip_addr = socket.gethostbyname(hostname) + except Exception: + log("Failed to resolve hostname '%s'" % (hostname), + level=WARNING) + return fallback + return ip_addr + + +def get_hostname(address, fqdn=True): + """ + Resolves hostname for given IP, or returns the input + if it is already a hostname. + """ + if is_ip(address): + try: + import dns.reversename + except ImportError: + apt_install("python3-dnspython", fatal=True) + import dns.reversename + + rev = dns.reversename.from_address(address) + result = ns_query(rev) + + if not result: + try: + result = socket.gethostbyaddr(address)[0] + except Exception: + return None + else: + result = address + + if fqdn: + # strip trailing . + if result.endswith('.'): + return result[:-1] + else: + return result + else: + return result.split('.')[0] + + +class SSLPortCheckInfo(object): + + def __init__(self, key, cert, ca_cert, check_hostname=False): + self.key = key + self.cert = cert + self.ca_cert = ca_cert + # NOTE: by default we do not check hostname since the port check is + # typically performed using 0.0.0.0 which will not match the + # certificate. Hence the default for this is False. + self.check_hostname = check_hostname + + @property + def ssl_context(self): + context = ssl.create_default_context() + context.check_hostname = self.check_hostname + context.load_cert_chain(self.cert, self.key) + context.load_verify_locations(self.ca_cert) + return context + + +def port_has_listener(address, port, sslinfo=None): + """ + Returns True if the address:port is open and being listened to, + else False. By default uses netcat to check ports but if sslinfo is + provided will use an SSL connection instead. + + @param address: an IP address or hostname + @param port: integer port + @param sslinfo: optional SSLPortCheckInfo object. + If provided, the check is performed using an ssl + connection. + + Note calls 'zc' via a subprocess shell + """ + if not sslinfo: + cmd = ['nc', '-z', address, str(port)] + result = subprocess.call(cmd) + return not (bool(result)) + + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) as sock: + ssock = sslinfo.ssl_context.wrap_socket(sock, + server_hostname=address) + ssock.connect((address, port)) + # this bit is crucial to ensure tls close_notify is sent + ssock.unwrap() + + return True + except ConnectionRefusedError: + return False + + +def assert_charm_supports_ipv6(): + """Check whether we are able to support charms ipv6.""" + release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(release) < "trusty": + raise Exception("IPv6 is not supported in the charms for Ubuntu " + "versions less than Trusty 14.04") + + +def get_relation_ip(interface, cidr_network=None): + """Return this unit's IP for the given interface. + + Allow for an arbitrary interface to use with network-get to select an IP. + Handle all address selection options including passed cidr network and + IPv6. + + Usage: get_relation_ip('amqp', cidr_network='10.0.0.0/8') + + @param interface: string name of the relation. + @param cidr_network: string CIDR Network to select an address from. + @raises Exception if prefer-ipv6 is configured but IPv6 unsupported. + @returns IPv6 or IPv4 address + """ + # Select the interface address first + # For possible use as a fallback below with get_address_in_network + try: + # Get the interface specific IP + address = network_get_primary_address(interface) + except NotImplementedError: + # If network-get is not available + address = get_host_ip(unit_get('private-address')) + except NoNetworkBinding: + log("No network binding for {}".format(interface), WARNING) + address = get_host_ip(unit_get('private-address')) + + if config('prefer-ipv6'): + # Currently IPv6 has priority, eventually we want IPv6 to just be + # another network space. + assert_charm_supports_ipv6() + return get_ipv6_addr()[0] + elif cidr_network: + # If a specific CIDR network is passed get the address from that + # network. + return get_address_in_network(cidr_network, address) + + # Return the interface address + return address diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/alternatives.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/alternatives.py new file mode 100644 index 00000000..547de09c --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/alternatives.py @@ -0,0 +1,44 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +''' Helper for managing alternatives for file conflict resolution ''' + +import subprocess +import shutil +import os + + +def install_alternative(name, target, source, priority=50): + ''' Install alternative configuration ''' + if (os.path.exists(target) and not os.path.islink(target)): + # Move existing file/directory away before installing + shutil.move(target, '{}.bak'.format(target)) + cmd = [ + 'update-alternatives', '--force', '--install', + target, name, source, str(priority) + ] + subprocess.check_call(cmd) + + +def remove_alternative(name, source): + """Remove an installed alternative configuration file + + :param name: string name of the alternative to remove + :param source: string full path to alternative to remove + """ + cmd = [ + 'update-alternatives', '--remove', + name, source + ] + subprocess.check_call(cmd) diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/audits/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/audits/__init__.py new file mode 100644 index 00000000..7f7e5f79 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/audits/__init__.py @@ -0,0 +1,212 @@ +# Copyright 2019 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""OpenStack Security Audit code""" + +import collections +from enum import Enum +import traceback + +from charmhelpers.core.host import cmp_pkgrevno +import charmhelpers.contrib.openstack.utils as openstack_utils +import charmhelpers.core.hookenv as hookenv + + +class AuditType(Enum): + OpenStackSecurityGuide = 1 + + +_audits = {} + +Audit = collections.namedtuple('Audit', 'func filters') + + +def audit(*args): + """Decorator to register an audit. + + These are used to generate audits that can be run on a + deployed system that matches the given configuration + + :param args: List of functions to filter tests against + :type args: List[Callable[Dict]] + """ + def wrapper(f): + test_name = f.__name__ + if _audits.get(test_name): + raise RuntimeError( + "Test name '{}' used more than once" + .format(test_name)) + non_callables = [fn for fn in args if not callable(fn)] + if non_callables: + raise RuntimeError( + "Configuration includes non-callable filters: {}" + .format(non_callables)) + _audits[test_name] = Audit(func=f, filters=args) + return f + return wrapper + + +def is_audit_type(*args): + """This audit is included in the specified kinds of audits. + + :param *args: List of AuditTypes to include this audit in + :type args: List[AuditType] + :rtype: Callable[Dict] + """ + def _is_audit_type(audit_options): + if audit_options.get('audit_type') in args: + return True + else: + return False + return _is_audit_type + + +def since_package(pkg, pkg_version): + """This audit should be run after the specified package version (incl). + + :param pkg: Package name to compare + :type pkg: str + :param release: The package version + :type release: str + :rtype: Callable[Dict] + """ + def _since_package(audit_options=None): + return cmp_pkgrevno(pkg, pkg_version) >= 0 + + return _since_package + + +def before_package(pkg, pkg_version): + """This audit should be run before the specified package version (excl). + + :param pkg: Package name to compare + :type pkg: str + :param release: The package version + :type release: str + :rtype: Callable[Dict] + """ + def _before_package(audit_options=None): + return not since_package(pkg, pkg_version)() + + return _before_package + + +def since_openstack_release(pkg, release): + """This audit should run after the specified OpenStack version (incl). + + :param pkg: Package name to compare + :type pkg: str + :param release: The OpenStack release codename + :type release: str + :rtype: Callable[Dict] + """ + def _since_openstack_release(audit_options=None): + _release = openstack_utils.get_os_codename_package(pkg) + return openstack_utils.CompareOpenStackReleases(_release) >= release + + return _since_openstack_release + + +def before_openstack_release(pkg, release): + """This audit should run before the specified OpenStack version (excl). + + :param pkg: Package name to compare + :type pkg: str + :param release: The OpenStack release codename + :type release: str + :rtype: Callable[Dict] + """ + def _before_openstack_release(audit_options=None): + return not since_openstack_release(pkg, release)() + + return _before_openstack_release + + +def it_has_config(config_key): + """This audit should be run based on specified config keys. + + :param config_key: Config key to look for + :type config_key: str + :rtype: Callable[Dict] + """ + def _it_has_config(audit_options): + return audit_options.get(config_key) is not None + + return _it_has_config + + +def run(audit_options): + """Run the configured audits with the specified audit_options. + + :param audit_options: Configuration for the audit + :type audit_options: Config + + :rtype: Dict[str, str] + """ + errors = {} + results = {} + for name, audit in sorted(_audits.items()): + result_name = name.replace('_', '-') + if result_name in audit_options.get('excludes', []): + print( + "Skipping {} because it is" + "excluded in audit config" + .format(result_name)) + continue + if all(p(audit_options) for p in audit.filters): + try: + audit.func(audit_options) + print("{}: PASS".format(name)) + results[result_name] = { + 'success': True, + } + except AssertionError as e: + print("{}: FAIL ({})".format(name, e)) + results[result_name] = { + 'success': False, + 'message': e, + } + except Exception as e: + print("{}: ERROR ({})".format(name, e)) + errors[name] = e + results[result_name] = { + 'success': False, + 'message': e, + } + for name, error in errors.items(): + print("=" * 20) + print("Error in {}: ".format(name)) + traceback.print_tb(error.__traceback__) + print() + return results + + +def action_parse_results(result): + """Parse the result of `run` in the context of an action. + + :param result: The result of running the security-checklist + action on a unit + :type result: Dict[str, Dict[str, str]] + :rtype: int + """ + passed = True + for test, result in result.items(): + if result['success']: + hookenv.action_set({test: 'PASS'}) + else: + hookenv.action_set({test: 'FAIL - {}'.format(result['message'])}) + passed = False + if not passed: + hookenv.action_fail("One or more tests failed") + return 0 if passed else 1 diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/audits/openstack_security_guide.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/audits/openstack_security_guide.py new file mode 100644 index 00000000..79740ed0 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/audits/openstack_security_guide.py @@ -0,0 +1,270 @@ +# Copyright 2019 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import configparser +import glob +import os.path +import subprocess + +from charmhelpers.contrib.openstack.audits import ( + audit, + AuditType, + # filters + is_audit_type, + it_has_config, +) + +from charmhelpers.core.hookenv import ( + cached, +) + +""" +The Security Guide suggests a specific list of files inside the +config directory for the service having 640 specifically, but +by ensuring the containing directory is 750, only the owner can +write, and only the group can read files within the directory. + +By restricting access to the containing directory, we can more +effectively ensure that there is no accidental leakage if a new +file is added to the service without being added to the security +guide, and to this check. +""" +FILE_ASSERTIONS = { + 'barbican': { + '/etc/barbican': {'group': 'barbican', 'mode': '750'}, + }, + 'ceph-mon': { + '/var/lib/charm/ceph-mon/ceph.conf': + {'owner': 'root', 'group': 'root', 'mode': '644'}, + '/etc/ceph/ceph.client.admin.keyring': + {'owner': 'ceph', 'group': 'ceph'}, + '/etc/ceph/rbdmap': {'mode': '644'}, + '/var/lib/ceph': {'owner': 'ceph', 'group': 'ceph', 'mode': '750'}, + '/var/lib/ceph/bootstrap-*/ceph.keyring': + {'owner': 'ceph', 'group': 'ceph', 'mode': '600'} + }, + 'ceph-osd': { + '/var/lib/charm/ceph-osd/ceph.conf': + {'owner': 'ceph', 'group': 'ceph', 'mode': '644'}, + '/var/lib/ceph': {'owner': 'ceph', 'group': 'ceph', 'mode': '750'}, + '/var/lib/ceph/*': {'owner': 'ceph', 'group': 'ceph', 'mode': '755'}, + '/var/lib/ceph/bootstrap-*/ceph.keyring': + {'owner': 'ceph', 'group': 'ceph', 'mode': '600'}, + '/var/lib/ceph/radosgw': + {'owner': 'ceph', 'group': 'ceph', 'mode': '755'}, + }, + 'cinder': { + '/etc/cinder': {'group': 'cinder', 'mode': '750'}, + }, + 'glance': { + '/etc/glance': {'group': 'glance', 'mode': '750'}, + }, + 'keystone': { + '/etc/keystone': + {'owner': 'keystone', 'group': 'keystone', 'mode': '750'}, + }, + 'manilla': { + '/etc/manila': {'group': 'manilla', 'mode': '750'}, + }, + 'neutron-gateway': { + '/etc/neutron': {'group': 'neutron', 'mode': '750'}, + }, + 'neutron-api': { + '/etc/neutron/': {'group': 'neutron', 'mode': '750'}, + }, + 'nova-cloud-controller': { + '/etc/nova': {'group': 'nova', 'mode': '750'}, + }, + 'nova-compute': { + '/etc/nova/': {'group': 'nova', 'mode': '750'}, + }, + 'openstack-dashboard': { + # From security guide + '/etc/openstack-dashboard/local_settings.py': + {'group': 'horizon', 'mode': '640'}, + }, +} + +Ownership = collections.namedtuple('Ownership', 'owner group mode') + + +@cached +def _stat(file): + """ + Get the Ownership information from a file. + + :param file: The path to a file to stat + :type file: str + :returns: owner, group, and mode of the specified file + :rtype: Ownership + :raises subprocess.CalledProcessError: If the underlying stat fails + """ + out = subprocess.check_output( + ['stat', '-c', '%U %G %a', file]).decode('utf-8') + return Ownership(*out.strip().split(' ')) + + +@cached +def _config_ini(path): + """ + Parse an ini file + + :param path: The path to a file to parse + :type file: str + :returns: Configuration contained in path + :rtype: Dict + """ + # When strict is enabled, duplicate options are not allowed in the + # parsed INI; however, Oslo allows duplicate values. This change + # causes us to ignore the duplicate values which is acceptable as + # long as we don't validate any multi-value options + conf = configparser.ConfigParser(strict=False) + conf.read(path) + return dict(conf) + + +def _validate_file_ownership(owner, group, file_name, optional=False): + """ + Validate that a specified file is owned by `owner:group`. + + :param owner: Name of the owner + :type owner: str + :param group: Name of the group + :type group: str + :param file_name: Path to the file to verify + :type file_name: str + :param optional: Is this file optional, + ie: Should this test fail when it's missing + :type optional: bool + """ + try: + ownership = _stat(file_name) + except subprocess.CalledProcessError as e: + print("Error reading file: {}".format(e)) + if not optional: + assert False, "Specified file does not exist: {}".format(file_name) + assert owner == ownership.owner, \ + "{} has an incorrect owner: {} should be {}".format( + file_name, ownership.owner, owner) + assert group == ownership.group, \ + "{} has an incorrect group: {} should be {}".format( + file_name, ownership.group, group) + print("Validate ownership of {}: PASS".format(file_name)) + + +def _validate_file_mode(mode, file_name, optional=False): + """ + Validate that a specified file has the specified permissions. + + :param mode: file mode that is desires + :type owner: str + :param file_name: Path to the file to verify + :type file_name: str + :param optional: Is this file optional, + ie: Should this test fail when it's missing + :type optional: bool + """ + try: + ownership = _stat(file_name) + except subprocess.CalledProcessError as e: + print("Error reading file: {}".format(e)) + if not optional: + assert False, "Specified file does not exist: {}".format(file_name) + assert mode == ownership.mode, \ + "{} has an incorrect mode: {} should be {}".format( + file_name, ownership.mode, mode) + print("Validate mode of {}: PASS".format(file_name)) + + +@cached +def _config_section(config, section): + """Read the configuration file and return a section.""" + path = os.path.join(config.get('config_path'), config.get('config_file')) + conf = _config_ini(path) + return conf.get(section) + + +@audit(is_audit_type(AuditType.OpenStackSecurityGuide), + it_has_config('files')) +def validate_file_ownership(config): + """Verify that configuration files are owned by the correct user/group.""" + files = config.get('files', {}) + for file_name, options in files.items(): + for key in options.keys(): + if key not in ["owner", "group", "mode"]: + raise RuntimeError( + "Invalid ownership configuration: {}".format(key)) + owner = options.get('owner', config.get('owner', 'root')) + group = options.get('group', config.get('group', 'root')) + optional = options.get('optional', config.get('optional', False)) + if '*' in file_name: + for file in glob.glob(file_name): + if file not in files.keys(): + if os.path.isfile(file): + _validate_file_ownership(owner, group, file, optional) + else: + if os.path.isfile(file_name): + _validate_file_ownership(owner, group, file_name, optional) + + +@audit(is_audit_type(AuditType.OpenStackSecurityGuide), + it_has_config('files')) +def validate_file_permissions(config): + """Verify that permissions on configuration files are secure enough.""" + files = config.get('files', {}) + for file_name, options in files.items(): + for key in options.keys(): + if key not in ["owner", "group", "mode"]: + raise RuntimeError( + "Invalid ownership configuration: {}".format(key)) + mode = options.get('mode', config.get('permissions', '600')) + optional = options.get('optional', config.get('optional', False)) + if '*' in file_name: + for file in glob.glob(file_name): + if file not in files.keys(): + if os.path.isfile(file): + _validate_file_mode(mode, file, optional) + else: + if os.path.isfile(file_name): + _validate_file_mode(mode, file_name, optional) + + +@audit(is_audit_type(AuditType.OpenStackSecurityGuide)) +def validate_uses_keystone(audit_options): + """Validate that the service uses Keystone for authentication.""" + section = _config_section(audit_options, 'api') or _config_section(audit_options, 'DEFAULT') + assert section is not None, "Missing section 'api / DEFAULT'" + assert section.get('auth_strategy') == "keystone", \ + "Application is not using Keystone" + + +@audit(is_audit_type(AuditType.OpenStackSecurityGuide)) +def validate_uses_tls_for_keystone(audit_options): + """Verify that TLS is used to communicate with Keystone.""" + section = _config_section(audit_options, 'keystone_authtoken') + assert section is not None, "Missing section 'keystone_authtoken'" + assert not section.get('insecure') and \ + "https://" in section.get("auth_uri"), \ + "TLS is not used for Keystone" + + +@audit(is_audit_type(AuditType.OpenStackSecurityGuide)) +def validate_uses_tls_for_glance(audit_options): + """Verify that TLS is used to communicate with Glance.""" + section = _config_section(audit_options, 'glance') + assert section is not None, "Missing section 'glance'" + assert not section.get('insecure') and \ + "https://" in section.get("api_servers"), \ + "TLS is not used for Glance" diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/cert_utils.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/cert_utils.py new file mode 100644 index 00000000..6620f59f --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/cert_utils.py @@ -0,0 +1,463 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Common python helper functions used for OpenStack charm certificates. + +import os +import json +from base64 import b64decode + +from charmhelpers.contrib.network.ip import ( + get_hostname, + resolve_network_cidr, +) +from charmhelpers.core.hookenv import ( + local_unit, + network_get_primary_address, + config, + related_units, + relation_get, + relation_ids, + remote_service_name, + NoNetworkBinding, + log, + WARNING, + INFO, +) +from charmhelpers.contrib.openstack.ip import ( + resolve_address, + get_vip_in_network, + ADDRESS_MAP, + get_default_api_bindings, + local_address, +) +from charmhelpers.contrib.network.ip import ( + get_relation_ip, +) + +from charmhelpers.core.host import ( + ca_cert_absolute_path, + install_ca_cert, + mkdir, + write_file, +) + +from charmhelpers.contrib.hahelpers.apache import ( + CONFIG_CA_CERT_FILE, +) + + +class CertRequest(object): + + """Create a request for certificates to be generated + """ + + def __init__(self, json_encode=True): + self.entries = [] + self.hostname_entry = None + self.json_encode = json_encode + + def add_entry(self, net_type, cn, addresses): + """Add a request to the batch + + :param net_type: str network space name request is for + :param cn: str Canonical Name for certificate + :param addresses: [] List of addresses to be used as SANs + """ + self.entries.append({ + 'cn': cn, + 'addresses': addresses}) + + def add_hostname_cn(self): + """Add a request for the hostname of the machine""" + ip = local_address(unit_get_fallback='private-address') + addresses = [ip] + # If a vip is being used without os-hostname config or + # network spaces then we need to ensure the local units + # cert has the appropriate vip in the SAN list + vip = get_vip_in_network(resolve_network_cidr(ip)) + if vip: + addresses.append(vip) + self.hostname_entry = { + 'cn': get_hostname(ip), + 'addresses': addresses} + + def add_hostname_cn_ip(self, addresses): + """Add an address to the SAN list for the hostname request + + :param addr: [] List of address to be added + """ + for addr in addresses: + if addr not in self.hostname_entry['addresses']: + self.hostname_entry['addresses'].append(addr) + + def get_request(self): + """Generate request from the batched up entries + + """ + if self.hostname_entry: + self.entries.append(self.hostname_entry) + request = {} + for entry in self.entries: + sans = sorted(list(set(entry['addresses']))) + request[entry['cn']] = {'sans': sans} + if self.json_encode: + req = {'cert_requests': json.dumps(request, sort_keys=True)} + else: + req = {'cert_requests': request} + req['unit_name'] = local_unit().replace('/', '_') + return req + + +def get_certificate_request(json_encode=True, bindings=None): + """Generate a certificate requests based on the network configuration + + :param json_encode: Encode request in JSON or not. Used for setting + directly on a relation. + :type json_encode: boolean + :param bindings: List of bindings to check in addition to default api + bindings. + :type bindings: list of strings + :returns: CertRequest request as dictionary or JSON string. + :rtype: Union[dict, json] + """ + if bindings: + # Add default API bindings to bindings list + bindings = list(bindings + get_default_api_bindings()) + else: + # Use default API bindings + bindings = get_default_api_bindings() + req = CertRequest(json_encode=json_encode) + req.add_hostname_cn() + # Add os-hostname entries + _sans = get_certificate_sans(bindings=bindings) + + # Handle specific hostnames per binding + for binding in bindings: + try: + hostname_override = config(ADDRESS_MAP[binding]['override']) + except KeyError: + hostname_override = None + try: + try: + net_addr = resolve_address(endpoint_type=binding) + except KeyError: + net_addr = None + ip = network_get_primary_address(binding) + addresses = [net_addr, ip] + vip = get_vip_in_network(resolve_network_cidr(ip)) + if vip: + addresses.append(vip) + + # Clear any Nones or duplicates + addresses = list(set([i for i in addresses if i])) + # Add hostname certificate request + if hostname_override: + req.add_entry( + binding, + hostname_override, + addresses) + # Remove hostname specific addresses from _sans + for addr in addresses: + try: + _sans.remove(addr) + except (ValueError, KeyError): + pass + + except NoNetworkBinding: + log("Skipping request for certificate for ip in {} space, no " + "local address found".format(binding), WARNING) + # Guarantee all SANs are covered + # These are network addresses with no corresponding hostname. + # Add the ips to the hostname cert to allow for this. + req.add_hostname_cn_ip(_sans) + return req.get_request() + + +def get_certificate_sans(bindings=None): + """Get all possible IP addresses for certificate SANs. + + :param bindings: List of bindings to check in addition to default api + bindings. + :type bindings: list of strings + :returns: List of binding string names + :rtype: List[str] + """ + _sans = [local_address(unit_get_fallback='private-address')] + if bindings: + # Add default API bindings to bindings list + bindings = list(bindings + get_default_api_bindings()) + else: + # Use default API bindings + bindings = get_default_api_bindings() + + for binding in bindings: + # Check for config override + try: + net_config = config(ADDRESS_MAP[binding]['config']) + except KeyError: + # There is no configuration network for this binding name + net_config = None + # Using resolve_address is likely redundant. Keeping it here in + # case there is an edge case it handles. + try: + net_addr = resolve_address(endpoint_type=binding) + except KeyError: + net_addr = None + ip = get_relation_ip(binding, cidr_network=net_config) + _sans = _sans + [net_addr, ip] + vip = get_vip_in_network(resolve_network_cidr(ip)) + if vip: + _sans.append(vip) + # Clear any Nones and duplicates + return list(set([i for i in _sans if i])) + + +def create_ip_cert_links(ssl_dir, custom_hostname_link=None, bindings=None): + """Create symlinks for SAN records + + :param ssl_dir: str Directory to create symlinks in + :param custom_hostname_link: str Additional link to be created + :param bindings: List of bindings to check in addition to default api + bindings. + :type bindings: list of strings + """ + + if bindings: + # Add default API bindings to bindings list + bindings = list(bindings + get_default_api_bindings()) + else: + # Use default API bindings + bindings = get_default_api_bindings() + + # This includes the hostname cert and any specific bindng certs: + # admin, internal, public + req = get_certificate_request(json_encode=False, bindings=bindings)["cert_requests"] + # Specific certs + for cert_req in req.keys(): + requested_cert = os.path.join( + ssl_dir, + 'cert_{}'.format(cert_req)) + requested_key = os.path.join( + ssl_dir, + 'key_{}'.format(cert_req)) + for addr in req[cert_req]['sans']: + cert = os.path.join(ssl_dir, 'cert_{}'.format(addr)) + key = os.path.join(ssl_dir, 'key_{}'.format(addr)) + if os.path.isfile(requested_cert) and not os.path.isfile(cert): + os.symlink(requested_cert, cert) + os.symlink(requested_key, key) + + # Handle custom hostnames + hostname = get_hostname(local_address(unit_get_fallback='private-address')) + hostname_cert = os.path.join( + ssl_dir, + 'cert_{}'.format(hostname)) + hostname_key = os.path.join( + ssl_dir, + 'key_{}'.format(hostname)) + if custom_hostname_link: + custom_cert = os.path.join( + ssl_dir, + 'cert_{}'.format(custom_hostname_link)) + custom_key = os.path.join( + ssl_dir, + 'key_{}'.format(custom_hostname_link)) + if os.path.isfile(hostname_cert) and not os.path.isfile(custom_cert): + os.symlink(hostname_cert, custom_cert) + os.symlink(hostname_key, custom_key) + + +def install_certs(ssl_dir, certs, chain=None, user='root', group='root'): + """Install the certs passed into the ssl dir and append the chain if + provided. + + :param ssl_dir: str Directory to create symlinks in + :param certs: {} {'cn': {'cert': 'CERT', 'key': 'KEY'}} + :param chain: str Chain to be appended to certs + :param user: (Optional) Owner of certificate files. Defaults to 'root' + :type user: str + :param group: (Optional) Group of certificate files. Defaults to 'root' + :type group: str + """ + for cn, bundle in certs.items(): + cert_filename = 'cert_{}'.format(cn) + key_filename = 'key_{}'.format(cn) + cert_data = bundle['cert'] + if chain: + # Append chain file so that clients that trust the root CA will + # trust certs signed by an intermediate in the chain + cert_data = cert_data + os.linesep + chain + write_file( + path=os.path.join(ssl_dir, cert_filename), owner=user, group=group, + content=cert_data, perms=0o640) + write_file( + path=os.path.join(ssl_dir, key_filename), owner=user, group=group, + content=bundle['key'], perms=0o640) + + +def get_cert_relation_ca_name(cert_relation_id=None): + """Determine CA certificate name as provided by relation. + + The filename on disk depends on the name chosen for the application on the + providing end of the certificates relation. + + :param cert_relation_id: (Optional) Relation id providing the certs + :type cert_relation_id: str + :returns: CA certificate filename without path nor extension + :rtype: str + """ + if cert_relation_id is None: + try: + cert_relation_id = relation_ids('certificates')[0] + except IndexError: + return '' + return '{}_juju_ca_cert'.format( + remote_service_name(relid=cert_relation_id)) + + +def _manage_ca_certs(ca, cert_relation_id): + """Manage CA certs. + + :param ca: CA Certificate from certificate relation. + :type ca: str + :param cert_relation_id: Relation id providing the certs + :type cert_relation_id: str + """ + config_ssl_ca = config('ssl_ca') + config_cert_file = ca_cert_absolute_path(CONFIG_CA_CERT_FILE) + if config_ssl_ca: + log("Installing CA certificate from charm ssl_ca config to {}".format( + config_cert_file), INFO) + install_ca_cert( + b64decode(config_ssl_ca).rstrip(), + name=CONFIG_CA_CERT_FILE) + elif os.path.exists(config_cert_file): + log("Removing CA certificate {}".format(config_cert_file), INFO) + os.remove(config_cert_file) + log("Installing CA certificate from certificate relation", INFO) + install_ca_cert( + ca.encode(), + name=get_cert_relation_ca_name(cert_relation_id)) + + +def process_certificates(service_name, relation_id, unit, + custom_hostname_link=None, user='root', group='root', + bindings=None): + """Process the certificates supplied down the relation + + :param service_name: str Name of service the certificates are for. + :param relation_id: str Relation id providing the certs + :param unit: str Unit providing the certs + :param custom_hostname_link: str Name of custom link to create + :param user: (Optional) Owner of certificate files. Defaults to 'root' + :type user: str + :param group: (Optional) Group of certificate files. Defaults to 'root' + :type group: str + :param bindings: List of bindings to check in addition to default api + bindings. + :type bindings: list of strings + :returns: True if certificates processed for local unit or False + :rtype: bool + """ + if bindings: + # Add default API bindings to bindings list + bindings = list(bindings + get_default_api_bindings()) + else: + # Use default API bindings + bindings = get_default_api_bindings() + + data = relation_get(rid=relation_id, unit=unit) + ssl_dir = os.path.join('/etc/apache2/ssl/', service_name) + mkdir(path=ssl_dir) + name = local_unit().replace('/', '_') + certs = data.get('{}.processed_requests'.format(name)) + chain = data.get('chain') + ca = data.get('ca') + if certs: + certs = json.loads(certs) + _manage_ca_certs(ca, relation_id) + install_certs(ssl_dir, certs, chain, user=user, group=group) + create_ip_cert_links( + ssl_dir, + custom_hostname_link=custom_hostname_link, + bindings=bindings) + return True + return False + + +def get_requests_for_local_unit(relation_name=None): + """Extract any certificates data targeted at this unit down relation_name. + + :param relation_name: str Name of relation to check for data. + :returns: List of bundles of certificates. + :rtype: List of dicts + """ + local_name = local_unit().replace('/', '_') + raw_certs_key = '{}.processed_requests'.format(local_name) + relation_name = relation_name or 'certificates' + bundles = [] + for rid in relation_ids(relation_name): + sent = relation_get(rid=rid, unit=local_unit()) + legacy_keys = ['certificate_name', 'common_name'] + is_legacy_request = set(sent).intersection(legacy_keys) + for unit in related_units(rid): + data = relation_get(rid=rid, unit=unit) + # Note: Bug#2028683 - data may not be available if the certificates + # relation hasn't been populated by the providing charm. If no 'ca' + # in the data then don't attempt the bundle at all. + if data.get('ca'): + if data.get(raw_certs_key): + bundles.append({ + 'ca': data['ca'], + 'chain': data.get('chain'), + 'certs': json.loads(data[raw_certs_key]) + }) + elif is_legacy_request: + bundles.append({ + 'ca': data['ca'], + 'chain': data.get('chain'), + 'certs': { + sent['common_name']: { + 'cert': data.get(local_name + '.server.cert'), + 'key': data.get(local_name + '.server.key') + } + } + }) + + return bundles + + +def get_bundle_for_cn(cn, relation_name=None): + """Extract certificates for the given cn. + + :param cn: str Canonical Name on certificate. + :param relation_name: str Relation to check for certificates down. + :returns: Dictionary of certificate data, + :rtype: dict. + """ + entries = get_requests_for_local_unit(relation_name) + cert_bundle = {} + for entry in entries: + for _cn, bundle in entry['certs'].items(): + if _cn == cn: + cert_bundle = { + 'cert': bundle['cert'], + 'key': bundle['key'], + 'chain': entry['chain'], + 'ca': entry['ca']} + break + if cert_bundle: + break + return cert_bundle diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/context.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/context.py new file mode 100644 index 00000000..cd70b55c --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/context.py @@ -0,0 +1,3467 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import copy +import enum +import glob +import hashlib +import json +import math +import os +import re +import socket +import time + +from base64 import b64decode +from distutils.version import LooseVersion +from subprocess import ( + check_call, + check_output, + CalledProcessError) + +import charmhelpers.contrib.storage.linux.ceph as ch_ceph + +from charmhelpers.contrib.openstack.audits.openstack_security_guide import ( + _config_ini as config_ini +) + +from charmhelpers.fetch import ( + apt_install, + filter_installed_packages, + get_installed_version, +) +from charmhelpers.core.hookenv import ( + NoNetworkBinding, + config, + is_relation_made, + local_unit, + log, + relation_get, + relation_ids, + related_units, + relation_set, + unit_private_ip, + charm_name, + DEBUG, + INFO, + ERROR, + status_set, + network_get_primary_address, + WARNING, + service_name, + remote_service_name, +) + +from charmhelpers.core.sysctl import create as sysctl_create +from charmhelpers.core.strutils import bool_from_string +from charmhelpers.contrib.openstack.exceptions import OSContextError + +from charmhelpers.core.host import ( + get_bond_master, + is_phy_iface, + list_nics, + get_nic_hwaddr, + mkdir, + write_file, + pwgen, + lsb_release, + CompareHostReleases, +) +from charmhelpers.contrib.hahelpers.cluster import ( + determine_apache_port, + determine_api_port, + https, + is_clustered, +) +from charmhelpers.contrib.hahelpers.apache import ( + get_cert, + get_ca_cert, + install_ca_cert, +) +from charmhelpers.contrib.openstack.neutron import ( + neutron_plugin_attribute, + parse_data_port_mappings, +) +from charmhelpers.contrib.openstack.ip import ( + resolve_address, + INTERNAL, + ADMIN, + PUBLIC, + ADDRESS_MAP, + local_address, +) +from charmhelpers.contrib.network.ip import ( + get_address_in_network, + get_ipv4_addr, + get_ipv6_addr, + get_netmask_for_address, + format_ipv6_addr, + is_bridge_member, + is_ipv6_disabled, + get_relation_ip, +) +from charmhelpers.contrib.openstack.utils import ( + config_flags_parser, + get_os_codename_install_source, + enable_memcache, + CompareOpenStackReleases, + os_release, +) +from charmhelpers.core.unitdata import kv + +from charmhelpers.contrib.hardware import pci + +try: + import psutil +except ImportError: + apt_install('python3-psutil', fatal=True) + import psutil + +CA_CERT_PATH = '/usr/local/share/ca-certificates/keystone_juju_ca_cert.crt' +ADDRESS_TYPES = ['admin', 'internal', 'public'] +HAPROXY_RUN_DIR = '/var/run/haproxy/' +DEFAULT_OSLO_MESSAGING_DRIVER = "messagingv2" +DEFAULT_HAPROXY_EXPORTER_STATS_PORT = 8404 + + +def ensure_packages(packages): + """Install but do not upgrade required plugin packages.""" + required = filter_installed_packages(packages) + if required: + apt_install(required, fatal=True) + + +def context_complete(ctxt): + _missing = [k for k, v in ctxt.items() if v is None or v == ''] + + if _missing: + log('Missing required data: %s' % ' '.join(_missing), level=INFO) + return False + + return True + + +class OSContextGenerator(object): + """Base class for all context generators.""" + interfaces = [] + related = False + complete = False + missing_data = [] + + def __call__(self): + raise NotImplementedError + + def context_complete(self, ctxt): + """Check for missing data for the required context data. + Set self.missing_data if it exists and return False. + Set self.complete if no missing data and return True. + """ + # Fresh start + self.complete = False + self.missing_data = [] + for k, v in ctxt.items(): + if v is None or v == '': + if k not in self.missing_data: + self.missing_data.append(k) + + if self.missing_data: + self.complete = False + log('Missing required data: %s' % ' '.join(self.missing_data), + level=INFO) + else: + self.complete = True + return self.complete + + def get_related(self): + """Check if any of the context interfaces have relation ids. + Set self.related and return True if one of the interfaces + has relation ids. + """ + # Fresh start + self.related = False + try: + for interface in self.interfaces: + if relation_ids(interface): + self.related = True + return self.related + except AttributeError as e: + log("{} {}" + "".format(self, e), 'INFO') + return self.related + + +class KeystoneAuditMiddleware(OSContextGenerator): + def __init__(self, service: str) -> None: + self.service_name = service + + def __call__(self): + """Return context dictionary containing configuration status of + audit-middleware and the charm service name. + """ + ctxt = { + 'audit_middleware': config('audit-middleware') or False, + 'service_name': self.service_name + } + return ctxt + + +class SharedDBContext(OSContextGenerator): + interfaces = ['shared-db'] + + def __init__(self, database=None, user=None, relation_prefix=None, + ssl_dir=None, relation_id=None): + """Allows inspecting relation for settings prefixed with + relation_prefix. This is useful for parsing access for multiple + databases returned via the shared-db interface (eg, nova_password, + quantum_password) + """ + self.relation_prefix = relation_prefix + self.database = database + self.user = user + self.ssl_dir = ssl_dir + self.rel_name = self.interfaces[0] + self.relation_id = relation_id + + def __call__(self): + self.database = self.database or config('database') + self.user = self.user or config('database-user') + if None in [self.database, self.user]: + log("Could not generate shared_db context. Missing required charm " + "config options. (database name and user)", level=ERROR) + raise OSContextError + + ctxt = {} + + # NOTE(jamespage) if mysql charm provides a network upon which + # access to the database should be made, reconfigure relation + # with the service units local address and defer execution + access_network = relation_get('access-network') + if access_network is not None: + if self.relation_prefix is not None: + hostname_key = "{}_hostname".format(self.relation_prefix) + else: + hostname_key = "hostname" + access_hostname = get_address_in_network( + access_network, + local_address(unit_get_fallback='private-address')) + set_hostname = relation_get(attribute=hostname_key, + unit=local_unit()) + if set_hostname != access_hostname: + relation_set(relation_settings={hostname_key: access_hostname}) + return None # Defer any further hook execution for now.... + + password_setting = 'password' + if self.relation_prefix: + password_setting = self.relation_prefix + '_password' + + if self.relation_id: + rids = [self.relation_id] + else: + rids = relation_ids(self.interfaces[0]) + + rel = (get_os_codename_install_source(config('openstack-origin')) or + 'icehouse') + for rid in rids: + self.related = True + for unit in related_units(rid): + rdata = relation_get(rid=rid, unit=unit) + host = rdata.get('db_host') + host = format_ipv6_addr(host) or host + ctxt = { + 'database_host': host, + 'database': self.database, + 'database_user': self.user, + 'database_password': rdata.get(password_setting), + 'database_type': 'mysql+pymysql' + } + # Port is being introduced with LP Bug #1876188 + # but it not currently required and may not be set in all + # cases, particularly in classic charms. + port = rdata.get('db_port') + if port: + ctxt['database_port'] = port + if CompareOpenStackReleases(rel) < 'queens': + ctxt['database_type'] = 'mysql' + if self.context_complete(ctxt): + db_ssl(rdata, ctxt, self.ssl_dir) + return ctxt + return {} + + +class PostgresqlDBContext(OSContextGenerator): + interfaces = ['pgsql-db'] + + def __init__(self, database=None): + self.database = database + + def __call__(self): + self.database = self.database or config('database') + if self.database is None: + log('Could not generate postgresql_db context. Missing required ' + 'charm config options. (database name)', level=ERROR) + raise OSContextError + + ctxt = {} + for rid in relation_ids(self.interfaces[0]): + self.related = True + for unit in related_units(rid): + rel_host = relation_get('host', rid=rid, unit=unit) + rel_user = relation_get('user', rid=rid, unit=unit) + rel_passwd = relation_get('password', rid=rid, unit=unit) + ctxt = {'database_host': rel_host, + 'database': self.database, + 'database_user': rel_user, + 'database_password': rel_passwd, + 'database_type': 'postgresql'} + if self.context_complete(ctxt): + return ctxt + + return {} + + +def db_ssl(rdata, ctxt, ssl_dir): + if 'ssl_ca' in rdata and ssl_dir: + ca_path = os.path.join(ssl_dir, 'db-client.ca') + with open(ca_path, 'wb') as fh: + fh.write(b64decode(rdata['ssl_ca'])) + + ctxt['database_ssl_ca'] = ca_path + elif 'ssl_ca' in rdata: + log("Charm not setup for ssl support but ssl ca found", level=INFO) + return ctxt + + if 'ssl_cert' in rdata: + cert_path = os.path.join( + ssl_dir, 'db-client.cert') + if not os.path.exists(cert_path): + log("Waiting 1m for ssl client cert validity", level=INFO) + time.sleep(60) + + with open(cert_path, 'wb') as fh: + fh.write(b64decode(rdata['ssl_cert'])) + + ctxt['database_ssl_cert'] = cert_path + key_path = os.path.join(ssl_dir, 'db-client.key') + with open(key_path, 'wb') as fh: + fh.write(b64decode(rdata['ssl_key'])) + + ctxt['database_ssl_key'] = key_path + + return ctxt + + +class IdentityServiceContext(OSContextGenerator): + + _forward_compat_remaps = { + 'admin_user': 'admin-user-name', + 'service_username': 'service-user-name', + 'service_tenant': 'service-project-name', + 'service_tenant_id': 'service-project-id', + 'service_domain': 'service-domain-name', + } + + def __init__(self, + service=None, + service_user=None, + rel_name='identity-service'): + self.service = service + self.service_user = service_user + self.rel_name = rel_name + self.interfaces = [self.rel_name] + + def _setup_pki_cache(self): + if self.service and self.service_user: + # This is required for pki token signing if we don't want /tmp to + # be used. + cachedir = '/var/cache/%s' % (self.service) + if not os.path.isdir(cachedir): + log("Creating service cache dir %s" % (cachedir), level=DEBUG) + mkdir(path=cachedir, owner=self.service_user, + group=self.service_user, perms=0o700) + + return cachedir + return None + + def _get_pkg_name(self, python_name='keystonemiddleware'): + """Get corresponding distro installed package for python + package name. + + :param python_name: nameof the python package + :type: string + """ + pkg_names = map(lambda x: x + python_name, ('python3-', 'python-')) + + for pkg in pkg_names: + if not filter_installed_packages((pkg,)): + return pkg + + return None + + def _get_keystone_authtoken_ctxt(self, ctxt, keystonemiddleware_os_rel): + """Build Jinja2 context for full rendering of [keystone_authtoken] + section with variable names included. Re-constructed from former + template 'section-keystone-auth-mitaka'. + + :param ctxt: Jinja2 context returned from self.__call__() + :type: dict + :param keystonemiddleware_os_rel: OpenStack release name of + keystonemiddleware package installed + """ + c = collections.OrderedDict((('auth_type', 'password'),)) + + # 'www_authenticate_uri' replaced 'auth_uri' since Stein, + # see keystonemiddleware upstream sources for more info + if CompareOpenStackReleases(keystonemiddleware_os_rel) >= 'stein': + if 'public_auth_url' in ctxt: + c.update(( + ('www_authenticate_uri', '{}/v3'.format( + ctxt.get('public_auth_url'))),)) + else: + c.update(( + ('www_authenticate_uri', "{}://{}:{}/v3".format( + ctxt.get('service_protocol', ''), + ctxt.get('service_host', ''), + ctxt.get('service_port', ''))),)) + else: + c.update(( + ('auth_uri', "{}://{}:{}/v3".format( + ctxt.get('service_protocol', ''), + ctxt.get('service_host', ''), + ctxt.get('service_port', ''))),)) + + if 'internal_auth_url' in ctxt: + c.update(( + ('auth_url', ctxt.get('internal_auth_url')),)) + else: + c.update(( + ('auth_url', "{}://{}:{}/v3".format( + ctxt.get('auth_protocol', ''), + ctxt.get('auth_host', ''), + ctxt.get('auth_port', ''))),)) + + c.update(( + ('project_domain_name', ctxt.get('admin_domain_name', '')), + ('user_domain_name', ctxt.get('admin_domain_name', '')), + ('project_name', ctxt.get('admin_tenant_name', '')), + ('username', ctxt.get('admin_user', '')), + ('password', ctxt.get('admin_password', '')), + ('signing_dir', ctxt.get('signing_dir', '')),)) + + if ctxt.get('service_type'): + c.update((('service_type', ctxt.get('service_type')),)) + + return c + + def __call__(self): + log('Generating template context for ' + self.rel_name, level=DEBUG) + ctxt = {} + + keystonemiddleware_os_release = None + if self._get_pkg_name(): + keystonemiddleware_os_release = os_release(self._get_pkg_name()) + + cachedir = self._setup_pki_cache() + if cachedir: + ctxt['signing_dir'] = cachedir + + for rid in relation_ids(self.rel_name): + self.related = True + for unit in related_units(rid): + rdata = {} + # NOTE(jamespage): + # forwards compat with application data + # bag driven approach to relation. + _adata = relation_get(rid=rid, app=remote_service_name(rid)) + adata = {} + # if no app data bag presented - fallback + # to legacy unit based relation data + rdata = relation_get(rid=rid, unit=unit) + if _adata: + # New app data bag uses - instead of _ + # in key names - remap for compat with + # existing relation data keys + for key, value in _adata.items(): + if key == 'api-version': + adata[key.replace('-', '_')] = value.strip('v') + else: + adata[key.replace('-', '_')] = value + # Re-map some keys for backwards compatibility + for target, source in self._forward_compat_remaps.items(): + adata[target] = _adata.get(source) + # Now preferentially get data from the app data bag, but if + # it's not available, get it from the legacy based relation + # data. + + def _resolve(key): + return adata.get(key) or rdata.get(key) + + serv_host = _resolve('service_host') + serv_host = format_ipv6_addr(serv_host) or serv_host + auth_host = _resolve('auth_host') + auth_host = format_ipv6_addr(auth_host) or auth_host + int_host = _resolve('internal_host',) + int_host = format_ipv6_addr(int_host) or int_host + svc_protocol = _resolve('service_protocol') or 'http' + auth_protocol = _resolve('auth_protocol') or 'http' + admin_role = _resolve('admin_role') or 'Admin' + int_protocol = _resolve('internal_protocol') or 'http' + api_version = _resolve('api_version') or '2.0' + ctxt.update({'service_port': _resolve('service_port'), + 'service_host': serv_host, + 'auth_host': auth_host, + 'auth_port': _resolve('auth_port'), + 'internal_host': int_host, + 'internal_port': _resolve('internal_port'), + 'admin_tenant_name': _resolve('service_tenant'), + 'admin_user': _resolve('service_username'), + 'admin_password': _resolve('service_password'), + 'admin_role': admin_role, + 'service_protocol': svc_protocol, + 'auth_protocol': auth_protocol, + 'internal_protocol': int_protocol, + 'api_version': api_version}) + + service_type = _resolve('service_type') + if service_type: + ctxt['service_type'] = service_type + + if float(api_version) > 2: + ctxt.update({ + 'admin_domain_name': _resolve('service_domain'), + 'service_project_id': _resolve('service_tenant_id'), + 'service_domain_id': _resolve('service_domain_id')}) + + # NOTE: + # keystone-k8s operator presents full URLS + # for all three endpoints - public and internal are + # externally addressable for machine based charm + public_auth_url = _resolve('public_auth_url') + # if 'public_auth_url' in rdata: + if public_auth_url: + ctxt.update({ + 'public_auth_url': public_auth_url, + }) + internal_auth_url = _resolve('internal_auth_url') + # if 'internal_auth_url' in rdata: + if internal_auth_url: + ctxt.update({ + 'internal_auth_url': internal_auth_url, + }) + + # we keep all variables in ctxt for compatibility and + # add nested dictionary for keystone_authtoken generic + # templating + if keystonemiddleware_os_release: + ctxt['keystone_authtoken'] = \ + self._get_keystone_authtoken_ctxt( + ctxt, keystonemiddleware_os_release) + + if self.context_complete(ctxt): + # NOTE(jamespage) this is required for >= icehouse + # so a missing value just indicates keystone needs + # upgrading + ctxt['admin_user_id'] = _resolve('service_user_id') + ctxt['admin_tenant_id'] = _resolve('service_tenant_id') + ctxt['admin_domain_id'] = _resolve('service_domain_id') + return ctxt + + return {} + + +class IdentityCredentialsContext(IdentityServiceContext): + '''Context for identity-credentials interface type''' + + def __init__(self, + service=None, + service_user=None, + rel_name='identity-credentials'): + super(IdentityCredentialsContext, self).__init__(service, + service_user, + rel_name) + + def __call__(self): + log('Generating template context for ' + self.rel_name, level=DEBUG) + ctxt = {} + + cachedir = self._setup_pki_cache() + if cachedir: + ctxt['signing_dir'] = cachedir + + for rid in relation_ids(self.rel_name): + self.related = True + for unit in related_units(rid): + rdata = relation_get(rid=rid, unit=unit) + credentials_host = rdata.get('credentials_host') + credentials_host = ( + format_ipv6_addr(credentials_host) or credentials_host + ) + auth_host = rdata.get('auth_host') + auth_host = format_ipv6_addr(auth_host) or auth_host + svc_protocol = rdata.get('credentials_protocol') or 'http' + auth_protocol = rdata.get('auth_protocol') or 'http' + api_version = rdata.get('api_version') or '2.0' + ctxt.update({ + 'service_port': rdata.get('credentials_port'), + 'service_host': credentials_host, + 'auth_host': auth_host, + 'auth_port': rdata.get('auth_port'), + 'admin_tenant_name': rdata.get('credentials_project'), + 'admin_tenant_id': rdata.get('credentials_project_id'), + 'admin_user': rdata.get('credentials_username'), + 'admin_password': rdata.get('credentials_password'), + 'service_protocol': svc_protocol, + 'auth_protocol': auth_protocol, + 'api_version': api_version + }) + + if rdata.get('service_type'): + ctxt['service_type'] = rdata.get('service_type') + + if float(api_version) > 2: + ctxt.update({'admin_domain_name': + rdata.get('domain')}) + + if self.context_complete(ctxt): + return ctxt + + return {} + + +class NovaVendorMetadataContext(OSContextGenerator): + """Context used for configuring nova vendor metadata on nova.conf file.""" + + def __init__(self, os_release_pkg, interfaces=None): + """Initialize the NovaVendorMetadataContext object. + + :param os_release_pkg: the package name to extract the OpenStack + release codename from. + :type os_release_pkg: str + :param interfaces: list of string values to be used as the Context's + relation interfaces. + :type interfaces: List[str] + """ + self.os_release_pkg = os_release_pkg + if interfaces is not None: + self.interfaces = interfaces + + def __call__(self): + cmp_os_release = CompareOpenStackReleases( + os_release(self.os_release_pkg)) + ctxt = {'vendor_data': False} + + vdata_providers = [] + vdata = config('vendor-data') + vdata_url = config('vendor-data-url') + + if vdata: + try: + # validate the JSON. If invalid, we do not set anything here + json.loads(vdata) + except (TypeError, ValueError) as e: + log('Error decoding vendor-data. {}'.format(e), level=ERROR) + else: + ctxt['vendor_data'] = True + # Mitaka does not support DynamicJSON + # so vendordata_providers is not needed + if cmp_os_release > 'mitaka': + vdata_providers.append('StaticJSON') + + if vdata_url: + if cmp_os_release > 'mitaka': + ctxt['vendor_data_url'] = vdata_url + vdata_providers.append('DynamicJSON') + else: + log('Dynamic vendor data unsupported' + ' for {}.'.format(cmp_os_release), level=ERROR) + if vdata_providers: + ctxt['vendordata_providers'] = ','.join(vdata_providers) + + return ctxt + + +class NovaVendorMetadataJSONContext(OSContextGenerator): + """Context used for writing nova vendor metadata json file.""" + + def __init__(self, os_release_pkg): + """Initialize the NovaVendorMetadataJSONContext object. + + :param os_release_pkg: the package name to extract the OpenStack + release codename from. + :type os_release_pkg: str + """ + self.os_release_pkg = os_release_pkg + + def __call__(self): + ctxt = {'vendor_data_json': '{}'} + + vdata = config('vendor-data') + if vdata: + try: + # validate the JSON. If invalid, we return empty. + json.loads(vdata) + except (TypeError, ValueError) as e: + log('Error decoding vendor-data. {}'.format(e), level=ERROR) + else: + ctxt['vendor_data_json'] = vdata + + return ctxt + + +class AMQPContext(OSContextGenerator): + + def __init__(self, ssl_dir=None, rel_name='amqp', relation_prefix=None, + relation_id=None): + self.ssl_dir = ssl_dir + self.rel_name = rel_name + self.relation_prefix = relation_prefix + self.interfaces = [rel_name] + self.relation_id = relation_id + + def __call__(self): + log('Generating template context for amqp', level=DEBUG) + conf = config() + if self.relation_prefix: + user_setting = '%s-rabbit-user' % (self.relation_prefix) + vhost_setting = '%s-rabbit-vhost' % (self.relation_prefix) + else: + user_setting = 'rabbit-user' + vhost_setting = 'rabbit-vhost' + + try: + username = conf[user_setting] + vhost = conf[vhost_setting] + except KeyError as e: + log('Could not generate shared_db context. Missing required charm ' + 'config options: %s.' % e, level=ERROR) + raise OSContextError + + ctxt = {} + if self.relation_id: + rids = [self.relation_id] + else: + rids = relation_ids(self.rel_name) + for rid in rids: + ha_vip_only = False + self.related = True + transport_hosts = None + rabbitmq_port = '5672' + for unit in related_units(rid): + if relation_get('clustered', rid=rid, unit=unit): + ctxt['clustered'] = True + vip = relation_get('vip', rid=rid, unit=unit) + vip = format_ipv6_addr(vip) or vip + ctxt['rabbitmq_host'] = vip + transport_hosts = [vip] + else: + host = relation_get('private-address', rid=rid, unit=unit) + host = format_ipv6_addr(host) or host + ctxt['rabbitmq_host'] = host + transport_hosts = [host] + + ctxt.update({ + 'rabbitmq_user': username, + 'rabbitmq_password': relation_get('password', rid=rid, + unit=unit), + 'rabbitmq_virtual_host': vhost, + }) + + ssl_port = relation_get('ssl_port', rid=rid, unit=unit) + if ssl_port: + ctxt['rabbit_ssl_port'] = ssl_port + rabbitmq_port = ssl_port + + ssl_ca = relation_get('ssl_ca', rid=rid, unit=unit) + if ssl_ca: + ctxt['rabbit_ssl_ca'] = ssl_ca + + if relation_get('ha_queues', rid=rid, unit=unit) is not None: + ctxt['rabbitmq_ha_queues'] = True + + ha_vip_only = relation_get('ha-vip-only', + rid=rid, unit=unit) is not None + + if self.context_complete(ctxt): + if 'rabbit_ssl_ca' in ctxt: + if not self.ssl_dir: + log("Charm not setup for ssl support but ssl ca " + "found", level=INFO) + break + + ca_path = os.path.join( + self.ssl_dir, 'rabbit-client-ca.pem') + with open(ca_path, 'wb') as fh: + fh.write(b64decode(ctxt['rabbit_ssl_ca'])) + ctxt['rabbit_ssl_ca'] = ca_path + + # Sufficient information found = break out! + break + + # Used for active/active rabbitmq >= grizzly + if (('clustered' not in ctxt or ha_vip_only) and + len(related_units(rid)) > 1): + rabbitmq_hosts = [] + for unit in related_units(rid): + host = relation_get('private-address', rid=rid, unit=unit) + if not relation_get('password', rid=rid, unit=unit): + log( + ("Skipping {} password not sent which indicates " + "unit is not ready.".format(host)), + level=DEBUG) + continue + host = format_ipv6_addr(host) or host + rabbitmq_hosts.append(host) + + rabbitmq_hosts = sorted(rabbitmq_hosts) + ctxt['rabbitmq_hosts'] = ','.join(rabbitmq_hosts) + transport_hosts = rabbitmq_hosts + + if transport_hosts: + transport_url_hosts = ','.join([ + "{}:{}@{}:{}".format(ctxt['rabbitmq_user'], + ctxt['rabbitmq_password'], + host_, + rabbitmq_port) + for host_ in transport_hosts]) + ctxt['transport_url'] = "rabbit://{}/{}".format( + transport_url_hosts, vhost) + + oslo_messaging_flags = conf.get('oslo-messaging-flags', None) + if oslo_messaging_flags: + ctxt['oslo_messaging_flags'] = config_flags_parser( + oslo_messaging_flags) + + oslo_messaging_driver = conf.get( + 'oslo-messaging-driver', DEFAULT_OSLO_MESSAGING_DRIVER) + if oslo_messaging_driver: + ctxt['oslo_messaging_driver'] = oslo_messaging_driver + + notification_format = conf.get('notification-format', None) + if notification_format: + ctxt['notification_format'] = notification_format + + notification_topics = conf.get('notification-topics', None) + if notification_topics: + ctxt['notification_topics'] = notification_topics + + send_notifications_to_logs = conf.get('send-notifications-to-logs', None) + if send_notifications_to_logs: + ctxt['send_notifications_to_logs'] = send_notifications_to_logs + + if not self.complete: + return {} + + return ctxt + + +class CephContext(OSContextGenerator): + """Generates context for /etc/ceph/ceph.conf templates.""" + interfaces = ['ceph'] + + def __call__(self): + if not relation_ids('ceph'): + return {} + + log('Generating template context for ceph', level=DEBUG) + mon_hosts = [] + ctxt = { + 'use_syslog': str(config('use-syslog')).lower() + } + for rid in relation_ids('ceph'): + for unit in related_units(rid): + if not ctxt.get('auth'): + ctxt['auth'] = relation_get('auth', rid=rid, unit=unit) + if not ctxt.get('key'): + ctxt['key'] = relation_get('key', rid=rid, unit=unit) + if not ctxt.get('rbd_features'): + default_features = relation_get('rbd-features', rid=rid, unit=unit) + if default_features is not None: + ctxt['rbd_features'] = default_features + + ceph_addrs = relation_get('ceph-public-address', rid=rid, + unit=unit) + if ceph_addrs: + for addr in ceph_addrs.split(' '): + mon_hosts.append(format_ipv6_addr(addr) or addr) + else: + priv_addr = relation_get('private-address', rid=rid, + unit=unit) + mon_hosts.append(format_ipv6_addr(priv_addr) or priv_addr) + + ctxt['mon_hosts'] = ' '.join(sorted(mon_hosts)) + + if config('pool-type') and config('pool-type') == 'erasure-coded': + base_pool_name = config('rbd-pool') or config('rbd-pool-name') + if not base_pool_name: + base_pool_name = service_name() + ctxt['rbd_default_data_pool'] = base_pool_name + + if not os.path.isdir('/etc/ceph'): + os.mkdir('/etc/ceph') + + if not self.context_complete(ctxt): + return {} + + ensure_packages(['ceph-common']) + return ctxt + + def context_complete(self, ctxt): + """Overridden here to ensure the context is actually complete. + + We set `key` and `auth` to None here, by default, to ensure + that the context will always evaluate to incomplete until the + Ceph relation has actually sent these details; otherwise, + there is a potential race condition between the relation + appearing and the first unit actually setting this data on the + relation. + + :param ctxt: The current context members + :type ctxt: Dict[str, ANY] + :returns: True if the context is complete + :rtype: bool + """ + if 'auth' not in ctxt or 'key' not in ctxt: + return False + return super(CephContext, self).context_complete(ctxt) + + +class HAProxyContext(OSContextGenerator): + """Provides half a context for the haproxy template, which describes + all peers to be included in the cluster. Each charm needs to include + its own context generator that describes the port mapping. + + :side effect: mkdir is called on HAPROXY_RUN_DIR + """ + interfaces = ['cluster'] + + def __init__(self, singlenode_mode=False, + address_types=None, + exporter_stats_port=DEFAULT_HAPROXY_EXPORTER_STATS_PORT): + if address_types is None: + address_types = ADDRESS_TYPES[:] + + self.address_types = address_types + self.singlenode_mode = singlenode_mode + self.exporter_stats_port = exporter_stats_port + + def __call__(self): + if not os.path.isdir(HAPROXY_RUN_DIR): + mkdir(path=HAPROXY_RUN_DIR) + if not relation_ids('cluster') and not self.singlenode_mode: + return {} + + l_unit = local_unit().replace('/', '-') + cluster_hosts = collections.OrderedDict() + + # NOTE(jamespage): build out map of configured network endpoints + # and associated backends + for addr_type in self.address_types: + cfg_opt = 'os-{}-network'.format(addr_type) + # NOTE(thedac) For some reason the ADDRESS_MAP uses 'int' rather + # than 'internal' + if addr_type == 'internal': + _addr_map_type = INTERNAL + else: + _addr_map_type = addr_type + # Network spaces aware + laddr = get_relation_ip(ADDRESS_MAP[_addr_map_type]['binding'], + config(cfg_opt)) + if laddr: + netmask = get_netmask_for_address(laddr) + cluster_hosts[laddr] = { + 'network': "{}/{}".format(laddr, + netmask), + 'backends': collections.OrderedDict([(l_unit, + laddr)]) + } + for rid in relation_ids('cluster'): + for unit in sorted(related_units(rid)): + # API Charms will need to set {addr_type}-address with + # get_relation_ip(addr_type) + _laddr = relation_get('{}-address'.format(addr_type), + rid=rid, unit=unit) + if _laddr: + _unit = unit.replace('/', '-') + cluster_hosts[laddr]['backends'][_unit] = _laddr + + # NOTE(jamespage) add backend based on get_relation_ip - this + # will either be the only backend or the fallback if no acls + # match in the frontend + # Network spaces aware + addr = get_relation_ip('cluster') + cluster_hosts[addr] = {} + netmask = get_netmask_for_address(addr) + cluster_hosts[addr] = { + 'network': "{}/{}".format(addr, netmask), + 'backends': collections.OrderedDict([(l_unit, + addr)]) + } + for rid in relation_ids('cluster'): + for unit in sorted(related_units(rid)): + # API Charms will need to set their private-address with + # get_relation_ip('cluster') + _laddr = relation_get('private-address', + rid=rid, unit=unit) + if _laddr: + _unit = unit.replace('/', '-') + cluster_hosts[addr]['backends'][_unit] = _laddr + + ctxt = { + 'frontends': cluster_hosts, + 'default_backend': addr + } + + if config('haproxy-server-timeout'): + ctxt['haproxy_server_timeout'] = config('haproxy-server-timeout') + + if config('haproxy-client-timeout'): + ctxt['haproxy_client_timeout'] = config('haproxy-client-timeout') + + if config('haproxy-queue-timeout'): + ctxt['haproxy_queue_timeout'] = config('haproxy-queue-timeout') + + if config('haproxy-connect-timeout'): + ctxt['haproxy_connect_timeout'] = config('haproxy-connect-timeout') + + if config('prefer-ipv6'): + ctxt['local_host'] = 'ip6-localhost' + ctxt['haproxy_host'] = '::' + else: + ctxt['local_host'] = '127.0.0.1' + ctxt['haproxy_host'] = '0.0.0.0' + + ctxt['ipv6_enabled'] = not is_ipv6_disabled() + + ctxt['stat_port'] = '8888' + + db = kv() + ctxt['stat_password'] = db.get('stat-password') + if not ctxt['stat_password']: + ctxt['stat_password'] = db.set('stat-password', pwgen(32)) + db.flush() + + # NOTE(rgildein): configure prometheus exporter for haproxy > 2.0.0 + # New bind will be created and a prometheus-exporter + # will be used for path /metrics. At the same time, + # prometheus-exporter avoids using auth. + haproxy_version = get_installed_version("haproxy") + if (haproxy_version and + haproxy_version.ver_str >= LooseVersion("2.0.0") and + is_relation_made("haproxy-exporter")): + ctxt["stats_exporter_host"] = get_relation_ip("haproxy-exporter") + ctxt["stats_exporter_port"] = self.exporter_stats_port + + for frontend in cluster_hosts: + if (len(cluster_hosts[frontend]['backends']) > 1 or + self.singlenode_mode): + # Enable haproxy when we have enough peers. + log('Ensuring haproxy enabled in /etc/default/haproxy.', + level=DEBUG) + with open('/etc/default/haproxy', 'w') as out: + out.write('ENABLED=1\n') + + return ctxt + + log('HAProxy context is incomplete, this unit has no peers.', + level=INFO) + return {} + + +class ImageServiceContext(OSContextGenerator): + interfaces = ['image-service'] + + def __call__(self): + """Obtains the glance API server from the image-service relation. + Useful in nova and cinder (currently). + """ + log('Generating template context for image-service.', level=DEBUG) + rids = relation_ids('image-service') + if not rids: + return {} + + for rid in rids: + for unit in related_units(rid): + api_server = relation_get('glance-api-server', + rid=rid, unit=unit) + if api_server: + return {'glance_api_servers': api_server} + + log("ImageService context is incomplete. Missing required relation " + "data.", level=INFO) + return {} + + +class ApacheSSLContext(OSContextGenerator): + """Generates a context for an apache vhost configuration that configures + HTTPS reverse proxying for one or many endpoints. Generated context + looks something like:: + + { + 'namespace': 'cinder', + 'private_address': 'iscsi.mycinderhost.com', + 'endpoints': [(8776, 8766), (8777, 8767)] + } + + The endpoints list consists of a tuples mapping external ports + to internal ports. + """ + interfaces = ['https'] + + # charms should inherit this context and set external ports + # and service namespace accordingly. + external_ports = [] + service_namespace = None + user = group = 'root' + + def enable_modules(self): + cmd = ['a2enmod', 'ssl', 'proxy', 'proxy_http', 'headers'] + check_call(cmd) + + def configure_cert(self, cn=None): + ssl_dir = os.path.join('/etc/apache2/ssl/', self.service_namespace) + mkdir(path=ssl_dir) + cert, key = get_cert(cn) + if cert and key: + if cn: + cert_filename = 'cert_{}'.format(cn) + key_filename = 'key_{}'.format(cn) + else: + cert_filename = 'cert' + key_filename = 'key' + + write_file(path=os.path.join(ssl_dir, cert_filename), + content=b64decode(cert), owner=self.user, + group=self.group, perms=0o640) + write_file(path=os.path.join(ssl_dir, key_filename), + content=b64decode(key), owner=self.user, + group=self.group, perms=0o640) + + def configure_ca(self): + ca_cert = get_ca_cert() + if ca_cert: + install_ca_cert(b64decode(ca_cert)) + + def canonical_names(self): + """Figure out which canonical names clients will access this service. + """ + cns = [] + for r_id in relation_ids('identity-service'): + for unit in related_units(r_id): + rdata = relation_get(rid=r_id, unit=unit) + for k in rdata: + if k.startswith('ssl_key_'): + cns.append(k.lstrip('ssl_key_')) + + return sorted(list(set(cns))) + + def get_network_addresses(self): + """For each network configured, return corresponding address and + hostnamr or vip (if available). + + Returns a list of tuples of the form: + + [(address_in_net_a, hostname_in_net_a), + (address_in_net_b, hostname_in_net_b), + ...] + + or, if no hostnames(s) available: + + [(address_in_net_a, vip_in_net_a), + (address_in_net_b, vip_in_net_b), + ...] + + or, if no vip(s) available: + + [(address_in_net_a, address_in_net_a), + (address_in_net_b, address_in_net_b), + ...] + """ + addresses = [] + for net_type in [INTERNAL, ADMIN, PUBLIC]: + net_config = config(ADDRESS_MAP[net_type]['config']) + # NOTE(jamespage): Fallback must always be private address + # as this is used to bind services on the + # local unit. + fallback = local_address(unit_get_fallback="private-address") + if net_config: + addr = get_address_in_network(net_config, + fallback) + else: + try: + addr = network_get_primary_address( + ADDRESS_MAP[net_type]['binding'] + ) + except (NotImplementedError, NoNetworkBinding): + addr = fallback + + endpoint = resolve_address(net_type) + addresses.append((addr, endpoint)) + + # Log the set of addresses to have a trail log and capture if tuples + # change over time in the same unit (LP: #1952414). + sorted_addresses = sorted(set(addresses)) + log('get_network_addresses: {}'.format(sorted_addresses)) + return sorted_addresses + + def __call__(self): + if isinstance(self.external_ports, str): + self.external_ports = [self.external_ports] + + if not self.external_ports or not https(): + return {} + + use_keystone_ca = True + for rid in relation_ids('certificates'): + if related_units(rid): + use_keystone_ca = False + + if use_keystone_ca: + self.configure_ca() + + self.enable_modules() + + ctxt = {'namespace': self.service_namespace, + 'endpoints': [], + 'ext_ports': []} + + if use_keystone_ca: + cns = self.canonical_names() + if cns: + for cn in cns: + self.configure_cert(cn) + else: + # Expect cert/key provided in config (currently assumed that ca + # uses ip for cn) + for net_type in (INTERNAL, ADMIN, PUBLIC): + cn = resolve_address(endpoint_type=net_type) + self.configure_cert(cn) + + addresses = self.get_network_addresses() + for address, endpoint in addresses: + for api_port in self.external_ports: + ext_port = determine_apache_port(api_port, + singlenode_mode=True) + int_port = determine_api_port(api_port, singlenode_mode=True) + portmap = (address, endpoint, int(ext_port), int(int_port)) + ctxt['endpoints'].append(portmap) + ctxt['ext_ports'].append(int(ext_port)) + + ctxt['ext_ports'] = sorted(list(set(ctxt['ext_ports']))) + return ctxt + + +class NeutronContext(OSContextGenerator): + interfaces = [] + + @property + def plugin(self): + return None + + @property + def network_manager(self): + return None + + @property + def packages(self): + return neutron_plugin_attribute(self.plugin, 'packages', + self.network_manager) + + @property + def neutron_security_groups(self): + return None + + def _ensure_packages(self): + for pkgs in self.packages: + ensure_packages(pkgs) + + def ovs_ctxt(self): + driver = neutron_plugin_attribute(self.plugin, 'driver', + self.network_manager) + config = neutron_plugin_attribute(self.plugin, 'config', + self.network_manager) + ovs_ctxt = {'core_plugin': driver, + 'neutron_plugin': 'ovs', + 'neutron_security_groups': self.neutron_security_groups, + 'local_ip': unit_private_ip(), + 'config': config} + + return ovs_ctxt + + def nuage_ctxt(self): + driver = neutron_plugin_attribute(self.plugin, 'driver', + self.network_manager) + config = neutron_plugin_attribute(self.plugin, 'config', + self.network_manager) + nuage_ctxt = {'core_plugin': driver, + 'neutron_plugin': 'vsp', + 'neutron_security_groups': self.neutron_security_groups, + 'local_ip': unit_private_ip(), + 'config': config} + + return nuage_ctxt + + def nvp_ctxt(self): + driver = neutron_plugin_attribute(self.plugin, 'driver', + self.network_manager) + config = neutron_plugin_attribute(self.plugin, 'config', + self.network_manager) + nvp_ctxt = {'core_plugin': driver, + 'neutron_plugin': 'nvp', + 'neutron_security_groups': self.neutron_security_groups, + 'local_ip': unit_private_ip(), + 'config': config} + + return nvp_ctxt + + def n1kv_ctxt(self): + driver = neutron_plugin_attribute(self.plugin, 'driver', + self.network_manager) + n1kv_config = neutron_plugin_attribute(self.plugin, 'config', + self.network_manager) + n1kv_user_config_flags = config('n1kv-config-flags') + restrict_policy_profiles = config('n1kv-restrict-policy-profiles') + n1kv_ctxt = {'core_plugin': driver, + 'neutron_plugin': 'n1kv', + 'neutron_security_groups': self.neutron_security_groups, + 'local_ip': unit_private_ip(), + 'config': n1kv_config, + 'vsm_ip': config('n1kv-vsm-ip'), + 'vsm_username': config('n1kv-vsm-username'), + 'vsm_password': config('n1kv-vsm-password'), + 'restrict_policy_profiles': restrict_policy_profiles} + + if n1kv_user_config_flags: + flags = config_flags_parser(n1kv_user_config_flags) + n1kv_ctxt['user_config_flags'] = flags + + return n1kv_ctxt + + def calico_ctxt(self): + driver = neutron_plugin_attribute(self.plugin, 'driver', + self.network_manager) + config = neutron_plugin_attribute(self.plugin, 'config', + self.network_manager) + calico_ctxt = {'core_plugin': driver, + 'neutron_plugin': 'Calico', + 'neutron_security_groups': self.neutron_security_groups, + 'local_ip': unit_private_ip(), + 'config': config} + + return calico_ctxt + + def neutron_ctxt(self): + if https(): + proto = 'https' + else: + proto = 'http' + + if is_clustered(): + host = config('vip') + else: + host = local_address(unit_get_fallback='private-address') + + ctxt = {'network_manager': self.network_manager, + 'neutron_url': '%s://%s:%s' % (proto, host, '9696')} + return ctxt + + def pg_ctxt(self): + driver = neutron_plugin_attribute(self.plugin, 'driver', + self.network_manager) + config = neutron_plugin_attribute(self.plugin, 'config', + self.network_manager) + ovs_ctxt = {'core_plugin': driver, + 'neutron_plugin': 'plumgrid', + 'neutron_security_groups': self.neutron_security_groups, + 'local_ip': unit_private_ip(), + 'config': config} + return ovs_ctxt + + def midonet_ctxt(self): + driver = neutron_plugin_attribute(self.plugin, 'driver', + self.network_manager) + midonet_config = neutron_plugin_attribute(self.plugin, 'config', + self.network_manager) + mido_ctxt = {'core_plugin': driver, + 'neutron_plugin': 'midonet', + 'neutron_security_groups': self.neutron_security_groups, + 'local_ip': unit_private_ip(), + 'config': midonet_config} + + return mido_ctxt + + def __call__(self): + if self.network_manager not in ['quantum', 'neutron']: + return {} + + if not self.plugin: + return {} + + ctxt = self.neutron_ctxt() + + if self.plugin == 'ovs': + ctxt.update(self.ovs_ctxt()) + elif self.plugin in ['nvp', 'nsx']: + ctxt.update(self.nvp_ctxt()) + elif self.plugin == 'n1kv': + ctxt.update(self.n1kv_ctxt()) + elif self.plugin == 'Calico': + ctxt.update(self.calico_ctxt()) + elif self.plugin == 'vsp': + ctxt.update(self.nuage_ctxt()) + elif self.plugin == 'plumgrid': + ctxt.update(self.pg_ctxt()) + elif self.plugin == 'midonet': + ctxt.update(self.midonet_ctxt()) + + alchemy_flags = config('neutron-alchemy-flags') + if alchemy_flags: + flags = config_flags_parser(alchemy_flags) + ctxt['neutron_alchemy_flags'] = flags + + return ctxt + + +class NeutronPortContext(OSContextGenerator): + + def resolve_ports(self, ports): + """Resolve NICs not yet bound to bridge(s) + + If hwaddress provided then returns resolved hwaddress otherwise NIC. + """ + if not ports: + return None + + hwaddr_to_nic = {} + hwaddr_to_ip = {} + extant_nics = list_nics() + + for nic in extant_nics: + # Ignore virtual interfaces (bond masters will be identified from + # their slaves) + if not is_phy_iface(nic): + continue + + _nic = get_bond_master(nic) + if _nic: + log("Replacing iface '%s' with bond master '%s'" % (nic, _nic), + level=DEBUG) + nic = _nic + + hwaddr = get_nic_hwaddr(nic) + hwaddr_to_nic[hwaddr] = nic + addresses = get_ipv4_addr(nic, fatal=False) + addresses += get_ipv6_addr(iface=nic, fatal=False) + hwaddr_to_ip[hwaddr] = addresses + + resolved = [] + mac_regex = re.compile(r'([0-9A-F]{2}[:-]){5}([0-9A-F]{2})', re.I) + for entry in ports: + if re.match(mac_regex, entry): + # NIC is in known NICs and does NOT have an IP address + if entry in hwaddr_to_nic and not hwaddr_to_ip[entry]: + # If the nic is part of a bridge then don't use it + if is_bridge_member(hwaddr_to_nic[entry]): + continue + + # Entry is a MAC address for a valid interface that doesn't + # have an IP address assigned yet. + resolved.append(hwaddr_to_nic[entry]) + elif entry in extant_nics: + # If the passed entry is not a MAC address and the interface + # exists, assume it's a valid interface, and that the user put + # it there on purpose (we can trust it to be the real external + # network). + resolved.append(entry) + + # Ensure no duplicates + return list(set(resolved)) + + +class OSConfigFlagContext(OSContextGenerator): + """Provides support for user-defined config flags. + + Users can define a comma-seperated list of key=value pairs + in the charm configuration and apply them at any point in + any file by using a template flag. + + Sometimes users might want config flags inserted within a + specific section so this class allows users to specify the + template flag name, allowing for multiple template flags + (sections) within the same context. + + NOTE: the value of config-flags may be a comma-separated list of + key=value pairs and some Openstack config files support + comma-separated lists as values. + """ + + def __init__(self, charm_flag='config-flags', + template_flag='user_config_flags'): + """ + :param charm_flag: config flags in charm configuration. + :param template_flag: insert point for user-defined flags in template + file. + """ + super(OSConfigFlagContext, self).__init__() + self._charm_flag = charm_flag + self._template_flag = template_flag + + def __call__(self): + config_flags = config(self._charm_flag) + if not config_flags: + return {} + + return {self._template_flag: + config_flags_parser(config_flags)} + + +class LibvirtConfigFlagsContext(OSContextGenerator): + """ + This context provides support for extending + the libvirt section through user-defined flags. + """ + def __call__(self): + ctxt = {} + libvirt_flags = config('libvirt-flags') + if libvirt_flags: + ctxt['libvirt_flags'] = config_flags_parser( + libvirt_flags) + return ctxt + + +class SubordinateConfigContext(OSContextGenerator): + + """ + Responsible for inspecting relations to subordinates that + may be exporting required config via a json blob. + + The subordinate interface allows subordinates to export their + configuration requirements to the principle for multiple config + files and multiple services. Ie, a subordinate that has interfaces + to both glance and nova may export to following yaml blob as json:: + + glance: + /etc/glance/glance-api.conf: + sections: + DEFAULT: + - [key1, value1] + /etc/glance/glance-registry.conf: + MYSECTION: + - [key2, value2] + nova: + /etc/nova/nova.conf: + sections: + DEFAULT: + - [key3, value3] + + + It is then up to the principle charms to subscribe this context to + the service+config file it is interestd in. Configuration data will + be available in the template context, in glance's case, as:: + + ctxt = { + ... other context ... + 'subordinate_configuration': { + 'DEFAULT': { + 'key1': 'value1', + }, + 'MYSECTION': { + 'key2': 'value2', + }, + } + } + """ + + def __init__(self, service, config_file, interface): + """ + :param service : Service name key to query in any subordinate + data found + :param config_file : Service's config file to query sections + :param interface : Subordinate interface to inspect + """ + self.config_file = config_file + if isinstance(service, list): + self.services = service + else: + self.services = [service] + if isinstance(interface, list): + self.interfaces = interface + else: + self.interfaces = [interface] + + def __call__(self): + ctxt = {'sections': {}} + rids = [] + for interface in self.interfaces: + rids.extend(relation_ids(interface)) + for rid in rids: + for unit in related_units(rid): + sub_config = relation_get('subordinate_configuration', + rid=rid, unit=unit) + if sub_config and sub_config != '': + try: + sub_config = json.loads(sub_config) + except Exception: + log('Could not parse JSON from ' + 'subordinate_configuration setting from %s' + % rid, level=ERROR) + continue + + for service in self.services: + if service not in sub_config: + log('Found subordinate_configuration on %s but it ' + 'contained nothing for %s service' + % (rid, service), level=INFO) + continue + + sub_config = sub_config[service] + if self.config_file not in sub_config: + log('Found subordinate_configuration on %s but it ' + 'contained nothing for %s' + % (rid, self.config_file), level=INFO) + continue + + sub_config = sub_config[self.config_file] + for k, v in sub_config.items(): + if k == 'sections': + for section, config_list in v.items(): + log("adding section '%s'" % (section), + level=DEBUG) + if ctxt[k].get(section): + ctxt[k][section].extend(config_list) + else: + ctxt[k][section] = config_list + else: + ctxt[k] = v + if self.context_complete(ctxt): + log("%d section(s) found" % (len(ctxt['sections'])), level=DEBUG) + return ctxt + else: + return {} + + def context_complete(self, ctxt): + """Overridden here to ensure the context is actually complete. + + :param ctxt: The current context members + :type ctxt: Dict[str, ANY] + :returns: True if the context is complete + :rtype: bool + """ + if not ctxt.get('sections'): + return False + return super(SubordinateConfigContext, self).context_complete(ctxt) + + +class LogLevelContext(OSContextGenerator): + + def __call__(self): + ctxt = {} + ctxt['debug'] = \ + False if config('debug') is None else config('debug') + ctxt['verbose'] = \ + False if config('verbose') is None else config('verbose') + + return ctxt + + +class SyslogContext(OSContextGenerator): + + def __call__(self): + ctxt = {'use_syslog': config('use-syslog')} + return ctxt + + +class BindHostContext(OSContextGenerator): + + def __call__(self): + if config('prefer-ipv6'): + return {'bind_host': '::'} + else: + return {'bind_host': '0.0.0.0'} + + +MAX_DEFAULT_WORKERS = 4 +DEFAULT_MULTIPLIER = 2 + + +def _calculate_workers(): + ''' + Determine the number of worker processes based on the CPU + count of the unit containing the application. + + Workers will be limited to MAX_DEFAULT_WORKERS in + container environments where no worker-multipler configuration + option been set. + + @returns int: number of worker processes to use + ''' + multiplier = config('worker-multiplier') + + # distinguish an empty config and an explicit config as 0.0 + if multiplier is None: + multiplier = DEFAULT_MULTIPLIER + + count = int(_num_cpus() * multiplier) + if count <= 0: + # assign at least one worker + count = 1 + + if config('worker-multiplier') is None: + # NOTE(jamespage): Limit unconfigured worker-multiplier + # to MAX_DEFAULT_WORKERS to avoid insane + # worker configuration on large servers + # Reference: https://pad.lv/1665270 + count = min(count, MAX_DEFAULT_WORKERS) + + return count + + +def _num_cpus(): + ''' + Compatibility wrapper for calculating the number of CPU's + a unit has. + + @returns: int: number of CPU cores detected + ''' + try: + return psutil.cpu_count() + except AttributeError: + return psutil.NUM_CPUS + + +class WorkerConfigContext(OSContextGenerator): + + def __call__(self): + ctxt = {"workers": _calculate_workers()} + return ctxt + + +class WSGIWorkerConfigContext(WorkerConfigContext): + + def __init__(self, name=None, script=None, admin_script=None, + public_script=None, user=None, group=None, + process_weight=1.00, + admin_process_weight=0.25, public_process_weight=0.75): + self.service_name = name + self.user = user or name + self.group = group or name + self.script = script + self.admin_script = admin_script + self.public_script = public_script + self.process_weight = process_weight + self.admin_process_weight = admin_process_weight + self.public_process_weight = public_process_weight + + def __call__(self): + total_processes = _calculate_workers() + enable_wsgi_socket_rotation = config('wsgi-socket-rotation') + if enable_wsgi_socket_rotation is None: + enable_wsgi_socket_rotation = True + ctxt = { + "service_name": self.service_name, + "user": self.user, + "group": self.group, + "script": self.script, + "admin_script": self.admin_script, + "public_script": self.public_script, + "processes": int(math.ceil(self.process_weight * total_processes)), + "admin_processes": int(math.ceil(self.admin_process_weight * + total_processes)), + "public_processes": int(math.ceil(self.public_process_weight * + total_processes)), + "threads": 1, + "wsgi_socket_rotation": enable_wsgi_socket_rotation, + } + return ctxt + + +class ZeroMQContext(OSContextGenerator): + interfaces = ['zeromq-configuration'] + + def __call__(self): + ctxt = {} + if is_relation_made('zeromq-configuration', 'host'): + for rid in relation_ids('zeromq-configuration'): + for unit in related_units(rid): + ctxt['zmq_nonce'] = relation_get('nonce', unit, rid) + ctxt['zmq_host'] = relation_get('host', unit, rid) + ctxt['zmq_redis_address'] = relation_get( + 'zmq_redis_address', unit, rid) + + return ctxt + + +class NotificationDriverContext(OSContextGenerator): + + def __init__(self, zmq_relation='zeromq-configuration', + amqp_relation='amqp'): + """ + :param zmq_relation: Name of Zeromq relation to check + """ + self.zmq_relation = zmq_relation + self.amqp_relation = amqp_relation + + def __call__(self): + ctxt = {'notifications': 'False'} + if is_relation_made(self.amqp_relation): + ctxt['notifications'] = "True" + + return ctxt + + +class SysctlContext(OSContextGenerator): + """This context check if the 'sysctl' option exists on configuration + then creates a file with the loaded contents""" + def __call__(self): + sysctl_dict = config('sysctl') + if sysctl_dict: + sysctl_create(sysctl_dict, + '/etc/sysctl.d/50-{0}.conf'.format(charm_name())) + return {'sysctl': sysctl_dict} + + +class NeutronAPIContext(OSContextGenerator): + ''' + Inspects current neutron-plugin-api relation for neutron settings. Return + defaults if it is not present. + ''' + interfaces = ['neutron-plugin-api'] + + def __call__(self): + self.neutron_defaults = { + 'l2_population': { + 'rel_key': 'l2-population', + 'default': False, + }, + 'overlay_network_type': { + 'rel_key': 'overlay-network-type', + 'default': 'gre', + }, + 'neutron_security_groups': { + 'rel_key': 'neutron-security-groups', + 'default': False, + }, + 'network_device_mtu': { + 'rel_key': 'network-device-mtu', + 'default': None, + }, + 'enable_dvr': { + 'rel_key': 'enable-dvr', + 'default': False, + }, + 'enable_l3ha': { + 'rel_key': 'enable-l3ha', + 'default': False, + }, + 'dns_domain': { + 'rel_key': 'dns-domain', + 'default': None, + }, + 'polling_interval': { + 'rel_key': 'polling-interval', + 'default': 2, + }, + 'rpc_response_timeout': { + 'rel_key': 'rpc-response-timeout', + 'default': 60, + }, + 'report_interval': { + 'rel_key': 'report-interval', + 'default': 30, + }, + 'enable_qos': { + 'rel_key': 'enable-qos', + 'default': False, + }, + 'enable_nsg_logging': { + 'rel_key': 'enable-nsg-logging', + 'default': False, + }, + 'enable_nfg_logging': { + 'rel_key': 'enable-nfg-logging', + 'default': False, + }, + 'enable_port_forwarding': { + 'rel_key': 'enable-port-forwarding', + 'default': False, + }, + 'enable_fwaas': { + 'rel_key': 'enable-fwaas', + 'default': False, + }, + 'global_physnet_mtu': { + 'rel_key': 'global-physnet-mtu', + 'default': 1500, + }, + 'physical_network_mtus': { + 'rel_key': 'physical-network-mtus', + 'default': None, + }, + } + ctxt = self.get_neutron_options({}) + for rid in relation_ids('neutron-plugin-api'): + for unit in related_units(rid): + rdata = relation_get(rid=rid, unit=unit) + # The l2-population key is used by the context as a way of + # checking if the api service on the other end is sending data + # in a recent format. + if 'l2-population' in rdata: + ctxt.update(self.get_neutron_options(rdata)) + + extension_drivers = [] + + if ctxt['enable_qos']: + extension_drivers.append('qos') + + if ctxt['enable_nsg_logging']: + extension_drivers.append('log') + + ctxt['extension_drivers'] = ','.join(extension_drivers) + + l3_extension_plugins = [] + + if ctxt['enable_port_forwarding']: + l3_extension_plugins.append('port_forwarding') + + if ctxt['enable_fwaas']: + l3_extension_plugins.append('fwaas_v2') + if ctxt['enable_nfg_logging']: + l3_extension_plugins.append('fwaas_v2_log') + + ctxt['l3_extension_plugins'] = l3_extension_plugins + + return ctxt + + def get_neutron_options(self, rdata): + settings = {} + for nkey in self.neutron_defaults.keys(): + defv = self.neutron_defaults[nkey]['default'] + rkey = self.neutron_defaults[nkey]['rel_key'] + if rkey in rdata.keys(): + if type(defv) is bool: + settings[nkey] = bool_from_string(rdata[rkey]) + else: + settings[nkey] = rdata[rkey] + else: + settings[nkey] = defv + return settings + + +class ExternalPortContext(NeutronPortContext): + + def __call__(self): + ctxt = {} + ports = config('ext-port') + if ports: + ports = [p.strip() for p in ports.split()] + ports = self.resolve_ports(ports) + if ports: + ctxt = {"ext_port": ports[0]} + napi_settings = NeutronAPIContext()() + mtu = napi_settings.get('network_device_mtu') + if mtu: + ctxt['ext_port_mtu'] = mtu + + return ctxt + + +class DataPortContext(NeutronPortContext): + + def __call__(self): + ports = config('data-port') + if ports: + # Map of {bridge:port/mac} + portmap = parse_data_port_mappings(ports) + ports = portmap.keys() + # Resolve provided ports or mac addresses and filter out those + # already attached to a bridge. + resolved = self.resolve_ports(ports) + # Rebuild port index using resolved and filtered ports. + normalized = {get_nic_hwaddr(port): port for port in resolved + if port not in ports} + normalized.update({port: port for port in resolved + if port in ports}) + if resolved: + return { + normalized[port]: bridge + for port, bridge in portmap.items() + if port in normalized.keys() + } + + return None + + +class PhyNICMTUContext(DataPortContext): + + def __call__(self): + ctxt = {} + mappings = super(PhyNICMTUContext, self).__call__() + if mappings and mappings.keys(): + ports = sorted(mappings.keys()) + napi_settings = NeutronAPIContext()() + mtu = napi_settings.get('network_device_mtu') + all_ports = set() + # If any of ports is a vlan device, its underlying device must have + # mtu applied first. + for port in ports: + for lport in glob.glob("/sys/class/net/%s/lower_*" % port): + lport = os.path.basename(lport) + all_ports.add(lport.split('_')[1]) + + all_ports = list(all_ports) + all_ports.extend(ports) + if mtu: + ctxt["devs"] = '\\n'.join(all_ports) + ctxt['mtu'] = mtu + + return ctxt + + +class NetworkServiceContext(OSContextGenerator): + + def __init__(self, rel_name='quantum-network-service'): + self.rel_name = rel_name + self.interfaces = [rel_name] + + def __call__(self): + for rid in relation_ids(self.rel_name): + for unit in related_units(rid): + rdata = relation_get(rid=rid, unit=unit) + ctxt = { + 'keystone_host': rdata.get('keystone_host'), + 'service_port': rdata.get('service_port'), + 'auth_port': rdata.get('auth_port'), + 'service_tenant': rdata.get('service_tenant'), + 'service_username': rdata.get('service_username'), + 'service_password': rdata.get('service_password'), + 'quantum_host': rdata.get('quantum_host'), + 'quantum_port': rdata.get('quantum_port'), + 'quantum_url': rdata.get('quantum_url'), + 'region': rdata.get('region'), + 'service_protocol': + rdata.get('service_protocol') or 'http', + 'auth_protocol': + rdata.get('auth_protocol') or 'http', + 'api_version': + rdata.get('api_version') or '2.0', + } + if self.context_complete(ctxt): + return ctxt + return {} + + +class InternalEndpointContext(OSContextGenerator): + """Internal endpoint context. + + This context provides the endpoint type used for communication between + services e.g. between Nova and Cinder internally. Openstack uses Public + endpoints by default so this allows admins to optionally use internal + endpoints. + """ + def __call__(self): + return {'use_internal_endpoints': config('use-internal-endpoints')} + + +class VolumeAPIContext(InternalEndpointContext): + """Volume API context. + + This context provides information regarding the volume endpoint to use + when communicating between services. It determines which version of the + API is appropriate for use. + + This value will be determined in the resulting context dictionary + returned from calling the VolumeAPIContext object. Information provided + by this context is as follows: + + volume_api_version: the volume api version to use, currently + 'v2' or 'v3' + volume_catalog_info: the information to use for a cinder client + configuration that consumes API endpoints from the keystone + catalog. This is defined as the type:name:endpoint_type string. + """ + # FIXME(wolsen) This implementation is based on the provider being able + # to specify the package version to check but does not guarantee that the + # volume service api version selected is available. In practice, it is + # quite likely the volume service *is* providing the v3 volume service. + # This should be resolved when the service-discovery spec is implemented. + def __init__(self, pkg): + """ + Creates a new VolumeAPIContext for use in determining which version + of the Volume API should be used for communication. A package codename + should be supplied for determining the currently installed OpenStack + version. + + :param pkg: the package codename to use in order to determine the + component version (e.g. nova-common). See + charmhelpers.contrib.openstack.utils.PACKAGE_CODENAMES for more. + """ + super(VolumeAPIContext, self).__init__() + self._ctxt = None + if not pkg: + raise ValueError('package name must be provided in order to ' + 'determine current OpenStack version.') + self.pkg = pkg + + @property + def ctxt(self): + if self._ctxt is not None: + return self._ctxt + self._ctxt = self._determine_ctxt() + return self._ctxt + + def _determine_ctxt(self): + """Determines the Volume API endpoint information. + + Determines the appropriate version of the API that should be used + as well as the catalog_info string that would be supplied. Returns + a dict containing the volume_api_version and the volume_catalog_info. + """ + rel = os_release(self.pkg) + version = '2' + if CompareOpenStackReleases(rel) >= 'pike': + version = '3' + + service_type = 'volumev{version}'.format(version=version) + service_name = 'cinderv{version}'.format(version=version) + endpoint_type = 'publicURL' + if config('use-internal-endpoints'): + endpoint_type = 'internalURL' + catalog_info = '{type}:{name}:{endpoint}'.format( + type=service_type, name=service_name, endpoint=endpoint_type) + + return { + 'volume_api_version': version, + 'volume_catalog_info': catalog_info, + } + + def __call__(self): + return self.ctxt + + +class AppArmorContext(OSContextGenerator): + """Base class for apparmor contexts.""" + + def __init__(self, profile_name=None): + self._ctxt = None + self.aa_profile = profile_name + self.aa_utils_packages = ['apparmor-utils'] + + @property + def ctxt(self): + if self._ctxt is not None: + return self._ctxt + self._ctxt = self._determine_ctxt() + return self._ctxt + + def _determine_ctxt(self): + """ + Validate aa-profile-mode settings is disable, enforce, or complain. + + :return ctxt: Dictionary of the apparmor profile or None + """ + if config('aa-profile-mode') in ['disable', 'enforce', 'complain']: + ctxt = {'aa_profile_mode': config('aa-profile-mode'), + 'ubuntu_release': lsb_release()['DISTRIB_RELEASE']} + if self.aa_profile: + ctxt['aa_profile'] = self.aa_profile + else: + ctxt = None + return ctxt + + def __call__(self): + return self.ctxt + + def install_aa_utils(self): + """ + Install packages required for apparmor configuration. + """ + log("Installing apparmor utils.") + ensure_packages(self.aa_utils_packages) + + def manually_disable_aa_profile(self): + """ + Manually disable an apparmor profile. + + If aa-profile-mode is set to disabled (default) this is required as the + template has been written but apparmor is yet unaware of the profile + and aa-disable aa-profile fails. Without this the profile would kick + into enforce mode on the next service restart. + + """ + profile_path = '/etc/apparmor.d' + disable_path = '/etc/apparmor.d/disable' + if not os.path.lexists(os.path.join(disable_path, self.aa_profile)): + os.symlink(os.path.join(profile_path, self.aa_profile), + os.path.join(disable_path, self.aa_profile)) + + def setup_aa_profile(self): + """ + Setup an apparmor profile. + The ctxt dictionary will contain the apparmor profile mode and + the apparmor profile name. + Makes calls out to aa-disable, aa-complain, or aa-enforce to setup + the apparmor profile. + """ + self() + if not self.ctxt: + log("Not enabling apparmor Profile") + return + self.install_aa_utils() + cmd = ['aa-{}'.format(self.ctxt['aa_profile_mode'])] + cmd.append(self.ctxt['aa_profile']) + log("Setting up the apparmor profile for {} in {} mode." + "".format(self.ctxt['aa_profile'], self.ctxt['aa_profile_mode'])) + try: + check_call(cmd) + except CalledProcessError as e: + # If aa-profile-mode is set to disabled (default) manual + # disabling is required as the template has been written but + # apparmor is yet unaware of the profile and aa-disable aa-profile + # fails. If aa-disable learns to read profile files first this can + # be removed. + if self.ctxt['aa_profile_mode'] == 'disable': + log("Manually disabling the apparmor profile for {}." + "".format(self.ctxt['aa_profile'])) + self.manually_disable_aa_profile() + return + status_set('blocked', "Apparmor profile {} failed to be set to {}." + "".format(self.ctxt['aa_profile'], + self.ctxt['aa_profile_mode'])) + raise e + + +class MemcacheContext(OSContextGenerator): + """Memcache context + + This context provides options for configuring a local memcache client and + server for both IPv4 and IPv6 + """ + + def __init__(self, package=None): + """ + @param package: Package to examine to extrapolate OpenStack release. + Used when charms have no openstack-origin config + option (ie subordinates) + """ + self.package = package + + def __call__(self): + ctxt = {} + ctxt['use_memcache'] = enable_memcache(package=self.package) + if ctxt['use_memcache']: + # Trusty version of memcached does not support ::1 as a listen + # address so use host file entry instead + release = lsb_release()['DISTRIB_CODENAME'].lower() + if is_ipv6_disabled(): + if CompareHostReleases(release) > 'trusty': + ctxt['memcache_server'] = '127.0.0.1' + else: + ctxt['memcache_server'] = 'localhost' + ctxt['memcache_server_formatted'] = '127.0.0.1' + ctxt['memcache_port'] = '11211' + ctxt['memcache_url'] = '{}:{}'.format( + ctxt['memcache_server_formatted'], + ctxt['memcache_port']) + else: + if CompareHostReleases(release) > 'trusty': + ctxt['memcache_server'] = '::1' + else: + ctxt['memcache_server'] = 'ip6-localhost' + ctxt['memcache_server_formatted'] = '[::1]' + ctxt['memcache_port'] = '11211' + ctxt['memcache_url'] = 'inet6:{}:{}'.format( + ctxt['memcache_server_formatted'], + ctxt['memcache_port']) + return ctxt + + +class EnsureDirContext(OSContextGenerator): + ''' + Serves as a generic context to create a directory as a side-effect. + + Useful for software that supports drop-in files (.d) in conjunction + with config option-based templates. Examples include: + * OpenStack oslo.policy drop-in files; + * systemd drop-in config files; + * other software that supports overriding defaults with .d files + + Another use-case is when a subordinate generates a configuration for + primary to render in a separate directory. + + Some software requires a user to create a target directory to be + scanned for drop-in files with a specific format. This is why this + context is needed to do that before rendering a template. + ''' + + def __init__(self, dirname, **kwargs): + '''Used merely to ensure that a given directory exists.''' + self.dirname = dirname + self.kwargs = kwargs + + def __call__(self): + mkdir(self.dirname, **self.kwargs) + return {} + + +class VersionsContext(OSContextGenerator): + """Context to return the openstack and operating system versions. + + """ + def __init__(self, pkg='python-keystone'): + """Initialise context. + + :param pkg: Package to extrapolate openstack version from. + :type pkg: str + """ + self.pkg = pkg + + def __call__(self): + ostack = os_release(self.pkg) + osystem = lsb_release()['DISTRIB_CODENAME'].lower() + return { + 'openstack_release': ostack, + 'operating_system_release': osystem} + + +class LogrotateContext(OSContextGenerator): + """Common context generator for logrotate.""" + + def __init__(self, location, interval, count): + """ + :param location: Absolute path for the logrotate config file + :type location: str + :param interval: The interval for the rotations. Valid values are + 'daily', 'weekly', 'monthly', 'yearly' + :type interval: str + :param count: The logrotate count option configures the 'count' times + the log files are being rotated before being + :type count: int + """ + self.location = location + self.interval = interval + self.count = 'rotate {}'.format(count) + + def __call__(self): + ctxt = { + 'logrotate_logs_location': self.location, + 'logrotate_interval': self.interval, + 'logrotate_count': self.count, + } + return ctxt + + +class HostInfoContext(OSContextGenerator): + """Context to provide host information.""" + + def __init__(self, use_fqdn_hint_cb=None): + """Initialize HostInfoContext + + :param use_fqdn_hint_cb: Callback whose return value used to populate + `use_fqdn_hint` + :type use_fqdn_hint_cb: Callable[[], bool] + """ + # Store callback used to get hint for whether FQDN should be used + + # Depending on the workload a charm manages, the use of FQDN vs. + # shortname may be a deploy-time decision, i.e. behaviour can not + # change on charm upgrade or post-deployment configuration change. + + # The hint is passed on as a flag in the context to allow the decision + # to be made in the Jinja2 configuration template. + self.use_fqdn_hint_cb = use_fqdn_hint_cb + + def _get_canonical_name(self, name=None): + """Get the official FQDN of the host + + The implementation of ``socket.getfqdn()`` in the standard Python + library does not exhaust all methods of getting the official name + of a host ref Python issue https://bugs.python.org/issue5004 + + This function mimics the behaviour of a call to ``hostname -f`` to + get the official FQDN but returns an empty string if it is + unsuccessful. + + :param name: Shortname to get FQDN on + :type name: Optional[str] + :returns: The official FQDN for host or empty string ('') + :rtype: str + """ + name = name or socket.gethostname() + fqdn = '' + + try: + addrs = socket.getaddrinfo( + name, None, 0, socket.SOCK_DGRAM, 0, socket.AI_CANONNAME) + except OSError: + pass + else: + for addr in addrs: + if addr[3]: + if '.' in addr[3]: + fqdn = addr[3] + break + return fqdn + + def __call__(self): + name = socket.gethostname() + ctxt = { + 'host_fqdn': self._get_canonical_name(name) or name, + 'host': name, + 'use_fqdn_hint': ( + self.use_fqdn_hint_cb() if self.use_fqdn_hint_cb else False) + } + return ctxt + + +def validate_ovs_use_veth(*args, **kwargs): + """Validate OVS use veth setting for dhcp agents + + The ovs_use_veth setting is considered immutable as it will break existing + deployments. Historically, we set ovs_use_veth=True in dhcp_agent.ini. It + turns out this is no longer necessary. Ideally, all new deployments would + have this set to False. + + This function validates that the config value does not conflict with + previously deployed settings in dhcp_agent.ini. + + See LP Bug#1831935 for details. + + :returns: Status state and message + :rtype: Union[(None, None), (string, string)] + """ + existing_ovs_use_veth = ( + DHCPAgentContext.get_existing_ovs_use_veth()) + config_ovs_use_veth = DHCPAgentContext.parse_ovs_use_veth() + + # Check settings are set and not None + if existing_ovs_use_veth is not None and config_ovs_use_veth is not None: + # Check for mismatch between existing config ini and juju config + if existing_ovs_use_veth != config_ovs_use_veth: + # Stop the line to avoid breakage + msg = ( + "The existing setting for dhcp_agent.ini ovs_use_veth, {}, " + "does not match the juju config setting, {}. This may lead to " + "VMs being unable to receive a DHCP IP. Either change the " + "juju config setting or dhcp agents may need to be recreated." + .format(existing_ovs_use_veth, config_ovs_use_veth)) + log(msg, ERROR) + return ( + "blocked", + "Mismatched existing and configured ovs-use-veth. See log.") + + # Everything is OK + return None, None + + +class DHCPAgentContext(OSContextGenerator): + + def __call__(self): + """Return the DHCPAGentContext. + + Return all DHCP Agent INI related configuration. + ovs unit is attached to (as a subordinate) and the 'dns_domain' from + the neutron-plugin-api relations (if one is set). + + :returns: Dictionary context + :rtype: Dict + """ + + ctxt = {} + dnsmasq_flags = config('dnsmasq-flags') + if dnsmasq_flags: + ctxt['dnsmasq_flags'] = config_flags_parser(dnsmasq_flags) + ctxt['dns_servers'] = config('dns-servers') + + neutron_api_settings = NeutronAPIContext()() + + ctxt['debug'] = config('debug') + ctxt['instance_mtu'] = config('instance-mtu') + ctxt['ovs_use_veth'] = self.get_ovs_use_veth() + + ctxt['enable_metadata_network'] = config('enable-metadata-network') + ctxt['enable_isolated_metadata'] = config('enable-isolated-metadata') + + if neutron_api_settings.get('dns_domain'): + ctxt['dns_domain'] = neutron_api_settings.get('dns_domain') + + # Override user supplied config for these plugins as these settings are + # mandatory + if config('plugin') in ['nvp', 'nsx', 'n1kv']: + ctxt['enable_metadata_network'] = True + ctxt['enable_isolated_metadata'] = True + + ctxt['append_ovs_config'] = False + cmp_release = CompareOpenStackReleases( + os_release('neutron-common', base='icehouse')) + if cmp_release >= 'queens' and config('enable-dpdk'): + ctxt['append_ovs_config'] = True + + return ctxt + + @staticmethod + def get_existing_ovs_use_veth(): + """Return existing ovs_use_veth setting from dhcp_agent.ini. + + :returns: Boolean value of existing ovs_use_veth setting or None + :rtype: Optional[Bool] + """ + DHCP_AGENT_INI = "/etc/neutron/dhcp_agent.ini" + existing_ovs_use_veth = None + # If there is a dhcp_agent.ini file read the current setting + if os.path.isfile(DHCP_AGENT_INI): + # config_ini does the right thing and returns None if the setting + # is commented. + existing_ovs_use_veth = ( + config_ini(DHCP_AGENT_INI)["DEFAULT"].get("ovs_use_veth")) + # Convert to Bool if necessary + if isinstance(existing_ovs_use_veth, str): + return bool_from_string(existing_ovs_use_veth) + return existing_ovs_use_veth + + @staticmethod + def parse_ovs_use_veth(): + """Parse the ovs-use-veth config setting. + + Parse the string config setting for ovs-use-veth and return a boolean + or None. + + bool_from_string will raise a ValueError if the string is not falsy or + truthy. + + :raises: ValueError for invalid input + :returns: Boolean value of ovs-use-veth or None + :rtype: Optional[Bool] + """ + _config = config("ovs-use-veth") + # An unset parameter returns None. Just in case we will also check for + # an empty string: "". Ironically, (the problem we are trying to avoid) + # "False" returns True and "" returns False. + if _config is None or not _config: + # Return None + return + # bool_from_string handles many variations of true and false strings + # as well as upper and lowercases including: + # ['y', 'yes', 'true', 't', 'on', 'n', 'no', 'false', 'f', 'off'] + return bool_from_string(_config) + + def get_ovs_use_veth(self): + """Return correct ovs_use_veth setting for use in dhcp_agent.ini. + + Get the right value from config or existing dhcp_agent.ini file. + Existing has precedence. Attempt to default to "False" without + disrupting existing deployments. Handle existing deployments and + upgrades safely. See LP Bug#1831935 + + :returns: Value to use for ovs_use_veth setting + :rtype: Bool + """ + _existing = self.get_existing_ovs_use_veth() + if _existing is not None: + return _existing + + _config = self.parse_ovs_use_veth() + if _config is None: + # New better default + return False + else: + return _config + + +EntityMac = collections.namedtuple('EntityMac', ['entity', 'mac']) + + +def resolve_pci_from_mapping_config(config_key): + """Resolve local PCI devices from MAC addresses in mapping config. + + Note that this function keeps record of mac->PCI address lookups + in the local unit db as the devices will disappaear from the system + once bound. + + :param config_key: Configuration option key to parse data from + :type config_key: str + :returns: PCI device address to Tuple(entity, mac) map + :rtype: collections.OrderedDict[str,Tuple[str,str]] + """ + devices = pci.PCINetDevices() + resolved_devices = collections.OrderedDict() + db = kv() + # Note that ``parse_data_port_mappings`` returns Dict regardless of input + for mac, entity in parse_data_port_mappings(config(config_key)).items(): + pcidev = devices.get_device_from_mac(mac) + if pcidev: + # NOTE: store mac->pci allocation as post binding + # it disappears from PCIDevices. + db.set(mac, pcidev.pci_address) + db.flush() + + pci_address = db.get(mac) + if pci_address: + resolved_devices[pci_address] = EntityMac(entity, mac) + + return resolved_devices + + +class DPDKDeviceContext(OSContextGenerator): + + def __init__(self, driver_key=None, bridges_key=None, bonds_key=None): + """Initialize DPDKDeviceContext. + + :param driver_key: Key to use when retrieving driver config. + :type driver_key: str + :param bridges_key: Key to use when retrieving bridge config. + :type bridges_key: str + :param bonds_key: Key to use when retrieving bonds config. + :type bonds_key: str + """ + self.driver_key = driver_key or 'dpdk-driver' + self.bridges_key = bridges_key or 'data-port' + self.bonds_key = bonds_key or 'dpdk-bond-mappings' + + def __call__(self): + """Populate context. + + :returns: context + :rtype: Dict[str,Union[str,collections.OrderedDict[str,str]]] + """ + driver = config(self.driver_key) + if driver is None: + return {} + # Resolve PCI devices for both directly used devices (_bridges) + # and devices for use in dpdk bonds (_bonds) + pci_devices = resolve_pci_from_mapping_config(self.bridges_key) + pci_devices.update(resolve_pci_from_mapping_config(self.bonds_key)) + return {'devices': pci_devices, + 'driver': driver} + + +class OVSDPDKDeviceContext(OSContextGenerator): + + def __init__(self, bridges_key=None, bonds_key=None): + """Initialize OVSDPDKDeviceContext. + + :param bridges_key: Key to use when retrieving bridge config. + :type bridges_key: str + :param bonds_key: Key to use when retrieving bonds config. + :type bonds_key: str + """ + self.bridges_key = bridges_key or 'data-port' + self.bonds_key = bonds_key or 'dpdk-bond-mappings' + + @staticmethod + def _parse_cpu_list(cpulist): + """Parses a linux cpulist for a numa node + + :returns: list of cores + :rtype: List[int] + """ + cores = [] + if cpulist and re.match(r"^[0-9,\-^]*$", cpulist): + ranges = cpulist.split(',') + for cpu_range in ranges: + if "-" in cpu_range: + cpu_min_max = cpu_range.split('-') + cores += range(int(cpu_min_max[0]), + int(cpu_min_max[1]) + 1) + elif "^" in cpu_range: + cpu_rm = cpu_range.split('^') + cores.remove(int(cpu_rm[1])) + else: + cores.append(int(cpu_range)) + return cores + + def _numa_node_cores(self): + """Get map of numa node -> cpu core + + :returns: map of numa node -> cpu core + :rtype: Dict[str,List[int]] + """ + nodes = {} + node_regex = '/sys/devices/system/node/node*' + for node in glob.glob(node_regex): + index = node.lstrip('/sys/devices/system/node/node') + with open(os.path.join(node, 'cpulist')) as cpulist: + nodes[index] = self._parse_cpu_list(cpulist.read().strip()) + return nodes + + def cpu_mask(self): + """Get hex formatted CPU mask + The mask is based on using the first config:dpdk-socket-cores + cores of each NUMA node in the unit. + :returns: hex formatted CPU mask + :rtype: str + """ + num_cores = config('dpdk-socket-cores') + mask = 0 + for cores in self._numa_node_cores().values(): + for core in cores[:num_cores]: + mask = mask | 1 << core + return format(mask, '#04x') + + @classmethod + def pmd_cpu_mask(cls): + """Get hex formatted pmd CPU mask + + The mask is based on config:pmd-cpu-set. + :returns: hex formatted CPU mask + :rtype: str + """ + mask = 0 + cpu_list = cls._parse_cpu_list(config('pmd-cpu-set')) + if cpu_list: + for core in cpu_list: + mask = mask | 1 << core + return format(mask, '#x') + + def socket_memory(self): + """Formatted list of socket memory configuration per socket. + + :returns: socket memory configuration per socket. + :rtype: str + """ + lscpu_out = check_output( + ['lscpu', '-p=socket']).decode('UTF-8').strip() + sockets = set() + for line in lscpu_out.split('\n'): + try: + sockets.add(int(line)) + except ValueError: + # lscpu output is headed by comments so ignore them. + pass + sm_size = config('dpdk-socket-memory') + mem_list = [str(sm_size) for _ in sockets] + if mem_list: + return ','.join(mem_list) + else: + return str(sm_size) + + def devices(self): + """List of PCI devices for use by DPDK + + :returns: List of PCI devices for use by DPDK + :rtype: collections.OrderedDict[str,str] + """ + pci_devices = resolve_pci_from_mapping_config(self.bridges_key) + pci_devices.update(resolve_pci_from_mapping_config(self.bonds_key)) + return pci_devices + + def _formatted_whitelist(self, flag): + """Flag formatted list of devices to whitelist + + :param flag: flag format to use + :type flag: str + :rtype: str + """ + whitelist = [] + for device in self.devices(): + whitelist.append(flag.format(device=device)) + return ' '.join(whitelist) + + def device_whitelist(self): + """Formatted list of devices to whitelist for dpdk + + using the old style '-w' flag + + :returns: devices to whitelist prefixed by '-w ' + :rtype: str + """ + return self._formatted_whitelist('-w {device}') + + def pci_whitelist(self): + """Formatted list of devices to whitelist for dpdk + + using the new style '--pci-whitelist' flag + + :returns: devices to whitelist prefixed by '--pci-whitelist ' + :rtype: str + """ + return self._formatted_whitelist('--pci-whitelist {device}') + + def __call__(self): + """Populate context. + + :returns: context + :rtype: Dict[str,Union[bool,str]] + """ + ctxt = {} + whitelist = self.device_whitelist() + if whitelist: + ctxt['dpdk_enabled'] = config('enable-dpdk') + ctxt['device_whitelist'] = self.device_whitelist() + ctxt['socket_memory'] = self.socket_memory() + ctxt['cpu_mask'] = self.cpu_mask() + ctxt['pmd_cpu_mask'] = self.pmd_cpu_mask() + return ctxt + + +class BridgePortInterfaceMap(object): + """Build a map of bridge ports and interfaces from charm configuration. + + NOTE: the handling of this detail in the charm is pre-deprecated. + + The long term goal is for network connectivity detail to be modelled in + the server provisioning layer (such as MAAS) which in turn will provide + a Netplan YAML description that will be used to drive Open vSwitch. + + Until we get to that reality the charm will need to configure this + detail based on application level configuration options. + + There is a established way of mapping interfaces to ports and bridges + in the ``neutron-openvswitch`` and ``neutron-gateway`` charms and we + will carry that forward. + + The relationship between bridge, port and interface(s). + +--------+ + | bridge | + +--------+ + | + +----------------+ + | port aka. bond | + +----------------+ + | | + +-+ +-+ + |i| |i| + |n| |n| + |t| |t| + |0| |N| + +-+ +-+ + """ + class interface_type(enum.Enum): + """Supported interface types. + + Supported interface types can be found in the ``iface_types`` column + in the ``Open_vSwitch`` table on a running system. + """ + dpdk = 'dpdk' + internal = 'internal' + system = 'system' + + def __str__(self): + """Return string representation of value. + + :returns: string representation of value. + :rtype: str + """ + return self.value + + def __init__(self, bridges_key=None, bonds_key=None, enable_dpdk_key=None, + global_mtu=None): + """Initialize map. + + :param bridges_key: Name of bridge:interface/port map config key + (default: 'data-port') + :type bridges_key: Optional[str] + :param bonds_key: Name of port-name:interface map config key + (default: 'dpdk-bond-mappings') + :type bonds_key: Optional[str] + :param enable_dpdk_key: Name of DPDK toggle config key + (default: 'enable-dpdk') + :type enable_dpdk_key: Optional[str] + :param global_mtu: Set a MTU on all interfaces at map initialization. + + The default is to have Open vSwitch get this from the underlying + interface as set up by bare metal provisioning. + + Note that you can augment the MTU on an individual interface basis + like this: + + ifdatamap = bpi.get_ifdatamap(bridge, port) + ifdatamap = { + port: { + **ifdata, + **{'mtu-request': my_individual_mtu_map[port]}, + } + for port, ifdata in ifdatamap.items() + } + :type global_mtu: Optional[int] + """ + bridges_key = bridges_key or 'data-port' + bonds_key = bonds_key or 'dpdk-bond-mappings' + enable_dpdk_key = enable_dpdk_key or 'enable-dpdk' + self._map = collections.defaultdict( + lambda: collections.defaultdict(dict)) + self._ifname_mac_map = collections.defaultdict(list) + self._mac_ifname_map = {} + self._mac_pci_address_map = {} + + # First we iterate over the list of physical interfaces visible to the + # system and update interface name to mac and mac to interface name map + for ifname in list_nics(): + if not is_phy_iface(ifname): + continue + mac = get_nic_hwaddr(ifname) + self._ifname_mac_map[ifname] = [mac] + self._mac_ifname_map[mac] = ifname + + # check if interface is part of a linux bond + _bond_name = get_bond_master(ifname) + if _bond_name and _bond_name != ifname: + log('Add linux bond "{}" to map for physical interface "{}" ' + 'with mac "{}".'.format(_bond_name, ifname, mac), + level=DEBUG) + # for bonds we want to be able to get a list of the mac + # addresses for the physical interfaces the bond is made up of. + if self._ifname_mac_map.get(_bond_name): + self._ifname_mac_map[_bond_name].append(mac) + else: + self._ifname_mac_map[_bond_name] = [mac] + + # In light of the pre-deprecation notice in the docstring of this + # class we will expose the ability to configure OVS bonds as a + # DPDK-only feature, but generally use the data structures internally. + if config(enable_dpdk_key): + # resolve PCI address of interfaces listed in the bridges and bonds + # charm configuration options. Note that for already bound + # interfaces the helper will retrieve MAC address from the unit + # KV store as the information is no longer available in sysfs. + _pci_bridge_mac = resolve_pci_from_mapping_config( + bridges_key) + _pci_bond_mac = resolve_pci_from_mapping_config( + bonds_key) + + for pci_address, bridge_mac in _pci_bridge_mac.items(): + if bridge_mac.mac in self._mac_ifname_map: + # if we already have the interface name in our map it is + # visible to the system and therefore not bound to DPDK + continue + ifname = 'dpdk-{}'.format( + hashlib.sha1( + pci_address.encode('UTF-8')).hexdigest()[:7]) + self._ifname_mac_map[ifname] = [bridge_mac.mac] + self._mac_ifname_map[bridge_mac.mac] = ifname + self._mac_pci_address_map[bridge_mac.mac] = pci_address + + for pci_address, bond_mac in _pci_bond_mac.items(): + # for bonds we want to be able to get a list of macs from + # the bond name and also get at the interface name made up + # of the hash of the PCI address + ifname = 'dpdk-{}'.format( + hashlib.sha1( + pci_address.encode('UTF-8')).hexdigest()[:7]) + self._ifname_mac_map[bond_mac.entity].append(bond_mac.mac) + self._mac_ifname_map[bond_mac.mac] = ifname + self._mac_pci_address_map[bond_mac.mac] = pci_address + + config_bridges = config(bridges_key) or '' + for bridge, ifname_or_mac in ( + pair.split(':', 1) + for pair in config_bridges.split()): + if ':' in ifname_or_mac: + try: + ifname = self.ifname_from_mac(ifname_or_mac) + except KeyError: + # The interface is destined for a different unit in the + # deployment. + continue + macs = [ifname_or_mac] + else: + ifname = ifname_or_mac + macs = self.macs_from_ifname(ifname_or_mac) + + portname = ifname + for mac in macs: + try: + pci_address = self.pci_address_from_mac(mac) + iftype = self.interface_type.dpdk + ifname = self.ifname_from_mac(mac) + except KeyError: + pci_address = None + iftype = self.interface_type.system + + self.add_interface( + bridge, portname, ifname, iftype, pci_address, global_mtu) + + if not macs: + # We have not mapped the interface and it is probably some sort + # of virtual interface. Our user have put it in the config with + # a purpose so let's carry out their wish. LP: #1884743 + log('Add unmapped interface from config: name "{}" bridge "{}"' + .format(ifname, bridge), + level=DEBUG) + self.add_interface( + bridge, ifname, ifname, self.interface_type.system, None, + global_mtu) + + def __getitem__(self, key): + """Provide a Dict-like interface, get value of item. + + :param key: Key to look up value from. + :type key: any + :returns: Value + :rtype: any + """ + return self._map.__getitem__(key) + + def __iter__(self): + """Provide a Dict-like interface, iterate over keys. + + :returns: Iterator + :rtype: Iterator[any] + """ + return self._map.__iter__() + + def __len__(self): + """Provide a Dict-like interface, measure the length of internal map. + + :returns: Length + :rtype: int + """ + return len(self._map) + + def items(self): + """Provide a Dict-like interface, iterate over items. + + :returns: Key Value pairs + :rtype: Iterator[any, any] + """ + return self._map.items() + + def keys(self): + """Provide a Dict-like interface, iterate over keys. + + :returns: Iterator + :rtype: Iterator[any] + """ + return self._map.keys() + + def ifname_from_mac(self, mac): + """ + :returns: Name of interface + :rtype: str + :raises: KeyError + """ + return (get_bond_master(self._mac_ifname_map[mac]) or + self._mac_ifname_map[mac]) + + def macs_from_ifname(self, ifname): + """ + :returns: List of hardware address (MAC) of interface + :rtype: List[str] + :raises: KeyError + """ + return self._ifname_mac_map[ifname] + + def pci_address_from_mac(self, mac): + """ + :param mac: Hardware address (MAC) of interface + :type mac: str + :returns: PCI address of device associated with mac + :rtype: str + :raises: KeyError + """ + return self._mac_pci_address_map[mac] + + def add_interface(self, bridge, port, ifname, iftype, + pci_address, mtu_request): + """Add an interface to the map. + + :param bridge: Name of bridge on which the bond will be added + :type bridge: str + :param port: Name of port which will represent the bond on bridge + :type port: str + :param ifname: Name of interface that will make up the bonded port + :type ifname: str + :param iftype: Type of interface + :type iftype: BridgeBondMap.interface_type + :param pci_address: PCI address of interface + :type pci_address: Optional[str] + :param mtu_request: MTU to request for interface + :type mtu_request: Optional[int] + """ + self._map[bridge][port][ifname] = { + 'type': str(iftype), + } + if pci_address: + self._map[bridge][port][ifname].update({ + 'pci-address': pci_address, + }) + if mtu_request is not None: + self._map[bridge][port][ifname].update({ + 'mtu-request': str(mtu_request) + }) + + def get_ifdatamap(self, bridge, port): + """Get structure suitable for charmhelpers.contrib.network.ovs helpers. + + :param bridge: Name of bridge on which the port will be added + :type bridge: str + :param port: Name of port which will represent one or more interfaces + :type port: str + """ + for _bridge, _ports in self.items(): + for _port, _interfaces in _ports.items(): + if _bridge == bridge and _port == port: + ifdatamap = {} + for name, data in _interfaces.items(): + ifdatamap.update({ + name: { + 'type': data['type'], + }, + }) + if data.get('mtu-request') is not None: + ifdatamap[name].update({ + 'mtu_request': data['mtu-request'], + }) + if data.get('pci-address'): + ifdatamap[name].update({ + 'options': { + 'dpdk-devargs': data['pci-address'], + }, + }) + return ifdatamap + + +class BondConfig(object): + """Container and helpers for bond configuration options. + + Data is put into a dictionary and a convenient config get interface is + provided. + """ + + DEFAULT_LACP_CONFIG = { + 'mode': 'balance-tcp', + 'lacp': 'active', + 'lacp-time': 'fast' + } + ALL_BONDS = 'ALL_BONDS' + + BOND_MODES = ['active-backup', 'balance-slb', 'balance-tcp'] + BOND_LACP = ['active', 'passive', 'off'] + BOND_LACP_TIME = ['fast', 'slow'] + + def __init__(self, config_key=None): + """Parse specified configuration option. + + :param config_key: Configuration key to retrieve data from + (default: ``dpdk-bond-config``) + :type config_key: Optional[str] + """ + self.config_key = config_key or 'dpdk-bond-config' + + self.lacp_config = { + self.ALL_BONDS: copy.deepcopy(self.DEFAULT_LACP_CONFIG) + } + + lacp_config = config(self.config_key) + if lacp_config: + lacp_config_map = lacp_config.split() + for entry in lacp_config_map: + bond, entry = entry.partition(':')[0:3:2] + if not bond: + bond = self.ALL_BONDS + + mode, entry = entry.partition(':')[0:3:2] + if not mode: + mode = self.DEFAULT_LACP_CONFIG['mode'] + assert mode in self.BOND_MODES, \ + "Bond mode {} is invalid".format(mode) + + lacp, entry = entry.partition(':')[0:3:2] + if not lacp: + lacp = self.DEFAULT_LACP_CONFIG['lacp'] + assert lacp in self.BOND_LACP, \ + "Bond lacp {} is invalid".format(lacp) + + lacp_time, entry = entry.partition(':')[0:3:2] + if not lacp_time: + lacp_time = self.DEFAULT_LACP_CONFIG['lacp-time'] + assert lacp_time in self.BOND_LACP_TIME, \ + "Bond lacp-time {} is invalid".format(lacp_time) + + self.lacp_config[bond] = { + 'mode': mode, + 'lacp': lacp, + 'lacp-time': lacp_time + } + + def get_bond_config(self, bond): + """Get the LACP configuration for a bond + + :param bond: the bond name + :return: a dictionary with the configuration of the bond + :rtype: Dict[str,Dict[str,str]] + """ + return self.lacp_config.get(bond, self.lacp_config[self.ALL_BONDS]) + + def get_ovs_portdata(self, bond): + """Get structure suitable for charmhelpers.contrib.network.ovs helpers. + + :param bond: the bond name + :return: a dictionary with the configuration of the bond + :rtype: Dict[str,Union[str,Dict[str,str]]] + """ + bond_config = self.get_bond_config(bond) + return { + 'bond_mode': bond_config['mode'], + 'lacp': bond_config['lacp'], + 'other_config': { + 'lacp-time': bond_config['lacp-time'], + }, + } + + +class SRIOVContext(OSContextGenerator): + """Provide context for configuring SR-IOV devices.""" + + class sriov_config_mode(enum.Enum): + """Mode in which SR-IOV is configured. + + The configuration option identified by the ``numvfs_key`` parameter + is overloaded and defines in which mode the charm should interpret + the other SR-IOV-related configuration options. + """ + auto = 'auto' + blanket = 'blanket' + explicit = 'explicit' + + PCIDeviceNumVFs = collections.namedtuple( + 'PCIDeviceNumVFs', ['device', 'numvfs']) + + def _determine_numvfs(self, device, sriov_numvfs): + """Determine number of Virtual Functions (VFs) configured for device. + + :param device: Object describing a PCI Network interface card (NIC)/ + :type device: contrib.hardware.pci.PCINetDevice + :param sriov_numvfs: Number of VFs requested for blanket configuration. + :type sriov_numvfs: int + :returns: Number of VFs to configure for device + :rtype: Optional[int] + """ + + def _get_capped_numvfs(requested): + """Get a number of VFs that does not exceed individual card limits. + + Depending and make and model of NIC the number of VFs supported + vary. Requesting more VFs than a card support would be a fatal + error, cap the requested number at the total number of VFs each + individual card supports. + + :param requested: Number of VFs requested + :type requested: int + :returns: Number of VFs allowed + :rtype: int + """ + actual = min(int(requested), int(device.sriov_totalvfs)) + if actual < int(requested): + log('Requested VFs ({}) too high for device {}. Falling back ' + 'to value supported by device: {}' + .format(requested, device.interface_name, + device.sriov_totalvfs), + level=WARNING) + return actual + + if self._sriov_config_mode == self.sriov_config_mode.auto: + # auto-mode + # + # If device mapping configuration is present, return information + # on cards with mapping. + # + # If no device mapping configuration is present, return information + # for all cards. + # + # The maximum number of VFs supported by card will be used. + if (self._sriov_mapped_devices and + device.interface_name not in self._sriov_mapped_devices): + log('SR-IOV configured in auto mode: No device mapping for {}' + .format(device.interface_name), + level=DEBUG) + return + return _get_capped_numvfs(device.sriov_totalvfs) + elif self._sriov_config_mode == self.sriov_config_mode.blanket: + # blanket-mode + # + # User has specified a number of VFs that should apply to all + # cards with support for VFs. + return _get_capped_numvfs(sriov_numvfs) + elif self._sriov_config_mode == self.sriov_config_mode.explicit: + # explicit-mode + # + # User has given a list of interface names and associated number of + # VFs + if device.interface_name not in self._sriov_config_devices: + log('SR-IOV configured in explicit mode: No device:numvfs ' + 'pair for device {}, skipping.' + .format(device.interface_name), + level=DEBUG) + return + return _get_capped_numvfs( + self._sriov_config_devices[device.interface_name]) + else: + raise RuntimeError('This should not be reached') + + def __init__(self, numvfs_key=None, device_mappings_key=None): + """Initialize map from PCI devices and configuration options. + + :param numvfs_key: Config key for numvfs (default: 'sriov-numvfs') + :type numvfs_key: Optional[str] + :param device_mappings_key: Config key for device mappings + (default: 'sriov-device-mappings') + :type device_mappings_key: Optional[str] + :raises: RuntimeError + """ + numvfs_key = numvfs_key or 'sriov-numvfs' + device_mappings_key = device_mappings_key or 'sriov-device-mappings' + + devices = pci.PCINetDevices() + charm_config = config() + sriov_numvfs = charm_config.get(numvfs_key) or '' + sriov_device_mappings = charm_config.get(device_mappings_key) or '' + + # create list of devices from sriov_device_mappings config option + self._sriov_mapped_devices = [ + pair.split(':', 1)[1] + for pair in sriov_device_mappings.split() + ] + + # create map of device:numvfs from sriov_numvfs config option + self._sriov_config_devices = { + ifname: numvfs for ifname, numvfs in ( + pair.split(':', 1) for pair in sriov_numvfs.split() + if ':' in sriov_numvfs) + } + + # determine configuration mode from contents of sriov_numvfs + if sriov_numvfs == 'auto': + self._sriov_config_mode = self.sriov_config_mode.auto + elif sriov_numvfs.isdigit(): + self._sriov_config_mode = self.sriov_config_mode.blanket + elif ':' in sriov_numvfs: + self._sriov_config_mode = self.sriov_config_mode.explicit + else: + raise RuntimeError('Unable to determine mode of SR-IOV ' + 'configuration.') + + self._map = { + device.pci_address: self.PCIDeviceNumVFs( + device, self._determine_numvfs(device, sriov_numvfs)) + for device in devices.pci_devices + if device.sriov and + self._determine_numvfs(device, sriov_numvfs) is not None + } + + def __call__(self): + """Provide backward compatible SR-IOV context. + + :returns: Map interface name: min(configured, max) virtual functions. + Example: + { + 'eth0': 16, + 'eth1': 32, + 'eth2': 64, + } + :rtype: Dict[str,int] + """ + return { + pcidnvfs.device.interface_name: pcidnvfs.numvfs + for _, pcidnvfs in self._map.items() + } + + @property + def get_map(self): + """Provide map of configured SR-IOV capable PCI devices. + + :returns: Map PCI-address: (PCIDevice, min(configured, max) VFs. + Example: + { + '0000:81:00.0': self.PCIDeviceNumVFs(, 32), + '0000:81:00.1': self.PCIDeviceNumVFs(, 32), + } + :rtype: Dict[str, self.PCIDeviceNumVFs] + """ + return self._map + + +class CephBlueStoreCompressionContext(OSContextGenerator): + """Ceph BlueStore compression options.""" + + # Tuple with Tuples that map configuration option name to CephBrokerRq op + # property name + options = ( + ('bluestore-compression-algorithm', + 'compression-algorithm'), + ('bluestore-compression-mode', + 'compression-mode'), + ('bluestore-compression-required-ratio', + 'compression-required-ratio'), + ('bluestore-compression-min-blob-size', + 'compression-min-blob-size'), + ('bluestore-compression-min-blob-size-hdd', + 'compression-min-blob-size-hdd'), + ('bluestore-compression-min-blob-size-ssd', + 'compression-min-blob-size-ssd'), + ('bluestore-compression-max-blob-size', + 'compression-max-blob-size'), + ('bluestore-compression-max-blob-size-hdd', + 'compression-max-blob-size-hdd'), + ('bluestore-compression-max-blob-size-ssd', + 'compression-max-blob-size-ssd'), + ) + + def __init__(self): + """Initialize context by loading values from charm config. + + We keep two maps, one suitable for use with CephBrokerRq's and one + suitable for template generation. + """ + charm_config = config() + + # CephBrokerRq op map + self.op = {} + # Context exposed for template generation + self.ctxt = {} + for config_key, op_key in self.options: + value = charm_config.get(config_key) + self.ctxt.update({config_key.replace('-', '_'): value}) + self.op.update({op_key: value}) + + def __call__(self): + """Get context. + + :returns: Context + :rtype: Dict[str,any] + """ + return self.ctxt + + def get_op(self): + """Get values for use in CephBrokerRq op. + + :returns: Context values with CephBrokerRq op property name as key. + :rtype: Dict[str,any] + """ + return self.op + + def get_kwargs(self): + """Get values for use as keyword arguments. + + :returns: Context values with key suitable for use as kwargs to + CephBrokerRq add_op_create_*_pool methods. + :rtype: Dict[str,any] + """ + return { + k.replace('-', '_'): v + for k, v in self.op.items() + } + + def validate(self): + """Validate options. + + :raises: AssertionError + """ + # We slip in a dummy name on class instantiation to allow validation of + # the other options. It will not affect further use. + # + # NOTE: once we retire Python 3.5 we can fold this into a in-line + # dictionary comprehension in the call to the initializer. + dummy_op = {'name': 'dummy-name'} + dummy_op.update(self.op) + pool = ch_ceph.BasePool('dummy-service', op=dummy_op) + pool.validate() diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/deferred_events.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/deferred_events.py new file mode 100644 index 00000000..4c46e41a --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/deferred_events.py @@ -0,0 +1,418 @@ +# Copyright 2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module for managing deferred service events. + +This module is used to manage deferred service events from both charm actions +and package actions. +""" + +import datetime +import glob +import yaml +import os +import time +import uuid + +import charmhelpers.contrib.openstack.policy_rcd as policy_rcd +import charmhelpers.core.hookenv as hookenv +import charmhelpers.core.host as host +import charmhelpers.core.unitdata as unitdata + +import subprocess + + +# Deferred events generated from the charm are stored along side those +# generated from packaging. +DEFERRED_EVENTS_DIR = policy_rcd.POLICY_DEFERRED_EVENTS_DIR + + +class ServiceEvent(): + + def __init__(self, timestamp, service, reason, action, + policy_requestor_name=None, policy_requestor_type=None): + self.timestamp = timestamp + self.service = service + self.reason = reason + self.action = action + if policy_requestor_name: + self.policy_requestor_name = policy_requestor_name + else: + self.policy_requestor_name = hookenv.service_name() + if policy_requestor_type: + self.policy_requestor_type = policy_requestor_type + else: + self.policy_requestor_type = 'charm' + + def __eq__(self, other): + for attr in vars(self): + if getattr(self, attr) != getattr(other, attr): + return False + return True + + def matching_request(self, other): + for attr in ['service', 'action', 'reason']: + if getattr(self, attr) != getattr(other, attr): + return False + return True + + @classmethod + def from_dict(cls, data): + return cls( + data['timestamp'], + data['service'], + data['reason'], + data['action'], + data.get('policy_requestor_name'), + data.get('policy_requestor_type')) + + +def deferred_events_files(): + """Deferred event files + + Deferred event files that were generated by service_name() policy. + + :returns: Deferred event files + :rtype: List[str] + """ + return glob.glob('{}/*.deferred'.format(DEFERRED_EVENTS_DIR)) + + +def read_event_file(file_name): + """Read a file and return the corresponding objects. + + :param file_name: Name of file to read. + :type file_name: str + :returns: ServiceEvent from file. + :rtype: ServiceEvent + """ + with open(file_name, 'r') as f: + contents = yaml.safe_load(f) + event = ServiceEvent( + contents['timestamp'], + contents['service'], + contents['reason'], + contents['action'], + policy_requestor_name=contents.get('policy_requestor_name'), + policy_requestor_type=contents.get('policy_requestor_type')) + return event + + +def deferred_events(): + """Get list of deferred events. + + List of deferred events. Events are represented by dicts of the form: + + { + action: restart, + policy_requestor_name: neutron-openvswitch, + policy_requestor_type: charm, + reason: 'Pkg update', + service: openvswitch-switch, + time: 1614328743} + + :returns: List of deferred events. + :rtype: List[ServiceEvent] + """ + events = [] + for defer_file in deferred_events_files(): + event = read_event_file(defer_file) + if event.policy_requestor_name == hookenv.service_name(): + events.append((defer_file, event)) + return events + + +def duplicate_event_files(event): + """Get list of event files that have equivalent deferred events. + + :param event: Event to compare + :type event: ServiceEvent + :returns: List of event files + :rtype: List[str] + """ + duplicates = [] + for event_file, existing_event in deferred_events(): + if event.matching_request(existing_event): + duplicates.append(event_file) + return duplicates + + +def get_event_record_file(policy_requestor_type, policy_requestor_name): + """Generate filename for storing a new event. + + :param policy_requestor_type: System that blocked event + :type policy_requestor_type: str + :param policy_requestor_name: Name of application that blocked event + :type policy_requestor_name: str + :returns: File name + :rtype: str + """ + file_name = '{}/{}-{}-{}.deferred'.format( + DEFERRED_EVENTS_DIR, + policy_requestor_type, + policy_requestor_name, + uuid.uuid1()) + return file_name + + +def save_event(event): + """Write deferred events to backend. + + :param event: Event to save + :type event: ServiceEvent + """ + requestor_name = hookenv.service_name() + requestor_type = 'charm' + init_policy_log_dir() + if duplicate_event_files(event): + hookenv.log( + "Not writing new event, existing event found. {} {} {}".format( + event.service, + event.action, + event.reason), + level="DEBUG") + else: + record_file = get_event_record_file( + policy_requestor_type=requestor_type, + policy_requestor_name=requestor_name) + + with open(record_file, 'w') as f: + data = { + 'timestamp': event.timestamp, + 'service': event.service, + 'action': event.action, + 'reason': event.reason, + 'policy_requestor_type': requestor_type, + 'policy_requestor_name': requestor_name} + yaml.dump(data, f) + + +def clear_deferred_events(svcs, action): + """Remove any outstanding deferred events. + + Remove a deferred event if its service is in the services list and its + action matches. + + :param svcs: List of services to remove. + :type svcs: List[str] + :param action: Action to remove + :type action: str + """ + # XXX This function is not currently processing the action. It needs to + # match the action and also take account of try-restart and the + # equivalnce of stop-start and restart. + for defer_file in deferred_events_files(): + deferred_event = read_event_file(defer_file) + if deferred_event.service in svcs: + os.remove(defer_file) + + +def init_policy_log_dir(): + """Ensure directory to store events exists.""" + if not os.path.exists(DEFERRED_EVENTS_DIR): + os.mkdir(DEFERRED_EVENTS_DIR) + + +def get_deferred_events(): + """Return a list of deferred events requested by the charm and packages. + + :returns: List of deferred events + :rtype: List[ServiceEvent] + """ + events = [] + for _, event in deferred_events(): + events.append(event) + return events + + +def get_deferred_restarts(): + """List of deferred restart events requested by the charm and packages. + + :returns: List of deferred restarts + :rtype: List[ServiceEvent] + """ + return [e for e in get_deferred_events() if e.action == 'restart'] + + +def clear_deferred_restarts(services): + """Clear deferred restart events targeted at `services`. + + :param services: Services with deferred actions to clear. + :type services: List[str] + """ + clear_deferred_events(services, 'restart') + + +def process_svc_restart(service): + """Respond to a service restart having occurred. + + :param service: Services that the action was performed against. + :type service: str + """ + clear_deferred_restarts([service]) + + +def is_restart_permitted(): + """Check whether restarts are permitted. + + :returns: Whether restarts are permitted + :rtype: bool + """ + if hookenv.config('enable-auto-restarts') is None: + return True + return hookenv.config('enable-auto-restarts') + + +def check_and_record_restart_request(service, changed_files): + """Check if restarts are permitted, if they are not log the request. + + :param service: Service to be restarted + :type service: str + :param changed_files: Files that have changed to trigger restarts. + :type changed_files: List[str] + :returns: Whether restarts are permitted + :rtype: bool + """ + changed_files = sorted(list(set(changed_files))) + permitted = is_restart_permitted() + if not permitted: + save_event(ServiceEvent( + timestamp=round(time.time()), + service=service, + reason='File(s) changed: {}'.format( + ', '.join(changed_files)), + action='restart')) + return permitted + + +def deferrable_svc_restart(service, reason=None): + """Restarts service if permitted, if not defer it. + + :param service: Service to be restarted + :type service: str + :param reason: Reason for restart + :type reason: Union[str, None] + """ + if is_restart_permitted(): + host.service_restart(service) + else: + save_event(ServiceEvent( + timestamp=round(time.time()), + service=service, + reason=reason, + action='restart')) + + +def configure_deferred_restarts(services): + """Setup deferred restarts. + + :param services: Services to block restarts of. + :type services: List[str] + """ + policy_rcd.install_policy_rcd() + if is_restart_permitted(): + policy_rcd.remove_policy_file() + else: + blocked_actions = ['stop', 'restart', 'try-restart'] + for svc in services: + policy_rcd.add_policy_block(svc, blocked_actions) + + +def get_service_start_time(service): + """Find point in time when the systemd unit transitioned to active state. + + :param service: Services to check timetsamp of. + :type service: str + """ + start_time = None + out = subprocess.check_output( + [ + 'systemctl', + 'show', + service, + '--property=ActiveEnterTimestamp']) + str_time = out.decode().rstrip().replace('ActiveEnterTimestamp=', '') + if str_time: + start_time = datetime.datetime.strptime( + str_time, + '%a %Y-%m-%d %H:%M:%S %Z') + return start_time + + +def check_restart_timestamps(): + """Check deferred restarts against systemd units start time. + + Check if a service has a deferred event and clear it if it has been + subsequently restarted. + """ + for event in get_deferred_restarts(): + start_time = get_service_start_time(event.service) + deferred_restart_time = datetime.datetime.fromtimestamp( + event.timestamp) + if start_time and start_time < deferred_restart_time: + hookenv.log( + ("Restart still required, {} was started at {}, restart was " + "requested after that at {}").format( + event.service, + start_time, + deferred_restart_time), + level='DEBUG') + else: + clear_deferred_restarts([event.service]) + + +def set_deferred_hook(hookname): + """Record that a hook has been deferred. + + :param hookname: Name of hook that was deferred. + :type hookname: str + """ + with unitdata.HookData()() as t: + kv = t[0] + deferred_hooks = kv.get('deferred-hooks', []) + if hookname not in deferred_hooks: + deferred_hooks.append(hookname) + kv.set('deferred-hooks', sorted(list(set(deferred_hooks)))) + + +def get_deferred_hooks(): + """Get a list of deferred hooks. + + :returns: List of hook names. + :rtype: List[str] + """ + with unitdata.HookData()() as t: + kv = t[0] + return kv.get('deferred-hooks', []) + + +def clear_deferred_hooks(): + """Clear any deferred hooks.""" + with unitdata.HookData()() as t: + kv = t[0] + kv.set('deferred-hooks', []) + + +def clear_deferred_hook(hookname): + """Clear a specific deferred hooks. + + :param hookname: Name of hook to remove. + :type hookname: str + """ + with unitdata.HookData()() as t: + kv = t[0] + deferred_hooks = kv.get('deferred-hooks', []) + if hookname in deferred_hooks: + deferred_hooks.remove(hookname) + kv.set('deferred-hooks', deferred_hooks) diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/exceptions.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/exceptions.py new file mode 100644 index 00000000..b2330637 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/exceptions.py @@ -0,0 +1,26 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class OSContextError(Exception): + """Raised when an error occurs during context generation. + + This exception is principally used in contrib.openstack.context + """ + pass + + +class ServiceActionError(Exception): + """Raised when a service action (stop/start/ etc) failed.""" + pass diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/files/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/files/__init__.py new file mode 100644 index 00000000..9df5f746 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/files/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# dummy __init__.py to fool syncer into thinking this is a syncable python +# module diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/files/check_deferred_restarts.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/files/check_deferred_restarts.py new file mode 100755 index 00000000..5f392b3c --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/files/check_deferred_restarts.py @@ -0,0 +1,128 @@ +#!/usr/bin/python3 + +# Copyright 2014-2022 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Checks for services with deferred restarts. + +This Nagios check will parse /var/lib/policy-rd.d/ +to find any restarts that are currently deferred. +""" + +import argparse +import glob +import sys +import yaml + + +DEFERRED_EVENTS_DIR = '/var/lib/policy-rc.d' + + +def get_deferred_events(): + """Return a list of deferred events dicts from policy-rc.d files. + + Events are read from DEFERRED_EVENTS_DIR and are of the form: + { + action: restart, + policy_requestor_name: rabbitmq-server, + policy_requestor_type: charm, + reason: 'Pkg update', + service: rabbitmq-server, + time: 1614328743 + } + + :raises OSError: Raised in case of a system error while reading a policy file + :raises yaml.YAMLError: Raised if parsing a policy file fails + + :returns: List of deferred event dictionaries + :rtype: list + """ + deferred_events_files = glob.glob( + '{}/*.deferred'.format(DEFERRED_EVENTS_DIR)) + + deferred_events = [] + for event_file in deferred_events_files: + with open(event_file, 'r') as f: + event = yaml.safe_load(f) + deferred_events.append(event) + + return deferred_events + + +def get_deferred_restart_services(application=None): + """Returns a list of services with deferred restarts. + + :param str application: Name of the application that blocked the service restart. + If application is None, all services with deferred restarts + are returned. Services which are blocked by a non-charm + requestor are always returned. + + :raises OSError: Raised in case of a system error while reading a policy file + :raises yaml.YAMLError: Raised if parsing a policy file fails + + :returns: List of services with deferred restarts belonging to application. + :rtype: list + """ + + deferred_restart_events = filter( + lambda e: e['action'] == 'restart', get_deferred_events()) + + deferred_restart_services = set() + for restart_event in deferred_restart_events: + if application: + if ( + restart_event['policy_requestor_type'] != 'charm' or + restart_event['policy_requestor_type'] == 'charm' and + restart_event['policy_requestor_name'] == application + ): + deferred_restart_services.add(restart_event['service']) + else: + deferred_restart_services.add(restart_event['service']) + + return list(deferred_restart_services) + + +def main(): + """Check for services with deferred restarts.""" + parser = argparse.ArgumentParser( + description='Check for services with deferred restarts') + parser.add_argument( + '--application', help='Check services belonging to this application only') + + args = parser.parse_args() + + services = set(get_deferred_restart_services(args.application)) + + if len(services) == 0: + print('OK: No deferred service restarts.') + sys.exit(0) + else: + print( + 'CRITICAL: Restarts are deferred for services: {}.'.format(', '.join(services))) + sys.exit(1) + + +if __name__ == '__main__': + try: + main() + except OSError as e: + print('CRITICAL: A system error occurred: {} ({})'.format(e.errno, e.strerror)) + sys.exit(1) + except yaml.YAMLError as e: + print('CRITICAL: Failed to parse a policy file: {}'.format(str(e))) + sys.exit(1) + except Exception as e: + print('CRITICAL: An unknown error occurred: {}'.format(str(e))) + sys.exit(1) diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/files/policy_rc_d_script.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/files/policy_rc_d_script.py new file mode 100755 index 00000000..431e972b --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/files/policy_rc_d_script.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python3 + +"""This script is an implementation of policy-rc.d + +For further information on policy-rc.d see *1 + +*1 https://people.debian.org/~hmh/invokerc.d-policyrc.d-specification.txt +""" +import collections +import glob +import os +import logging +import sys +import time +import uuid +import yaml + + +SystemPolicy = collections.namedtuple( + 'SystemPolicy', + [ + 'policy_requestor_name', + 'policy_requestor_type', + 'service', + 'blocked_actions']) + +DEFAULT_POLICY_CONFIG_DIR = '/etc/policy-rc.d' +DEFAULT_POLICY_LOG_DIR = '/var/lib/policy-rc.d' + + +def read_policy_file(policy_file): + """Return system policies from given file. + + :param file_name: Name of file to read. + :type file_name: str + :returns: Policy + :rtype: List[SystemPolicy] + """ + policies = [] + if os.path.exists(policy_file): + with open(policy_file, 'r') as f: + policy = yaml.safe_load(f) + for service, actions in policy['blocked_actions'].items(): + service = service.replace('.service', '') + policies.append(SystemPolicy( + policy_requestor_name=policy['policy_requestor_name'], + policy_requestor_type=policy['policy_requestor_type'], + service=service, + blocked_actions=actions)) + return policies + + +def get_policies(policy_config_dir): + """Return all system policies in policy_config_dir. + + :param policy_config_dir: Name of file to read. + :type policy_config_dir: str + :returns: Policy + :rtype: List[SystemPolicy] + """ + _policy = [] + for f in glob.glob('{}/*.policy'.format(policy_config_dir)): + _policy.extend(read_policy_file(f)) + return _policy + + +def record_blocked_action(service, action, blocking_policies, policy_log_dir): + """Record that an action was requested but deniedl + + :param service: Service that was blocked + :type service: str + :param action: Action that was blocked. + :type action: str + :param blocking_policies: Policies that blocked the action on the service. + :type blocking_policies: List[SystemPolicy] + :param policy_log_dir: Directory to place the blocking action record. + :type policy_log_dir: str + """ + if not os.path.exists(policy_log_dir): + os.mkdir(policy_log_dir) + seconds = round(time.time()) + for policy in blocking_policies: + if not os.path.exists(policy_log_dir): + os.mkdir(policy_log_dir) + file_name = '{}/{}-{}-{}.deferred'.format( + policy_log_dir, + policy.policy_requestor_type, + policy.policy_requestor_name, + uuid.uuid1()) + with open(file_name, 'w') as f: + data = { + 'timestamp': seconds, + 'service': service, + 'action': action, + 'reason': 'Package update', + 'policy_requestor_type': policy.policy_requestor_type, + 'policy_requestor_name': policy.policy_requestor_name} + yaml.dump(data, f) + + +def get_blocking_policies(service, action, policy_config_dir): + """Record that an action was requested but deniedl + + :param service: Service that action is requested against. + :type service: str + :param action: Action that is requested. + :type action: str + :param policy_config_dir: Directory that stores policy files. + :type policy_config_dir: str + :returns: Policies + :rtype: List[SystemPolicy] + """ + service = service.replace('.service', '') + blocking_policies = [ + policy + for policy in get_policies(policy_config_dir) + if policy.service == service and action in policy.blocked_actions] + return blocking_policies + + +def process_action_request(service, action, policy_config_dir, policy_log_dir): + """Take the requested action against service and check if it is permitted. + + :param service: Service that action is requested against. + :type service: str + :param action: Action that is requested. + :type action: str + :param policy_config_dir: Directory that stores policy files. + :type policy_config_dir: str + :param policy_log_dir: Directory that stores policy files. + :type policy_log_dir: str + :returns: Tuple of whether the action is permitted and explanation. + :rtype: (boolean, str) + """ + blocking_policies = get_blocking_policies( + service, + action, + policy_config_dir) + if blocking_policies: + policy_msg = [ + '{} {}'.format(p.policy_requestor_type, p.policy_requestor_name) + for p in sorted(blocking_policies)] + message = '{} of {} blocked by {}'.format( + action, + service, + ', '.join(policy_msg)) + record_blocked_action( + service, + action, + blocking_policies, + policy_log_dir) + action_permitted = False + else: + message = "Permitting {} {}".format(service, action) + action_permitted = True + return action_permitted, message + + +def main(): + logging.basicConfig( + filename='/var/log/policy-rc.d.log', + level=logging.DEBUG, + format='%(asctime)s %(message)s') + + service = sys.argv[1] + action = sys.argv[2] + + permitted, message = process_action_request( + service, + action, + DEFAULT_POLICY_CONFIG_DIR, + DEFAULT_POLICY_LOG_DIR) + logging.info(message) + + # https://people.debian.org/~hmh/invokerc.d-policyrc.d-specification.txt + # Exit status codes: + # 0 - action allowed + # 1 - unknown action (therefore, undefined policy) + # 100 - unknown initscript id + # 101 - action forbidden by policy + # 102 - subsystem error + # 103 - syntax error + # 104 - [reserved] + # 105 - behaviour uncertain, policy undefined. + # 106 - action not allowed. Use the returned fallback actions + # (which are implied to be "allowed") instead. + + if permitted: + return 0 + else: + return 101 + + +if __name__ == "__main__": + rc = main() + sys.exit(rc) diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/ha/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/ha/__init__.py new file mode 100644 index 00000000..9b088de8 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/ha/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/ha/utils.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/ha/utils.py new file mode 100644 index 00000000..b4912c42 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/ha/utils.py @@ -0,0 +1,377 @@ +# Copyright 2014-2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Copyright 2016 Canonical Ltd. +# +# Authors: +# Openstack Charmers < +# + +""" +Helpers for high availability. +""" + +import hashlib +import json +import os + +import re + +from charmhelpers.core.hookenv import ( + expected_related_units, + log, + relation_set, + charm_name, + config, + status_set, + DEBUG, + application_name, +) + +from charmhelpers.core.host import ( + lsb_release +) + +from charmhelpers.contrib.openstack.ip import ( + resolve_address, + is_ipv6, +) + +from charmhelpers.contrib.network.ip import ( + get_iface_for_address, + get_netmask_for_address, +) + +from charmhelpers.contrib.hahelpers.cluster import ( + get_hacluster_config +) + +JSON_ENCODE_OPTIONS = dict( + sort_keys=True, + allow_nan=False, + indent=None, + separators=(',', ':'), +) + +VIP_GROUP_NAME = 'grp_{service}_vips' +DNSHA_GROUP_NAME = 'grp_{service}_hostnames' +HAPROXY_DASHBOARD_RESOURCE = "haproxy-dashboard" + + +class DNSHAException(Exception): + """Raised when an error occurs setting up DNS HA + """ + + pass + + +def update_dns_ha_resource_params(resources, resource_params, + relation_id=None, + crm_ocf='ocf:maas:dns'): + """ Configure DNS-HA resources based on provided configuration and + update resource dictionaries for the HA relation. + + @param resources: Pointer to dictionary of resources. + Usually instantiated in ha_joined(). + @param resource_params: Pointer to dictionary of resource parameters. + Usually instantiated in ha_joined() + @param relation_id: Relation ID of the ha relation + @param crm_ocf: Corosync Open Cluster Framework resource agent to use for + DNS HA + """ + _relation_data = {'resources': {}, 'resource_params': {}} + update_hacluster_dns_ha(charm_name(), + _relation_data, + crm_ocf) + resources.update(_relation_data['resources']) + resource_params.update(_relation_data['resource_params']) + relation_set(relation_id=relation_id, groups=_relation_data['groups']) + + +def assert_charm_supports_dns_ha(): + """Validate prerequisites for DNS HA + The MAAS client is only available on Xenial or greater + + :raises DNSHAException: if release is < 16.04 + """ + if lsb_release().get('DISTRIB_RELEASE') < '16.04': + msg = ('DNS HA is only supported on 16.04 and greater ' + 'versions of Ubuntu.') + status_set('blocked', msg) + raise DNSHAException(msg) + return True + + +def expect_ha(): + """ Determine if the unit expects to be in HA + + Check juju goal-state if ha relation is expected, check for VIP or dns-ha + settings which indicate the unit should expect to be related to hacluster. + + @returns boolean + """ + ha_related_units = [] + try: + ha_related_units = list(expected_related_units(reltype='ha')) + except (NotImplementedError, KeyError): + pass + return len(ha_related_units) > 0 or config('vip') or config('dns-ha') + + +def generate_ha_relation_data(service, + extra_settings=None, + haproxy_enabled=True): + """ Generate relation data for ha relation + + Based on configuration options and unit interfaces, generate a json + encoded dict of relation data items for the hacluster relation, + providing configuration for DNS HA or VIP's + haproxy clone sets. + + Example of supplying additional settings:: + + COLO_CONSOLEAUTH = 'inf: res_nova_consoleauth grp_nova_vips' + AGENT_CONSOLEAUTH = 'ocf:openstack:nova-consoleauth' + AGENT_CA_PARAMS = 'op monitor interval="5s"' + + ha_console_settings = { + 'colocations': {'vip_consoleauth': COLO_CONSOLEAUTH}, + 'init_services': {'res_nova_consoleauth': 'nova-consoleauth'}, + 'resources': {'res_nova_consoleauth': AGENT_CONSOLEAUTH}, + 'resource_params': {'res_nova_consoleauth': AGENT_CA_PARAMS}) + generate_ha_relation_data('nova', extra_settings=ha_console_settings) + + + @param service: Name of the service being configured + @param extra_settings: Dict of additional resource data + @returns dict: json encoded data for use with relation_set + """ + _relation_data = {'resources': {}, 'resource_params': {}} + + if haproxy_enabled: + _meta = 'meta migration-threshold="INFINITY" failure-timeout="5s"' + _haproxy_res = 'res_{}_haproxy'.format(service) + _relation_data['resources'] = {_haproxy_res: 'lsb:haproxy'} + _relation_data['resource_params'] = { + _haproxy_res: '{} op monitor interval="5s"'.format(_meta) + } + _relation_data['init_services'] = {_haproxy_res: 'haproxy'} + _relation_data['clones'] = { + 'cl_{}_haproxy'.format(service): _haproxy_res + } + + if extra_settings: + for k, v in extra_settings.items(): + if _relation_data.get(k): + _relation_data[k].update(v) + else: + _relation_data[k] = v + + if config('dns-ha'): + update_hacluster_dns_ha(service, _relation_data) + else: + update_hacluster_vip(service, _relation_data) + + return { + 'json_{}'.format(k): json.dumps(v, **JSON_ENCODE_OPTIONS) + for k, v in _relation_data.items() if v + } + + +def update_hacluster_dns_ha(service, relation_data, + crm_ocf='ocf:maas:dns'): + """ Configure DNS-HA resources based on provided configuration + + @param service: Name of the service being configured + @param relation_data: Pointer to dictionary of relation data. + @param crm_ocf: Corosync Open Cluster Framework resource agent to use for + DNS HA + """ + # Validate the charm environment for DNS HA + assert_charm_supports_dns_ha() + + settings = ['os-admin-hostname', 'os-internal-hostname', + 'os-public-hostname', 'os-access-hostname'] + + # Check which DNS settings are set and update dictionaries + hostname_group = [] + for setting in settings: + hostname = config(setting) + if hostname is None: + log('DNS HA: Hostname setting {} is None. Ignoring.' + ''.format(setting), + DEBUG) + continue + m = re.search('os-(.+?)-hostname', setting) + if m: + endpoint_type = m.group(1) + # resolve_address's ADDRESS_MAP uses 'int' not 'internal' + if endpoint_type == 'internal': + endpoint_type = 'int' + else: + msg = ('Unexpected DNS hostname setting: {}. ' + 'Cannot determine endpoint_type name' + ''.format(setting)) + status_set('blocked', msg) + raise DNSHAException(msg) + + hostname_key = 'res_{}_{}_hostname'.format(service, endpoint_type) + if hostname_key in hostname_group: + log('DNS HA: Resource {}: {} already exists in ' + 'hostname group - skipping'.format(hostname_key, hostname), + DEBUG) + continue + + hostname_group.append(hostname_key) + relation_data['resources'][hostname_key] = crm_ocf + relation_data['resource_params'][hostname_key] = ( + 'params fqdn="{}" ip_address="{}"' + .format(hostname, resolve_address(endpoint_type=endpoint_type, + override=False))) + + if len(hostname_group) >= 1: + log('DNS HA: Hostname group is set with {} as members. ' + 'Informing the ha relation'.format(' '.join(hostname_group)), + DEBUG) + relation_data['groups'] = { + DNSHA_GROUP_NAME.format(service=service): ' '.join(hostname_group) + } + else: + msg = 'DNS HA: Hostname group has no members.' + status_set('blocked', msg) + raise DNSHAException(msg) + + +def get_vip_settings(vip): + """Calculate which nic is on the correct network for the given vip. + + If nic or netmask discovery fail then fallback to using charm supplied + config. If fallback is used this is indicated via the fallback variable. + + @param vip: VIP to lookup nic and cidr for. + @returns (str, str, bool): eg (iface, netmask, fallback) + """ + iface = get_iface_for_address(vip) + netmask = get_netmask_for_address(vip) + fallback = False + if iface is None: + iface = config('vip_iface') + fallback = True + if netmask is None: + netmask = config('vip_cidr') + fallback = True + return iface, netmask, fallback + + +def update_hacluster_vip(service, relation_data): + """ Configure VIP resources based on provided configuration + + @param service: Name of the service being configured + @param relation_data: Pointer to dictionary of relation data. + """ + cluster_config = get_hacluster_config() + vip_group = [] + vips_to_delete = [] + for vip in cluster_config['vip'].split(): + if is_ipv6(vip): + res_vip = 'ocf:heartbeat:IPv6addr' + vip_params = 'ipv6addr' + else: + res_vip = 'ocf:heartbeat:IPaddr2' + vip_params = 'ip' + + iface, netmask, fallback = get_vip_settings(vip) + + vip_monitoring = 'op monitor timeout="20s" interval="10s" depth="0"' + if iface is not None: + # NOTE(jamespage): Delete old VIP resources + # Old style naming encoding iface in name + # does not work well in environments where + # interface/subnet wiring is not consistent + vip_key = 'res_{}_{}_vip'.format(service, iface) + if vip_key in vips_to_delete: + vip_key = '{}_{}'.format(vip_key, vip_params) + vips_to_delete.append(vip_key) + + vip_key = 'res_{}_{}_vip'.format( + service, + hashlib.sha1(vip.encode('UTF-8')).hexdigest()[:7]) + + relation_data['resources'][vip_key] = res_vip + # NOTE(jamespage): + # Use option provided vip params if these where used + # instead of auto-detected values + if fallback: + relation_data['resource_params'][vip_key] = ( + 'params {ip}="{vip}" cidr_netmask="{netmask}" ' + 'nic="{iface}" {vip_monitoring}'.format( + ip=vip_params, + vip=vip, + iface=iface, + netmask=netmask, + vip_monitoring=vip_monitoring)) + else: + # NOTE(jamespage): + # let heartbeat figure out which interface and + # netmask to configure, which works nicely + # when network interface naming is not + # consistent across units. + relation_data['resource_params'][vip_key] = ( + 'params {ip}="{vip}" {vip_monitoring}'.format( + ip=vip_params, + vip=vip, + vip_monitoring=vip_monitoring)) + + vip_group.append(vip_key) + + if vips_to_delete: + try: + relation_data['delete_resources'].extend(vips_to_delete) + except KeyError: + relation_data['delete_resources'] = vips_to_delete + + if len(vip_group) >= 1: + key = VIP_GROUP_NAME.format(service=service) + try: + relation_data['groups'][key] = ' '.join(vip_group) + except KeyError: + relation_data['groups'] = { + key: ' '.join(vip_group) + } + + +def render_grafana_dashboard(prometheus_app_name, haproxy_dashboard): + """Load grafana dashboard json model and insert prometheus datasource. + + :param prometheus_app_name: name of the 'prometheus' application that will + be used as datasource in grafana dashboard + :type prometheus_app_name: str + :param haproxy_dashboard: path to haproxy dashboard + :type haproxy_dashboard: str + :return: Grafana dashboard json model as a str. + :rtype: str + """ + from charmhelpers.contrib.templating import jinja + + dashboard_template = os.path.basename(haproxy_dashboard) + dashboard_template_dir = os.path.dirname(haproxy_dashboard) + app_name = application_name() + datasource = "{} - Juju generated source".format(prometheus_app_name) + return jinja.render(dashboard_template, + {"datasource": datasource, + "app_name": app_name, + "prometheus_app_name": prometheus_app_name}, + template_dir=dashboard_template_dir, + jinja_env_args={"variable_start_string": "<< ", + "variable_end_string": " >>"}) diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/ip.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/ip.py new file mode 100644 index 00000000..2afad369 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/ip.py @@ -0,0 +1,260 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import ( + NoNetworkBinding, + config, + unit_get, + service_name, + network_get_primary_address, +) +from charmhelpers.contrib.network.ip import ( + get_address_in_network, + is_address_in_network, + is_ipv6, + get_ipv6_addr, + resolve_network_cidr, + get_iface_for_address +) +from charmhelpers.contrib.hahelpers.cluster import is_clustered + +PUBLIC = 'public' +INTERNAL = 'int' +ADMIN = 'admin' +ACCESS = 'access' + +# TODO: reconcile 'int' vs 'internal' binding names +ADDRESS_MAP = { + PUBLIC: { + 'binding': 'public', + 'config': 'os-public-network', + 'fallback': 'public-address', + 'override': 'os-public-hostname', + }, + INTERNAL: { + 'binding': 'internal', + 'config': 'os-internal-network', + 'fallback': 'private-address', + 'override': 'os-internal-hostname', + }, + ADMIN: { + 'binding': 'admin', + 'config': 'os-admin-network', + 'fallback': 'private-address', + 'override': 'os-admin-hostname', + }, + ACCESS: { + 'binding': 'access', + 'config': 'access-network', + 'fallback': 'private-address', + 'override': 'os-access-hostname', + }, + # Note (thedac) bridge to begin the reconciliation between 'int' vs + # 'internal' binding names + 'internal': { + 'binding': 'internal', + 'config': 'os-internal-network', + 'fallback': 'private-address', + 'override': 'os-internal-hostname', + }, +} + + +def canonical_url(configs, endpoint_type=PUBLIC): + """Returns the correct HTTP URL to this host given the state of HTTPS + configuration, hacluster and charm configuration. + + :param configs: OSTemplateRenderer config templating object to inspect + for a complete https context. + :param endpoint_type: str endpoint type to resolve. + :param returns: str base URL for services on the current service unit. + """ + scheme = _get_scheme(configs) + + address = resolve_address(endpoint_type) + if is_ipv6(address): + address = "[{}]".format(address) + + return '%s://%s' % (scheme, address) + + +def _get_scheme(configs): + """Returns the scheme to use for the url (either http or https) + depending upon whether https is in the configs value. + + :param configs: OSTemplateRenderer config templating object to inspect + for a complete https context. + :returns: either 'http' or 'https' depending on whether https is + configured within the configs context. + """ + scheme = 'http' + if configs and 'https' in configs.complete_contexts(): + scheme = 'https' + return scheme + + +def _get_address_override(endpoint_type=PUBLIC): + """Returns any address overrides that the user has defined based on the + endpoint type. + + Note: this function allows for the service name to be inserted into the + address if the user specifies {service_name}.somehost.org. + + :param endpoint_type: the type of endpoint to retrieve the override + value for. + :returns: any endpoint address or hostname that the user has overridden + or None if an override is not present. + """ + override_key = ADDRESS_MAP[endpoint_type]['override'] + addr_override = config(override_key) + if not addr_override: + return None + else: + return addr_override.format(service_name=service_name()) + + +def local_address(unit_get_fallback='public-address'): + """Return a network address for this unit. + + Attempt to retrieve a 'default' IP address for this unit + from network-get. If this is running with an old version of Juju then + fallback to unit_get. + + Note on juju < 2.9 the binding to juju-info may not exist, so fall back to + the unit-get. + + :param unit_get_fallback: Either 'public-address' or 'private-address'. + Only used with old versions of Juju. + :type unit_get_fallback: str + :returns: IP Address + :rtype: str + """ + try: + return network_get_primary_address('juju-info') + except (NotImplementedError, NoNetworkBinding): + return unit_get(unit_get_fallback) + + +def get_invalid_vips(): + """Check if any of the provided vips are invalid. + A vip is invalid if it doesn't belong to the subnet in any interface. + If all vips are valid, this returns an empty list. + + :returns: A list of strings, where each string is an invalid vip address. + :rtype: list + """ + + clustered = is_clustered() + vips = config('vip') + if vips: + vips = vips.split() + invalid_vips = [] + + if clustered and vips: + for vip in vips: + iface_for_vip = get_iface_for_address(vip) + if iface_for_vip is None: + invalid_vips.append(vip) + + return invalid_vips + + +def resolve_address(endpoint_type=PUBLIC, override=True): + """Return unit address depending on net config. + + If unit is clustered with vip(s) and has net splits defined, return vip on + correct network. If clustered with no nets defined, return primary vip. + + If not clustered, return unit address ensuring address is on configured net + split if one is configured, or a Juju 2.0 extra-binding has been used. + + :param endpoint_type: Network endpoing type + :param override: Accept hostname overrides or not + """ + resolved_address = None + if override: + resolved_address = _get_address_override(endpoint_type) + if resolved_address: + return resolved_address + + vips = config('vip') + if vips: + vips = vips.split() + + net_type = ADDRESS_MAP[endpoint_type]['config'] + net_addr = config(net_type) + net_fallback = ADDRESS_MAP[endpoint_type]['fallback'] + binding = ADDRESS_MAP[endpoint_type]['binding'] + clustered = is_clustered() + + if clustered and vips: + if net_addr: + for vip in vips: + if is_address_in_network(net_addr, vip): + resolved_address = vip + break + else: + # NOTE: endeavour to check vips against network space + # bindings + try: + bound_cidr = resolve_network_cidr( + network_get_primary_address(binding) + ) + for vip in vips: + if is_address_in_network(bound_cidr, vip): + resolved_address = vip + break + except (NotImplementedError, NoNetworkBinding): + # If no net-splits configured and no support for extra + # bindings/network spaces so we expect a single vip + resolved_address = vips[0] + else: + if config('prefer-ipv6'): + fallback_addr = get_ipv6_addr(exc_list=vips)[0] + else: + fallback_addr = local_address(unit_get_fallback=net_fallback) + + if net_addr: + resolved_address = get_address_in_network(net_addr, fallback_addr) + else: + # NOTE: only try to use extra bindings if legacy network + # configuration is not in use + try: + resolved_address = network_get_primary_address(binding) + except (NotImplementedError, NoNetworkBinding): + resolved_address = fallback_addr + + if resolved_address is None: + raise ValueError("Unable to resolve a suitable IP address based on " + "charm state and configuration. (net_type=%s, " + "clustered=%s)" % (net_type, clustered)) + + return resolved_address + + +def get_vip_in_network(network): + matching_vip = None + vips = config('vip') + if vips: + for vip in vips.split(): + if is_address_in_network(network, vip): + matching_vip = vip + return matching_vip + + +def get_default_api_bindings(): + _default_bindings = [] + for binding in [INTERNAL, ADMIN, PUBLIC]: + _default_bindings.append(ADDRESS_MAP[binding]['binding']) + return _default_bindings diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/keystone.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/keystone.py new file mode 100644 index 00000000..5775aa44 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/keystone.py @@ -0,0 +1,170 @@ +# +# Copyright 2017 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.fetch import apt_install +from charmhelpers.contrib.openstack.context import IdentityServiceContext +from charmhelpers.core.hookenv import ( + log, + ERROR, +) + + +def get_api_suffix(api_version): + """Return the formatted api suffix for the given version + @param api_version: version of the keystone endpoint + @returns the api suffix formatted according to the given api + version + """ + return 'v2.0' if api_version in (2, "2", "2.0") else 'v3' + + +def format_endpoint(schema, addr, port, api_version): + """Return a formatted keystone endpoint + @param schema: http or https + @param addr: ipv4/ipv6 host of the keystone service + @param port: port of the keystone service + @param api_version: 2 or 3 + @returns a fully formatted keystone endpoint + """ + return '{}://{}:{}/{}/'.format(schema, addr, port, + get_api_suffix(api_version)) + + +def get_keystone_manager(endpoint, api_version, **kwargs): + """Return a keystonemanager for the correct API version + + @param endpoint: the keystone endpoint to point client at + @param api_version: version of the keystone api the client should use + @param kwargs: token or username/tenant/password information + @returns keystonemanager class used for interrogating keystone + """ + if api_version == 2: + return KeystoneManager2(endpoint, **kwargs) + if api_version == 3: + return KeystoneManager3(endpoint, **kwargs) + raise ValueError('No manager found for api version {}'.format(api_version)) + + +def get_keystone_manager_from_identity_service_context(): + """Return a keystonmanager generated from a + instance of charmhelpers.contrib.openstack.context.IdentityServiceContext + @returns keystonamenager instance + """ + context = IdentityServiceContext()() + if not context: + msg = "Identity service context cannot be generated" + log(msg, level=ERROR) + raise ValueError(msg) + + endpoint = format_endpoint(context['service_protocol'], + context['service_host'], + context['service_port'], + context['api_version']) + + if context['api_version'] in (2, "2.0"): + api_version = 2 + else: + api_version = 3 + + return get_keystone_manager(endpoint, api_version, + username=context['admin_user'], + password=context['admin_password'], + tenant_name=context['admin_tenant_name']) + + +class KeystoneManager(object): + + def resolve_service_id(self, service_name=None, service_type=None): + """Find the service_id of a given service""" + services = [s._info for s in self.api.services.list()] + + service_name = service_name.lower() + for s in services: + name = s['name'].lower() + if service_type and service_name: + if (service_name == name and service_type == s['type']): + return s['id'] + elif service_name and service_name == name: + return s['id'] + elif service_type and service_type == s['type']: + return s['id'] + return None + + def service_exists(self, service_name=None, service_type=None): + """Determine if the given service exists on the service list""" + return self.resolve_service_id(service_name, service_type) is not None + + +class KeystoneManager2(KeystoneManager): + + def __init__(self, endpoint, **kwargs): + try: + from keystoneclient.v2_0 import client + from keystoneclient.auth.identity import v2 + from keystoneclient import session + except ImportError: + apt_install(["python3-keystoneclient"], fatal=True) + + from keystoneclient.v2_0 import client + from keystoneclient.auth.identity import v2 + from keystoneclient import session + + self.api_version = 2 + + token = kwargs.get("token", None) + if token: + api = client.Client(endpoint=endpoint, token=token) + else: + auth = v2.Password(username=kwargs.get("username"), + password=kwargs.get("password"), + tenant_name=kwargs.get("tenant_name"), + auth_url=endpoint) + sess = session.Session(auth=auth) + api = client.Client(session=sess) + + self.api = api + + +class KeystoneManager3(KeystoneManager): + + def __init__(self, endpoint, **kwargs): + try: + from keystoneclient.v3 import client + from keystoneclient.auth import token_endpoint + from keystoneclient import session + from keystoneclient.auth.identity import v3 + except ImportError: + apt_install(["python3-keystoneclient"], fatal=True) + + from keystoneclient.v3 import client + from keystoneclient.auth import token_endpoint + from keystoneclient import session + from keystoneclient.auth.identity import v3 + + self.api_version = 3 + + token = kwargs.get("token", None) + if token: + auth = token_endpoint.Token(endpoint=endpoint, + token=token) + sess = session.Session(auth=auth) + else: + auth = v3.Password(auth_url=endpoint, + user_id=kwargs.get("username"), + password=kwargs.get("password"), + project_id=kwargs.get("tenant_name")) + sess = session.Session(auth=auth) + + self.api = client.Client(session=sess) diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/neutron.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/neutron.py new file mode 100644 index 00000000..47772467 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/neutron.py @@ -0,0 +1,351 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Various utilities for dealing with Neutron and the renaming from Quantum. + +from subprocess import check_output + +from charmhelpers.core.hookenv import ( + config, + log, + ERROR, +) + +from charmhelpers.contrib.openstack.utils import ( + os_release, + CompareOpenStackReleases, +) + + +def headers_package(): + """Ensures correct linux-headers for running kernel are installed, + for building DKMS package""" + kver = check_output(['uname', '-r']).decode('UTF-8').strip() + return 'linux-headers-%s' % kver + + +QUANTUM_CONF_DIR = '/etc/quantum' + + +def kernel_version(): + """ Retrieve the current major kernel version as a tuple e.g. (3, 13) """ + kver = check_output(['uname', '-r']).decode('UTF-8').strip() + kver = kver.split('.') + return (int(kver[0]), int(kver[1])) + + +def determine_dkms_package(): + """ Determine which DKMS package should be used based on kernel version """ + # NOTE: 3.13 kernels have support for GRE and VXLAN native + if kernel_version() >= (3, 13): + return [] + else: + return [headers_package(), 'openvswitch-datapath-dkms'] + + +# legacy + + +def quantum_plugins(): + return { + 'ovs': { + 'config': '/etc/quantum/plugins/openvswitch/' + 'ovs_quantum_plugin.ini', + 'driver': 'quantum.plugins.openvswitch.ovs_quantum_plugin.' + 'OVSQuantumPluginV2', + 'contexts': [], + 'services': ['quantum-plugin-openvswitch-agent'], + 'packages': [determine_dkms_package(), + ['quantum-plugin-openvswitch-agent']], + 'server_packages': ['quantum-server', + 'quantum-plugin-openvswitch'], + 'server_services': ['quantum-server'] + }, + 'nvp': { + 'config': '/etc/quantum/plugins/nicira/nvp.ini', + 'driver': 'quantum.plugins.nicira.nicira_nvp_plugin.' + 'QuantumPlugin.NvpPluginV2', + 'contexts': [], + 'services': [], + 'packages': [], + 'server_packages': ['quantum-server', + 'quantum-plugin-nicira'], + 'server_services': ['quantum-server'] + } + } + + +NEUTRON_CONF_DIR = '/etc/neutron' + + +def neutron_plugins(): + release = os_release('nova-common') + plugins = { + 'ovs': { + 'config': '/etc/neutron/plugins/openvswitch/' + 'ovs_neutron_plugin.ini', + 'driver': 'neutron.plugins.openvswitch.ovs_neutron_plugin.' + 'OVSNeutronPluginV2', + 'contexts': [], + 'services': ['neutron-plugin-openvswitch-agent'], + 'packages': [determine_dkms_package(), + ['neutron-plugin-openvswitch-agent']], + 'server_packages': ['neutron-server', + 'neutron-plugin-openvswitch'], + 'server_services': ['neutron-server'] + }, + 'nvp': { + 'config': '/etc/neutron/plugins/nicira/nvp.ini', + 'driver': 'neutron.plugins.nicira.nicira_nvp_plugin.' + 'NeutronPlugin.NvpPluginV2', + 'contexts': [], + 'services': [], + 'packages': [], + 'server_packages': ['neutron-server', + 'neutron-plugin-nicira'], + 'server_services': ['neutron-server'] + }, + 'nsx': { + 'config': '/etc/neutron/plugins/vmware/nsx.ini', + 'driver': 'vmware', + 'contexts': [], + 'services': [], + 'packages': [], + 'server_packages': ['neutron-server', + 'neutron-plugin-vmware'], + 'server_services': ['neutron-server'] + }, + 'n1kv': { + 'config': '/etc/neutron/plugins/cisco/cisco_plugins.ini', + 'driver': 'neutron.plugins.cisco.network_plugin.PluginV2', + 'contexts': [], + 'services': [], + 'packages': [determine_dkms_package(), + ['neutron-plugin-cisco']], + 'server_packages': ['neutron-server', + 'neutron-plugin-cisco'], + 'server_services': ['neutron-server'] + }, + 'Calico': { + 'config': '/etc/neutron/plugins/ml2/ml2_conf.ini', + 'driver': 'neutron.plugins.ml2.plugin.Ml2Plugin', + 'contexts': [], + 'services': ['calico-felix', + 'bird', + 'neutron-dhcp-agent', + 'nova-api-metadata', + 'etcd'], + 'packages': [determine_dkms_package(), + ['calico-compute', + 'bird', + 'neutron-dhcp-agent', + 'nova-api-metadata', + 'etcd']], + 'server_packages': ['neutron-server', 'calico-control', 'etcd'], + 'server_services': ['neutron-server', 'etcd'] + }, + 'vsp': { + 'config': '/etc/neutron/plugins/nuage/nuage_plugin.ini', + 'driver': 'neutron.plugins.nuage.plugin.NuagePlugin', + 'contexts': [], + 'services': [], + 'packages': [], + 'server_packages': ['neutron-server', 'neutron-plugin-nuage'], + 'server_services': ['neutron-server'] + }, + 'plumgrid': { + 'config': '/etc/neutron/plugins/plumgrid/plumgrid.ini', + 'driver': ('neutron.plugins.plumgrid.plumgrid_plugin' + '.plumgrid_plugin.NeutronPluginPLUMgridV2'), + 'contexts': [], + 'services': [], + 'packages': ['plumgrid-lxc', + 'iovisor-dkms'], + 'server_packages': ['neutron-server', + 'neutron-plugin-plumgrid'], + 'server_services': ['neutron-server'] + }, + 'midonet': { + 'config': '/etc/neutron/plugins/midonet/midonet.ini', + 'driver': 'midonet.neutron.plugin.MidonetPluginV2', + 'contexts': [], + 'services': [], + 'packages': [determine_dkms_package()], + 'server_packages': ['neutron-server', + 'python-neutron-plugin-midonet'], + 'server_services': ['neutron-server'] + } + } + if CompareOpenStackReleases(release) >= 'icehouse': + # NOTE: patch in ml2 plugin for icehouse onwards + plugins['ovs']['config'] = '/etc/neutron/plugins/ml2/ml2_conf.ini' + plugins['ovs']['driver'] = 'neutron.plugins.ml2.plugin.Ml2Plugin' + plugins['ovs']['server_packages'] = ['neutron-server', + 'neutron-plugin-ml2'] + # NOTE: patch in vmware renames nvp->nsx for icehouse onwards + plugins['nvp'] = plugins['nsx'] + if CompareOpenStackReleases(release) >= 'kilo': + plugins['midonet']['driver'] = ( + 'neutron.plugins.midonet.plugin.MidonetPluginV2') + if CompareOpenStackReleases(release) >= 'liberty': + plugins['midonet']['driver'] = ( + 'midonet.neutron.plugin_v1.MidonetPluginV2') + plugins['midonet']['server_packages'].remove( + 'python-neutron-plugin-midonet') + plugins['midonet']['server_packages'].append( + 'python-networking-midonet') + plugins['plumgrid']['driver'] = ( + 'networking_plumgrid.neutron.plugins' + '.plugin.NeutronPluginPLUMgridV2') + plugins['plumgrid']['server_packages'].remove( + 'neutron-plugin-plumgrid') + if CompareOpenStackReleases(release) >= 'mitaka': + plugins['nsx']['server_packages'].remove('neutron-plugin-vmware') + plugins['nsx']['server_packages'].append('python-vmware-nsx') + plugins['nsx']['config'] = '/etc/neutron/nsx.ini' + plugins['vsp']['driver'] = ( + 'nuage_neutron.plugins.nuage.plugin.NuagePlugin') + if CompareOpenStackReleases(release) >= 'newton': + plugins['vsp']['config'] = '/etc/neutron/plugins/ml2/ml2_conf.ini' + plugins['vsp']['driver'] = 'neutron.plugins.ml2.plugin.Ml2Plugin' + plugins['vsp']['server_packages'] = ['neutron-server', + 'neutron-plugin-ml2'] + return plugins + + +def neutron_plugin_attribute(plugin, attr, net_manager=None): + manager = net_manager or network_manager() + if manager == 'quantum': + plugins = quantum_plugins() + elif manager == 'neutron': + plugins = neutron_plugins() + else: + log("Network manager '%s' does not support plugins." % (manager), + level=ERROR) + raise Exception + + try: + _plugin = plugins[plugin] + except KeyError: + log('Unrecognised plugin for %s: %s' % (manager, plugin), level=ERROR) + raise Exception + + try: + return _plugin[attr] + except KeyError: + return None + + +def network_manager(): + ''' + Deals with the renaming of Quantum to Neutron in H and any situations + that require compatibility (eg, deploying H with network-manager=quantum, + upgrading from G). + ''' + release = os_release('nova-common') + manager = config('network-manager').lower() + + if manager not in ['quantum', 'neutron']: + return manager + + if release in ['essex']: + # E does not support neutron + log('Neutron networking not supported in Essex.', level=ERROR) + raise Exception + elif release in ['folsom', 'grizzly']: + # neutron is named quantum in F and G + return 'quantum' + else: + # ensure accurate naming for all releases post-H + return 'neutron' + + +def parse_mappings(mappings, key_rvalue=False): + """By default mappings are lvalue keyed. + + If key_rvalue is True, the mapping will be reversed to allow multiple + configs for the same lvalue. + """ + parsed = {} + if mappings: + mappings = mappings.split() + for m in mappings: + p = m.partition(':') + + if key_rvalue: + key_index = 2 + val_index = 0 + # if there is no rvalue skip to next + if not p[1]: + continue + else: + key_index = 0 + val_index = 2 + + key = p[key_index].strip() + parsed[key] = p[val_index].strip() + + return parsed + + +def parse_bridge_mappings(mappings): + """Parse bridge mappings. + + Mappings must be a space-delimited list of provider:bridge mappings. + + Returns dict of the form {provider:bridge}. + """ + return parse_mappings(mappings) + + +def parse_data_port_mappings(mappings, default_bridge='br-data'): + """Parse data port mappings. + + Mappings must be a space-delimited list of bridge:port. + + Returns dict of the form {port:bridge} where ports may be mac addresses or + interface names. + """ + + # NOTE(dosaboy): we use rvalue for key to allow multiple values to be + # proposed for since it may be a mac address which will differ + # across units this allowing first-known-good to be chosen. + _mappings = parse_mappings(mappings, key_rvalue=True) + if not _mappings or list(_mappings.values()) == ['']: + if not mappings: + return {} + + # For backwards-compatibility we need to support port-only provided in + # config. + _mappings = {mappings.split()[0]: default_bridge} + + ports = _mappings.keys() + if len(set(ports)) != len(ports): + raise Exception("It is not allowed to have the same port configured " + "on more than one bridge") + + return _mappings + + +def parse_vlan_range_mappings(mappings): + """Parse vlan range mappings. + + Mappings must be a space-delimited list of provider:start:end mappings. + + The start:end range is optional and may be omitted. + + Returns dict of the form {provider: (start, end)}. + """ + _mappings = parse_mappings(mappings) + return {p: tuple(r.split(':')) for p, r in _mappings.items()} diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/policy_rcd.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/policy_rcd.py new file mode 100644 index 00000000..ecffbc68 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/policy_rcd.py @@ -0,0 +1,173 @@ +# Copyright 2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module for managing policy-rc.d script and associated files. + +This module manages the installation of /usr/sbin/policy-rc.d, the +policy files and the event files. When a package update occurs the +packaging system calls: + +policy-rc.d [options] + +The return code of the script determines if the packaging system +will perform that action on the given service. The policy-rc.d +implementation installed by this module checks if an action is +permitted by checking policy files placed in /etc/policy-rc.d. +If a policy file exists which denies the requested action then +this is recorded in an event file which is placed in +/var/lib/policy-rc.d. +""" + +import os +import shutil +import tempfile +import yaml + +import charmhelpers.contrib.openstack.files as os_files +import charmhelpers.contrib.openstack.alternatives as alternatives +import charmhelpers.core.hookenv as hookenv +import charmhelpers.core.host as host + +POLICY_HEADER = """# Managed by juju\n""" +POLICY_DEFERRED_EVENTS_DIR = '/var/lib/policy-rc.d' +POLICY_CONFIG_DIR = '/etc/policy-rc.d' + + +def get_policy_file_name(): + """Get the name of the policy file for this application. + + :returns: Policy file name + :rtype: str + """ + application_name = hookenv.service_name() + return '{}/charm-{}.policy'.format(POLICY_CONFIG_DIR, application_name) + + +def read_default_policy_file(): + """Return the policy file. + + A policy is in the form: + blocked_actions: + neutron-dhcp-agent: [restart, stop, try-restart] + neutron-l3-agent: [restart, stop, try-restart] + neutron-metadata-agent: [restart, stop, try-restart] + neutron-openvswitch-agent: [restart, stop, try-restart] + openvswitch-switch: [restart, stop, try-restart] + ovs-vswitchd: [restart, stop, try-restart] + ovs-vswitchd-dpdk: [restart, stop, try-restart] + ovsdb-server: [restart, stop, try-restart] + policy_requestor_name: neutron-openvswitch + policy_requestor_type: charm + + :returns: Policy + :rtype: Dict[str, Union[str, Dict[str, List[str]]] + """ + policy = {} + policy_file = get_policy_file_name() + if os.path.exists(policy_file): + with open(policy_file, 'r') as f: + policy = yaml.safe_load(f) + return policy + + +def write_policy_file(policy_file, policy): + """Write policy to disk. + + :param policy_file: Name of policy file + :type policy_file: str + :param policy: Policy + :type policy: Dict[str, Union[str, Dict[str, List[str]]]] + """ + with tempfile.NamedTemporaryFile('w', delete=False) as f: + f.write(POLICY_HEADER) + yaml.dump(policy, f) + tmp_file_name = f.name + shutil.move(tmp_file_name, policy_file) + + +def remove_policy_file(): + """Remove policy file.""" + try: + os.remove(get_policy_file_name()) + except FileNotFoundError: + pass + + +def install_policy_rcd(): + """Install policy-rc.d components.""" + source_file_dir = os.path.dirname(os.path.abspath(os_files.__file__)) + policy_rcd_exec = "/var/lib/charm/{}/policy-rc.d".format( + hookenv.service_name()) + host.mkdir(os.path.dirname(policy_rcd_exec)) + shutil.copy2( + '{}/policy_rc_d_script.py'.format(source_file_dir), + policy_rcd_exec) + # policy-rc.d must be installed via the alternatives system: + # https://people.debian.org/~hmh/invokerc.d-policyrc.d-specification.txt + if not os.path.exists('/usr/sbin/policy-rc.d'): + alternatives.install_alternative( + 'policy-rc.d', + '/usr/sbin/policy-rc.d', + policy_rcd_exec) + host.mkdir(POLICY_CONFIG_DIR) + + +def get_default_policy(): + """Return the default policy structure. + + :returns: Policy + :rtype: Dict[str, Union[str, Dict[str, List[str]]] + """ + policy = { + 'policy_requestor_name': hookenv.service_name(), + 'policy_requestor_type': 'charm', + 'blocked_actions': {}} + return policy + + +def add_policy_block(service, blocked_actions): + """Update a policy file with new list of actions. + + :param service: Service name + :type service: str + :param blocked_actions: Action to block + :type blocked_actions: List[str] + """ + policy = read_default_policy_file() or get_default_policy() + policy_file = get_policy_file_name() + if policy['blocked_actions'].get(service): + policy['blocked_actions'][service].extend(blocked_actions) + else: + policy['blocked_actions'][service] = blocked_actions + policy['blocked_actions'][service] = sorted( + list(set(policy['blocked_actions'][service]))) + write_policy_file(policy_file, policy) + + +def remove_policy_block(service, unblocked_actions): + """Remove list of actions from policy file. + + :param service: Service name + :type service: str + :param unblocked_actions: Action to unblock + :type unblocked_actions: List[str] + """ + policy_file = get_policy_file_name() + policy = read_default_policy_file() + for action in unblocked_actions: + try: + policy['blocked_actions'][service].remove(action) + except (KeyError, ValueError): + continue + write_policy_file(policy_file, policy) diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/policyd.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/policyd.py new file mode 100644 index 00000000..767943c2 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/policyd.py @@ -0,0 +1,763 @@ +# Copyright 2019-2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import contextlib +import os +import shutil +import yaml +import zipfile + +import charmhelpers +import charmhelpers.core.hookenv as hookenv +import charmhelpers.core.host as ch_host + +# Features provided by this module: + +""" +Policy.d helper functions +========================= + +The functions in this module are designed, as a set, to provide an easy-to-use +set of hooks for classic charms to add in /etc//policy.d/ +directory override YAML files. + +(For charms.openstack charms, a mixin class is provided for this +functionality). + +In order to "hook" this functionality into a (classic) charm, two functions are +provided: + + maybe_do_policyd_overrides(openstack_release, + service, + blacklist_paths=none, + blacklist_keys=none, + template_function=none, + restart_handler=none) + + maybe_do_policyd_overrides_on_config_changed(openstack_release, + service, + blacklist_paths=None, + blacklist_keys=None, + template_function=None, + restart_handler=None + +(See the docstrings for details on the parameters) + +The functions should be called from the install and upgrade hooks in the charm. +The `maybe_do_policyd_overrides_on_config_changed` function is designed to be +called on the config-changed hook, in that it does an additional check to +ensure that an already overridden policy.d in an upgrade or install hooks isn't +repeated. + +In order the *enable* this functionality, the charm's install, config_changed, +and upgrade_charm hooks need to be modified, and a new config option (see +below) needs to be added. The README for the charm should also be updated. + +Examples from the keystone charm are: + +@hooks.hook('install.real') +@harden() +def install(): + ... + # call the policy overrides handler which will install any policy overrides + maybe_do_policyd_overrides(os_release('keystone'), 'keystone') + + +@hooks.hook('config-changed') +@restart_on_change(restart_map(), restart_functions=restart_function_map()) +@harden() +def config_changed(): + ... + # call the policy overrides handler which will install any policy overrides + maybe_do_policyd_overrides_on_config_changed(os_release('keystone'), + 'keystone') + +@hooks.hook('upgrade-charm') +@restart_on_change(restart_map(), stopstart=True) +@harden() +def upgrade_charm(): + ... + # call the policy overrides handler which will install any policy overrides + maybe_do_policyd_overrides(os_release('keystone'), 'keystone') + +Status Line +=========== + +The workload status code in charm-helpers has been modified to detect if +policy.d override code has been incorporated into the charm by checking for the +new config variable (in the config.yaml). If it has been, then the workload +status line will automatically show "PO:" at the beginning of the workload +status for that unit/service if the config option is set. If the policy +override is broken, the "PO (broken):" will be shown. No changes to the charm +(apart from those already mentioned) are needed to enable this functionality. +(charms.openstack charms also get this functionality, but please see that +library for further details). +""" + +# The config.yaml for the charm should contain the following for the config +# option: + +""" + use-policyd-override: + type: boolean + default: False + description: | + If True then use the resource file named 'policyd-override' to install + override YAML files in the service's policy.d directory. The resource + file should be a ZIP file containing at least one yaml file with a .yaml + or .yml extension. If False then remove the overrides. +""" + +# The metadata.yaml for the charm should contain the following: +""" +resources: + policyd-override: + type: file + filename: policyd-override.zip + description: The policy.d overrides file +""" + +# The README for the charm should contain the following: +""" +Policy Overrides +---------------- + +This feature allows for policy overrides using the `policy.d` directory. This +is an **advanced** feature and the policies that the OpenStack service supports +should be clearly and unambiguously understood before trying to override, or +add to, the default policies that the service uses. The charm also has some +policy defaults. They should also be understood before being overridden. + +> **Caution**: It is possible to break the system (for tenants and other + services) if policies are incorrectly applied to the service. + +Policy overrides are YAML files that contain rules that will add to, or +override, existing policy rules in the service. The `policy.d` directory is +a place to put the YAML override files. This charm owns the +`/etc/keystone/policy.d` directory, and as such, any manual changes to it will +be overwritten on charm upgrades. + +Overrides are provided to the charm using a Juju resource called +`policyd-override`. The resource is a ZIP file. This file, say +`overrides.zip`, is attached to the charm by: + + + juju attach-resource policyd-override=overrides.zip + +The policy override is enabled in the charm using: + + juju config use-policyd-override=true + +When `use-policyd-override` is `True` the status line of the charm will be +prefixed with `PO:` indicating that policies have been overridden. If the +installation of the policy override YAML files failed for any reason then the +status line will be prefixed with `PO (broken):`. The log file for the charm +will indicate the reason. No policy override files are installed if the `PO +(broken):` is shown. The status line indicates that the overrides are broken, +not that the policy for the service has failed. The policy will be the defaults +for the charm and service. + +Policy overrides on one service may affect the functionality of another +service. Therefore, it may be necessary to provide policy overrides for +multiple service charms to achieve a consistent set of policies across the +OpenStack system. The charms for the other services that may need overrides +should be checked to ensure that they support overrides before proceeding. +""" + +POLICYD_VALID_EXTS = ['.yaml', '.yml', '.j2', '.tmpl', '.tpl'] +POLICYD_TEMPLATE_EXTS = ['.j2', '.tmpl', '.tpl'] +POLICYD_RESOURCE_NAME = "policyd-override" +POLICYD_CONFIG_NAME = "use-policyd-override" +POLICYD_SUCCESS_FILENAME = "policyd-override-success" +POLICYD_LOG_LEVEL_DEFAULT = hookenv.INFO +POLICYD_ALWAYS_BLACKLISTED_KEYS = ("admin_required", "cloud_admin") + + +class BadPolicyZipFile(Exception): + + def __init__(self, log_message): + self.log_message = log_message + + def __str__(self): + return self.log_message + + +class BadPolicyYamlFile(Exception): + + def __init__(self, log_message): + self.log_message = log_message + + def __str__(self): + return self.log_message + + +def is_policyd_override_valid_on_this_release(openstack_release): + """Check that the charm is running on at least Ubuntu Xenial, and at + least the queens release. + + :param openstack_release: the release codename that is installed. + :type openstack_release: str + :returns: True if okay + :rtype: bool + """ + # NOTE(ajkavanagh) circular import! This is because the status message + # generation code in utils has to call into this module, but this function + # needs the CompareOpenStackReleases() function. The only way to solve + # this is either to put ALL of this module into utils, or refactor one or + # other of the CompareOpenStackReleases or status message generation code + # into a 3rd module. + import charmhelpers.contrib.openstack.utils as ch_utils + return ch_utils.CompareOpenStackReleases(openstack_release) >= 'queens' + + +def maybe_do_policyd_overrides(openstack_release, + service, + blacklist_paths=None, + blacklist_keys=None, + template_function=None, + restart_handler=None, + user=None, + group=None, + config_changed=False): + """If the config option is set, get the resource file and process it to + enable the policy.d overrides for the service passed. + + The param `openstack_release` is required as the policyd overrides feature + is only supported on openstack_release "queens" or later, and on ubuntu + "xenial" or later. Prior to these versions, this feature is a NOP. + + The optional template_function is a function that accepts a string and has + an opportunity to modify the loaded file prior to it being read by + yaml.safe_load(). This allows the charm to perform "templating" using + charm derived data. + + The param blacklist_paths are paths (that are in the service's policy.d + directory that should not be touched). + + The param blacklist_keys are keys that must not appear in the yaml file. + If they do, then the whole policy.d file fails. + + The yaml file extracted from the resource_file (which is a zipped file) has + its file path reconstructed. This, also, must not match any path in the + black list. + + The param restart_handler is an optional Callable that is called to perform + the service restart if the policy.d file is changed. This should normally + be None as oslo.policy automatically picks up changes in the policy.d + directory. However, for any services where this is buggy then a + restart_handler can be used to force the policy.d files to be read. + + If the config_changed param is True, then the handling is slightly + different: It will only perform the policyd overrides if the config is True + and the success file doesn't exist. Otherwise, it does nothing as the + resource file has already been processed. + + :param openstack_release: The openstack release that is installed. + :type openstack_release: str + :param service: the service name to construct the policy.d directory for. + :type service: str + :param blacklist_paths: optional list of paths to leave alone + :type blacklist_paths: Union[None, List[str]] + :param blacklist_keys: optional list of keys that mustn't appear in the + yaml file's + :type blacklist_keys: Union[None, List[str]] + :param template_function: Optional function that can modify the string + prior to being processed as a Yaml document. + :type template_function: Union[None, Callable[[str], str]] + :param restart_handler: The function to call if the service should be + restarted. + :type restart_handler: Union[None, Callable[]] + :param user: The user to create/write files/directories as + :type user: Union[None, str] + :param group: the group to create/write files/directories as + :type group: Union[None, str] + :param config_changed: Set to True for config_changed hook. + :type config_changed: bool + """ + _user = service if user is None else user + _group = service if group is None else group + if not is_policyd_override_valid_on_this_release(openstack_release): + return + hookenv.log("Running maybe_do_policyd_overrides", + level=POLICYD_LOG_LEVEL_DEFAULT) + config = hookenv.config() + try: + if not config.get(POLICYD_CONFIG_NAME, False): + clean_policyd_dir_for(service, + blacklist_paths, + user=_user, + group=_group) + if (os.path.isfile(_policy_success_file()) and + restart_handler is not None and + callable(restart_handler)): + restart_handler() + remove_policy_success_file() + return + except Exception as e: + hookenv.log("... ERROR: Exception is: {}".format(str(e)), + level=POLICYD_CONFIG_NAME) + import traceback + hookenv.log(traceback.format_exc(), level=POLICYD_LOG_LEVEL_DEFAULT) + return + # if the policyd overrides have been performed when doing config_changed + # just return + if config_changed and is_policy_success_file_set(): + hookenv.log("... already setup, so skipping.", + level=POLICYD_LOG_LEVEL_DEFAULT) + return + # from now on it should succeed; if it doesn't then status line will show + # broken. + resource_filename = get_policy_resource_filename() + restart = process_policy_resource_file( + resource_filename, service, blacklist_paths, blacklist_keys, + template_function) + if restart and restart_handler is not None and callable(restart_handler): + restart_handler() + + +@charmhelpers.deprecate("Use maybe_do_policyd_overrides instead") +def maybe_do_policyd_overrides_on_config_changed(*args, **kwargs): + """This function is designed to be called from the config changed hook. + + DEPRECATED: please use maybe_do_policyd_overrides() with the param + `config_changed` as `True`. + + See maybe_do_policyd_overrides() for more details on the params. + """ + if 'config_changed' not in kwargs.keys(): + kwargs['config_changed'] = True + return maybe_do_policyd_overrides(*args, **kwargs) + + +def get_policy_resource_filename(): + """Function to extract the policy resource filename + + :returns: The filename of the resource, if set, otherwise, if an error + occurs, then None is returned. + :rtype: Union[str, None] + """ + try: + return hookenv.resource_get(POLICYD_RESOURCE_NAME) + except Exception: + return None + + +@contextlib.contextmanager +def open_and_filter_yaml_files(filepath, has_subdirs=False): + """Validate that the filepath provided is a zip file and contains at least + one (.yaml|.yml) file, and that the files are not duplicated when the zip + file is flattened. Note that the yaml files are not checked. This is the + first stage in validating the policy zipfile; individual yaml files are not + checked for validity or black listed keys. + + If the has_subdirs param is True, then the files are flattened to the first + directory, and the files in the root are ignored. + + An example of use is: + + with open_and_filter_yaml_files(some_path) as zfp, g: + for zipinfo in g: + # do something with zipinfo ... + + :param filepath: a filepath object that can be opened by zipfile + :type filepath: Union[AnyStr, os.PathLike[AntStr]] + :param has_subdirs: Keep first level of subdirectories in yaml file. + :type has_subdirs: bool + :returns: (zfp handle, + a generator of the (name, filename, ZipInfo object) tuples) as a + tuple. + :rtype: ContextManager[(zipfile.ZipFile, + Generator[(name, str, str, zipfile.ZipInfo)])] + :raises: zipfile.BadZipFile + :raises: BadPolicyZipFile if duplicated yaml or missing + :raises: IOError if the filepath is not found + """ + with zipfile.ZipFile(filepath, 'r') as zfp: + # first pass through; check for duplicates and at least one yaml file. + names = collections.defaultdict(int) + yamlfiles = _yamlfiles(zfp, has_subdirs) + for name, _, _, _ in yamlfiles: + names[name] += 1 + # There must be at least 1 yaml file. + if len(names.keys()) == 0: + raise BadPolicyZipFile("contains no yaml files with {} extensions." + .format(", ".join(POLICYD_VALID_EXTS))) + # There must be no duplicates + duplicates = [n for n, c in names.items() if c > 1] + if duplicates: + raise BadPolicyZipFile("{} have duplicates in the zip file." + .format(", ".join(duplicates))) + # Finally, let's yield the generator + yield (zfp, yamlfiles) + + +def _yamlfiles(zipfile, has_subdirs=False): + """Helper to get a yaml file (according to POLICYD_VALID_EXTS extensions) + and the infolist item from a zipfile. + + If the `has_subdirs` param is True, the the only yaml files that have a + directory component are read, and then first part of the directory + component is kept, along with the filename in the name. e.g. an entry with + a filename of: + + compute/someotherdir/override.yaml + + is returned as: + + compute/override, yaml, override.yaml, + + This is to help with the special, additional, processing that the dashboard + charm requires. + + :param zipfile: the zipfile to read zipinfo items from + :type zipfile: zipfile.ZipFile + :param has_subdirs: Keep first level of subdirectories in yaml file. + :type has_subdirs: bool + :returns: generator of (name, ext, filename, info item) for each + self-identified yaml file. + :rtype: List[(str, str, str, zipfile.ZipInfo)] + """ + files = [] + for infolist_item in zipfile.infolist(): + try: + if infolist_item.is_dir(): + continue + except AttributeError: + # fallback to "old" way to determine dir entry for pre-py36 + if infolist_item.filename.endswith('/'): + continue + _dir, name_ext = os.path.split(infolist_item.filename) + name, ext = os.path.splitext(name_ext) + if has_subdirs and _dir != "": + name = os.path.join(_dir.split(os.path.sep)[0], name) + ext = ext.lower() + if ext and ext in POLICYD_VALID_EXTS: + files.append((name, ext, name_ext, infolist_item)) + return files + + +def read_and_validate_yaml(stream_or_doc, blacklist_keys=None): + """Read, validate and return the (first) yaml document from the stream. + + The doc is read, and checked for a yaml file. The the top-level keys are + checked against the blacklist_keys provided. If there are problems then an + Exception is raised. Otherwise the yaml document is returned as a Python + object that can be dumped back as a yaml file on the system. + + The yaml file must only consist of a str:str mapping, and if not then the + yaml file is rejected. + + :param stream_or_doc: the file object to read the yaml from + :type stream_or_doc: Union[AnyStr, IO[AnyStr]] + :param blacklist_keys: Any keys, which if in the yaml file, should cause + and error. + :type blacklisted_keys: Union[None, List[str]] + :returns: the yaml file as a python document + :rtype: Dict[str, str] + :raises: yaml.YAMLError if there is a problem with the document + :raises: BadPolicyYamlFile if file doesn't look right or there are + blacklisted keys in the file. + """ + blacklist_keys = blacklist_keys or [] + blacklist_keys.append(POLICYD_ALWAYS_BLACKLISTED_KEYS) + doc = yaml.safe_load(stream_or_doc) + if not isinstance(doc, dict): + raise BadPolicyYamlFile("doesn't look like a policy file?") + keys = set(doc.keys()) + blacklisted_keys_present = keys.intersection(blacklist_keys) + if blacklisted_keys_present: + raise BadPolicyYamlFile("blacklisted keys {} present." + .format(", ".join(blacklisted_keys_present))) + if not all(isinstance(k, str) for k in keys): + raise BadPolicyYamlFile("keys in yaml aren't all strings?") + # check that the dictionary looks like a mapping of str to str + if not all(isinstance(v, str) for v in doc.values()): + raise BadPolicyYamlFile("values in yaml aren't all strings?") + return doc + + +def policyd_dir_for(service): + """Return the policy directory for the named service. + + :param service: str + :returns: the policy.d override directory. + :rtype: os.PathLike[str] + """ + return os.path.join("/", "etc", service, "policy.d") + + +def clean_policyd_dir_for(service, keep_paths=None, user=None, group=None): + """Clean out the policyd directory except for items that should be kept. + + The keep_paths, if used, should be set to the full path of the files that + should be kept in the policyd directory for the service. Note that the + service name is passed in, and then the policyd_dir_for() function is used. + This is so that a coding error doesn't result in a sudden deletion of the + charm (say). + + :param service: the service name to use to construct the policy.d dir. + :type service: str + :param keep_paths: optional list of paths to not delete. + :type keep_paths: Union[None, List[str]] + :param user: The user to create/write files/directories as + :type user: Union[None, str] + :param group: the group to create/write files/directories as + :type group: Union[None, str] + """ + _user = service if user is None else user + _group = service if group is None else group + keep_paths = keep_paths or [] + path = policyd_dir_for(service) + hookenv.log("Cleaning path: {}".format(path), level=hookenv.DEBUG) + if not os.path.exists(path): + ch_host.mkdir(path, owner=_user, group=_group, perms=0o775) + for direntry in os.scandir(path): + # see if the path should be kept. + if direntry.path in keep_paths: + continue + # we remove any directories; it's ours and there shouldn't be any + if direntry.is_dir(): + shutil.rmtree(direntry.path) + else: + os.remove(direntry.path) + + +def maybe_create_directory_for(path, user, group): + """For the filename 'path', ensure that the directory for that path exists. + + Note that if the directory already exists then the permissions are NOT + changed. + + :param path: the filename including the path to it. + :type path: str + :param user: the user to create the directory as + :param group: the group to create the directory as + """ + _dir, _ = os.path.split(path) + if not os.path.exists(_dir): + ch_host.mkdir(_dir, owner=user, group=group, perms=0o775) + + +def path_for_policy_file(service, name): + """Return the full path for a policy.d file that will be written to the + service's policy.d directory. + + It is constructed using policyd_dir_for(), the name and the ".yaml" + extension. + + For horizon, for example, it's a bit more complicated. The name param is + actually "override_service_dir/a_name", where target_service needs to be + one the allowed horizon override services. This translation and check is + done in the _yamlfiles() function. + + :param service: the service name + :type service: str + :param name: the name for the policy override + :type name: str + :returns: the full path name for the file + :rtype: os.PathLike[str] + """ + return os.path.join(policyd_dir_for(service), name + ".yaml") + + +def _policy_success_file(): + """Return the file name for a successful drop of policy.d overrides + + :returns: the path name for the file. + :rtype: str + """ + return os.path.join(hookenv.charm_dir(), POLICYD_SUCCESS_FILENAME) + + +def remove_policy_success_file(): + """Remove the file that indicates successful policyd override.""" + try: + os.remove(_policy_success_file()) + except Exception: + pass + + +def set_policy_success_file(): + """Set the file that indicates successful policyd override.""" + open(_policy_success_file(), "w").close() + + +def is_policy_success_file_set(): + """Returns True if the policy success file has been set. + + This indicates that policies are overridden and working properly. + + :returns: True if the policy file is set + :rtype: bool + """ + return os.path.isfile(_policy_success_file()) + + +def policyd_status_message_prefix(): + """Return the prefix str for the status line. + + "PO:" indicating that the policy overrides are in place, or "PO (broken):" + if the policy is supposed to be working but there is no success file. + + :returns: the prefix + :rtype: str + """ + if is_policy_success_file_set(): + return "PO:" + return "PO (broken):" + + +def process_policy_resource_file(resource_file, + service, + blacklist_paths=None, + blacklist_keys=None, + template_function=None, + preserve_topdir=False, + preprocess_filename=None, + user=None, + group=None): + """Process the resource file (which should contain at least one yaml file) + and write those files to the service's policy.d directory. + + The optional template_function is a function that accepts a python + string and has an opportunity to modify the document + prior to it being read by the yaml.safe_load() function and written to + disk. Note that this function does *not* say how the templating is done - + this is up to the charm to implement its chosen method. + + The param blacklist_paths are paths (that are in the service's policy.d + directory that should not be touched). + + The param blacklist_keys are keys that must not appear in the yaml file. + If they do, then the whole policy.d file fails. + + The yaml file extracted from the resource_file (which is a zipped file) has + its file path reconstructed. This, also, must not match any path in the + black list. + + The yaml filename can be modified in two ways. If the `preserve_topdir` + param is True, then files will be flattened to the top dir. This allows + for creating sets of files that can be grouped into a single level tree + structure. + + Secondly, if the `preprocess_filename` param is not None and callable() + then the name is passed to that function for preprocessing before being + converted to the end location. This is to allow munging of the filename + prior to being tested for a blacklist path. + + If any error occurs, then the policy.d directory is cleared, the error is + written to the log, and the status line will eventually show as failed. + + :param resource_file: The zipped file to open and extract yaml files form. + :type resource_file: Union[AnyStr, os.PathLike[AnyStr]] + :param service: the service name to construct the policy.d directory for. + :type service: str + :param blacklist_paths: optional list of paths to leave alone + :type blacklist_paths: Union[None, List[str]] + :param blacklist_keys: optional list of keys that mustn't appear in the + yaml file's + :type blacklist_keys: Union[None, List[str]] + :param template_function: Optional function that can modify the yaml + document. + :type template_function: Union[None, Callable[[AnyStr], AnyStr]] + :param preserve_topdir: Keep the toplevel subdir + :type preserve_topdir: bool + :param preprocess_filename: Optional function to use to process filenames + extracted from the resource file. + :type preprocess_filename: Union[None, Callable[[AnyStr]. AnyStr]] + :param user: The user to create/write files/directories as + :type user: Union[None, str] + :param group: the group to create/write files/directories as + :type group: Union[None, str] + :returns: True if the processing was successful, False if not. + :rtype: boolean + """ + hookenv.log("Running process_policy_resource_file", level=hookenv.DEBUG) + blacklist_paths = blacklist_paths or [] + completed = False + _preprocess = None + if preprocess_filename is not None and callable(preprocess_filename): + _preprocess = preprocess_filename + _user = service if user is None else user + _group = service if group is None else group + try: + with open_and_filter_yaml_files( + resource_file, preserve_topdir) as (zfp, gen): + # first clear out the policy.d directory and clear success + remove_policy_success_file() + clean_policyd_dir_for(service, + blacklist_paths, + user=_user, + group=_group) + for name, ext, filename, zipinfo in gen: + # See if the name should be preprocessed. + if _preprocess is not None: + name = _preprocess(name) + # construct a name for the output file. + yaml_filename = path_for_policy_file(service, name) + if yaml_filename in blacklist_paths: + raise BadPolicyZipFile("policy.d name {} is blacklisted" + .format(yaml_filename)) + with zfp.open(zipinfo) as fp: + doc = fp.read() + # if template_function is not None, then offer the document + # to the template function + if ext in POLICYD_TEMPLATE_EXTS: + if (template_function is None or not + callable(template_function)): + raise BadPolicyZipFile( + "Template {} but no template_function is " + "available".format(filename)) + doc = template_function(doc) + yaml_doc = read_and_validate_yaml(doc, blacklist_keys) + # we may have to create the directory + maybe_create_directory_for(yaml_filename, _user, _group) + ch_host.write_file(yaml_filename, + yaml.dump(yaml_doc).encode('utf-8'), + _user, + _group) + # Every thing worked, so we mark up a success. + completed = True + except (zipfile.BadZipFile, BadPolicyZipFile, BadPolicyYamlFile) as e: + hookenv.log("Processing {} failed: {}".format(resource_file, str(e)), + level=POLICYD_LOG_LEVEL_DEFAULT) + except IOError as e: + # technically this shouldn't happen; it would be a programming error as + # the filename comes from Juju and thus, should exist. + hookenv.log( + "File {} failed with IOError. This really shouldn't happen" + " -- error: {}".format(resource_file, str(e)), + level=POLICYD_LOG_LEVEL_DEFAULT) + except Exception as e: + import traceback + hookenv.log("General Exception({}) during policyd processing" + .format(str(e)), + level=POLICYD_LOG_LEVEL_DEFAULT) + hookenv.log(traceback.format_exc()) + finally: + if not completed: + hookenv.log("Processing {} failed: cleaning policy.d directory" + .format(resource_file), + level=POLICYD_LOG_LEVEL_DEFAULT) + clean_policyd_dir_for(service, + blacklist_paths, + user=_user, + group=_group) + else: + # touch the success filename + hookenv.log("policy.d overrides installed.", + level=POLICYD_LOG_LEVEL_DEFAULT) + set_policy_success_file() + return completed diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/ssh_migrations.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/ssh_migrations.py new file mode 100644 index 00000000..0512e3a5 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/ssh_migrations.py @@ -0,0 +1,412 @@ +# Copyright 2018 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import subprocess + +from charmhelpers.core.hookenv import ( + ERROR, + log, + relation_get, +) +from charmhelpers.contrib.network.ip import ( + is_ipv6, + ns_query, +) +from charmhelpers.contrib.openstack.utils import ( + get_hostname, + get_host_ip, + is_ip, +) + +NOVA_SSH_DIR = '/etc/nova/compute_ssh/' + + +def ssh_directory_for_unit(application_name, user=None): + """Return the directory used to store ssh assets for the application. + + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + :returns: Fully qualified directory path. + :rtype: str + """ + if user: + application_name = "{}_{}".format(application_name, user) + _dir = os.path.join(NOVA_SSH_DIR, application_name) + for d in [NOVA_SSH_DIR, _dir]: + if not os.path.isdir(d): + os.mkdir(d) + for f in ['authorized_keys', 'known_hosts']: + f = os.path.join(_dir, f) + if not os.path.isfile(f): + open(f, 'w').close() + return _dir + + +def known_hosts(application_name, user=None): + """Return the known hosts file for the application. + + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + :returns: Fully qualified path to file. + :rtype: str + """ + return os.path.join( + ssh_directory_for_unit(application_name, user), + 'known_hosts') + + +def authorized_keys(application_name, user=None): + """Return the authorized keys file for the application. + + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + :returns: Fully qualified path to file. + :rtype: str + """ + return os.path.join( + ssh_directory_for_unit(application_name, user), + 'authorized_keys') + + +def ssh_known_host_key(host, application_name, user=None): + """Return the first entry in known_hosts for host. + + :param host: hostname to lookup in file. + :type host: str + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + :returns: Host key + :rtype: str or None + """ + cmd = [ + 'ssh-keygen', + '-f', known_hosts(application_name, user), + '-H', + '-F', + host] + try: + # The first line of output is like '# Host xx found: line 1 type RSA', + # which should be excluded. + output = subprocess.check_output(cmd) + except subprocess.CalledProcessError as e: + # RC of 1 seems to be legitimate for most ssh-keygen -F calls. + if e.returncode == 1: + output = e.output + else: + raise + output = output.strip() + + if output: + # Bug #1500589 cmd has 0 rc on precise if entry not present + lines = output.split('\n') + if len(lines) >= 1: + return lines[0] + + return None + + +def remove_known_host(host, application_name, user=None): + """Remove the entry in known_hosts for host. + + :param host: hostname to lookup in file. + :type host: str + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + """ + log('Removing SSH known host entry for compute host at %s' % host) + cmd = ['ssh-keygen', '-f', known_hosts(application_name, user), '-R', host] + subprocess.check_call(cmd) + + +def is_same_key(key_1, key_2): + """Extract the key from two host entries and compare them. + + :param key_1: Host key + :type key_1: str + :param key_2: Host key + :type key_2: str + """ + # The key format get will be like '|1|2rUumCavEXWVaVyB5uMl6m85pZo=|Cp' + # 'EL6l7VTY37T/fg/ihhNb/GPgs= ssh-rsa AAAAB', we only need to compare + # the part start with 'ssh-rsa' followed with '= ', because the hash + # value in the beginning will change each time. + k_1 = key_1.split('= ')[1] + k_2 = key_2.split('= ')[1] + return k_1 == k_2 + + +def add_known_host(host, application_name, user=None): + """Add the given host key to the known hosts file. + + :param host: host name + :type host: str + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + """ + cmd = ['ssh-keyscan', '-H', '-t', 'rsa', host] + try: + remote_key = subprocess.check_output(cmd).strip() + except Exception as e: + log('Could not obtain SSH host key from %s' % host, level=ERROR) + raise e + + current_key = ssh_known_host_key(host, application_name, user) + if current_key and remote_key: + if is_same_key(remote_key, current_key): + log('Known host key for compute host %s up to date.' % host) + return + else: + remove_known_host(host, application_name, user) + + log('Adding SSH host key to known hosts for compute node at %s.' % host) + with open(known_hosts(application_name, user), 'a') as out: + out.write("{}\n".format(remote_key)) + + +def ssh_authorized_key_exists(public_key, application_name, user=None): + """Check if given key is in the authorized_key file. + + :param public_key: Public key. + :type public_key: str + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + :returns: Whether given key is in the authorized_key file. + :rtype: boolean + """ + with open(authorized_keys(application_name, user)) as keys: + return ('%s' % public_key) in keys.read() + + +def add_authorized_key(public_key, application_name, user=None): + """Add given key to the authorized_key file. + + :param public_key: Public key. + :type public_key: str + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + """ + with open(authorized_keys(application_name, user), 'a') as keys: + keys.write("{}\n".format(public_key)) + + +def ssh_compute_add_host_and_key(public_key, hostname, private_address, + application_name, user=None): + """Add a compute nodes ssh details to local cache. + + Collect various hostname variations and add the corresponding host keys to + the local known hosts file. Finally, add the supplied public key to the + authorized_key file. + + :param public_key: Public key. + :type public_key: str + :param hostname: Hostname to collect host keys from. + :type hostname: str + :param private_address:aCorresponding private address for hostname + :type private_address: str + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + """ + # If remote compute node hands us a hostname, ensure we have a + # known hosts entry for its IP, hostname and FQDN. + hosts = [private_address] + + if not is_ipv6(private_address): + if hostname: + hosts.append(hostname) + + if is_ip(private_address): + hn = get_hostname(private_address) + if hn: + hosts.append(hn) + short = hn.split('.')[0] + if ns_query(short): + hosts.append(short) + else: + hosts.append(get_host_ip(private_address)) + short = private_address.split('.')[0] + if ns_query(short): + hosts.append(short) + + for host in list(set(hosts)): + add_known_host(host, application_name, user) + + if not ssh_authorized_key_exists(public_key, application_name, user): + log('Saving SSH authorized key for compute host at %s.' % + private_address) + add_authorized_key(public_key, application_name, user) + + +def ssh_compute_add(public_key, application_name, rid=None, unit=None, + user=None): + """Add a compute nodes ssh details to local cache. + + Collect various hostname variations and add the corresponding host keys to + the local known hosts file. Finally, add the supplied public key to the + authorized_key file. + + :param public_key: Public key. + :type public_key: str + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param rid: Relation id of the relation between this charm and the app. If + none is supplied it is assumed its the relation relating to + the current hook context. + :type rid: str + :param unit: Unit to add ssh asserts for if none is supplied it is assumed + its the unit relating to the current hook context. + :type unit: str + :param user: The user that the ssh asserts are for. + :type user: str + """ + relation_data = relation_get(rid=rid, unit=unit) + ssh_compute_add_host_and_key( + public_key, + relation_data.get('hostname'), + relation_data.get('private-address'), + application_name, + user=user) + + +def ssh_known_hosts_lines(application_name, user=None): + """Return contents of known_hosts file for given application. + + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + """ + known_hosts_list = [] + with open(known_hosts(application_name, user)) as hosts: + for hosts_line in hosts: + if hosts_line.rstrip(): + known_hosts_list.append(hosts_line.rstrip()) + return known_hosts_list + + +def ssh_authorized_keys_lines(application_name, user=None): + """Return contents of authorized_keys file for given application. + + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + """ + authorized_keys_list = [] + + with open(authorized_keys(application_name, user)) as keys: + for authkey_line in keys: + if authkey_line.rstrip(): + authorized_keys_list.append(authkey_line.rstrip()) + return authorized_keys_list + + +def ssh_compute_remove(public_key, application_name, user=None): + """Remove given public key from authorized_keys file. + + :param public_key: Public key. + :type public_key: str + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + """ + if not (os.path.isfile(authorized_keys(application_name, user)) or + os.path.isfile(known_hosts(application_name, user))): + return + + keys = ssh_authorized_keys_lines(application_name, user=None) + keys = [k.strip() for k in keys] + + if public_key not in keys: + return + + [keys.remove(key) for key in keys if key == public_key] + + with open(authorized_keys(application_name, user), 'w') as _keys: + keys = '\n'.join(keys) + if not keys.endswith('\n'): + keys += '\n' + _keys.write(keys) + + +def get_ssh_settings(application_name, user=None): + """Retrieve the known host entries and public keys for application + + Retrieve the known host entries and public keys for application for all + units of the given application related to this application for the + app + user combination. + + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + :returns: Public keys + host keys for all units for app + user combination. + :rtype: dict + """ + settings = {} + keys = {} + prefix = '' + if user: + prefix = '{}_'.format(user) + + for i, line in enumerate(ssh_known_hosts_lines( + application_name=application_name, user=user)): + settings['{}known_hosts_{}'.format(prefix, i)] = line + if settings: + settings['{}known_hosts_max_index'.format(prefix)] = len( + settings.keys()) + + for i, line in enumerate(ssh_authorized_keys_lines( + application_name=application_name, user=user)): + keys['{}authorized_keys_{}'.format(prefix, i)] = line + if keys: + keys['{}authorized_keys_max_index'.format(prefix)] = len(keys.keys()) + settings.update(keys) + return settings + + +def get_all_user_ssh_settings(application_name): + """Retrieve the known host entries and public keys for application + + Retrieve the known host entries and public keys for application for all + units of the given application related to this application for root user + and nova user. + + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :returns: Public keys + host keys for all units for app + user combination. + :rtype: dict + """ + settings = get_ssh_settings(application_name) + settings.update(get_ssh_settings(application_name, user='nova')) + return settings diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/templates/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/templates/__init__.py new file mode 100644 index 00000000..9df5f746 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/templates/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# dummy __init__.py to fool syncer into thinking this is a syncable python +# module diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/templates/section-ceph-bluestore-compression b/ceph-osd/hooks/charmhelpers/contrib/openstack/templates/section-ceph-bluestore-compression new file mode 100644 index 00000000..a6430100 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/templates/section-ceph-bluestore-compression @@ -0,0 +1,28 @@ +{# section header omitted as options can belong to multiple sections #} +{% if bluestore_compression_algorithm -%} +bluestore compression algorithm = {{ bluestore_compression_algorithm }} +{% endif -%} +{% if bluestore_compression_mode -%} +bluestore compression mode = {{ bluestore_compression_mode }} +{% endif -%} +{% if bluestore_compression_required_ratio -%} +bluestore compression required ratio = {{ bluestore_compression_required_ratio }} +{% endif -%} +{% if bluestore_compression_min_blob_size -%} +bluestore compression min blob size = {{ bluestore_compression_min_blob_size }} +{% endif -%} +{% if bluestore_compression_min_blob_size_hdd -%} +bluestore compression min blob size hdd = {{ bluestore_compression_min_blob_size_hdd }} +{% endif -%} +{% if bluestore_compression_min_blob_size_ssd -%} +bluestore compression min blob size ssd = {{ bluestore_compression_min_blob_size_ssd }} +{% endif -%} +{% if bluestore_compression_max_blob_size -%} +bluestore compression max blob size = {{ bluestore_compression_max_blob_size }} +{% endif -%} +{% if bluestore_compression_max_blob_size_hdd -%} +bluestore compression max blob size hdd = {{ bluestore_compression_max_blob_size_hdd }} +{% endif -%} +{% if bluestore_compression_max_blob_size_ssd -%} +bluestore compression max blob size ssd = {{ bluestore_compression_max_blob_size_ssd }} +{% endif -%} diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/templating.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/templating.py new file mode 100644 index 00000000..3b7c6a9f --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/templating.py @@ -0,0 +1,370 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from charmhelpers.fetch import apt_install, apt_update +from charmhelpers.core.hookenv import ( + log, + ERROR, + INFO, + TRACE +) +from charmhelpers.contrib.openstack.utils import OPENSTACK_CODENAMES + +try: + from jinja2 import FileSystemLoader, ChoiceLoader, Environment, exceptions +except ImportError: + apt_update(fatal=True) + apt_install('python3-jinja2', fatal=True) + from jinja2 import FileSystemLoader, ChoiceLoader, Environment, exceptions + + +class OSConfigException(Exception): + pass + + +def get_loader(templates_dir, os_release): + """ + Create a jinja2.ChoiceLoader containing template dirs up to + and including os_release. If directory template directory + is missing at templates_dir, it will be omitted from the loader. + templates_dir is added to the bottom of the search list as a base + loading dir. + + A charm may also ship a templates dir with this module + and it will be appended to the bottom of the search list, eg:: + + hooks/charmhelpers/contrib/openstack/templates + + :param templates_dir (str): Base template directory containing release + sub-directories. + :param os_release (str): OpenStack release codename to construct template + loader. + :returns: jinja2.ChoiceLoader constructed with a list of + jinja2.FilesystemLoaders, ordered in descending + order by OpenStack release. + """ + tmpl_dirs = [(rel, os.path.join(templates_dir, rel)) + for rel in OPENSTACK_CODENAMES.values()] + + if not os.path.isdir(templates_dir): + log('Templates directory not found @ %s.' % templates_dir, + level=ERROR) + raise OSConfigException + + # the bottom contains tempaltes_dir and possibly a common templates dir + # shipped with the helper. + loaders = [FileSystemLoader(templates_dir)] + helper_templates = os.path.join(os.path.dirname(__file__), 'templates') + if os.path.isdir(helper_templates): + loaders.append(FileSystemLoader(helper_templates)) + + for rel, tmpl_dir in tmpl_dirs: + if os.path.isdir(tmpl_dir): + loaders.insert(0, FileSystemLoader(tmpl_dir)) + if rel == os_release: + break + # demote this log to the lowest level; we don't really need to see these + # lots in production even when debugging. + log('Creating choice loader with dirs: %s' % + [l.searchpath for l in loaders], level=TRACE) + return ChoiceLoader(loaders) + + +class OSConfigTemplate(object): + """ + Associates a config file template with a list of context generators. + Responsible for constructing a template context based on those generators. + """ + + def __init__(self, config_file, contexts, config_template=None): + self.config_file = config_file + + if hasattr(contexts, '__call__'): + self.contexts = [contexts] + else: + self.contexts = contexts + + self._complete_contexts = [] + + self.config_template = config_template + + def context(self): + ctxt = {} + for context in self.contexts: + _ctxt = context() + if _ctxt: + ctxt.update(_ctxt) + # track interfaces for every complete context. + [self._complete_contexts.append(interface) + for interface in context.interfaces + if interface not in self._complete_contexts] + return ctxt + + def complete_contexts(self): + ''' + Return a list of interfaces that have satisfied contexts. + ''' + if self._complete_contexts: + return self._complete_contexts + self.context() + return self._complete_contexts + + @property + def is_string_template(self): + """:returns: Boolean if this instance is a template initialised with a string""" + return self.config_template is not None + + +class OSConfigRenderer(object): + """ + This class provides a common templating system to be used by OpenStack + charms. It is intended to help charms share common code and templates, + and ease the burden of managing config templates across multiple OpenStack + releases. + + Basic usage:: + + # import some common context generates from charmhelpers + from charmhelpers.contrib.openstack import context + + # Create a renderer object for a specific OS release. + configs = OSConfigRenderer(templates_dir='/tmp/templates', + openstack_release='folsom') + # register some config files with context generators. + configs.register(config_file='/etc/nova/nova.conf', + contexts=[context.SharedDBContext(), + context.AMQPContext()]) + configs.register(config_file='/etc/nova/api-paste.ini', + contexts=[context.IdentityServiceContext()]) + configs.register(config_file='/etc/haproxy/haproxy.conf', + contexts=[context.HAProxyContext()]) + configs.register(config_file='/etc/keystone/policy.d/extra.cfg', + contexts=[context.ExtraPolicyContext() + context.KeystoneContext()], + config_template=hookenv.config('extra-policy')) + # write out a single config + configs.write('/etc/nova/nova.conf') + # write out all registered configs + configs.write_all() + + **OpenStack Releases and template loading** + + When the object is instantiated, it is associated with a specific OS + release. This dictates how the template loader will be constructed. + + The constructed loader attempts to load the template from several places + in the following order: + - from the most recent OS release-specific template dir (if one exists) + - the base templates_dir + - a template directory shipped in the charm with this helper file. + + For the example above, '/tmp/templates' contains the following structure:: + + /tmp/templates/nova.conf + /tmp/templates/api-paste.ini + /tmp/templates/grizzly/api-paste.ini + /tmp/templates/havana/api-paste.ini + + Since it was registered with the grizzly release, it first searches + the grizzly directory for nova.conf, then the templates dir. + + When writing api-paste.ini, it will find the template in the grizzly + directory. + + If the object were created with folsom, it would fall back to the + base templates dir for its api-paste.ini template. + + This system should help manage changes in config files through + openstack releases, allowing charms to fall back to the most recently + updated config template for a given release + + The haproxy.conf, since it is not shipped in the templates dir, will + be loaded from the module directory's template directory, eg + $CHARM/hooks/charmhelpers/contrib/openstack/templates. This allows + us to ship common templates (haproxy, apache) with the helpers. + + **Context generators** + + Context generators are used to generate template contexts during hook + execution. Doing so may require inspecting service relations, charm + config, etc. When registered, a config file is associated with a list + of generators. When a template is rendered and written, all context + generates are called in a chain to generate the context dictionary + passed to the jinja2 template. See context.py for more info. + """ + def __init__(self, templates_dir, openstack_release): + if not os.path.isdir(templates_dir): + log('Could not locate templates dir %s' % templates_dir, + level=ERROR) + raise OSConfigException + + self.templates_dir = templates_dir + self.openstack_release = openstack_release + self.templates = {} + self._tmpl_env = None + + if None in [Environment, ChoiceLoader, FileSystemLoader]: + # if this code is running, the object is created pre-install hook. + # jinja2 shouldn't get touched until the module is reloaded on next + # hook execution, with proper jinja2 bits successfully imported. + apt_install('python3-jinja2') + + def register(self, config_file, contexts, config_template=None): + """ + Register a config file with a list of context generators to be called + during rendering. + config_template can be used to load a template from a string instead of + using template loaders and template files. + :param config_file (str): a path where a config file will be rendered + :param contexts (list): a list of context dictionaries with kv pairs + :param config_template (str): an optional template string to use + """ + self.templates[config_file] = OSConfigTemplate( + config_file=config_file, + contexts=contexts, + config_template=config_template + ) + log('Registered config file: {}'.format(config_file), + level=INFO) + + def _get_tmpl_env(self): + if not self._tmpl_env: + loader = get_loader(self.templates_dir, self.openstack_release) + self._tmpl_env = Environment(loader=loader) + + def _get_template(self, template): + self._get_tmpl_env() + template = self._tmpl_env.get_template(template) + log('Loaded template from {}'.format(template.filename), + level=INFO) + return template + + def _get_template_from_string(self, ostmpl): + ''' + Get a jinja2 template object from a string. + :param ostmpl: OSConfigTemplate to use as a data source. + ''' + self._get_tmpl_env() + template = self._tmpl_env.from_string(ostmpl.config_template) + log('Loaded a template from a string for {}'.format( + ostmpl.config_file), + level=INFO) + return template + + def render(self, config_file): + if config_file not in self.templates: + log('Config not registered: {}'.format(config_file), level=ERROR) + raise OSConfigException + + ostmpl = self.templates[config_file] + ctxt = ostmpl.context() + + if ostmpl.is_string_template: + template = self._get_template_from_string(ostmpl) + log('Rendering from a string template: ' + '{}'.format(config_file), + level=INFO) + else: + _tmpl = os.path.basename(config_file) + try: + template = self._get_template(_tmpl) + except exceptions.TemplateNotFound: + # if no template is found with basename, try looking + # for it using a munged full path, eg: + # /etc/apache2/apache2.conf -> etc_apache2_apache2.conf + _tmpl = '_'.join(config_file.split('/')[1:]) + try: + template = self._get_template(_tmpl) + except exceptions.TemplateNotFound as e: + log('Could not load template from {} by {} or {}.' + ''.format( + self.templates_dir, + os.path.basename(config_file), + _tmpl + ), + level=ERROR) + raise e + + log('Rendering from template: {}'.format(config_file), + level=INFO) + return template.render(ctxt) + + def write(self, config_file): + """ + Write a single config file, raises if config file is not registered. + """ + if config_file not in self.templates: + log('Config not registered: %s' % config_file, level=ERROR) + raise OSConfigException + + _out = self.render(config_file).encode('UTF-8') + + with open(config_file, 'wb') as out: + out.write(_out) + + log('Wrote template %s.' % config_file, level=INFO) + + def write_all(self): + """ + Write out all registered config files. + """ + for k in self.templates.keys(): + self.write(k) + + def set_release(self, openstack_release): + """ + Resets the template environment and generates a new template loader + based on a the new openstack release. + """ + self._tmpl_env = None + self.openstack_release = openstack_release + self._get_tmpl_env() + + def complete_contexts(self): + ''' + Returns a list of context interfaces that yield a complete context. + ''' + interfaces = [] + for i in self.templates.values(): + interfaces.extend(i.complete_contexts()) + return interfaces + + def get_incomplete_context_data(self, interfaces): + ''' + Return dictionary of relation status of interfaces and any missing + required context data. Example: + {'amqp': {'missing_data': ['rabbitmq_password'], 'related': True}, + 'zeromq-configuration': {'related': False}} + ''' + incomplete_context_data = {} + + for i in self.templates.values(): + for context in i.contexts: + for interface in interfaces: + related = False + if interface in context.interfaces: + related = context.get_related() + missing_data = context.missing_data + if missing_data: + incomplete_context_data[interface] = {'missing_data': missing_data} + if related: + if incomplete_context_data.get(interface): + incomplete_context_data[interface].update({'related': True}) + else: + incomplete_context_data[interface] = {'related': True} + else: + incomplete_context_data[interface] = {'related': False} + return incomplete_context_data diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/utils.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/utils.py new file mode 100644 index 00000000..82c28d8e --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/utils.py @@ -0,0 +1,2695 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Common python helper functions used for OpenStack charms. +from collections import OrderedDict, namedtuple +from functools import partial, wraps + +import subprocess +import json +import operator +import os +import sys +import re +import itertools +import functools + +import traceback +import uuid +import yaml + +from charmhelpers import deprecate + +from charmhelpers.contrib.network import ip + +from charmhelpers.core import decorators, unitdata + +import charmhelpers.contrib.openstack.deferred_events as deferred_events + +from charmhelpers.core.hookenv import ( + WORKLOAD_STATES, + action_fail, + action_get, + action_set, + config, + expected_peer_units, + expected_related_units, + log as juju_log, + charm_dir, + INFO, + ERROR, + metadata, + related_units, + relation_get, + relation_id, + relation_ids, + relation_set, + service_name as ch_service_name, + status_set, + hook_name, + application_version_set, + cached, + leader_set, + leader_get, + local_unit, +) + +from charmhelpers.core.strutils import ( + BasicStringComparator, + bool_from_string, +) + +from charmhelpers.contrib.storage.linux.lvm import ( + deactivate_lvm_volume_group, + is_lvm_physical_volume, + remove_lvm_physical_volume, +) + +from charmhelpers.contrib.network.ip import ( + get_ipv6_addr, + is_ipv6, + port_has_listener, +) + +from charmhelpers.core.host import ( + lsb_release, + mounts, + umount, + service_running, + service_pause, + service_resume, + service_stop, + service_start, + restart_on_change_helper, +) + +from charmhelpers.fetch import ( + apt_cache, + apt_install, + import_key as fetch_import_key, + add_source as fetch_add_source, + SourceConfigError, + GPGKeyError, + get_upstream_version, + filter_installed_packages, + filter_missing_packages, + ubuntu_apt_pkg as apt, + OPENSTACK_RELEASES, + UBUNTU_OPENSTACK_RELEASE, +) + +from charmhelpers.fetch.snap import ( + snap_install, + snap_refresh, + valid_snap_channel, +) + +from charmhelpers.contrib.storage.linux.utils import is_block_device, zap_disk +from charmhelpers.contrib.storage.linux.loopback import ensure_loopback_device +from charmhelpers.contrib.openstack.exceptions import OSContextError, ServiceActionError +from charmhelpers.contrib.openstack.policyd import ( + policyd_status_message_prefix, + POLICYD_CONFIG_NAME, +) + +from charmhelpers.contrib.openstack.ha.utils import ( + expect_ha, +) + +CLOUD_ARCHIVE_URL = "http://ubuntu-cloud.archive.canonical.com/ubuntu" +CLOUD_ARCHIVE_KEY_ID = '5EDB1B62EC4926EA' + +DISTRO_PROPOSED = ('deb http://archive.ubuntu.com/ubuntu/ %s-proposed ' + 'restricted main multiverse universe') + +OPENSTACK_CODENAMES = OrderedDict([ + # NOTE(lourot): 'yyyy.i' isn't actually mapping with any real version + # number. This just means the i-th version of the year yyyy. + ('2011.2', 'diablo'), + ('2012.1', 'essex'), + ('2012.2', 'folsom'), + ('2013.1', 'grizzly'), + ('2013.2', 'havana'), + ('2014.1', 'icehouse'), + ('2014.2', 'juno'), + ('2015.1', 'kilo'), + ('2015.2', 'liberty'), + ('2016.1', 'mitaka'), + ('2016.2', 'newton'), + ('2017.1', 'ocata'), + ('2017.2', 'pike'), + ('2018.1', 'queens'), + ('2018.2', 'rocky'), + ('2019.1', 'stein'), + ('2019.2', 'train'), + ('2020.1', 'ussuri'), + ('2020.2', 'victoria'), + ('2021.1', 'wallaby'), + ('2021.2', 'xena'), + ('2022.1', 'yoga'), + ('2022.2', 'zed'), + ('2023.1', 'antelope'), + ('2023.2', 'bobcat'), + ('2024.1', 'caracal'), +]) + +# The ugly duckling - must list releases oldest to newest +SWIFT_CODENAMES = OrderedDict([ + ('diablo', + ['1.4.3']), + ('essex', + ['1.4.8']), + ('folsom', + ['1.7.4']), + ('grizzly', + ['1.7.6', '1.7.7', '1.8.0']), + ('havana', + ['1.9.0', '1.9.1', '1.10.0']), + ('icehouse', + ['1.11.0', '1.12.0', '1.13.0', '1.13.1']), + ('juno', + ['2.0.0', '2.1.0', '2.2.0']), + ('kilo', + ['2.2.1', '2.2.2']), + ('liberty', + ['2.3.0', '2.4.0', '2.5.0']), + ('mitaka', + ['2.5.0', '2.6.0', '2.7.0']), + ('newton', + ['2.8.0', '2.9.0', '2.10.0']), + ('ocata', + ['2.11.0', '2.12.0', '2.13.0']), + ('pike', + ['2.13.0', '2.15.0']), + ('queens', + ['2.16.0', '2.17.0']), + ('rocky', + ['2.18.0', '2.19.0']), + ('stein', + ['2.20.0', '2.21.0']), + ('train', + ['2.22.0', '2.23.0']), + ('ussuri', + ['2.24.0', '2.25.0']), + ('victoria', + ['2.25.0', '2.26.0']), +]) + +# >= Liberty version->codename mapping +PACKAGE_CODENAMES = { + 'nova-common': OrderedDict([ + ('12', 'liberty'), + ('13', 'mitaka'), + ('14', 'newton'), + ('15', 'ocata'), + ('16', 'pike'), + ('17', 'queens'), + ('18', 'rocky'), + ('19', 'stein'), + ('20', 'train'), + ('21', 'ussuri'), + ('22', 'victoria'), + ]), + 'neutron-common': OrderedDict([ + ('7', 'liberty'), + ('8', 'mitaka'), + ('9', 'newton'), + ('10', 'ocata'), + ('11', 'pike'), + ('12', 'queens'), + ('13', 'rocky'), + ('14', 'stein'), + ('15', 'train'), + ('16', 'ussuri'), + ('17', 'victoria'), + ]), + 'cinder-common': OrderedDict([ + ('7', 'liberty'), + ('8', 'mitaka'), + ('9', 'newton'), + ('10', 'ocata'), + ('11', 'pike'), + ('12', 'queens'), + ('13', 'rocky'), + ('14', 'stein'), + ('15', 'train'), + ('16', 'ussuri'), + ('17', 'victoria'), + ]), + 'keystone': OrderedDict([ + ('8', 'liberty'), + ('9', 'mitaka'), + ('10', 'newton'), + ('11', 'ocata'), + ('12', 'pike'), + ('13', 'queens'), + ('14', 'rocky'), + ('15', 'stein'), + ('16', 'train'), + ('17', 'ussuri'), + ('18', 'victoria'), + ]), + 'horizon-common': OrderedDict([ + ('8', 'liberty'), + ('9', 'mitaka'), + ('10', 'newton'), + ('11', 'ocata'), + ('12', 'pike'), + ('13', 'queens'), + ('14', 'rocky'), + ('15', 'stein'), + ('16', 'train'), + ('18', 'ussuri'), # Note this was actually 17.0 - 18.3 + ('19', 'victoria'), # Note this is really 18.6 + ]), + 'ceilometer-common': OrderedDict([ + ('5', 'liberty'), + ('6', 'mitaka'), + ('7', 'newton'), + ('8', 'ocata'), + ('9', 'pike'), + ('10', 'queens'), + ('11', 'rocky'), + ('12', 'stein'), + ('13', 'train'), + ('14', 'ussuri'), + ('15', 'victoria'), + ]), + 'heat-common': OrderedDict([ + ('5', 'liberty'), + ('6', 'mitaka'), + ('7', 'newton'), + ('8', 'ocata'), + ('9', 'pike'), + ('10', 'queens'), + ('11', 'rocky'), + ('12', 'stein'), + ('13', 'train'), + ('14', 'ussuri'), + ('15', 'victoria'), + ]), + 'glance-common': OrderedDict([ + ('11', 'liberty'), + ('12', 'mitaka'), + ('13', 'newton'), + ('14', 'ocata'), + ('15', 'pike'), + ('16', 'queens'), + ('17', 'rocky'), + ('18', 'stein'), + ('19', 'train'), + ('20', 'ussuri'), + ('21', 'victoria'), + ]), + 'openstack-dashboard': OrderedDict([ + ('8', 'liberty'), + ('9', 'mitaka'), + ('10', 'newton'), + ('11', 'ocata'), + ('12', 'pike'), + ('13', 'queens'), + ('14', 'rocky'), + ('15', 'stein'), + ('16', 'train'), + ('18', 'ussuri'), + ('19', 'victoria'), + ]), +} + +DEFAULT_LOOPBACK_SIZE = '5G' + +DB_SERIES_UPGRADING_KEY = 'cluster-series-upgrading' + +DB_MAINTENANCE_KEYS = [DB_SERIES_UPGRADING_KEY] + + +class CompareOpenStackReleases(BasicStringComparator): + """Provide comparisons of OpenStack releases. + + Use in the form of + + if CompareOpenStackReleases(release) > 'mitaka': + # do something with mitaka + """ + _list = OPENSTACK_RELEASES + + +def error_out(msg): + juju_log("FATAL ERROR: %s" % msg, level='ERROR') + sys.exit(1) + + +def get_installed_semantic_versioned_packages(): + '''Get a list of installed packages which have OpenStack semantic versioning + + :returns List of installed packages + :rtype: [pkg1, pkg2, ...] + ''' + return filter_missing_packages(PACKAGE_CODENAMES.keys()) + + +def get_os_codename_install_source(src): + '''Derive OpenStack release codename from a given installation source.''' + ubuntu_rel = lsb_release()['DISTRIB_CODENAME'] + rel = '' + if src is None: + return rel + if src in OPENSTACK_RELEASES: + return src + if src in ['distro', 'distro-proposed', 'proposed']: + try: + rel = UBUNTU_OPENSTACK_RELEASE[ubuntu_rel] + except KeyError: + e = 'Could not derive openstack release for '\ + 'this Ubuntu release: %s' % ubuntu_rel + error_out(e) + return rel + + if src.startswith('cloud:'): + ca_rel = src.split(':')[1] + ca_rel = ca_rel.split('-')[1].split('/')[0] + return ca_rel + + # Best guess match based on deb string provided + if (src.startswith('deb') or + src.startswith('ppa') or + src.startswith('snap')): + for v in OPENSTACK_CODENAMES.values(): + if v in src: + return v + + +def get_os_version_install_source(src): + codename = get_os_codename_install_source(src) + return get_os_version_codename(codename) + + +def get_os_codename_version(vers): + '''Determine OpenStack codename from version number.''' + try: + return OPENSTACK_CODENAMES[vers] + except KeyError: + e = 'Could not determine OpenStack codename for version %s' % vers + error_out(e) + + +def get_os_version_codename(codename, version_map=OPENSTACK_CODENAMES, + raise_exception=False): + '''Determine OpenStack version number from codename.''' + for k, v in version_map.items(): + if v == codename: + return k + e = 'Could not derive OpenStack version for '\ + 'codename: %s' % codename + if raise_exception: + raise ValueError(str(e)) + error_out(e) + + +def get_swift_codename(version): + '''Determine OpenStack codename that corresponds to swift version.''' + codenames = [k for k, v in SWIFT_CODENAMES.items() if version in v] + + if len(codenames) > 1: + # If more than one release codename contains this version we determine + # the actual codename based on the highest available install source. + for codename in reversed(codenames): + releases = UBUNTU_OPENSTACK_RELEASE + release = [k for k, v in releases.items() if codename in v] + ret = (subprocess + .check_output(['apt-cache', 'policy', 'swift']) + .decode('UTF-8')) + if codename in ret or release[0] in ret: + return codename + elif len(codenames) == 1: + return codenames[0] + + # NOTE: fallback - attempt to match with just major.minor version + match = re.match(r'^(\d+)\.(\d+)', version) + if match: + major_minor_version = match.group(0) + for codename, versions in SWIFT_CODENAMES.items(): + for release_version in versions: + if release_version.startswith(major_minor_version): + return codename + + return None + + +def get_os_codename_package(package, fatal=True): + """Derive OpenStack release codename from an installed package. + + Initially, see if the openstack-release pkg is available (by trying to + install it) and use it instead. + + If it isn't then it falls back to the existing method of checking the + version of the package passed and then resolving the version from that + using lookup tables. + + Note: if possible, charms should use get_installed_os_version() to + determine the version of the "openstack-release" pkg. + + :param package: the package to test for version information. + :type package: str + :param fatal: If True (default), then die via error_out() + :type fatal: bool + :returns: the OpenStack release codename (e.g. ussuri) + :rtype: str + """ + + codename = get_installed_os_version() + if codename: + return codename + + if snap_install_requested(): + cmd = ['snap', 'list', package] + try: + out = subprocess.check_output(cmd).decode('UTF-8') + except subprocess.CalledProcessError: + return None + lines = out.split('\n') + for line in lines: + if package in line: + # Second item in list is Version + return line.split()[1] + + cache = apt_cache() + + try: + pkg = cache[package] + except Exception: + if not fatal: + return None + # the package is unknown to the current apt cache. + e = 'Could not determine version of package with no installation '\ + 'candidate: %s' % package + error_out(e) + + if not pkg.current_ver: + if not fatal: + return None + # package is known, but no version is currently installed. + e = 'Could not determine version of uninstalled package: %s' % package + error_out(e) + + vers = apt.upstream_version(pkg.current_ver.ver_str) + if 'swift' in pkg.name: + # Fully x.y.z match for swift versions + match = re.match(r'^(\d+)\.(\d+)\.(\d+)', vers) + else: + # x.y match only for 20XX.X + # and ignore patch level for other packages + match = re.match(r'^(\d+)\.(\d+)', vers) + + if match: + vers = match.group(0) + + # Generate a major version number for newer semantic + # versions of openstack projects + major_vers = vers.split('.')[0] + # >= Liberty independent project versions + if (package in PACKAGE_CODENAMES and + major_vers in PACKAGE_CODENAMES[package]): + return PACKAGE_CODENAMES[package][major_vers] + else: + # < Liberty co-ordinated project versions + try: + if 'swift' in pkg.name: + return get_swift_codename(vers) + else: + return OPENSTACK_CODENAMES[vers] + except KeyError: + if not fatal: + return None + e = 'Could not determine OpenStack codename for version %s' % vers + error_out(e) + + +def get_os_version_package(pkg, fatal=True): + '''Derive OpenStack version number from an installed package.''' + codename = get_os_codename_package(pkg, fatal=fatal) + + if not codename: + return None + + if 'swift' in pkg: + vers_map = SWIFT_CODENAMES + for cname, version in vers_map.items(): + if cname == codename: + return version[-1] + else: + vers_map = OPENSTACK_CODENAMES + for version, cname in vers_map.items(): + if cname == codename: + return version + + +def get_installed_os_version(): + """Determine the OpenStack release code name from openstack-release pkg. + + This uses the "openstack-release" pkg (if it exists) to return the + OpenStack release codename (e.g. usurri, mitaka, ocata, etc.) + + Note, it caches the result so that it is only done once per hook. + + :returns: the OpenStack release codename, if available + :rtype: Optional[str] + """ + @cached + def _do_install(): + apt_install(filter_installed_packages(['openstack-release']), + fatal=False, quiet=True) + + _do_install() + return openstack_release().get('OPENSTACK_CODENAME') + + +def openstack_release(): + """Return /etc/os-release in a dict.""" + d = {} + try: + with open('/etc/openstack-release', 'r') as lsb: + for l in lsb: + s = l.split('=') + if len(s) != 2: + continue + d[s[0].strip()] = s[1].strip() + except FileNotFoundError: + pass + return d + + +# Module local cache variable for the os_release. +_os_rel = None + + +def reset_os_release(): + '''Unset the cached os_release version''' + global _os_rel + _os_rel = None + + +def os_release(package, base=None, reset_cache=False, source_key=None): + """Returns OpenStack release codename from a cached global. + + If reset_cache then unset the cached os_release version and return the + freshly determined version. + + If the codename can not be determined from either an installed package or + the installation source, the earliest release supported by the charm should + be returned. + + :param package: Name of package to determine release from + :type package: str + :param base: Fallback codename if endavours to determine from package fail + :type base: Optional[str] + :param reset_cache: Reset any cached codename value + :type reset_cache: bool + :param source_key: Name of source configuration option + (default: 'openstack-origin') + :type source_key: Optional[str] + :returns: OpenStack release codename + :rtype: str + """ + source_key = source_key or 'openstack-origin' + if not base: + base = UBUNTU_OPENSTACK_RELEASE[lsb_release()['DISTRIB_CODENAME']] + global _os_rel + if reset_cache: + reset_os_release() + if _os_rel: + return _os_rel + _os_rel = ( + get_os_codename_package(package, fatal=False) or + get_os_codename_install_source(config(source_key)) or + base) + return _os_rel + + +@deprecate("moved to charmhelpers.fetch.import_key()", "2017-07", log=juju_log) +def import_key(keyid): + """Import a key, either ASCII armored, or a GPG key id. + + @param keyid: the key in ASCII armor format, or a GPG key id. + @raises SystemExit() via sys.exit() on failure. + """ + try: + return fetch_import_key(keyid) + except GPGKeyError as e: + error_out("Could not import key: {}".format(str(e))) + + +def get_source_and_pgp_key(source_and_key): + """Look for a pgp key ID or ascii-armor key in the given input. + + :param source_and_key: String, "source_spec|keyid" where '|keyid' is + optional. + :returns (source_spec, key_id OR None) as a tuple. Returns None for key_id + if there was no '|' in the source_and_key string. + """ + try: + source, key = source_and_key.split('|', 2) + return source, key or None + except ValueError: + return source_and_key, None + + +@deprecate("use charmhelpers.fetch.add_source() instead.", + "2017-07", log=juju_log) +def configure_installation_source(source_plus_key): + """Configure an installation source. + + The functionality is provided by charmhelpers.fetch.add_source() + The difference between the two functions is that add_source() signature + requires the key to be passed directly, whereas this function passes an + optional key by appending '|' to the end of the source specification + 'source'. + + Another difference from add_source() is that the function calls sys.exit(1) + if the configuration fails, whereas add_source() raises + SourceConfigurationError(). Another difference, is that add_source() + silently fails (with a juju_log command) if there is no matching source to + configure, whereas this function fails with a sys.exit(1) + + :param source: String_plus_key -- see above for details. + + Note that the behaviour on error is to log the error to the juju log and + then call sys.exit(1). + """ + if source_plus_key.startswith('snap'): + # Do nothing for snap installs + return + # extract the key if there is one, denoted by a '|' in the rel + source, key = get_source_and_pgp_key(source_plus_key) + + # handle the ordinary sources via add_source + try: + fetch_add_source(source, key, fail_invalid=True) + except SourceConfigError as se: + error_out(str(se)) + + +def config_value_changed(option): + """ + Determine if config value changed since last call to this function. + """ + hook_data = unitdata.HookData() + with hook_data(): + db = unitdata.kv() + current = config(option) + saved = db.get(option) + db.set(option, current) + if saved is None: + return False + return current != saved + + +def get_endpoint_key(service_name, relation_id, unit_name): + """Return the key used to refer to an ep changed notification from a unit. + + :param service_name: Service name eg nova, neutron, placement etc + :type service_name: str + :param relation_id: The id of the relation the unit is on. + :type relation_id: str + :param unit_name: The name of the unit publishing the notification. + :type unit_name: str + :returns: The key used to refer to an ep changed notification from a unit + :rtype: str + """ + return '{}-{}-{}'.format( + service_name, + relation_id.replace(':', '_'), + unit_name.replace('/', '_')) + + +def get_endpoint_notifications(service_names, rel_name='identity-service'): + """Return all notifications for the given services. + + :param service_names: List of service name. + :type service_name: List + :param rel_name: Name of the relation to query + :type rel_name: str + :returns: A dict containing the source of the notification and its nonce. + :rtype: Dict[str, str] + """ + notifications = {} + for rid in relation_ids(rel_name): + for unit in related_units(relid=rid): + ep_changed_json = relation_get( + rid=rid, + unit=unit, + attribute='ep_changed') + if ep_changed_json: + ep_changed = json.loads(ep_changed_json) + for service in service_names: + if ep_changed.get(service): + key = get_endpoint_key(service, rid, unit) + notifications[key] = ep_changed[service] + return notifications + + +def endpoint_changed(service_name, rel_name='identity-service'): + """Whether a new notification has been received for an endpoint. + + :param service_name: Service name eg nova, neutron, placement etc + :type service_name: str + :param rel_name: Name of the relation to query + :type rel_name: str + :returns: Whether endpoint has changed + :rtype: bool + """ + changed = False + with unitdata.HookData()() as t: + db = t[0] + notifications = get_endpoint_notifications( + [service_name], + rel_name=rel_name) + for key, nonce in notifications.items(): + if db.get(key) != nonce: + juju_log(('New endpoint change notification found: ' + '{}={}').format(key, nonce), + 'INFO') + changed = True + break + return changed + + +def save_endpoint_changed_triggers(service_names, rel_name='identity-service'): + """Save the endpoint triggers in db so it can be tracked if they changed. + + :param service_names: List of service name. + :type service_name: List + :param rel_name: Name of the relation to query + :type rel_name: str + """ + with unitdata.HookData()() as t: + db = t[0] + notifications = get_endpoint_notifications( + service_names, + rel_name=rel_name) + for key, nonce in notifications.items(): + db.set(key, nonce) + + +def save_script_rc(script_path="scripts/scriptrc", **env_vars): + """ + Write an rc file in the charm-delivered directory containing + exported environment variables provided by env_vars. Any charm scripts run + outside the juju hook environment can source this scriptrc to obtain + updated config information necessary to perform health checks or + service changes. + """ + juju_rc_path = "%s/%s" % (charm_dir(), script_path) + if not os.path.exists(os.path.dirname(juju_rc_path)): + os.mkdir(os.path.dirname(juju_rc_path)) + with open(juju_rc_path, 'wt') as rc_script: + rc_script.write("#!/bin/bash\n") + for u, p in env_vars.items(): + if u != "script_path": + rc_script.write('export %s=%s\n' % (u, p)) + + +def openstack_upgrade_available(package): + """ + Determines if an OpenStack upgrade is available from installation + source, based on version of installed package. + + :param package: str: Name of installed package. + + :returns: bool: : Returns True if configured installation source offers + a newer version of package. + """ + + src = config('openstack-origin') + cur_vers = get_os_version_package(package) + if not cur_vers: + # The package has not been installed yet do not attempt upgrade + return False + try: + avail_vers = get_os_version_install_source(src) + except Exception: + avail_vers = cur_vers + apt.init() + return apt.version_compare(avail_vers, cur_vers) >= 1 + + +def ensure_block_device(block_device): + ''' + Confirm block_device, create as loopback if necessary. + + :param block_device: str: Full path of block device to ensure. + + :returns: str: Full path of ensured block device. + ''' + _none = ['None', 'none', None] + if (block_device in _none): + error_out('prepare_storage(): Missing required input: block_device=%s.' + % block_device) + + if block_device.startswith('/dev/'): + bdev = block_device + elif block_device.startswith('/'): + _bd = block_device.split('|') + if len(_bd) == 2: + bdev, size = _bd + else: + bdev = block_device + size = DEFAULT_LOOPBACK_SIZE + bdev = ensure_loopback_device(bdev, size) + else: + bdev = '/dev/%s' % block_device + + if not is_block_device(bdev): + error_out('Failed to locate valid block device at %s' % bdev) + + return bdev + + +def clean_storage(block_device): + ''' + Ensures a block device is clean. That is: + - unmounted + - any lvm volume groups are deactivated + - any lvm physical device signatures removed + - partition table wiped + + :param block_device: str: Full path to block device to clean. + ''' + for mp, d in mounts(): + if d == block_device: + juju_log('clean_storage(): %s is mounted @ %s, unmounting.' % + (d, mp), level=INFO) + umount(mp, persist=True) + + if is_lvm_physical_volume(block_device): + deactivate_lvm_volume_group(block_device) + remove_lvm_physical_volume(block_device) + else: + zap_disk(block_device) + + +is_ip = ip.is_ip +ns_query = ip.ns_query +get_host_ip = ip.get_host_ip +get_hostname = ip.get_hostname + + +def get_matchmaker_map(mm_file='/etc/oslo/matchmaker_ring.json'): + mm_map = {} + if os.path.isfile(mm_file): + with open(mm_file, 'r') as f: + mm_map = json.load(f) + return mm_map + + +def sync_db_with_multi_ipv6_addresses(database, database_user, + relation_prefix=None): + hosts = get_ipv6_addr(dynamic_only=False) + + if config('vip'): + vips = config('vip').split() + for vip in vips: + if vip and is_ipv6(vip): + hosts.append(vip) + + kwargs = {'database': database, + 'username': database_user, + 'hostname': json.dumps(hosts)} + + if relation_prefix: + for key in list(kwargs.keys()): + kwargs["%s_%s" % (relation_prefix, key)] = kwargs[key] + del kwargs[key] + + for rid in relation_ids('shared-db'): + relation_set(relation_id=rid, **kwargs) + + +def os_requires_version(ostack_release, pkg): + """ + Decorator for hook to specify minimum supported release + """ + def wrap(f): + @wraps(f) + def wrapped_f(*args): + if CompareOpenStackReleases(os_release(pkg)) < ostack_release: + raise Exception("This hook is not supported on releases" + " before %s" % ostack_release) + f(*args) + return wrapped_f + return wrap + + +def os_workload_status(configs, required_interfaces, charm_func=None): + """ + Decorator to set workload status based on complete contexts + """ + def wrap(f): + @wraps(f) + def wrapped_f(*args, **kwargs): + # Run the original function first + f(*args, **kwargs) + # Set workload status now that contexts have been + # acted on + set_os_workload_status(configs, required_interfaces, charm_func) + return wrapped_f + return wrap + + +def set_os_workload_status(configs, required_interfaces, charm_func=None, + services=None, ports=None): + """Set the state of the workload status for the charm. + + This calls _determine_os_workload_status() to get the new state, message + and sets the status using status_set() + + @param configs: a templating.OSConfigRenderer() object + @param required_interfaces: {generic: [specific, specific2, ...]} + @param charm_func: a callable function that returns state, message. The + signature is charm_func(configs) -> (state, message) + @param services: list of strings OR dictionary specifying services/ports + @param ports: OPTIONAL list of port numbers. + @returns state, message: the new workload status, user message + """ + state, message = _determine_os_workload_status( + configs, required_interfaces, charm_func, services, ports) + status_set(state, message) + + +def _determine_os_workload_status( + configs, required_interfaces, charm_func=None, + services=None, ports=None): + """Determine the state of the workload status for the charm. + + This function returns the new workload status for the charm based + on the state of the interfaces, the paused state and whether the + services are actually running and any specified ports are open. + + This checks: + + 1. if the unit should be paused, that it is actually paused. If so the + state is 'maintenance' + message, else 'broken'. + 2. that the interfaces/relations are complete. If they are not then + it sets the state to either 'broken' or 'waiting' and an appropriate + message. + 3. If all the relation data is set, then it checks that the actual + services really are running. If not it sets the state to 'broken'. + + If everything is okay then the state returns 'active'. + + @param configs: a templating.OSConfigRenderer() object + @param required_interfaces: {generic: [specific, specific2, ...]} + @param charm_func: a callable function that returns state, message. The + signature is charm_func(configs) -> (state, message) + @param services: list of strings OR dictionary specifying services/ports + @param ports: OPTIONAL list of port numbers. + @returns state, message: the new workload status, user message + """ + state, message = _ows_check_if_paused(services, ports) + + if state is None: + state, message = _ows_check_generic_interfaces( + configs, required_interfaces) + + if state != 'maintenance' and charm_func: + # _ows_check_charm_func() may modify the state, message + state, message = _ows_check_charm_func( + state, message, lambda: charm_func(configs)) + + if state is None: + state, message = ows_check_services_running(services, ports) + + if state is None: + state = 'active' + message = "Unit is ready" + juju_log(message, 'INFO') + + try: + if config(POLICYD_CONFIG_NAME): + message = "{} {}".format(policyd_status_message_prefix(), message) + # Get deferred restarts events that have been triggered by a policy + # written by this charm. + deferred_restarts = list(set( + [e.service + for e in deferred_events.get_deferred_restarts() + if e.policy_requestor_name == ch_service_name()])) + if deferred_restarts: + svc_msg = "Services queued for restart: {}".format( + ', '.join(sorted(deferred_restarts))) + message = "{}. {}".format(message, svc_msg) + deferred_hooks = deferred_events.get_deferred_hooks() + if deferred_hooks: + svc_msg = "Hooks skipped due to disabled auto restarts: {}".format( + ', '.join(sorted(deferred_hooks))) + message = "{}. {}".format(message, svc_msg) + + except Exception: + pass + + return state, message + + +def _ows_check_if_paused(services=None, ports=None): + """Check if the unit is supposed to be paused, and if so check that the + services/ports (if passed) are actually stopped/not being listened to. + + If the unit isn't supposed to be paused, just return None, None + + If the unit is performing a series upgrade, return a message indicating + this. + + @param services: OPTIONAL services spec or list of service names. + @param ports: OPTIONAL list of port numbers. + @returns state, message or None, None + """ + if is_unit_upgrading_set(): + state, message = check_actually_paused(services=services, + ports=ports) + if state is None: + # we're paused okay, so set maintenance and return + state = "blocked" + message = ("Ready for do-release-upgrade and reboot. " + "Set complete when finished.") + return state, message + + if is_unit_paused_set(): + state, message = check_actually_paused(services=services, + ports=ports) + if state is None: + # we're paused okay, so set maintenance and return + state = "maintenance" + message = "Paused. Use 'resume' action to resume normal service." + return state, message + return None, None + + +def _ows_check_generic_interfaces(configs, required_interfaces): + """Check the complete contexts to determine the workload status. + + - Checks for missing or incomplete contexts + - juju log details of missing required data. + - determines the correct workload status + - creates an appropriate message for status_set(...) + + if there are no problems then the function returns None, None + + @param configs: a templating.OSConfigRenderer() object + @params required_interfaces: {generic_interface: [specific_interface], } + @returns state, message or None, None + """ + incomplete_rel_data = incomplete_relation_data(configs, + required_interfaces) + state = None + message = None + missing_relations = set() + incomplete_relations = set() + + for generic_interface, relations_states in incomplete_rel_data.items(): + related_interface = None + missing_data = {} + # Related or not? + for interface, relation_state in relations_states.items(): + if relation_state.get('related'): + related_interface = interface + missing_data = relation_state.get('missing_data') + break + # No relation ID for the generic_interface? + if not related_interface: + juju_log("{} relation is missing and must be related for " + "functionality. ".format(generic_interface), 'WARN') + state = 'blocked' + missing_relations.add(generic_interface) + else: + # Relation ID eists but no related unit + if not missing_data: + # Edge case - relation ID exists but departings + _hook_name = hook_name() + if (('departed' in _hook_name or 'broken' in _hook_name) and + related_interface in _hook_name): + state = 'blocked' + missing_relations.add(generic_interface) + juju_log("{} relation's interface, {}, " + "relationship is departed or broken " + "and is required for functionality." + "".format(generic_interface, related_interface), + "WARN") + # Normal case relation ID exists but no related unit + # (joining) + else: + juju_log("{} relations's interface, {}, is related but has" + " no units in the relation." + "".format(generic_interface, related_interface), + "INFO") + # Related unit exists and data missing on the relation + else: + juju_log("{} relation's interface, {}, is related awaiting " + "the following data from the relationship: {}. " + "".format(generic_interface, related_interface, + ", ".join(missing_data)), "INFO") + if state != 'blocked': + state = 'waiting' + if generic_interface not in missing_relations: + incomplete_relations.add(generic_interface) + + if missing_relations: + message = "Missing relations: {}".format(", ".join(missing_relations)) + if incomplete_relations: + message += "; incomplete relations: {}" \ + "".format(", ".join(incomplete_relations)) + state = 'blocked' + elif incomplete_relations: + message = "Incomplete relations: {}" \ + "".format(", ".join(incomplete_relations)) + state = 'waiting' + + return state, message + + +def _ows_check_charm_func(state, message, charm_func_with_configs): + """Run a custom check function for the charm to see if it wants to + change the state. This is only run if not in 'maintenance' and + tests to see if the new state is more important that the previous + one determined by the interfaces/relations check. + + @param state: the previously determined state so far. + @param message: the user orientated message so far. + @param charm_func: a callable function that returns state, message + @returns state, message strings. + """ + if charm_func_with_configs: + charm_state, charm_message = charm_func_with_configs() + if (charm_state != 'active' and + charm_state != 'unknown' and + charm_state is not None): + state = workload_state_compare(state, charm_state) + if message: + charm_message = charm_message.replace("Incomplete relations: ", + "") + message = "{}, {}".format(message, charm_message) + else: + message = charm_message + return state, message + + +@deprecate("use ows_check_services_running() instead", "2022-05", log=juju_log) +def _ows_check_services_running(services, ports): + return ows_check_services_running(services, ports) + + +def ows_check_services_running(services, ports, ssl_check_info=None): + """Check that the services that should be running are actually running + and that any ports specified are being listened to. + + @param services: list of strings OR dictionary specifying services/ports + @param ports: list of ports + @param ssl_check_info: SSLPortCheckInfo object. If provided, port checks + will be done using an SSL connection. + @returns state, message: strings or None, None + """ + messages = [] + state = None + if services is not None: + services = _extract_services_list_helper(services) + services_running, running = _check_running_services(services) + if not all(running): + messages.append( + "Services not running that should be: {}" + .format(", ".join(_filter_tuples(services_running, False)))) + state = 'blocked' + # also verify that the ports that should be open are open + # NB, that ServiceManager objects only OPTIONALLY have ports + map_not_open, ports_open = ( + _check_listening_on_services_ports(services, ssl_check_info)) + if not all(ports_open): + # find which service has missing ports. They are in service + # order which makes it a bit easier. + message_parts = {service: ", ".join([str(v) for v in open_ports]) + for service, open_ports in map_not_open.items()} + message = ", ".join( + ["{}: [{}]".format(s, sp) for s, sp in message_parts.items()]) + messages.append( + "Services with ports not open that should be: {}" + .format(message)) + state = 'blocked' + + if ports is not None: + # and we can also check ports which we don't know the service for + ports_open, ports_open_bools = \ + _check_listening_on_ports_list(ports, ssl_check_info) + if not all(ports_open_bools): + messages.append( + "Ports which should be open, but are not: {}" + .format(", ".join([str(p) for p, v in ports_open + if not v]))) + state = 'blocked' + + if state is not None: + message = "; ".join(messages) + return state, message + + return None, None + + +def _extract_services_list_helper(services): + """Extract a OrderedDict of {service: [ports]} of the supplied services + for use by the other functions. + + The services object can either be: + - None : no services were passed (an empty dict is returned) + - a list of strings + - A dictionary (optionally OrderedDict) {service_name: {'service': ..}} + - An array of [{'service': service_name, ...}, ...] + + @param services: see above + @returns OrderedDict(service: [ports], ...) + """ + if services is None: + return {} + if isinstance(services, dict): + services = services.values() + # either extract the list of services from the dictionary, or if + # it is a simple string, use that. i.e. works with mixed lists. + _s = OrderedDict() + for s in services: + if isinstance(s, dict) and 'service' in s: + _s[s['service']] = s.get('ports', []) + if isinstance(s, str): + _s[s] = [] + return _s + + +def _check_running_services(services): + """Check that the services dict provided is actually running and provide + a list of (service, boolean) tuples for each service. + + Returns both a zipped list of (service, boolean) and a list of booleans + in the same order as the services. + + @param services: OrderedDict of strings: [ports], one for each service to + check. + @returns [(service, boolean), ...], : results for checks + [boolean] : just the result of the service checks + """ + services_running = [service_running(s) for s in services] + return list(zip(services, services_running)), services_running + + +def _check_listening_on_services_ports(services, test=False, + ssl_check_info=None): + """Check that the unit is actually listening (has the port open) on the + ports that the service specifies are open. If test is True then the + function returns the services with ports that are open rather than + closed. + + Returns an OrderedDict of service: ports and a list of booleans + + @param services: OrderedDict(service: [port, ...], ...) + @param test: default=False, if False, test for closed, otherwise open. + @param ssl_check_info: SSLPortCheckInfo object. If provided, port checks + will be done using an SSL connection. + @returns OrderedDict(service: [port-not-open, ...]...), [boolean] + """ + test = not (not (test)) # ensure test is True or False + all_ports = list(itertools.chain(*services.values())) + ports_states = [port_has_listener('0.0.0.0', p, ssl_check_info) + for p in all_ports] + map_ports = OrderedDict() + matched_ports = [p for p, opened in zip(all_ports, ports_states) + if opened == test] # essentially opened xor test + for service, ports in services.items(): + set_ports = set(ports).intersection(matched_ports) + if set_ports: + map_ports[service] = set_ports + return map_ports, ports_states + + +def _check_listening_on_ports_list(ports, ssl_check_info=None): + """Check that the ports list given are being listened to + + Returns a list of ports being listened to and a list of the + booleans. + + @param ssl_check_info: SSLPortCheckInfo object. If provided, port checks + will be done using an SSL connection. + @param ports: LIST of port numbers. + @returns [(port_num, boolean), ...], [boolean] + """ + ports_open = [port_has_listener('0.0.0.0', p, ssl_check_info) + for p in ports] + return zip(ports, ports_open), ports_open + + +def _filter_tuples(services_states, state): + """Return a simple list from a list of tuples according to the condition + + @param services_states: LIST of (string, boolean): service and running + state. + @param state: Boolean to match the tuple against. + @returns [LIST of strings] that matched the tuple RHS. + """ + return [s for s, b in services_states if b == state] + + +def workload_state_compare(current_workload_state, workload_state): + """ Return highest priority of two states""" + hierarchy = {'unknown': -1, + 'active': 0, + 'maintenance': 1, + 'waiting': 2, + 'blocked': 3, + } + + if hierarchy.get(workload_state) is None: + workload_state = 'unknown' + if hierarchy.get(current_workload_state) is None: + current_workload_state = 'unknown' + + # Set workload_state based on hierarchy of statuses + if hierarchy.get(current_workload_state) > hierarchy.get(workload_state): + return current_workload_state + else: + return workload_state + + +def incomplete_relation_data(configs, required_interfaces): + """Check complete contexts against required_interfaces + Return dictionary of incomplete relation data. + + configs is an OSConfigRenderer object with configs registered + + required_interfaces is a dictionary of required general interfaces + with dictionary values of possible specific interfaces. + Example: + required_interfaces = {'database': ['shared-db', 'pgsql-db']} + + The interface is said to be satisfied if anyone of the interfaces in the + list has a complete context. + + Return dictionary of incomplete or missing required contexts with relation + status of interfaces and any missing data points. Example: + {'message': + {'amqp': {'missing_data': ['rabbitmq_password'], 'related': True}, + 'zeromq-configuration': {'related': False}}, + 'identity': + {'identity-service': {'related': False}}, + 'database': + {'pgsql-db': {'related': False}, + 'shared-db': {'related': True}}} + """ + complete_ctxts = configs.complete_contexts() + incomplete_relations = [ + svc_type + for svc_type, interfaces in required_interfaces.items() + if not set(interfaces).intersection(complete_ctxts)] + return { + i: configs.get_incomplete_context_data(required_interfaces[i]) + for i in incomplete_relations} + + +def do_action_openstack_upgrade(package, upgrade_callback, configs): + """Perform action-managed OpenStack upgrade. + + Upgrades packages to the configured openstack-origin version and sets + the corresponding action status as a result. + + For backwards compatibility a config flag (action-managed-upgrade) must + be set for this code to run, otherwise a full service level upgrade will + fire on config-changed. + + @param package: package name for determining if openstack upgrade available + @param upgrade_callback: function callback to charm's upgrade function + @param configs: templating object derived from OSConfigRenderer class + + @return: True if upgrade successful; False if upgrade failed or skipped + """ + ret = False + + if openstack_upgrade_available(package): + if config('action-managed-upgrade'): + juju_log('Upgrading OpenStack release') + + try: + upgrade_callback(configs=configs) + action_set({'outcome': 'success, upgrade completed'}) + ret = True + except Exception: + action_set({'outcome': 'upgrade failed, see traceback'}) + action_set({'traceback': traceback.format_exc()}) + action_fail('upgrade callback resulted in an ' + 'unexpected error') + else: + action_set({'outcome': 'action-managed-upgrade config is ' + 'False, skipped upgrade'}) + else: + action_set({'outcome': 'no upgrade available'}) + + return ret + + +def do_action_package_upgrade(package, upgrade_callback, configs): + """Perform package upgrade within the current OpenStack release. + + Upgrades packages only if there is not an openstack upgrade available, + and sets the corresponding action status as a result. + + @param package: package name for determining if openstack upgrade available + @param upgrade_callback: function callback to charm's upgrade function + @param configs: templating object derived from OSConfigRenderer class + + @return: True if upgrade successful; False if upgrade failed or skipped + """ + ret = False + + if not openstack_upgrade_available(package): + juju_log('Upgrading packages') + + try: + upgrade_callback(configs=configs) + action_set({'outcome': 'success, upgrade completed'}) + ret = True + except Exception: + action_set({'outcome': 'upgrade failed, see traceback'}) + action_set({'traceback': traceback.format_exc()}) + action_fail('upgrade callback resulted in an ' + 'unexpected error') + else: + action_set({'outcome': 'upgrade skipped because an openstack upgrade ' + 'is available'}) + + return ret + + +def remote_restart(rel_name, remote_service=None): + trigger = { + 'restart-trigger': str(uuid.uuid4()), + } + if remote_service: + trigger['remote-service'] = remote_service + for rid in relation_ids(rel_name): + # This subordinate can be related to two separate services using + # different subordinate relations so only issue the restart if + # the principle is connected down the relation we think it is + if related_units(relid=rid): + relation_set(relation_id=rid, + relation_settings=trigger, + ) + + +def check_actually_paused(services=None, ports=None): + """Check that services listed in the services object and ports + are actually closed (not listened to), to verify that the unit is + properly paused. + + @param services: See _extract_services_list_helper + @returns status, : string for status (None if okay) + message : string for problem for status_set + """ + state = None + message = None + messages = [] + if services is not None: + services = _extract_services_list_helper(services) + services_running, services_states = _check_running_services(services) + if any(services_states): + # there shouldn't be any running so this is a problem + messages.append("these services running: {}" + .format(", ".join( + _filter_tuples(services_running, True)))) + state = "blocked" + ports_open, ports_open_bools = ( + _check_listening_on_services_ports(services, True)) + if any(ports_open_bools): + message_parts = {service: ", ".join([str(v) for v in open_ports]) + for service, open_ports in ports_open.items()} + message = ", ".join( + ["{}: [{}]".format(s, sp) for s, sp in message_parts.items()]) + messages.append( + "these service:ports are open: {}".format(message)) + state = 'blocked' + if ports is not None: + ports_open, bools = _check_listening_on_ports_list(ports) + if any(bools): + messages.append( + "these ports which should be closed, but are open: {}" + .format(", ".join([str(p) for p, v in ports_open if v]))) + state = 'blocked' + if messages: + message = ("Services should be paused but {}" + .format(", ".join(messages))) + return state, message + + +def set_unit_paused(): + """Set the unit to a paused state in the local kv() store. + This does NOT actually pause the unit + """ + with unitdata.HookData()() as t: + kv = t[0] + kv.set('unit-paused', True) + + +def clear_unit_paused(): + """Clear the unit from a paused state in the local kv() store + This does NOT actually restart any services - it only clears the + local state. + """ + with unitdata.HookData()() as t: + kv = t[0] + kv.set('unit-paused', False) + + +def is_unit_paused_set(): + """Return the state of the kv().get('unit-paused'). + This does NOT verify that the unit really is paused. + + To help with units that don't have HookData() (testing) + if it excepts, return False + """ + try: + with unitdata.HookData()() as t: + kv = t[0] + # transform something truth-y into a Boolean. + return not (not (kv.get('unit-paused'))) + except Exception: + return False + + +def is_hook_allowed(hookname, check_deferred_restarts=True): + """Check if hook can run. + + :param hookname: Name of hook to check.. + :type hookname: str + :param check_deferred_restarts: Whether to check deferred restarts. + :type check_deferred_restarts: bool + """ + permitted = True + reasons = [] + if is_unit_paused_set(): + reasons.append( + "Unit is pause or upgrading. Skipping {}".format(hookname)) + permitted = False + + if check_deferred_restarts: + if deferred_events.is_restart_permitted(): + permitted = True + deferred_events.clear_deferred_hook(hookname) + else: + if not config().changed('enable-auto-restarts'): + deferred_events.set_deferred_hook(hookname) + reasons.append("auto restarts are disabled") + permitted = False + return permitted, " and ".join(reasons) + + +def manage_payload_services(action, services=None, charm_func=None): + """Run an action against all services. + + An optional charm_func() can be called. It should raise an Exception to + indicate that the function failed. If it was successful it should return + None or an optional message. + + The signature for charm_func is: + charm_func() -> message: str + + charm_func() is executed after any services are stopped, if supplied. + + The services object can either be: + - None : no services were passed (an empty dict is returned) + - a list of strings + - A dictionary (optionally OrderedDict) {service_name: {'service': ..}} + - An array of [{'service': service_name, ...}, ...] + + :param action: Action to run: pause, resume, start or stop. + :type action: str + :param services: See above + :type services: See above + :param charm_func: function to run for custom charm pausing. + :type charm_func: f() + :returns: Status boolean and list of messages + :rtype: (bool, []) + :raises: RuntimeError + """ + actions = { + 'pause': service_pause, + 'resume': service_resume, + 'start': service_start, + 'stop': service_stop} + action = action.lower() + if action not in actions.keys(): + raise RuntimeError( + "action: {} must be one of: {}".format(action, + ', '.join(actions.keys()))) + services = _extract_services_list_helper(services) + messages = [] + success = True + if services: + for service in services.keys(): + rc = actions[action](service) + if not rc: + success = False + messages.append("{} didn't {} cleanly.".format(service, + action)) + if charm_func: + try: + message = charm_func() + if message: + messages.append(message) + except Exception as e: + success = False + messages.append(str(e)) + return success, messages + + +def make_wait_for_ports_barrier(ports, retry_count=5): + """Make a function to wait for port shutdowns. + + Create a function which closes over the provided ports. The function will + retry probing ports until they are closed or the retry count has been reached. + + """ + @decorators.retry_on_predicate(retry_count, operator.not_, base_delay=0.1) + def retry_port_check(): + _, ports_states = _check_listening_on_ports_list(ports) + juju_log("Probe ports {}, result: {}".format(ports, ports_states), level="DEBUG") + return any(ports_states) + return retry_port_check + + +def pause_unit(assess_status_func, services=None, ports=None, + charm_func=None): + """Pause a unit by stopping the services and setting 'unit-paused' + in the local kv() store. + + Also checks that the services have stopped and ports are no longer + being listened to. + + An optional charm_func() can be called that can either raise an + Exception or return non None, None to indicate that the unit + didn't pause cleanly. + + The signature for charm_func is: + charm_func() -> message: string + + charm_func() is executed after any services are stopped, if supplied. + + The services object can either be: + - None : no services were passed (an empty dict is returned) + - a list of strings + - A dictionary (optionally OrderedDict) {service_name: {'service': ..}} + - An array of [{'service': service_name, ...}, ...] + + @param assess_status_func: (f() -> message: string | None) or None + @param services: OPTIONAL see above + @param ports: OPTIONAL list of port + @param charm_func: function to run for custom charm pausing. + @returns None + @raises Exception(message) on an error for action_fail(). + """ + _, messages = manage_payload_services( + 'pause', + services=services, + charm_func=charm_func) + set_unit_paused() + + if assess_status_func: + message = assess_status_func() + if message: + messages.append(message) + if messages and not is_unit_upgrading_set(): + raise Exception("Couldn't pause: {}".format("; ".join(messages))) + + +def resume_unit(assess_status_func, services=None, ports=None, + charm_func=None): + """Resume a unit by starting the services and clearning 'unit-paused' + in the local kv() store. + + Also checks that the services have started and ports are being listened to. + + An optional charm_func() can be called that can either raise an + Exception or return non None to indicate that the unit + didn't resume cleanly. + + The signature for charm_func is: + charm_func() -> message: string + + charm_func() is executed after any services are started, if supplied. + + The services object can either be: + - None : no services were passed (an empty dict is returned) + - a list of strings + - A dictionary (optionally OrderedDict) {service_name: {'service': ..}} + - An array of [{'service': service_name, ...}, ...] + + @param assess_status_func: (f() -> message: string | None) or None + @param services: OPTIONAL see above + @param ports: OPTIONAL list of port + @param charm_func: function to run for custom charm resuming. + @returns None + @raises Exception(message) on an error for action_fail(). + """ + _, messages = manage_payload_services( + 'resume', + services=services, + charm_func=charm_func) + clear_unit_paused() + if assess_status_func: + message = assess_status_func() + if message: + messages.append(message) + if messages: + raise Exception("Couldn't resume: {}".format("; ".join(messages))) + + +def restart_services_action(services=None, when_all_stopped_func=None, + deferred_only=None): + """Manage a service restart request via charm action. + + :param services: Services to be restarted + :type model_name: List[str] + :param when_all_stopped_func: Function to call when all services are + stopped. + :type when_all_stopped_func: Callable[] + :param model_name: Only restart services which have a deferred restart + event. + :type model_name: bool + """ + if services and deferred_only: + raise ValueError( + "services and deferred_only are mutually exclusive") + if deferred_only: + services = list(set( + [a.service for a in deferred_events.get_deferred_restarts()])) + _, messages = manage_payload_services( + 'stop', + services=services, + charm_func=when_all_stopped_func) + if messages: + raise ServiceActionError( + "Error processing service stop request: {}".format( + "; ".join(messages))) + _, messages = manage_payload_services( + 'start', + services=services) + if messages: + raise ServiceActionError( + "Error processing service start request: {}".format( + "; ".join(messages))) + deferred_events.clear_deferred_restarts(services) + + +def make_assess_status_func(*args, **kwargs): + """Creates an assess_status_func() suitable for handing to pause_unit() + and resume_unit(). + + This uses the _determine_os_workload_status(...) function to determine + what the workload_status should be for the unit. If the unit is + not in maintenance or active states, then the message is returned to + the caller. This is so an action that doesn't result in either a + complete pause or complete resume can signal failure with an action_fail() + """ + def _assess_status_func(): + state, message = _determine_os_workload_status(*args, **kwargs) + status_set(state, message) + if state not in ['maintenance', 'active']: + return message + return None + + return _assess_status_func + + +def pausable_restart_on_change(restart_map, stopstart=False, + restart_functions=None, + can_restart_now_f=None, + post_svc_restart_f=None, + pre_restarts_wait_f=None): + """A restart_on_change decorator that checks to see if the unit is + paused. If it is paused then the decorated function doesn't fire. + + This is provided as a helper, as the @restart_on_change(...) decorator + is in core.host, yet the openstack specific helpers are in this file + (contrib.openstack.utils). Thus, this needs to be an optional feature + for openstack charms (or charms that wish to use the openstack + pause/resume type features). + + It is used as follows: + + from contrib.openstack.utils import ( + pausable_restart_on_change as restart_on_change) + + @restart_on_change(restart_map, stopstart=) + def some_hook(...): + pass + + see core.utils.restart_on_change() for more details. + + Note restart_map can be a callable, in which case, restart_map is only + evaluated at runtime. This means that it is lazy and the underlying + function won't be called if the decorated function is never called. Note, + retains backwards compatibility for passing a non-callable dictionary. + + :param f: function to decorate. + :type f: Callable + :param restart_map: Optionally callable, which then returns the restart_map or + the restart map {conf_file: [services]} + :type restart_map: Union[Callable[[],], Dict[str, List[str,]] + :param stopstart: whether to stop, start or restart a service + :type stopstart: booleean + :param restart_functions: nonstandard functions to use to restart services + {svc: func, ...} + :type restart_functions: Dict[str, Callable[[str], None]] + :param can_restart_now_f: A function used to check if the restart is + permitted. + :type can_restart_now_f: Callable[[str, List[str]], boolean] + :param post_svc_restart_f: A function run after a service has + restarted. + :type post_svc_restart_f: Callable[[str], None] + :param pre_restarts_wait_f: A function called before any restarts. + :type pre_restarts_wait_f: Callable[None, None] + :returns: decorator to use a restart_on_change with pausability + :rtype: decorator + + + """ + def wrap(f): + __restart_map_cache = None + + @functools.wraps(f) + def wrapped_f(*args, **kwargs): + nonlocal __restart_map_cache + if is_unit_paused_set(): + return f(*args, **kwargs) + if __restart_map_cache is None: + __restart_map_cache = restart_map() \ + if callable(restart_map) else restart_map + # otherwise, normal restart_on_change functionality + return restart_on_change_helper( + (lambda: f(*args, **kwargs)), + __restart_map_cache, + stopstart, + restart_functions, + can_restart_now_f, + post_svc_restart_f, + pre_restarts_wait_f) + return wrapped_f + return wrap + + +def ordered(orderme): + """Converts the provided dictionary into a collections.OrderedDict. + + The items in the returned OrderedDict will be inserted based on the + natural sort order of the keys. Nested dictionaries will also be sorted + in order to ensure fully predictable ordering. + + :param orderme: the dict to order + :return: collections.OrderedDict + :raises: ValueError: if `orderme` isn't a dict instance. + """ + if not isinstance(orderme, dict): + raise ValueError('argument must be a dict type') + + result = OrderedDict() + for k, v in sorted(orderme.items(), key=lambda x: x[0]): + if isinstance(v, dict): + result[k] = ordered(v) + else: + result[k] = v + + return result + + +def config_flags_parser(config_flags): + """Parses config flags string into dict. + + This parsing method supports a few different formats for the config + flag values to be parsed: + + 1. A string in the simple format of key=value pairs, with the possibility + of specifying multiple key value pairs within the same string. For + example, a string in the format of 'key1=value1, key2=value2' will + return a dict of: + + {'key1': 'value1', 'key2': 'value2'}. + + 2. A string in the above format, but supporting a comma-delimited list + of values for the same key. For example, a string in the format of + 'key1=value1, key2=value3,value4,value5' will return a dict of: + + {'key1': 'value1', 'key2': 'value2,value3,value4'} + + 3. A string containing a colon character (:) prior to an equal + character (=) will be treated as yaml and parsed as such. This can be + used to specify more complex key value pairs. For example, + a string in the format of 'key1: subkey1=value1, subkey2=value2' will + return a dict of: + + {'key1', 'subkey1=value1, subkey2=value2'} + + The provided config_flags string may be a list of comma-separated values + which themselves may be comma-separated list of values. + """ + # If we find a colon before an equals sign then treat it as yaml. + # Note: limit it to finding the colon first since this indicates assignment + # for inline yaml. + colon = config_flags.find(':') + equals = config_flags.find('=') + if colon > 0: + if colon < equals or equals < 0: + return ordered(yaml.safe_load(config_flags)) + + if config_flags.find('==') >= 0: + juju_log("config_flags is not in expected format (key=value)", + level=ERROR) + raise OSContextError + + # strip the following from each value. + post_strippers = ' ,' + # we strip any leading/trailing '=' or ' ' from the string then + # split on '='. + split = config_flags.strip(' =').split('=') + limit = len(split) + flags = OrderedDict() + for i in range(0, limit - 1): + current = split[i] + next = split[i + 1] + vindex = next.rfind(',') + if (i == limit - 2) or (vindex < 0): + value = next + else: + value = next[:vindex] + + if i == 0: + key = current + else: + # if this not the first entry, expect an embedded key. + index = current.rfind(',') + if index < 0: + juju_log("Invalid config value(s) at index %s" % (i), + level=ERROR) + raise OSContextError + key = current[index + 1:] + + # Add to collection. + flags[key.strip(post_strippers)] = value.rstrip(post_strippers) + + return flags + + +def os_application_version_set(package): + '''Set version of application for Juju 2.0 and later''' + application_version = get_upstream_version(package) + # NOTE(jamespage) if not able to figure out package version, fallback to + # openstack codename version detection. + if not application_version: + application_version_set(os_release(package)) + else: + application_version_set(application_version) + + +def os_application_status_set(check_function): + """Run the supplied function and set the application status accordingly. + + :param check_function: Function to run to get app states and messages. + :type check_function: function + """ + state, message = check_function() + status_set(state, message, application=True) + + +def enable_memcache(source=None, release=None, package=None): + """Determine if memcache should be enabled on the local unit + + @param release: release of OpenStack currently deployed + @param package: package to derive OpenStack version deployed + @returns boolean Whether memcache should be enabled + """ + _release = None + if release: + _release = release + else: + _release = os_release(package) + if not _release: + _release = get_os_codename_install_source(source) + + return CompareOpenStackReleases(_release) >= 'mitaka' + + +def token_cache_pkgs(source=None, release=None): + """Determine additional packages needed for token caching + + @param source: source string for charm + @param release: release of OpenStack currently deployed + @returns List of package to enable token caching + """ + packages = [] + if enable_memcache(source=source, release=release): + packages.extend(['memcached', 'python-memcache']) + return packages + + +def update_json_file(filename, items): + """Updates the json `filename` with a given dict. + :param filename: path to json file (e.g. /etc/glance/policy.json) + :param items: dict of items to update + """ + if not items: + return + + with open(filename) as fd: + policy = json.load(fd) + + # Compare before and after and if nothing has changed don't write the file + # since that could cause unnecessary service restarts. + before = json.dumps(policy, indent=4, sort_keys=True) + policy.update(items) + after = json.dumps(policy, indent=4, sort_keys=True) + if before == after: + return + + with open(filename, "w") as fd: + fd.write(after) + + +@cached +def snap_install_requested(): + """ Determine if installing from snaps + + If openstack-origin is of the form snap:track/channel[/branch] + and channel is in SNAPS_CHANNELS return True. + """ + origin = config('openstack-origin') or "" + if not origin.startswith('snap:'): + return False + + _src = origin[5:] + if '/' in _src: + channel = _src.split('/')[1] + else: + # Handle snap:track with no channel + channel = 'stable' + return valid_snap_channel(channel) + + +def get_snaps_install_info_from_origin(snaps, src, mode='classic'): + """Generate a dictionary of snap install information from origin + + @param snaps: List of snaps + @param src: String of openstack-origin or source of the form + snap:track/channel + @param mode: String classic, devmode or jailmode + @returns: Dictionary of snaps with channels and modes + """ + + if not src.startswith('snap:'): + juju_log("Snap source is not a snap origin", 'WARN') + return {} + + _src = src[5:] + channel = '--channel={}'.format(_src) + + return {snap: {'channel': channel, 'mode': mode} + for snap in snaps} + + +def install_os_snaps(snaps, refresh=False): + """Install OpenStack snaps from channel and with mode + + @param snaps: Dictionary of snaps with channels and modes of the form: + {'snap_name': {'channel': 'snap_channel', + 'mode': 'snap_mode'}} + Where channel is a snapstore channel and mode is --classic, --devmode + or --jailmode. + @param post_snap_install: Callback function to run after snaps have been + installed + """ + + def _ensure_flag(flag): + if flag.startswith('--'): + return flag + return '--{}'.format(flag) + + if refresh: + for snap in snaps.keys(): + snap_refresh(snap, + _ensure_flag(snaps[snap]['channel']), + _ensure_flag(snaps[snap]['mode'])) + else: + for snap in snaps.keys(): + snap_install(snap, + _ensure_flag(snaps[snap]['channel']), + _ensure_flag(snaps[snap]['mode'])) + + +def set_unit_upgrading(): + """Set the unit to a upgrading state in the local kv() store. + """ + with unitdata.HookData()() as t: + kv = t[0] + kv.set('unit-upgrading', True) + + +def clear_unit_upgrading(): + """Clear the unit from a upgrading state in the local kv() store + """ + with unitdata.HookData()() as t: + kv = t[0] + kv.set('unit-upgrading', False) + + +def is_unit_upgrading_set(): + """Return the state of the kv().get('unit-upgrading'). + + To help with units that don't have HookData() (testing) + if it excepts, return False + """ + try: + with unitdata.HookData()() as t: + kv = t[0] + # transform something truth-y into a Boolean. + return not (not (kv.get('unit-upgrading'))) + except Exception: + return False + + +def series_upgrade_prepare(pause_unit_helper=None, configs=None): + """ Run common series upgrade prepare tasks. + + :param pause_unit_helper: function: Function to pause unit + :param configs: OSConfigRenderer object: Configurations + :returns None: + """ + set_unit_upgrading() + if pause_unit_helper and configs: + if not is_unit_paused_set(): + pause_unit_helper(configs) + + +def series_upgrade_complete(resume_unit_helper=None, configs=None): + """ Run common series upgrade complete tasks. + + :param resume_unit_helper: function: Function to resume unit + :param configs: OSConfigRenderer object: Configurations + :returns None: + """ + clear_unit_paused() + clear_unit_upgrading() + if configs: + configs.write_all() + if resume_unit_helper: + resume_unit_helper(configs) + + +def is_db_initialised(): + """Check leader storage to see if database has been initialised. + + :returns: Whether DB has been initialised + :rtype: bool + """ + db_initialised = None + if leader_get('db-initialised') is None: + juju_log( + 'db-initialised key missing, assuming db is not initialised', + 'DEBUG') + db_initialised = False + else: + db_initialised = bool_from_string(leader_get('db-initialised')) + juju_log('Database initialised: {}'.format(db_initialised), 'DEBUG') + return db_initialised + + +def set_db_initialised(): + """Add flag to leader storage to indicate database has been initialised. + """ + juju_log('Setting db-initialised to True', 'DEBUG') + leader_set({'db-initialised': True}) + + +def is_db_maintenance_mode(relid=None): + """Check relation data from notifications of db in maintenance mode. + + :returns: Whether db has notified it is in maintenance mode. + :rtype: bool + """ + juju_log('Checking for maintenance notifications', 'DEBUG') + if relid: + r_ids = [relid] + else: + r_ids = relation_ids('shared-db') + rids_units = [(r, u) for r in r_ids for u in related_units(r)] + notifications = [] + for r_id, unit in rids_units: + settings = relation_get(unit=unit, rid=r_id) + for key, value in settings.items(): + if value and key in DB_MAINTENANCE_KEYS: + juju_log( + 'Unit: {}, Key: {}, Value: {}'.format(unit, key, value), + 'DEBUG') + try: + notifications.append(bool_from_string(value)) + except ValueError: + juju_log( + 'Could not discern bool from {}'.format(value), + 'WARN') + pass + return True in notifications + + +@cached +def container_scoped_relations(): + """Get all the container scoped relations + + :returns: List of relation names + :rtype: List + """ + md = metadata() + relations = [] + for relation_type in ('provides', 'requires', 'peers'): + for relation in md.get(relation_type, []): + if md[relation_type][relation].get('scope') == 'container': + relations.append(relation) + return relations + + +def container_scoped_relation_get(attribute=None): + """Get relation data from all container scoped relations. + + :param attribute: Name of attribute to get + :type attribute: Optional[str] + :returns: Iterator with relation data + :rtype: Iterator[Optional[any]] + """ + for endpoint_name in container_scoped_relations(): + for rid in relation_ids(endpoint_name): + for unit in related_units(rid): + yield relation_get( + attribute=attribute, + unit=unit, + rid=rid) + + +def is_db_ready(use_current_context=False, rel_name=None): + """Check remote database is ready to be used. + + Database relations are expected to provide a list of 'allowed' units to + confirm that the database is ready for use by those units. + + If db relation has provided this information and local unit is a member, + returns True otherwise False. + + :param use_current_context: Whether to limit checks to current hook + context. + :type use_current_context: bool + :param rel_name: Name of relation to check + :type rel_name: string + :returns: Whether remote db is ready. + :rtype: bool + :raises: Exception + """ + key = 'allowed_units' + + rel_name = rel_name or 'shared-db' + this_unit = local_unit() + + if use_current_context: + if relation_id() in relation_ids(rel_name): + rids_units = [(None, None)] + else: + raise Exception("use_current_context=True but not in {} " + "rel hook contexts (currently in {})." + .format(rel_name, relation_id())) + else: + rids_units = [(r_id, u) + for r_id in relation_ids(rel_name) + for u in related_units(r_id)] + + for rid, unit in rids_units: + allowed_units = relation_get(rid=rid, unit=unit, attribute=key) + if allowed_units and this_unit in allowed_units.split(): + juju_log("This unit ({}) is in allowed unit list from {}".format( + this_unit, + unit), 'DEBUG') + return True + + juju_log("This unit was not found in any allowed unit list") + return False + + +def is_expected_scale(peer_relation_name='cluster'): + """Query juju goal-state to determine whether our peer- and dependency- + relations are at the expected scale. + + Useful for deferring per unit per relation housekeeping work until we are + ready to complete it successfully and without unnecessary repetiton. + + Always returns True if version of juju used does not support goal-state. + + :param peer_relation_name: Name of peer relation + :type rel_name: string + :returns: True or False + :rtype: bool + """ + def _get_relation_id(rel_type): + return next((rid for rid in relation_ids(reltype=rel_type)), None) + + Relation = namedtuple('Relation', 'rel_type rel_id') + peer_rid = _get_relation_id(peer_relation_name) + # Units with no peers should still have a peer relation. + if not peer_rid: + juju_log('Not at expected scale, no peer relation found', 'DEBUG') + return False + expected_relations = [ + Relation(rel_type='shared-db', rel_id=_get_relation_id('shared-db'))] + if expect_ha(): + expected_relations.append( + Relation( + rel_type='ha', + rel_id=_get_relation_id('ha'))) + juju_log( + 'Checking scale of {} relations'.format( + ','.join([r.rel_type for r in expected_relations])), + 'DEBUG') + try: + if (len(related_units(relid=peer_rid)) < + len(list(expected_peer_units()))): + return False + for rel in expected_relations: + if not rel.rel_id: + juju_log( + 'Expected to find {} relation, but it is missing'.format( + rel.rel_type), + 'DEBUG') + return False + # Goal state returns every unit even for container scoped + # relations but the charm only ever has a relation with + # the local unit. + if rel.rel_type in container_scoped_relations(): + expected_count = 1 + else: + expected_count = len( + list(expected_related_units(reltype=rel.rel_type))) + if len(related_units(relid=rel.rel_id)) < expected_count: + juju_log( + ('Not at expected scale, not enough units on {} ' + 'relation'.format(rel.rel_type)), + 'DEBUG') + return False + except NotImplementedError: + return True + juju_log('All checks have passed, unit is at expected scale', 'DEBUG') + return True + + +def get_peer_key(unit_name): + """Get the peer key for this unit. + + The peer key is the key a unit uses to publish its status down the peer + relation + + :param unit_name: Name of unit + :type unit_name: string + :returns: Peer key for given unit + :rtype: string + """ + return 'unit-state-{}'.format(unit_name.replace('/', '-')) + + +UNIT_READY = 'READY' +UNIT_NOTREADY = 'NOTREADY' +UNIT_UNKNOWN = 'UNKNOWN' +UNIT_STATES = [UNIT_READY, UNIT_NOTREADY, UNIT_UNKNOWN] + + +def inform_peers_unit_state(state, relation_name='cluster'): + """Inform peers of the state of this unit. + + :param state: State of unit to publish + :type state: string + :param relation_name: Name of relation to publish state on + :type relation_name: string + """ + if state not in UNIT_STATES: + raise ValueError( + "Setting invalid state {} for unit".format(state)) + this_unit = local_unit() + for r_id in relation_ids(relation_name): + juju_log('Telling peer behind relation {} that {} is {}'.format( + r_id, this_unit, state), 'DEBUG') + relation_set(relation_id=r_id, + relation_settings={ + get_peer_key(this_unit): state}) + + +def get_peers_unit_state(relation_name='cluster'): + """Get the state of all peers. + + :param relation_name: Name of relation to check peers on. + :type relation_name: string + :returns: Unit states keyed on unit name. + :rtype: dict + :raises: ValueError + """ + r_ids = relation_ids(relation_name) + rids_units = [(r, u) for r in r_ids for u in related_units(r)] + unit_states = {} + for r_id, unit in rids_units: + settings = relation_get(unit=unit, rid=r_id) + unit_states[unit] = settings.get(get_peer_key(unit), UNIT_UNKNOWN) + if unit_states[unit] not in UNIT_STATES: + raise ValueError( + "Unit in unknown state {}".format(unit_states[unit])) + return unit_states + + +def are_peers_ready(relation_name='cluster'): + """Check if all peers are ready. + + :param relation_name: Name of relation to check peers on. + :type relation_name: string + :returns: Whether all units are ready. + :rtype: bool + """ + unit_states = get_peers_unit_state(relation_name).values() + juju_log('{} peers are in the following states: {}'.format( + relation_name, unit_states), 'DEBUG') + return all(state == UNIT_READY for state in unit_states) + + +def inform_peers_if_ready(check_unit_ready_func, relation_name='cluster'): + """Inform peers if this unit is ready. + + The check function should return a tuple (state, message). A state + of 'READY' indicates the unit is READY. + + :param check_unit_ready_func: Function to run to check readiness + :type check_unit_ready_func: function + :param relation_name: Name of relation to check peers on. + :type relation_name: string + """ + unit_ready, msg = check_unit_ready_func() + if unit_ready: + state = UNIT_READY + else: + state = UNIT_NOTREADY + juju_log('Telling peers this unit is: {}'.format(state), 'DEBUG') + inform_peers_unit_state(state, relation_name) + + +def check_api_unit_ready(check_db_ready=True): + """Check if this unit is ready. + + :param check_db_ready: Include checks of database readiness. + :type check_db_ready: bool + :returns: Whether unit state is ready and status message + :rtype: (bool, str) + """ + unit_state, msg = get_api_unit_status(check_db_ready=check_db_ready) + return unit_state == WORKLOAD_STATES.ACTIVE, msg + + +def get_api_unit_status(check_db_ready=True): + """Return a workload status and message for this unit. + + :param check_db_ready: Include checks of database readiness. + :type check_db_ready: bool + :returns: Workload state and message + :rtype: (bool, str) + """ + unit_state = WORKLOAD_STATES.ACTIVE + msg = 'Unit is ready' + if is_db_maintenance_mode(): + unit_state = WORKLOAD_STATES.MAINTENANCE + msg = 'Database in maintenance mode.' + elif is_unit_paused_set(): + unit_state = WORKLOAD_STATES.BLOCKED + msg = 'Unit paused.' + elif check_db_ready and not is_db_ready(): + unit_state = WORKLOAD_STATES.WAITING + msg = 'Allowed_units list provided but this unit not present' + elif not is_db_initialised(): + unit_state = WORKLOAD_STATES.WAITING + msg = 'Database not initialised' + elif not is_expected_scale(): + unit_state = WORKLOAD_STATES.WAITING + msg = 'Charm and its dependencies not yet at expected scale' + juju_log(msg, 'DEBUG') + return unit_state, msg + + +def check_api_application_ready(): + """Check if this application is ready. + + :returns: Whether application state is ready and status message + :rtype: (bool, str) + """ + app_state, msg = get_api_application_status() + return app_state == WORKLOAD_STATES.ACTIVE, msg + + +def get_api_application_status(): + """Return a workload status and message for this application. + + :returns: Workload state and message + :rtype: (bool, str) + """ + app_state, msg = get_api_unit_status() + if app_state == WORKLOAD_STATES.ACTIVE: + if are_peers_ready(): + msg = 'Application Ready' + else: + app_state = WORKLOAD_STATES.WAITING + msg = 'Some units are not ready' + juju_log(msg, 'DEBUG') + return app_state, msg + + +def sequence_status_check_functions(*functions): + """Sequence the functions passed so that they all get a chance to run as + the charm status check functions. + + :param *functions: a list of functions that return (state, message) + :type *functions: List[Callable[[OSConfigRender], (str, str)]] + :returns: the Callable that takes configs and returns (state, message) + :rtype: Callable[[OSConfigRender], (str, str)] + """ + def _inner_sequenced_functions(configs): + state, message = 'unknown', '' + for f in functions: + new_state, new_message = f(configs) + state = workload_state_compare(state, new_state) + if message: + message = "{}, {}".format(message, new_message) + else: + message = new_message + return state, message + + return _inner_sequenced_functions + + +SubordinatePackages = namedtuple('SubordinatePackages', ['install', 'purge']) + + +def get_subordinate_release_packages(os_release, package_type='deb'): + """Iterate over subordinate relations and get package information. + + :param os_release: OpenStack release to look for + :type os_release: str + :param package_type: Package type (one of 'deb' or 'snap') + :type package_type: str + :returns: Packages to install and packages to purge or None + :rtype: SubordinatePackages[set,set] + """ + install = set() + purge = set() + + for rdata in container_scoped_relation_get('releases-packages-map'): + rp_map = json.loads(rdata or '{}') + # The map provided by subordinate has OpenStack release name as key. + # Find package information from subordinate matching requested release + # or the most recent release prior to requested release by sorting the + # keys in reverse order. This follows established patterns in our + # charms for templates and reactive charm implementations, i.e. as long + # as nothing has changed the definitions for the prior OpenStack + # release is still valid. + for release in sorted(rp_map.keys(), reverse=True): + if (CompareOpenStackReleases(release) <= os_release and + package_type in rp_map[release]): + for name, container in ( + ('install', install), + ('purge', purge)): + for pkg in rp_map[release][package_type].get(name, []): + container.add(pkg) + break + return SubordinatePackages(install, purge) + + +def get_subordinate_services(): + """Iterate over subordinate relations and get service information. + + In a similar fashion as with get_subordinate_release_packages(), + principle charms can retrieve a list of services advertised by their + subordinate charms. This is useful to know about subordinate services when + pausing, resuming or upgrading a principle unit. + + :returns: Name of all services advertised by all subordinates + :rtype: Set[str] + """ + services = set() + for rdata in container_scoped_relation_get('services'): + services |= set(json.loads(rdata or '[]')) + return services + + +os_restart_on_change = partial( + pausable_restart_on_change, + can_restart_now_f=deferred_events.check_and_record_restart_request, + post_svc_restart_f=deferred_events.process_svc_restart) + + +def restart_services_action_helper(all_services): + """Helper to run the restart-services action. + + NOTE: all_services is all services that could be restarted but + depending on the action arguments it may be a subset of + these that are actually restarted. + + :param all_services: All services that could be restarted + :type all_services: List[str] + """ + deferred_only = action_get("deferred-only") + services = action_get("services") + if services: + services = services.split() + else: + services = all_services + if deferred_only: + restart_services_action(deferred_only=True) + else: + restart_services_action(services=services) + + +def show_deferred_events_action_helper(): + """Helper to run the show-deferred-restarts action.""" + restarts = [] + for event in deferred_events.get_deferred_events(): + restarts.append('{} {} {}'.format( + str(event.timestamp), + event.service.ljust(40), + event.reason)) + restarts.sort() + output = { + 'restarts': restarts, + 'hooks': deferred_events.get_deferred_hooks()} + action_set({'output': "{}".format( + yaml.dump(output, default_flow_style=False))}) diff --git a/ceph-osd/hooks/charmhelpers/contrib/openstack/vaultlocker.py b/ceph-osd/hooks/charmhelpers/contrib/openstack/vaultlocker.py new file mode 100644 index 00000000..002bc579 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/openstack/vaultlocker.py @@ -0,0 +1,184 @@ +# Copyright 2018-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os + +import charmhelpers.contrib.openstack.alternatives as alternatives +import charmhelpers.contrib.openstack.context as context + +import charmhelpers.core.hookenv as hookenv +import charmhelpers.core.host as host +import charmhelpers.core.templating as templating +import charmhelpers.core.unitdata as unitdata + +VAULTLOCKER_BACKEND = 'charm-vaultlocker' + + +class VaultKVContext(context.OSContextGenerator): + """Vault KV context for interaction with vault-kv interfaces""" + interfaces = ['secrets-storage'] + + def __init__(self, secret_backend=None): + super(context.OSContextGenerator, self).__init__() + self.secret_backend = ( + secret_backend or 'charm-{}'.format(hookenv.service_name()) + ) + + def __call__(self): + try: + import hvac + except ImportError: + # BUG: #1862085 - if the relation is made to vault, but the + # 'encrypt' option is not made, then the charm errors with an + # import warning. This catches that, logs a warning, and returns + # with an empty context. + hookenv.log("VaultKVContext: trying to use hvac pythong module " + "but it's not available. Is secrets-stroage relation " + "made, but encrypt option not set?", + level=hookenv.WARNING) + # return an empty context on hvac import error + return {} + ctxt = {} + # NOTE(hopem): see https://bugs.launchpad.net/charm-helpers/+bug/1849323 + db = unitdata.kv() + # currently known-good secret-id + secret_id = db.get('secret-id') + + for relation_id in hookenv.relation_ids(self.interfaces[0]): + for unit in hookenv.related_units(relation_id): + data = hookenv.relation_get(unit=unit, + rid=relation_id) + vault_url = data.get('vault_url') + role_id = data.get('{}_role_id'.format(hookenv.local_unit())) + token = data.get('{}_token'.format(hookenv.local_unit())) + + if all([vault_url, role_id, token]): + token = json.loads(token) + vault_url = json.loads(vault_url) + + # Tokens may change when secret_id's are being + # reissued - if so use token to get new secret_id + token_success = False + try: + secret_id = retrieve_secret_id( + url=vault_url, + token=token + ) + token_success = True + except hvac.exceptions.InvalidRequest: + # Try next + pass + + if token_success: + db.set('secret-id', secret_id) + db.flush() + + ctxt['vault_url'] = vault_url + ctxt['role_id'] = json.loads(role_id) + ctxt['secret_id'] = secret_id + ctxt['secret_backend'] = self.secret_backend + vault_ca = data.get('vault_ca') + if vault_ca: + ctxt['vault_ca'] = json.loads(vault_ca) + + self.complete = True + break + else: + if secret_id: + ctxt['vault_url'] = vault_url + ctxt['role_id'] = json.loads(role_id) + ctxt['secret_id'] = secret_id + ctxt['secret_backend'] = self.secret_backend + vault_ca = data.get('vault_ca') + if vault_ca: + ctxt['vault_ca'] = json.loads(vault_ca) + + if self.complete: + break + + if ctxt: + self.complete = True + + return ctxt + + +def write_vaultlocker_conf(context, priority=100): + """Write vaultlocker configuration to disk and install alternative + + :param context: Dict of data from vault-kv relation + :ptype: context: dict + :param priority: Priority of alternative configuration + :ptype: priority: int""" + charm_vl_path = "/var/lib/charm/{}/vaultlocker.conf".format( + hookenv.service_name() + ) + host.mkdir(os.path.dirname(charm_vl_path), perms=0o700) + templating.render(source='vaultlocker.conf.j2', + target=charm_vl_path, + context=context, perms=0o600), + alternatives.install_alternative('vaultlocker.conf', + '/etc/vaultlocker/vaultlocker.conf', + charm_vl_path, priority) + + +def vault_relation_complete(backend=None): + """Determine whether vault relation is complete + + :param backend: Name of secrets backend requested + :ptype backend: string + :returns: whether the relation to vault is complete + :rtype: bool""" + try: + import hvac + except ImportError: + return False + try: + vault_kv = VaultKVContext(secret_backend=backend or VAULTLOCKER_BACKEND) + vault_kv() + return vault_kv.complete + except hvac.exceptions.InvalidRequest: + return False + + +# TODO: contrib a high level unwrap method to hvac that works +def retrieve_secret_id(url, token): + """Retrieve a response-wrapped secret_id from Vault + + :param url: URL to Vault Server + :ptype url: str + :param token: One shot Token to use + :ptype token: str + :returns: secret_id to use for Vault Access + :rtype: str""" + import hvac + try: + # hvac 0.10.1 changed default adapter to JSONAdapter + client = hvac.Client(url=url, token=token, adapter=hvac.adapters.Request) + except AttributeError: + # hvac < 0.6.2 doesn't have adapter but uses the same response interface + client = hvac.Client(url=url, token=token) + else: + # hvac < 0.9.2 assumes adapter is an instance, so doesn't instantiate + if not isinstance(client.adapter, hvac.adapters.Request): + client.adapter = hvac.adapters.Request(base_uri=url, token=token) + try: + # hvac == 1.0.0 has an API to unwrap with the user token + response = client.sys.unwrap() + except AttributeError: + # fallback to hvac < 1.0.0 + response = client._post('/v1/sys/wrapping/unwrap') + if response.status_code == 200: + data = response.json() + return data['data']['secret_id'] diff --git a/ceph-osd/hooks/charmhelpers/contrib/python.py b/ceph-osd/hooks/charmhelpers/contrib/python.py new file mode 100644 index 00000000..fcded680 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/python.py @@ -0,0 +1,19 @@ +# Copyright 2014-2019 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# deprecated aliases for backwards compatibility +from charmhelpers.fetch.python import debug # noqa +from charmhelpers.fetch.python import packages # noqa +from charmhelpers.fetch.python import rpdb # noqa +from charmhelpers.fetch.python import version # noqa diff --git a/ceph-osd/hooks/charmhelpers/contrib/storage/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/storage/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/storage/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-osd/hooks/charmhelpers/contrib/storage/linux/__init__.py b/ceph-osd/hooks/charmhelpers/contrib/storage/linux/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/storage/linux/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-osd/hooks/charmhelpers/contrib/storage/linux/bcache.py b/ceph-osd/hooks/charmhelpers/contrib/storage/linux/bcache.py new file mode 100644 index 00000000..605991e1 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/storage/linux/bcache.py @@ -0,0 +1,74 @@ +# Copyright 2017 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import json + +from charmhelpers.core.hookenv import log + +stats_intervals = ['stats_day', 'stats_five_minute', + 'stats_hour', 'stats_total'] + +SYSFS = '/sys' + + +class Bcache(object): + """Bcache behaviour + """ + + def __init__(self, cachepath): + self.cachepath = cachepath + + @classmethod + def fromdevice(cls, devname): + return cls('{}/block/{}/bcache'.format(SYSFS, devname)) + + def __str__(self): + return self.cachepath + + def get_stats(self, interval): + """Get cache stats + """ + intervaldir = 'stats_{}'.format(interval) + path = "{}/{}".format(self.cachepath, intervaldir) + out = dict() + for elem in os.listdir(path): + out[elem] = open('{}/{}'.format(path, elem)).read().strip() + return out + + +def get_bcache_fs(): + """Return all cache sets + """ + cachesetroot = "{}/fs/bcache".format(SYSFS) + try: + dirs = os.listdir(cachesetroot) + except OSError: + log("No bcache fs found") + return [] + cacheset = set([Bcache('{}/{}'.format(cachesetroot, d)) for d in dirs if not d.startswith('register')]) + return cacheset + + +def get_stats_action(cachespec, interval): + """Action for getting bcache statistics for a given cachespec. + Cachespec can either be a device name, eg. 'sdb', which will retrieve + cache stats for the given device, or 'global', which will retrieve stats + for all cachesets + """ + if cachespec == 'global': + caches = get_bcache_fs() + else: + caches = [Bcache.fromdevice(cachespec)] + res = dict((c.cachepath, c.get_stats(interval)) for c in caches) + return json.dumps(res, indent=4, separators=(',', ': ')) diff --git a/ceph-osd/hooks/charmhelpers/contrib/storage/linux/ceph.py b/ceph-osd/hooks/charmhelpers/contrib/storage/linux/ceph.py new file mode 100644 index 00000000..6ec67cba --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/storage/linux/ceph.py @@ -0,0 +1,2401 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file is sourced from lp:openstack-charm-helpers +# +# Authors: +# James Page +# Adam Gandelman +# + +import collections +import errno +import hashlib +import math + +import os +import shutil +import json +import time + +from subprocess import ( + check_call, + check_output, + CalledProcessError, +) +from charmhelpers import deprecate +from charmhelpers.core.hookenv import ( + application_name, + config, + service_name, + local_unit, + relation_get, + relation_ids, + relation_set, + related_units, + log, + DEBUG, + INFO, + WARNING, + ERROR, +) +from charmhelpers.core.host import ( + mount, + mounts, + service_start, + service_stop, + service_running, + umount, + cmp_pkgrevno, +) +from charmhelpers.fetch import ( + apt_install, +) +from charmhelpers.core.unitdata import kv + +from charmhelpers.core.kernel import modprobe +from charmhelpers.contrib.openstack.utils import config_flags_parser + +KEYRING = '/etc/ceph/ceph.client.{}.keyring' +KEYFILE = '/etc/ceph/ceph.client.{}.key' + +CEPH_CONF = """[global] +auth supported = {auth} +keyring = {keyring} +mon host = {mon_hosts} +log to syslog = {use_syslog} +err to syslog = {use_syslog} +clog to syslog = {use_syslog} +""" + +# The number of placement groups per OSD to target for placement group +# calculations. This number is chosen as 100 due to the ceph PG Calc +# documentation recommending to choose 100 for clusters which are not +# expected to increase in the foreseeable future. Since the majority of the +# calculations are done on deployment, target the case of non-expanding +# clusters as the default. +DEFAULT_PGS_PER_OSD_TARGET = 100 +DEFAULT_POOL_WEIGHT = 10.0 +LEGACY_PG_COUNT = 200 +DEFAULT_MINIMUM_PGS = 2 +AUTOSCALER_DEFAULT_PGS = 32 + + +class OsdPostUpgradeError(Exception): + """Error class for OSD post-upgrade operations.""" + pass + + +class OSDSettingConflict(Exception): + """Error class for conflicting osd setting requests.""" + pass + + +class OSDSettingNotAllowed(Exception): + """Error class for a disallowed setting.""" + pass + + +OSD_SETTING_EXCEPTIONS = (OSDSettingConflict, OSDSettingNotAllowed) + +OSD_SETTING_WHITELIST = [ + 'osd heartbeat grace', + 'osd heartbeat interval', +] + + +def _order_dict_by_key(rdict): + """Convert a dictionary into an OrderedDict sorted by key. + + :param rdict: Dictionary to be ordered. + :type rdict: dict + :returns: Ordered Dictionary. + :rtype: collections.OrderedDict + """ + return collections.OrderedDict(sorted(rdict.items(), key=lambda k: k[0])) + + +def get_osd_settings(relation_name): + """Consolidate requested osd settings from all clients. + + Consolidate requested osd settings from all clients. Check that the + requested setting is on the whitelist and it does not conflict with + any other requested settings. + + :returns: Dictionary of settings + :rtype: dict + + :raises: OSDSettingNotAllowed + :raises: OSDSettingConflict + """ + rel_ids = relation_ids(relation_name) + osd_settings = {} + for relid in rel_ids: + for unit in related_units(relid): + unit_settings = relation_get('osd-settings', unit, relid) or '{}' + unit_settings = json.loads(unit_settings) + for key, value in unit_settings.items(): + if key not in OSD_SETTING_WHITELIST: + msg = 'Illegal settings "{}"'.format(key) + raise OSDSettingNotAllowed(msg) + if key in osd_settings: + if osd_settings[key] != unit_settings[key]: + msg = 'Conflicting settings for "{}"'.format(key) + raise OSDSettingConflict(msg) + else: + osd_settings[key] = value + return _order_dict_by_key(osd_settings) + + +def send_application_name(relid=None, app_name=None): + """Send the application name down the relation. + + :param relid: Relation id to set application name in. + :type relid: str + :param app_name: Application name to send in the relation. + :type app_name: str + """ + if app_name is None: + app_name = application_name() + relation_set( + relation_id=relid, + relation_settings={'application-name': app_name}) + + +def send_osd_settings(): + """Pass on requested OSD settings to osd units.""" + try: + settings = get_osd_settings('client') + except OSD_SETTING_EXCEPTIONS as e: + # There is a problem with the settings, not passing them on. Update + # status will notify the user. + log(e, level=ERROR) + return + data = { + 'osd-settings': json.dumps(settings, sort_keys=True)} + for relid in relation_ids('osd'): + relation_set(relation_id=relid, + relation_settings=data) + + +def validator(value, valid_type, valid_range=None): + """Helper function for type validation. + + Used to validate these: + https://docs.ceph.com/docs/master/rados/operations/pools/#set-pool-values + https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#inline-compression + + Example input: + validator(value=1, + valid_type=int, + valid_range=[0, 2]) + + This says I'm testing value=1. It must be an int inclusive in [0,2] + + :param value: The value to validate. + :type value: any + :param valid_type: The type that value should be. + :type valid_type: any + :param valid_range: A range of values that value can assume. + :type valid_range: Optional[Union[List,Tuple]] + :raises: AssertionError, ValueError + """ + assert isinstance(value, valid_type), ( + "{} is not a {}".format(value, valid_type)) + if valid_range is not None: + assert isinstance( + valid_range, list) or isinstance(valid_range, tuple), ( + "valid_range must be of type List or Tuple, " + "was given {} of type {}" + .format(valid_range, type(valid_range))) + # If we're dealing with strings + if isinstance(value, str): + assert value in valid_range, ( + "{} is not in the list {}".format(value, valid_range)) + # Integer, float should have a min and max + else: + if len(valid_range) != 2: + raise ValueError( + "Invalid valid_range list of {} for {}. " + "List must be [min,max]".format(valid_range, value)) + assert value >= valid_range[0], ( + "{} is less than minimum allowed value of {}" + .format(value, valid_range[0])) + assert value <= valid_range[1], ( + "{} is greater than maximum allowed value of {}" + .format(value, valid_range[1])) + + +class PoolCreationError(Exception): + """A custom exception to inform the caller that a pool creation failed. + + Provides an error message + """ + + def __init__(self, message): + super(PoolCreationError, self).__init__(message) + + +class BasePool(object): + """An object oriented approach to Ceph pool creation. + + This base class is inherited by ReplicatedPool and ErasurePool. Do not call + create() on this base class as it will raise an exception. + + Instantiate a child class and call create(). + """ + # Dictionary that maps pool operation properties to Tuples with valid type + # and valid range + op_validation_map = { + 'compression-algorithm': (str, ('lz4', 'snappy', 'zlib', 'zstd')), + 'compression-mode': (str, ('none', 'passive', 'aggressive', 'force')), + 'compression-required-ratio': (float, None), + 'compression-min-blob-size': (int, None), + 'compression-min-blob-size-hdd': (int, None), + 'compression-min-blob-size-ssd': (int, None), + 'compression-max-blob-size': (int, None), + 'compression-max-blob-size-hdd': (int, None), + 'compression-max-blob-size-ssd': (int, None), + 'rbd-mirroring-mode': (str, ('image', 'pool')) + } + + def __init__(self, service, name=None, percent_data=None, app_name=None, + op=None): + """Initialize BasePool object. + + Pool information is either initialized from individual keyword + arguments or from a individual CephBrokerRq operation Dict. + + :param service: The Ceph user name to run commands under. + :type service: str + :param name: Name of pool to operate on. + :type name: str + :param percent_data: The expected pool size in relation to all + available resources in the Ceph cluster. Will be + used to set the ``target_size_ratio`` pool + property. (default: 10.0) + :type percent_data: Optional[float] + :param app_name: Ceph application name, usually one of: + ('cephfs', 'rbd', 'rgw') (default: 'unknown') + :type app_name: Optional[str] + :param op: Broker request Op to compile pool data from. + :type op: Optional[Dict[str,any]] + :raises: KeyError + """ + # NOTE: Do not perform initialization steps that require live data from + # a running cluster here. The *Pool classes may be used for validation. + self.service = service + self.op = op or {} + + if op: + # When initializing from op the `name` attribute is required and we + # will fail with KeyError if it is not provided. + self.name = op['name'] + self.percent_data = op.get('weight') + self.app_name = op.get('app-name') + else: + self.name = name + self.percent_data = percent_data + self.app_name = app_name + + # Set defaults for these if they are not provided + self.percent_data = self.percent_data or 10.0 + self.app_name = self.app_name or 'unknown' + + def validate(self): + """Check that value of supplied operation parameters are valid. + + :raises: ValueError + """ + for op_key, op_value in self.op.items(): + if op_key in self.op_validation_map and op_value is not None: + valid_type, valid_range = self.op_validation_map[op_key] + try: + validator(op_value, valid_type, valid_range) + except (AssertionError, ValueError) as e: + # Normalize on ValueError, also add information about which + # variable we had an issue with. + raise ValueError("'{}': {}".format(op_key, str(e))) + + def _create(self): + """Perform the pool creation, method MUST be overridden by child class. + """ + raise NotImplementedError + + def _post_create(self): + """Perform common post pool creation tasks. + + Note that pool properties subject to change during the lifetime of a + pool / deployment should go into the ``update`` method. + + Do not add calls for a specific pool type here, those should go into + one of the pool specific classes. + """ + nautilus_or_later = cmp_pkgrevno('ceph-common', '14.2.0') >= 0 + if nautilus_or_later: + # Ensure we set the expected pool ratio + update_pool( + client=self.service, + pool=self.name, + settings={ + 'target_size_ratio': str( + self.percent_data / 100.0), + }) + try: + set_app_name_for_pool(client=self.service, + pool=self.name, + name=self.app_name) + except CalledProcessError: + log('Could not set app name for pool {}' + .format(self.name), + level=WARNING) + if 'pg_autoscaler' in enabled_manager_modules(): + try: + enable_pg_autoscale(self.service, self.name) + except CalledProcessError as e: + log('Could not configure auto scaling for pool {}: {}' + .format(self.name, e), + level=WARNING) + + def create(self): + """Create pool and perform any post pool creation tasks. + + To allow for sharing of common code among pool specific classes the + processing has been broken out into the private methods ``_create`` + and ``_post_create``. + + Do not add any pool type specific handling here, that should go into + one of the pool specific classes. + """ + if not pool_exists(self.service, self.name): + self.validate() + self._create() + self._post_create() + self.update() + + def set_quota(self): + """Set a quota if requested. + + :raises: CalledProcessError + """ + max_bytes = self.op.get('max-bytes') + max_objects = self.op.get('max-objects') + if max_bytes or max_objects: + set_pool_quota(service=self.service, pool_name=self.name, + max_bytes=max_bytes, max_objects=max_objects) + + def set_compression(self): + """Set compression properties if requested. + + :raises: CalledProcessError + """ + compression_properties = { + key.replace('-', '_'): value + for key, value in self.op.items() + if key in ( + 'compression-algorithm', + 'compression-mode', + 'compression-required-ratio', + 'compression-min-blob-size', + 'compression-min-blob-size-hdd', + 'compression-min-blob-size-ssd', + 'compression-max-blob-size', + 'compression-max-blob-size-hdd', + 'compression-max-blob-size-ssd') and value} + if compression_properties: + update_pool(self.service, self.name, compression_properties) + + def update(self): + """Update properties for an already existing pool. + + Do not add calls for a specific pool type here, those should go into + one of the pool specific classes. + """ + self.validate() + self.set_quota() + self.set_compression() + + def add_cache_tier(self, cache_pool, mode): + """Adds a new cache tier to an existing pool. + + :param cache_pool: The cache tier pool name to add. + :type cache_pool: str + :param mode: The caching mode to use for this pool. + valid range = ["readonly", "writeback"] + :type mode: str + """ + # Check the input types and values + validator(value=cache_pool, valid_type=str) + validator( + value=mode, valid_type=str, + valid_range=["readonly", "writeback"]) + + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'add', self.name, cache_pool, + ]) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'cache-mode', cache_pool, mode, + ]) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'set-overlay', self.name, cache_pool, + ]) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'pool', 'set', cache_pool, 'hit_set_type', 'bloom', + ]) + + def remove_cache_tier(self, cache_pool): + """Removes a cache tier from Ceph. + + Flushes all dirty objects from writeback pools and waits for that to + complete. + + :param cache_pool: The cache tier pool name to remove. + :type cache_pool: str + """ + # read-only is easy, writeback is much harder + mode = get_cache_mode(self.service, cache_pool) + if mode == 'readonly': + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'cache-mode', cache_pool, 'none' + ]) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'remove', self.name, cache_pool, + ]) + + elif mode == 'writeback': + pool_forward_cmd = ['ceph', '--id', self.service, 'osd', 'tier', + 'cache-mode', cache_pool, 'forward'] + if cmp_pkgrevno('ceph-common', '10.1') >= 0: + # Jewel added a mandatory flag + pool_forward_cmd.append('--yes-i-really-mean-it') + + check_call(pool_forward_cmd) + # Flush the cache and wait for it to return + check_call([ + 'rados', '--id', self.service, + '-p', cache_pool, 'cache-flush-evict-all']) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'remove-overlay', self.name]) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'remove', self.name, cache_pool]) + + def get_pgs(self, pool_size, percent_data=DEFAULT_POOL_WEIGHT, + device_class=None): + """Return the number of placement groups to use when creating the pool. + + Returns the number of placement groups which should be specified when + creating the pool. This is based upon the calculation guidelines + provided by the Ceph Placement Group Calculator (located online at + http://ceph.com/pgcalc/). + + The number of placement groups are calculated using the following: + + (Target PGs per OSD) * (OSD #) * (%Data) + ---------------------------------------- + (Pool size) + + Per the upstream guidelines, the OSD # should really be considered + based on the number of OSDs which are eligible to be selected by the + pool. Since the pool creation doesn't specify any of CRUSH set rules, + the default rule will be dependent upon the type of pool being + created (replicated or erasure). + + This code makes no attempt to determine the number of OSDs which can be + selected for the specific rule, rather it is left to the user to tune + in the form of 'expected-osd-count' config option. + + :param pool_size: pool_size is either the number of replicas for + replicated pools or the K+M sum for erasure coded pools + :type pool_size: int + :param percent_data: the percentage of data that is expected to + be contained in the pool for the specific OSD set. Default value + is to assume 10% of the data is for this pool, which is a + relatively low % of the data but allows for the pg_num to be + increased. NOTE: the default is primarily to handle the scenario + where related charms requiring pools has not been upgraded to + include an update to indicate their relative usage of the pools. + :type percent_data: float + :param device_class: class of storage to use for basis of pgs + calculation; ceph supports nvme, ssd and hdd by default based + on presence of devices of each type in the deployment. + :type device_class: str + :returns: The number of pgs to use. + :rtype: int + """ + + # Note: This calculation follows the approach that is provided + # by the Ceph PG Calculator located at http://ceph.com/pgcalc/. + validator(value=pool_size, valid_type=int) + + # Ensure that percent data is set to something - even with a default + # it can be set to None, which would wreak havoc below. + if percent_data is None: + percent_data = DEFAULT_POOL_WEIGHT + + # If the expected-osd-count is specified, then use the max between + # the expected-osd-count and the actual osd_count + osd_list = get_osds(self.service, device_class) + expected = config('expected-osd-count') or 0 + + if osd_list: + if device_class: + osd_count = len(osd_list) + else: + osd_count = max(expected, len(osd_list)) + + # Log a message to provide some insight if the calculations claim + # to be off because someone is setting the expected count and + # there are more OSDs in reality. Try to make a proper guess + # based upon the cluster itself. + if not device_class and expected and osd_count != expected: + log("Found more OSDs than provided expected count. " + "Using the actual count instead", INFO) + elif expected: + # Use the expected-osd-count in older ceph versions to allow for + # a more accurate pg calculations + osd_count = expected + else: + # NOTE(james-page): Default to 200 for older ceph versions + # which don't support OSD query from cli + return LEGACY_PG_COUNT + + percent_data /= 100.0 + target_pgs_per_osd = config( + 'pgs-per-osd') or DEFAULT_PGS_PER_OSD_TARGET + num_pg = (target_pgs_per_osd * osd_count * percent_data) // pool_size + + # NOTE: ensure a sane minimum number of PGS otherwise we don't get any + # reasonable data distribution in minimal OSD configurations + if num_pg < DEFAULT_MINIMUM_PGS: + num_pg = DEFAULT_MINIMUM_PGS + + # The CRUSH algorithm has a slight optimization for placement groups + # with powers of 2 so find the nearest power of 2. If the nearest + # power of 2 is more than 25% below the original value, the next + # highest value is used. To do this, find the nearest power of 2 such + # that 2^n <= num_pg, check to see if its within the 25% tolerance. + exponent = math.floor(math.log(num_pg, 2)) + nearest = 2 ** exponent + if (num_pg - nearest) > (num_pg * 0.25): + # Choose the next highest power of 2 since the nearest is more + # than 25% below the original value. + return int(nearest * 2) + else: + return int(nearest) + + +class Pool(BasePool): + """Compatibility shim for any descendents external to this library.""" + + @deprecate( + 'The ``Pool`` baseclass has been replaced by ``BasePool`` class.') + def __init__(self, service, name): + super(Pool, self).__init__(service, name=name) + + def create(self): + pass + + +class ReplicatedPool(BasePool): + def __init__(self, service, name=None, pg_num=None, replicas=None, + percent_data=None, app_name=None, op=None, + profile_name='replicated_rule'): + """Initialize ReplicatedPool object. + + Pool information is either initialized from individual keyword + arguments or from a individual CephBrokerRq operation Dict. + + Please refer to the docstring of the ``BasePool`` class for + documentation of the common parameters. + + :param pg_num: Express wish for number of Placement Groups (this value + is subject to validation against a running cluster prior + to use to avoid creating a pool with too many PGs) + :type pg_num: int + :param replicas: Number of copies there should be of each object added + to this replicated pool. + :type replicas: int + :raises: KeyError + :param profile_name: Crush Profile to use + :type profile_name: Optional[str] + """ + # NOTE: Do not perform initialization steps that require live data from + # a running cluster here. The *Pool classes may be used for validation. + + # The common parameters are handled in our parents initializer + super(ReplicatedPool, self).__init__( + service=service, name=name, percent_data=percent_data, + app_name=app_name, op=op) + + if op: + # When initializing from op `replicas` is a required attribute, and + # we will fail with KeyError if it is not provided. + self.replicas = op['replicas'] + self.pg_num = op.get('pg_num') + self.profile_name = op.get('crush-profile') or profile_name + else: + self.replicas = replicas or 2 + self.pg_num = pg_num + self.profile_name = profile_name or 'replicated_rule' + + def _create(self): + # Validate if crush profile exists + if self.profile_name is None: + msg = ("Failed to discover crush profile named " + "{}".format(self.profile_name)) + log(msg, level=ERROR) + raise PoolCreationError(msg) + + # Do extra validation on pg_num with data from live cluster + if self.pg_num: + # Since the number of placement groups were specified, ensure + # that there aren't too many created. + max_pgs = self.get_pgs(self.replicas, 100.0) + self.pg_num = min(self.pg_num, max_pgs) + else: + self.pg_num = self.get_pgs(self.replicas, self.percent_data) + + nautilus_or_later = cmp_pkgrevno('ceph-common', '14.2.0') >= 0 + # Create it + if nautilus_or_later: + cmd = [ + 'ceph', '--id', self.service, 'osd', 'pool', 'create', + '--pg-num-min={}'.format( + min(AUTOSCALER_DEFAULT_PGS, self.pg_num) + ), + self.name, str(self.pg_num), self.profile_name + ] + else: + cmd = [ + 'ceph', '--id', self.service, 'osd', 'pool', 'create', + self.name, str(self.pg_num), self.profile_name + ] + check_call(cmd) + + def _post_create(self): + # Set the pool replica size + update_pool(client=self.service, + pool=self.name, + settings={'size': str(self.replicas)}) + # Perform other common post pool creation tasks + super(ReplicatedPool, self)._post_create() + + +class ErasurePool(BasePool): + """Default jerasure erasure coded pool.""" + + def __init__(self, service, name=None, erasure_code_profile=None, + percent_data=None, app_name=None, op=None, + allow_ec_overwrites=False): + """Initialize ErasurePool object. + + Pool information is either initialized from individual keyword + arguments or from a individual CephBrokerRq operation Dict. + + Please refer to the docstring of the ``BasePool`` class for + documentation of the common parameters. + + :param erasure_code_profile: EC Profile to use (default: 'default') + :type erasure_code_profile: Optional[str] + """ + # NOTE: Do not perform initialization steps that require live data from + # a running cluster here. The *Pool classes may be used for validation. + + # The common parameters are handled in our parents initializer + super(ErasurePool, self).__init__( + service=service, name=name, percent_data=percent_data, + app_name=app_name, op=op) + + if op: + # Note that the different default when initializing from op stems + # from different handling of this in the `charms.ceph` library. + self.erasure_code_profile = op.get('erasure-profile', + 'default-canonical') + self.allow_ec_overwrites = op.get('allow-ec-overwrites') + else: + # We keep the class default when initialized from keyword arguments + # to not break the API for any other consumers. + self.erasure_code_profile = erasure_code_profile or 'default' + self.allow_ec_overwrites = allow_ec_overwrites + + def _create(self): + # Try to find the erasure profile information in order to properly + # size the number of placement groups. The size of an erasure + # coded placement group is calculated as k+m. + erasure_profile = get_erasure_profile(self.service, + self.erasure_code_profile) + + # Check for errors + if erasure_profile is None: + msg = ("Failed to discover erasure profile named " + "{}".format(self.erasure_code_profile)) + log(msg, level=ERROR) + raise PoolCreationError(msg) + if 'k' not in erasure_profile or 'm' not in erasure_profile: + # Error + msg = ("Unable to find k (data chunks) or m (coding chunks) " + "in erasure profile {}".format(erasure_profile)) + log(msg, level=ERROR) + raise PoolCreationError(msg) + + k = int(erasure_profile['k']) + m = int(erasure_profile['m']) + pgs = self.get_pgs(k + m, self.percent_data) + nautilus_or_later = cmp_pkgrevno('ceph-common', '14.2.0') >= 0 + # Create it + if nautilus_or_later: + cmd = [ + 'ceph', '--id', self.service, 'osd', 'pool', 'create', + '--pg-num-min={}'.format( + min(AUTOSCALER_DEFAULT_PGS, pgs) + ), + self.name, str(pgs), str(pgs), + 'erasure', self.erasure_code_profile + ] + else: + cmd = [ + 'ceph', '--id', self.service, 'osd', 'pool', 'create', + self.name, str(pgs), str(pgs), + 'erasure', self.erasure_code_profile + ] + check_call(cmd) + + def _post_create(self): + super(ErasurePool, self)._post_create() + if self.allow_ec_overwrites: + update_pool(self.service, self.name, + {'allow_ec_overwrites': 'true'}) + + +def enabled_manager_modules(): + """Return a list of enabled manager modules. + + :rtype: List[str] + """ + cmd = ['ceph', 'mgr', 'module', 'ls'] + quincy_or_later = cmp_pkgrevno('ceph-common', '17.1.0') >= 0 + if quincy_or_later: + cmd.append('--format=json') + try: + modules = check_output(cmd).decode('utf-8') + except CalledProcessError as e: + log("Failed to list ceph modules: {}".format(e), WARNING) + return [] + modules = json.loads(modules) + return modules['enabled_modules'] + + +def enable_pg_autoscale(service, pool_name): + """Enable Ceph's PG autoscaler for the specified pool. + + :param service: The Ceph user name to run the command under + :type service: str + :param pool_name: The name of the pool to enable sutoscaling on + :type pool_name: str + :raises: CalledProcessError if the command fails + """ + check_call([ + 'ceph', '--id', service, + 'osd', 'pool', 'set', pool_name, 'pg_autoscale_mode', 'on']) + + +def get_mon_map(service): + """Return the current monitor map. + + :param service: The Ceph user name to run the command under + :type service: str + :returns: Dictionary with monitor map data + :rtype: Dict[str,any] + :raises: ValueError if the monmap fails to parse, CalledProcessError if our + ceph command fails. + """ + try: + octopus_or_later = cmp_pkgrevno('ceph-common', '15.0.0') >= 0 + mon_status_cmd = 'quorum_status' if octopus_or_later else 'mon_status' + mon_status = (check_output(['ceph', '--id', service, mon_status_cmd, + '--format=json'])).decode('utf-8') + try: + return json.loads(mon_status) + except ValueError as v: + log("Unable to parse mon_status json: {}. Error: {}" + .format(mon_status, str(v))) + raise + except CalledProcessError as e: + log("mon_status command failed with message: {}" + .format(str(e))) + raise + + +def hash_monitor_names(service): + """Get a sorted list of monitor hashes in ascending order. + + Uses the get_mon_map() function to get information about the monitor + cluster. Hash the name of each monitor. + + :param service: The Ceph user name to run the command under. + :type service: str + :returns: a sorted list of monitor hashes in an ascending order. + :rtype : List[str] + :raises: CalledProcessError, ValueError + """ + try: + hash_list = [] + monitor_list = get_mon_map(service=service) + if monitor_list['monmap']['mons']: + for mon in monitor_list['monmap']['mons']: + hash_list.append( + hashlib.sha224(mon['name'].encode('utf-8')).hexdigest()) + return sorted(hash_list) + else: + return None + except (ValueError, CalledProcessError): + raise + + +def monitor_key_delete(service, key): + """Delete a key and value pair from the monitor cluster. + + Deletes a key value pair on the monitor cluster. + + :param service: The Ceph user name to run the command under + :type service: str + :param key: The key to delete. + :type key: str + :raises: CalledProcessError + """ + try: + check_output( + ['ceph', '--id', service, + 'config-key', 'del', str(key)]) + except CalledProcessError as e: + log("Monitor config-key put failed with message: {}" + .format(e.output)) + raise + + +def monitor_key_set(service, key, value): + """Set a key value pair on the monitor cluster. + + :param service: The Ceph user name to run the command under. + :type service str + :param key: The key to set. + :type key: str + :param value: The value to set. This will be coerced into a string. + :type value: str + :raises: CalledProcessError + """ + try: + check_output( + ['ceph', '--id', service, + 'config-key', 'put', str(key), str(value)]) + except CalledProcessError as e: + log("Monitor config-key put failed with message: {}" + .format(e.output)) + raise + + +def monitor_key_get(service, key): + """Get the value of an existing key in the monitor cluster. + + :param service: The Ceph user name to run the command under + :type service: str + :param key: The key to search for. + :type key: str + :return: Returns the value of that key or None if not found. + :rtype: Optional[str] + """ + try: + output = check_output( + ['ceph', '--id', service, + 'config-key', 'get', str(key)]).decode('UTF-8') + return output + except CalledProcessError as e: + log("Monitor config-key get failed with message: {}" + .format(e.output)) + return None + + +def monitor_key_exists(service, key): + """Search for existence of key in the monitor cluster. + + :param service: The Ceph user name to run the command under. + :type service: str + :param key: The key to search for. + :type key: str + :return: Returns True if the key exists, False if not. + :rtype: bool + :raises: CalledProcessError if an unknown error occurs. + """ + try: + check_call( + ['ceph', '--id', service, + 'config-key', 'exists', str(key)]) + # I can return true here regardless because Ceph returns + # ENOENT if the key wasn't found + return True + except CalledProcessError as e: + if e.returncode == errno.ENOENT: + return False + else: + log("Unknown error from ceph config-get exists: {} {}" + .format(e.returncode, e.output)) + raise + + +def get_erasure_profile(service, name): + """Get an existing erasure code profile if it exists. + + :param service: The Ceph user name to run the command under. + :type service: str + :param name: Name of profile. + :type name: str + :returns: Dictionary with profile data. + :rtype: Optional[Dict[str]] + """ + try: + out = check_output(['ceph', '--id', service, + 'osd', 'erasure-code-profile', 'get', + name, '--format=json']).decode('utf-8') + return json.loads(out) + except (CalledProcessError, OSError, ValueError): + return None + + +def pool_set(service, pool_name, key, value): + """Sets a value for a RADOS pool in ceph. + + :param service: The Ceph user name to run the command under. + :type service: str + :param pool_name: Name of pool to set property on. + :type pool_name: str + :param key: Property key. + :type key: str + :param value: Value, will be coerced into str and shifted to lowercase. + :type value: str + :raises: CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'set', pool_name, key, str(value).lower()] + check_call(cmd) + + +def snapshot_pool(service, pool_name, snapshot_name): + """Snapshots a RADOS pool in Ceph. + + :param service: The Ceph user name to run the command under. + :type service: str + :param pool_name: Name of pool to snapshot. + :type pool_name: str + :param snapshot_name: Name of snapshot to create. + :type snapshot_name: str + :raises: CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'mksnap', pool_name, snapshot_name] + check_call(cmd) + + +def remove_pool_snapshot(service, pool_name, snapshot_name): + """Remove a snapshot from a RADOS pool in Ceph. + + :param service: The Ceph user name to run the command under. + :type service: str + :param pool_name: Name of pool to remove snapshot from. + :type pool_name: str + :param snapshot_name: Name of snapshot to remove. + :type snapshot_name: str + :raises: CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'rmsnap', pool_name, snapshot_name] + check_call(cmd) + + +def set_pool_quota(service, pool_name, max_bytes=None, max_objects=None): + """Set byte quota on a RADOS pool in Ceph. + + :param service: The Ceph user name to run the command under + :type service: str + :param pool_name: Name of pool + :type pool_name: str + :param max_bytes: Maximum bytes quota to apply + :type max_bytes: int + :param max_objects: Maximum objects quota to apply + :type max_objects: int + :raises: subprocess.CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'set-quota', pool_name] + if max_bytes: + cmd = cmd + ['max_bytes', str(max_bytes)] + if max_objects: + cmd = cmd + ['max_objects', str(max_objects)] + check_call(cmd) + + +def remove_pool_quota(service, pool_name): + """Remove byte quota on a RADOS pool in Ceph. + + :param service: The Ceph user name to run the command under. + :type service: str + :param pool_name: Name of pool to remove quota from. + :type pool_name: str + :raises: CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'set-quota', pool_name, 'max_bytes', '0'] + check_call(cmd) + + +def remove_erasure_profile(service, profile_name): + """Remove erasure code profile. + + :param service: The Ceph user name to run the command under + :type service: str + :param profile_name: Name of profile to remove. + :type profile_name: str + :raises: CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'erasure-code-profile', 'rm', profile_name] + check_call(cmd) + + +def create_erasure_profile(service, profile_name, + erasure_plugin_name='jerasure', + failure_domain=None, + data_chunks=2, coding_chunks=1, + locality=None, durability_estimator=None, + helper_chunks=None, + scalar_mds=None, + crush_locality=None, + device_class=None, + erasure_plugin_technique=None): + """Create a new erasure code profile if one does not already exist for it. + + Profiles are considered immutable so will not be updated if the named + profile already exists. + + Please refer to [0] for more details. + + 0: http://docs.ceph.com/docs/master/rados/operations/erasure-code-profile/ + + :param service: The Ceph user name to run the command under. + :type service: str + :param profile_name: Name of profile. + :type profile_name: str + :param erasure_plugin_name: Erasure code plugin. + :type erasure_plugin_name: str + :param failure_domain: Failure domain, one of: + ('chassis', 'datacenter', 'host', 'osd', 'pdu', + 'pod', 'rack', 'region', 'room', 'root', 'row'). + :type failure_domain: str + :param data_chunks: Number of data chunks. + :type data_chunks: int + :param coding_chunks: Number of coding chunks. + :type coding_chunks: int + :param locality: Locality. + :type locality: int + :param durability_estimator: Durability estimator. + :type durability_estimator: int + :param helper_chunks: int + :type helper_chunks: int + :param device_class: Restrict placement to devices of specific class. + :type device_class: str + :param scalar_mds: one of ['isa', 'jerasure', 'shec'] + :type scalar_mds: str + :param crush_locality: LRC locality faulure domain, one of: + ('chassis', 'datacenter', 'host', 'osd', 'pdu', 'pod', + 'rack', 'region', 'room', 'root', 'row') or unset. + :type crush_locaity: str + :param erasure_plugin_technique: Coding technique for EC plugin + :type erasure_plugin_technique: str + :return: None. Can raise CalledProcessError, ValueError or AssertionError + """ + if erasure_profile_exists(service, profile_name): + log('EC profile {} exists, skipping update'.format(profile_name), + level=WARNING) + return + + plugin_techniques = { + 'jerasure': [ + 'reed_sol_van', + 'reed_sol_r6_op', + 'cauchy_orig', + 'cauchy_good', + 'liberation', + 'blaum_roth', + 'liber8tion' + ], + 'lrc': [], + 'isa': [ + 'reed_sol_van', + 'cauchy', + ], + 'shec': [ + 'single', + 'multiple' + ], + 'clay': [], + } + failure_domains = [ + 'chassis', 'datacenter', + 'host', 'osd', + 'pdu', 'pod', + 'rack', 'region', + 'room', 'root', + 'row', + ] + device_classes = [ + 'ssd', + 'hdd', + 'nvme' + ] + + validator(erasure_plugin_name, str, list(plugin_techniques.keys())) + + cmd = [ + 'ceph', '--id', service, + 'osd', 'erasure-code-profile', 'set', profile_name, + 'plugin={}'.format(erasure_plugin_name), + 'k={}'.format(str(data_chunks)), + 'm={}'.format(str(coding_chunks)), + ] + + if erasure_plugin_technique: + validator(erasure_plugin_technique, str, + plugin_techniques[erasure_plugin_name]) + cmd.append('technique={}'.format(erasure_plugin_technique)) + + luminous_or_later = cmp_pkgrevno('ceph-common', '12.0.0') >= 0 + + # Set failure domain from options if not provided in args + if not failure_domain and config('customize-failure-domain'): + # Defaults to 'host' so just need to deal with + # setting 'rack' if feature is enabled + failure_domain = 'rack' + + if failure_domain: + validator(failure_domain, str, failure_domains) + # failure_domain changed in luminous + if luminous_or_later: + cmd.append('crush-failure-domain={}'.format(failure_domain)) + else: + cmd.append('ruleset-failure-domain={}'.format(failure_domain)) + + # device class new in luminous + if luminous_or_later and device_class: + validator(device_class, str, device_classes) + cmd.append('crush-device-class={}'.format(device_class)) + else: + log('Skipping device class configuration (ceph < 12.0.0)', + level=DEBUG) + + # Add plugin specific information + if erasure_plugin_name == 'lrc': + # LRC mandatory configuration + if locality: + cmd.append('l={}'.format(str(locality))) + else: + raise ValueError("locality must be provided for lrc plugin") + # LRC optional configuration + if crush_locality: + validator(crush_locality, str, failure_domains) + cmd.append('crush-locality={}'.format(crush_locality)) + + if erasure_plugin_name == 'shec': + # SHEC optional configuration + if durability_estimator: + cmd.append('c={}'.format((durability_estimator))) + + if erasure_plugin_name == 'clay': + # CLAY optional configuration + if helper_chunks: + cmd.append('d={}'.format(str(helper_chunks))) + if scalar_mds: + cmd.append('scalar-mds={}'.format(scalar_mds)) + + check_call(cmd) + + +def rename_pool(service, old_name, new_name): + """Rename a Ceph pool from old_name to new_name. + + :param service: The Ceph user name to run the command under. + :type service: str + :param old_name: Name of pool subject to rename. + :type old_name: str + :param new_name: Name to rename pool to. + :type new_name: str + """ + validator(value=old_name, valid_type=str) + validator(value=new_name, valid_type=str) + + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'rename', old_name, new_name] + check_call(cmd) + + +def erasure_profile_exists(service, name): + """Check to see if an Erasure code profile already exists. + + :param service: The Ceph user name to run the command under + :type service: str + :param name: Name of profile to look for. + :type name: str + :returns: True if it exists, False otherwise. + :rtype: bool + """ + validator(value=name, valid_type=str) + try: + check_call(['ceph', '--id', service, + 'osd', 'erasure-code-profile', 'get', + name]) + return True + except CalledProcessError: + return False + + +def get_cache_mode(service, pool_name): + """Find the current caching mode of the pool_name given. + + :param service: The Ceph user name to run the command under + :type service: str + :param pool_name: Name of pool. + :type pool_name: str + :returns: Current cache mode. + :rtype: Optional[int] + """ + validator(value=service, valid_type=str) + validator(value=pool_name, valid_type=str) + out = check_output(['ceph', '--id', service, + 'osd', 'dump', '--format=json']).decode('utf-8') + try: + osd_json = json.loads(out) + for pool in osd_json['pools']: + if pool['pool_name'] == pool_name: + return pool['cache_mode'] + return None + except ValueError: + raise + + +def pool_exists(service, name): + """Check to see if a RADOS pool already exists.""" + try: + out = check_output( + ['rados', '--id', service, 'lspools']).decode('utf-8') + except CalledProcessError: + return False + + return name in out.split() + + +def get_osds(service, device_class=None): + """Return a list of all Ceph Object Storage Daemons currently in the + cluster (optionally filtered by storage device class). + + :param device_class: Class of storage device for OSD's + :type device_class: str + """ + luminous_or_later = cmp_pkgrevno('ceph-common', '12.0.0') >= 0 + if luminous_or_later and device_class: + out = check_output(['ceph', '--id', service, + 'osd', 'crush', 'class', + 'ls-osd', device_class, + '--format=json']).decode('utf-8') + else: + out = check_output(['ceph', '--id', service, + 'osd', 'ls', + '--format=json']).decode('utf-8') + return json.loads(out) + + +def install(): + """Basic Ceph client installation.""" + ceph_dir = "/etc/ceph" + if not os.path.exists(ceph_dir): + os.mkdir(ceph_dir) + + apt_install('ceph-common', fatal=True) + + +def rbd_exists(service, pool, rbd_img): + """Check to see if a RADOS block device exists.""" + try: + out = check_output(['rbd', 'list', '--id', + service, '--pool', pool]).decode('utf-8') + except CalledProcessError: + return False + + return rbd_img in out + + +def create_rbd_image(service, pool, image, sizemb): + """Create a new RADOS block device.""" + cmd = ['rbd', 'create', image, '--size', str(sizemb), '--id', service, + '--pool', pool] + check_call(cmd) + + +def update_pool(client, pool, settings): + """Update pool properties. + + :param client: Client/User-name to authenticate with. + :type client: str + :param pool: Name of pool to operate on + :type pool: str + :param settings: Dictionary with key/value pairs to set. + :type settings: Dict[str, str] + :raises: CalledProcessError + """ + cmd = ['ceph', '--id', client, 'osd', 'pool', 'set', pool] + for k, v in settings.items(): + check_call(cmd + [k, v]) + + +def set_app_name_for_pool(client, pool, name): + """Calls `osd pool application enable` for the specified pool name + + :param client: Name of the ceph client to use + :type client: str + :param pool: Pool to set app name for + :type pool: str + :param name: app name for the specified pool + :type name: str + + :raises: CalledProcessError if ceph call fails + """ + if cmp_pkgrevno('ceph-common', '12.0.0') >= 0: + cmd = ['ceph', '--id', client, 'osd', 'pool', + 'application', 'enable', pool, name] + check_call(cmd) + + +def create_pool(service, name, replicas=3, pg_num=None): + """Create a new RADOS pool.""" + if pool_exists(service, name): + log("Ceph pool {} already exists, skipping creation".format(name), + level=WARNING) + return + + if not pg_num: + # Calculate the number of placement groups based + # on upstream recommended best practices. + osds = get_osds(service) + if osds: + pg_num = (len(osds) * 100 // replicas) + else: + # NOTE(james-page): Default to 200 for older ceph versions + # which don't support OSD query from cli + pg_num = 200 + + cmd = ['ceph', '--id', service, 'osd', 'pool', 'create', name, str(pg_num)] + check_call(cmd) + + update_pool(service, name, settings={'size': str(replicas)}) + + +def delete_pool(service, name): + """Delete a RADOS pool from ceph.""" + cmd = ['ceph', '--id', service, 'osd', 'pool', 'delete', name, + '--yes-i-really-really-mean-it'] + check_call(cmd) + + +def _keyfile_path(service): + return KEYFILE.format(service) + + +def _keyring_path(service): + return KEYRING.format(service) + + +def add_key(service, key): + """Add a key to a keyring. + + Creates the keyring if it doesn't already exist. + + Logs and returns if the key is already in the keyring. + """ + keyring = _keyring_path(service) + if os.path.exists(keyring): + with open(keyring, 'r') as ring: + if key in ring.read(): + log('Ceph keyring exists at %s and has not changed.' % keyring, + level=DEBUG) + return + log('Updating existing keyring %s.' % keyring, level=DEBUG) + + cmd = ['ceph-authtool', keyring, '--create-keyring', + '--name=client.{}'.format(service), '--add-key={}'.format(key)] + check_call(cmd) + log('Created new ceph keyring at %s.' % keyring, level=DEBUG) + + +def create_keyring(service, key): + """Deprecated. Please use the more accurately named 'add_key'""" + return add_key(service, key) + + +def delete_keyring(service): + """Delete an existing Ceph keyring.""" + keyring = _keyring_path(service) + if not os.path.exists(keyring): + log('Keyring does not exist at %s' % keyring, level=WARNING) + return + + os.remove(keyring) + log('Deleted ring at %s.' % keyring, level=INFO) + + +def create_key_file(service, key): + """Create a file containing key.""" + keyfile = _keyfile_path(service) + if os.path.exists(keyfile): + log('Keyfile exists at %s.' % keyfile, level=WARNING) + return + + with open(keyfile, 'w') as fd: + fd.write(key) + + log('Created new keyfile at %s.' % keyfile, level=INFO) + + +def get_ceph_nodes(relation='ceph'): + """Query named relation to determine current nodes.""" + hosts = [] + for r_id in relation_ids(relation): + for unit in related_units(r_id): + hosts.append(relation_get('private-address', unit=unit, rid=r_id)) + + return hosts + + +def configure(service, key, auth, use_syslog): + """Perform basic configuration of Ceph.""" + add_key(service, key) + create_key_file(service, key) + hosts = get_ceph_nodes() + with open('/etc/ceph/ceph.conf', 'w') as ceph_conf: + ceph_conf.write(CEPH_CONF.format(auth=auth, + keyring=_keyring_path(service), + mon_hosts=",".join(map(str, hosts)), + use_syslog=use_syslog)) + modprobe('rbd') + + +def image_mapped(name): + """Determine whether a RADOS block device is mapped locally.""" + try: + out = check_output(['rbd', 'showmapped']).decode('utf-8') + except CalledProcessError: + return False + + return name in out + + +def map_block_storage(service, pool, image): + """Map a RADOS block device for local use.""" + cmd = [ + 'rbd', + 'map', + '{}/{}'.format(pool, image), + '--user', + service, + '--secret', + _keyfile_path(service), + ] + check_call(cmd) + + +def filesystem_mounted(fs): + """Determine whether a filesystem is already mounted.""" + return fs in [f for f, m in mounts()] + + +def make_filesystem(blk_device, fstype='ext4', timeout=10): + """Make a new filesystem on the specified block device.""" + count = 0 + e_noent = errno.ENOENT + while not os.path.exists(blk_device): + if count >= timeout: + log('Gave up waiting on block device %s' % blk_device, + level=ERROR) + raise IOError(e_noent, os.strerror(e_noent), blk_device) + + log('Waiting for block device %s to appear' % blk_device, + level=DEBUG) + count += 1 + time.sleep(1) + else: + log('Formatting block device %s as filesystem %s.' % + (blk_device, fstype), level=INFO) + check_call(['mkfs', '-t', fstype, blk_device]) + + +def place_data_on_block_device(blk_device, data_src_dst): + """Migrate data in data_src_dst to blk_device and then remount.""" + # mount block device into /mnt + mount(blk_device, '/mnt') + # copy data to /mnt + copy_files(data_src_dst, '/mnt') + # umount block device + umount('/mnt') + # Grab user/group ID's from original source + _dir = os.stat(data_src_dst) + uid = _dir.st_uid + gid = _dir.st_gid + # re-mount where the data should originally be + # TODO: persist is currently a NO-OP in core.host + mount(blk_device, data_src_dst, persist=True) + # ensure original ownership of new mount. + os.chown(data_src_dst, uid, gid) + + +def copy_files(src, dst, symlinks=False, ignore=None): + """Copy files from src to dst.""" + for item in os.listdir(src): + s = os.path.join(src, item) + d = os.path.join(dst, item) + if os.path.isdir(s): + shutil.copytree(s, d, symlinks, ignore) + else: + shutil.copy2(s, d) + + +def ensure_ceph_storage(service, pool, rbd_img, sizemb, mount_point, + blk_device, fstype, system_services=[], + replicas=3): + """NOTE: This function must only be called from a single service unit for + the same rbd_img otherwise data loss will occur. + + Ensures given pool and RBD image exists, is mapped to a block device, + and the device is formatted and mounted at the given mount_point. + + If formatting a device for the first time, data existing at mount_point + will be migrated to the RBD device before being re-mounted. + + All services listed in system_services will be stopped prior to data + migration and restarted when complete. + """ + # Ensure pool, RBD image, RBD mappings are in place. + if not pool_exists(service, pool): + log('Creating new pool {}.'.format(pool), level=INFO) + create_pool(service, pool, replicas=replicas) + + if not rbd_exists(service, pool, rbd_img): + log('Creating RBD image ({}).'.format(rbd_img), level=INFO) + create_rbd_image(service, pool, rbd_img, sizemb) + + if not image_mapped(rbd_img): + log('Mapping RBD Image {} as a Block Device.'.format(rbd_img), + level=INFO) + map_block_storage(service, pool, rbd_img) + + # make file system + # TODO: What happens if for whatever reason this is run again and + # the data is already in the rbd device and/or is mounted?? + # When it is mounted already, it will fail to make the fs + # XXX: This is really sketchy! Need to at least add an fstab entry + # otherwise this hook will blow away existing data if its executed + # after a reboot. + if not filesystem_mounted(mount_point): + make_filesystem(blk_device, fstype) + + for svc in system_services: + if service_running(svc): + log('Stopping services {} prior to migrating data.' + .format(svc), level=DEBUG) + service_stop(svc) + + place_data_on_block_device(blk_device, mount_point) + + for svc in system_services: + log('Starting service {} after migrating data.' + .format(svc), level=DEBUG) + service_start(svc) + + +def ensure_ceph_keyring(service, user=None, group=None, + relation='ceph', key=None): + """Ensures a ceph keyring is created for a named service and optionally + ensures user and group ownership. + + @returns boolean: Flag to indicate whether a key was successfully written + to disk based on either relation data or a supplied key + """ + if not key: + for rid in relation_ids(relation): + for unit in related_units(rid): + key = relation_get('key', rid=rid, unit=unit) + if key: + break + + if not key: + return False + + add_key(service=service, key=key) + keyring = _keyring_path(service) + if user and group: + check_call(['chown', '%s.%s' % (user, group), keyring]) + + return True + + +class CephBrokerRq(object): + """Ceph broker request. + + Multiple operations can be added to a request and sent to the Ceph broker + to be executed. + + Request is json-encoded for sending over the wire. + + The API is versioned and defaults to version 1. + """ + + # The below hash is the result of running + # `hashlib.sha1('[]'.encode()).hexdigest()` + EMPTY_LIST_SHA = '97d170e1550eee4afc0af065b78cda302a97674c' + + def __init__(self, api_version=1, request_id=None, raw_request_data=None): + """Initialize CephBrokerRq object. + + Builds a new empty request or rebuilds a request from on-wire JSON + data. + + :param api_version: API version for request (default: 1). + :type api_version: Optional[int] + :param request_id: Unique identifier for request. The identifier will + be updated as ops are added or removed from the + broker request. This ensures that Ceph will + correctly process requests where operations are + added after the initial request is processed. + (default: sha1 of operations) + :type request_id: Optional[str] + :param raw_request_data: JSON-encoded string to build request from. + :type raw_request_data: Optional[str] + :raises: KeyError + """ + if raw_request_data: + request_data = json.loads(raw_request_data) + self.api_version = request_data['api-version'] + self.set_ops(request_data['ops']) + self.request_id = request_data['request-id'] + else: + self.api_version = api_version + if request_id: + self.request_id = request_id + else: + self.request_id = CephBrokerRq.EMPTY_LIST_SHA + self.ops = [] + + def _hash_ops(self): + """Return the sha1 of the requested Broker ops.""" + return hashlib.sha1(json.dumps(self.ops, sort_keys=True).encode()).hexdigest() + + def add_op(self, op): + """Add an op if it is not already in the list. + + :param op: Operation to add. + :type op: dict + """ + if op not in self.ops: + self.ops.append(op) + self.request_id = self._hash_ops() + + def add_op_request_access_to_group(self, name, namespace=None, + permission=None, key_name=None, + object_prefix_permissions=None): + """ + Adds the requested permissions to the current service's Ceph key, + allowing the key to access only the specified pools or + object prefixes. object_prefix_permissions should be a dictionary + keyed on the permission with the corresponding value being a list + of prefixes to apply that permission to. + { + 'rwx': ['prefix1', 'prefix2'], + 'class-read': ['prefix3']} + """ + self.add_op({ + 'op': 'add-permissions-to-key', 'group': name, + 'namespace': namespace, + 'name': key_name or service_name(), + 'group-permission': permission, + 'object-prefix-permissions': object_prefix_permissions}) + + def add_op_create_pool(self, name, replica_count=3, pg_num=None, + weight=None, group=None, namespace=None, + app_name=None, max_bytes=None, max_objects=None): + """DEPRECATED: Use ``add_op_create_replicated_pool()`` or + ``add_op_create_erasure_pool()`` instead. + """ + return self.add_op_create_replicated_pool( + name, replica_count=replica_count, pg_num=pg_num, weight=weight, + group=group, namespace=namespace, app_name=app_name, + max_bytes=max_bytes, max_objects=max_objects) + + # Use function parameters and docstring to define types in a compatible + # manner. + # + # NOTE: Our caller should always use a kwarg Dict when calling us so + # no need to maintain fixed order/position for parameters. Please keep them + # sorted by name when adding new ones. + def _partial_build_common_op_create(self, + app_name=None, + compression_algorithm=None, + compression_mode=None, + compression_required_ratio=None, + compression_min_blob_size=None, + compression_min_blob_size_hdd=None, + compression_min_blob_size_ssd=None, + compression_max_blob_size=None, + compression_max_blob_size_hdd=None, + compression_max_blob_size_ssd=None, + group=None, + max_bytes=None, + max_objects=None, + namespace=None, + rbd_mirroring_mode='pool', + weight=None): + """Build common part of a create pool operation. + + :param app_name: Tag pool with application name. Note that there is + certain protocols emerging upstream with regard to + meaningful application names to use. + Examples are 'rbd' and 'rgw'. + :type app_name: Optional[str] + :param compression_algorithm: Compressor to use, one of: + ('lz4', 'snappy', 'zlib', 'zstd') + :type compression_algorithm: Optional[str] + :param compression_mode: When to compress data, one of: + ('none', 'passive', 'aggressive', 'force') + :type compression_mode: Optional[str] + :param compression_required_ratio: Minimum compression ratio for data + chunk, if the requested ratio is not + achieved the compressed version will + be thrown away and the original + stored. + :type compression_required_ratio: Optional[float] + :param compression_min_blob_size: Chunks smaller than this are never + compressed (unit: bytes). + :type compression_min_blob_size: Optional[int] + :param compression_min_blob_size_hdd: Chunks smaller than this are not + compressed when destined to + rotational media (unit: bytes). + :type compression_min_blob_size_hdd: Optional[int] + :param compression_min_blob_size_ssd: Chunks smaller than this are not + compressed when destined to flash + media (unit: bytes). + :type compression_min_blob_size_ssd: Optional[int] + :param compression_max_blob_size: Chunks larger than this are broken + into N * compression_max_blob_size + chunks before being compressed + (unit: bytes). + :type compression_max_blob_size: Optional[int] + :param compression_max_blob_size_hdd: Chunks larger than this are + broken into + N * compression_max_blob_size_hdd + chunks before being compressed + when destined for rotational + media (unit: bytes) + :type compression_max_blob_size_hdd: Optional[int] + :param compression_max_blob_size_ssd: Chunks larger than this are + broken into + N * compression_max_blob_size_ssd + chunks before being compressed + when destined for flash media + (unit: bytes). + :type compression_max_blob_size_ssd: Optional[int] + :param group: Group to add pool to + :type group: Optional[str] + :param max_bytes: Maximum bytes quota to apply + :type max_bytes: Optional[int] + :param max_objects: Maximum objects quota to apply + :type max_objects: Optional[int] + :param namespace: Group namespace + :type namespace: Optional[str] + :param rbd_mirroring_mode: Pool mirroring mode used when Ceph RBD + mirroring is enabled. + :type rbd_mirroring_mode: Optional[str] + :param weight: The percentage of data that is expected to be contained + in the pool from the total available space on the OSDs. + Used to calculate number of Placement Groups to create + for pool. + :type weight: Optional[float] + :returns: Dictionary with kwarg name as key. + :rtype: Dict[str,any] + :raises: AssertionError + """ + return { + 'app-name': app_name, + 'compression-algorithm': compression_algorithm, + 'compression-mode': compression_mode, + 'compression-required-ratio': compression_required_ratio, + 'compression-min-blob-size': compression_min_blob_size, + 'compression-min-blob-size-hdd': compression_min_blob_size_hdd, + 'compression-min-blob-size-ssd': compression_min_blob_size_ssd, + 'compression-max-blob-size': compression_max_blob_size, + 'compression-max-blob-size-hdd': compression_max_blob_size_hdd, + 'compression-max-blob-size-ssd': compression_max_blob_size_ssd, + 'group': group, + 'max-bytes': max_bytes, + 'max-objects': max_objects, + 'group-namespace': namespace, + 'rbd-mirroring-mode': rbd_mirroring_mode, + 'weight': weight, + } + + def add_op_create_replicated_pool(self, name, replica_count=3, pg_num=None, + crush_profile=None, **kwargs): + """Adds an operation to create a replicated pool. + + Refer to docstring for ``_partial_build_common_op_create`` for + documentation of keyword arguments. + + :param name: Name of pool to create + :type name: str + :param replica_count: Number of copies Ceph should keep of your data. + :type replica_count: int + :param pg_num: Request specific number of Placement Groups to create + for pool. + :type pg_num: int + :raises: AssertionError if provided data is of invalid type/range + :param crush_profile: Name of crush profile to use. If not set the + ceph-mon unit handling the broker request will + set its default value. + :type crush_profile: Optional[str] + """ + if pg_num and kwargs.get('weight'): + raise ValueError('pg_num and weight are mutually exclusive') + + op = { + 'op': 'create-pool', + 'name': name, + 'replicas': replica_count, + 'pg_num': pg_num, + 'crush-profile': crush_profile + } + op.update(self._partial_build_common_op_create(**kwargs)) + + # Initialize Pool-object to validate type and range of ops. + pool = ReplicatedPool('dummy-service', op=op) + pool.validate() + + self.add_op(op) + + def add_op_create_erasure_pool(self, name, erasure_profile=None, + allow_ec_overwrites=False, **kwargs): + """Adds an operation to create a erasure coded pool. + + Refer to docstring for ``_partial_build_common_op_create`` for + documentation of keyword arguments. + + :param name: Name of pool to create + :type name: str + :param erasure_profile: Name of erasure code profile to use. If not + set the ceph-mon unit handling the broker + request will set its default value. + :type erasure_profile: str + :param allow_ec_overwrites: allow EC pools to be overridden + :type allow_ec_overwrites: bool + :raises: AssertionError if provided data is of invalid type/range + """ + op = { + 'op': 'create-pool', + 'name': name, + 'pool-type': 'erasure', + 'erasure-profile': erasure_profile, + 'allow-ec-overwrites': allow_ec_overwrites, + } + op.update(self._partial_build_common_op_create(**kwargs)) + + # Initialize Pool-object to validate type and range of ops. + pool = ErasurePool('dummy-service', op) + pool.validate() + + self.add_op(op) + + def add_op_create_erasure_profile(self, name, + erasure_type='jerasure', + erasure_technique=None, + k=None, m=None, + failure_domain=None, + lrc_locality=None, + shec_durability_estimator=None, + clay_helper_chunks=None, + device_class=None, + clay_scalar_mds=None, + lrc_crush_locality=None): + """Adds an operation to create a erasure coding profile. + + :param name: Name of profile to create + :type name: str + :param erasure_type: Which of the erasure coding plugins should be used + :type erasure_type: string + :param erasure_technique: EC plugin technique to use + :type erasure_technique: string + :param k: Number of data chunks + :type k: int + :param m: Number of coding chunks + :type m: int + :param lrc_locality: Group the coding and data chunks into sets of size locality + (lrc plugin) + :type lrc_locality: int + :param durability_estimator: The number of parity chunks each of which includes + a data chunk in its calculation range (shec plugin) + :type durability_estimator: int + :param helper_chunks: The number of helper chunks to use for recovery operations + (clay plugin) + :type: helper_chunks: int + :param failure_domain: Type of failure domain from Ceph bucket types + to be used + :type failure_domain: string + :param device_class: Device class to use for profile (ssd, hdd) + :type device_class: string + :param clay_scalar_mds: Plugin to use for CLAY layered construction + (jerasure|isa|shec) + :type clay_scaler_mds: string + :param lrc_crush_locality: Type of crush bucket in which set of chunks + defined by lrc_locality will be stored. + :type lrc_crush_locality: string + """ + self.add_op({'op': 'create-erasure-profile', + 'name': name, + 'k': k, + 'm': m, + 'l': lrc_locality, + 'c': shec_durability_estimator, + 'd': clay_helper_chunks, + 'erasure-type': erasure_type, + 'erasure-technique': erasure_technique, + 'failure-domain': failure_domain, + 'device-class': device_class, + 'scalar-mds': clay_scalar_mds, + 'crush-locality': lrc_crush_locality}) + + def set_ops(self, ops): + """Set request ops to provided value. + + Useful for injecting ops that come from a previous request + to allow comparisons to ensure validity. + """ + self.ops = ops + self.request_id = self._hash_ops() + + @property + def request(self): + return json.dumps({'api-version': self.api_version, 'ops': self.ops, + 'request-id': self.request_id}) + + def _ops_equal(self, other): + keys_to_compare = [ + 'replicas', 'name', 'op', 'pg_num', 'group-permission', + 'object-prefix-permissions', + ] + keys_to_compare += list(self._partial_build_common_op_create().keys()) + if len(self.ops) == len(other.ops): + for req_no in range(0, len(self.ops)): + for key in keys_to_compare: + if self.ops[req_no].get(key) != other.ops[req_no].get(key): + return False + else: + return False + return True + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return False + if self.api_version == other.api_version and \ + self._ops_equal(other): + return True + else: + return False + + def __ne__(self, other): + return not self.__eq__(other) + + +class CephBrokerRsp(object): + """Ceph broker response. + + Response is json-decoded and contents provided as methods/properties. + + The API is versioned and defaults to version 1. + """ + + def __init__(self, encoded_rsp): + self.api_version = None + self.rsp = json.loads(encoded_rsp) + + @property + def request_id(self): + return self.rsp.get('request-id') + + @property + def exit_code(self): + return self.rsp.get('exit-code') + + @property + def exit_msg(self): + return self.rsp.get('stderr') + + +# Ceph Broker Conversation: +# If a charm needs an action to be taken by ceph it can create a CephBrokerRq +# and send that request to ceph via the ceph relation. The CephBrokerRq has a +# unique id so that the client can identity which CephBrokerRsp is associated +# with the request. Ceph will also respond to each client unit individually +# creating a response key per client unit eg glance/0 will get a CephBrokerRsp +# via key broker-rsp-glance-0 +# +# To use this the charm can just do something like: +# +# from charmhelpers.contrib.storage.linux.ceph import ( +# send_request_if_needed, +# is_request_complete, +# CephBrokerRq, +# ) +# +# @hooks.hook('ceph-relation-changed') +# def ceph_changed(): +# rq = CephBrokerRq() +# rq.add_op_create_pool(name='poolname', replica_count=3) +# +# if is_request_complete(rq): +# +# else: +# send_request_if_needed(get_ceph_request()) +# +# CephBrokerRq and CephBrokerRsp are serialized into JSON. Below is an example +# of glance having sent a request to ceph which ceph has successfully processed +# 'ceph:8': { +# 'ceph/0': { +# 'auth': 'cephx', +# 'broker-rsp-glance-0': '{"request-id": "0bc7dc54", "exit-code": 0}', +# 'broker_rsp': '{"request-id": "0da543b8", "exit-code": 0}', +# 'ceph-public-address': '10.5.44.103', +# 'key': 'AQCLDttVuHXINhAAvI144CB09dYchhHyTUY9BQ==', +# 'private-address': '10.5.44.103', +# }, +# 'glance/0': { +# 'broker_req': ('{"api-version": 1, "request-id": "0bc7dc54", ' +# '"ops": [{"replicas": 3, "name": "glance", ' +# '"op": "create-pool"}]}'), +# 'private-address': '10.5.44.109', +# }, +# } + +def get_previous_request(rid): + """Return the last ceph broker request sent on a given relation + + :param rid: Relation id to query for request + :type rid: str + :returns: CephBrokerRq object or None if relation data not found. + :rtype: Optional[CephBrokerRq] + """ + broker_req = relation_get(attribute='broker_req', rid=rid, + unit=local_unit()) + if broker_req: + return CephBrokerRq(raw_request_data=broker_req) + + +def get_request_states(request, relation='ceph'): + """Return a dict of requests per relation id with their corresponding + completion state. + + This allows a charm, which has a request for ceph, to see whether there is + an equivalent request already being processed and if so what state that + request is in. + + @param request: A CephBrokerRq object + """ + complete = [] + requests = {} + for rid in relation_ids(relation): + complete = False + previous_request = get_previous_request(rid) + if request == previous_request: + sent = True + complete = is_request_complete_for_rid(previous_request, rid) + else: + sent = False + complete = False + + requests[rid] = { + 'sent': sent, + 'complete': complete, + } + + return requests + + +def is_request_sent(request, relation='ceph'): + """Check to see if a functionally equivalent request has already been sent + + Returns True if a similair request has been sent + + @param request: A CephBrokerRq object + """ + states = get_request_states(request, relation=relation) + for rid in states.keys(): + if not states[rid]['sent']: + return False + + return True + + +def is_request_complete(request, relation='ceph'): + """Check to see if a functionally equivalent request has already been + completed + + Returns True if a similair request has been completed + + @param request: A CephBrokerRq object + """ + states = get_request_states(request, relation=relation) + for rid in states.keys(): + if not states[rid]['complete']: + return False + + return True + + +def is_request_complete_for_rid(request, rid): + """Check if a given request has been completed on the given relation + + @param request: A CephBrokerRq object + @param rid: Relation ID + """ + broker_key = get_broker_rsp_key() + for unit in related_units(rid): + rdata = relation_get(rid=rid, unit=unit) + if rdata.get(broker_key): + rsp = CephBrokerRsp(rdata.get(broker_key)) + if rsp.request_id == request.request_id: + if not rsp.exit_code: + return True + else: + # The remote unit sent no reply targeted at this unit so either the + # remote ceph cluster does not support unit targeted replies or it + # has not processed our request yet. + if rdata.get('broker_rsp'): + request_data = json.loads(rdata['broker_rsp']) + if request_data.get('request-id'): + log('Ignoring legacy broker_rsp without unit key as remote ' + 'service supports unit specific replies', level=DEBUG) + else: + log('Using legacy broker_rsp as remote service does not ' + 'supports unit specific replies', level=DEBUG) + rsp = CephBrokerRsp(rdata['broker_rsp']) + if not rsp.exit_code: + return True + + return False + + +def get_broker_rsp_key(): + """Return broker response key for this unit + + This is the key that ceph is going to use to pass request status + information back to this unit + """ + return 'broker-rsp-' + local_unit().replace('/', '-') + + +def send_request_if_needed(request, relation='ceph'): + """Send broker request if an equivalent request has not already been sent + + @param request: A CephBrokerRq object + """ + if is_request_sent(request, relation=relation): + log('Request already sent but not complete, not sending new request', + level=DEBUG) + else: + for rid in relation_ids(relation): + log('Sending request {}'.format(request.request_id), level=DEBUG) + relation_set(relation_id=rid, broker_req=request.request) + relation_set(relation_id=rid, relation_settings={'unit-name': local_unit()}) + + +def has_broker_rsp(rid=None, unit=None): + """Return True if the broker_rsp key is 'truthy' (i.e. set to something) in the relation data. + + :param rid: The relation to check (default of None means current relation) + :type rid: Union[str, None] + :param unit: The remote unit to check (default of None means current unit) + :type unit: Union[str, None] + :returns: True if broker key exists and is set to something 'truthy' + :rtype: bool + """ + rdata = relation_get(rid=rid, unit=unit) or {} + broker_rsp = rdata.get(get_broker_rsp_key()) + return True if broker_rsp else False + + +def is_broker_action_done(action, rid=None, unit=None): + """Check whether broker action has completed yet. + + @param action: name of action to be performed + @returns True if action complete otherwise False + """ + rdata = relation_get(rid=rid, unit=unit) or {} + broker_rsp = rdata.get(get_broker_rsp_key()) + if not broker_rsp: + return False + + rsp = CephBrokerRsp(broker_rsp) + unit_name = local_unit().partition('/')[2] + key = "unit_{}_ceph_broker_action.{}".format(unit_name, action) + kvstore = kv() + val = kvstore.get(key=key) + if val and val == rsp.request_id: + return True + + return False + + +def mark_broker_action_done(action, rid=None, unit=None): + """Mark action as having been completed. + + @param action: name of action to be performed + @returns None + """ + rdata = relation_get(rid=rid, unit=unit) or {} + broker_rsp = rdata.get(get_broker_rsp_key()) + if not broker_rsp: + return + + rsp = CephBrokerRsp(broker_rsp) + unit_name = local_unit().partition('/')[2] + key = "unit_{}_ceph_broker_action.{}".format(unit_name, action) + kvstore = kv() + kvstore.set(key=key, value=rsp.request_id) + kvstore.flush() + + +class CephConfContext(object): + """Ceph config (ceph.conf) context. + + Supports user-provided Ceph configuration settings. Use can provide a + dictionary as the value for the config-flags charm option containing + Ceph configuration settings keyede by their section in ceph.conf. + """ + def __init__(self, permitted_sections=None): + self.permitted_sections = permitted_sections or [] + + def __call__(self): + conf = config('config-flags') + if not conf: + return {} + + conf = config_flags_parser(conf) + if not isinstance(conf, dict): + log("Provided config-flags is not a dictionary - ignoring", + level=WARNING) + return {} + + permitted = self.permitted_sections + if permitted: + diff = set(conf.keys()).difference(set(permitted)) + if diff: + log("Config-flags contains invalid keys '%s' - they will be " + "ignored" % (', '.join(diff)), level=WARNING) + + ceph_conf = {} + for key in conf: + if permitted and key not in permitted: + log("Ignoring key '%s'" % key, level=WARNING) + continue + + ceph_conf[key] = conf[key] + return ceph_conf + + +class CephOSDConfContext(CephConfContext): + """Ceph config (ceph.conf) context. + + Consolidates settings from config-flags via CephConfContext with + settings provided by the mons. The config-flag values are preserved in + conf['osd'], settings from the mons which do not clash with config-flag + settings are in conf['osd_from_client'] and finally settings which do + clash are in conf['osd_from_client_conflict']. Rather than silently drop + the conflicting settings they are provided in the context so they can be + rendered commented out to give some visibility to the admin. + """ + + def __init__(self, permitted_sections=None): + super(CephOSDConfContext, self).__init__( + permitted_sections=permitted_sections) + try: + self.settings_from_mons = get_osd_settings('mon') + except OSDSettingConflict: + log( + "OSD settings from mons are inconsistent, ignoring them", + level=WARNING) + self.settings_from_mons = {} + + def filter_osd_from_mon_settings(self): + """Filter settings from client relation against config-flags. + + :returns: A tuple ( + ,config-flag values, + ,client settings which do not conflict with config-flag values, + ,client settings which confilct with config-flag values) + :rtype: (OrderedDict, OrderedDict, OrderedDict) + """ + ceph_conf = super(CephOSDConfContext, self).__call__() + conflicting_entries = {} + clear_entries = {} + for key, value in self.settings_from_mons.items(): + if key in ceph_conf.get('osd', {}): + if ceph_conf['osd'][key] != value: + conflicting_entries[key] = value + else: + clear_entries[key] = value + clear_entries = _order_dict_by_key(clear_entries) + conflicting_entries = _order_dict_by_key(conflicting_entries) + return ceph_conf, clear_entries, conflicting_entries + + def __call__(self): + """Construct OSD config context. + + Standard context with two additional special keys. + osd_from_client_conflict: client settings which confilct with + config-flag values + osd_from_client: settings which do not conflict with config-flag + values + + :returns: OSD config context dict. + :rtype: dict + """ + conf, osd_clear, osd_conflict = self.filter_osd_from_mon_settings() + conf['osd_from_client_conflict'] = osd_conflict + conf['osd_from_client'] = osd_clear + return conf diff --git a/ceph-osd/hooks/charmhelpers/contrib/storage/linux/loopback.py b/ceph-osd/hooks/charmhelpers/contrib/storage/linux/loopback.py new file mode 100644 index 00000000..04daea29 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/storage/linux/loopback.py @@ -0,0 +1,88 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from subprocess import ( + check_call, + check_output, +) + + +################################################## +# loopback device helpers. +################################################## +def loopback_devices(): + ''' + Parse through 'losetup -a' output to determine currently mapped + loopback devices. Output is expected to look like: + + /dev/loop0: [0807]:961814 (/tmp/my.img) + + or: + + /dev/loop0: [0807]:961814 (/tmp/my.img (deleted)) + + :returns: dict: a dict mapping {loopback_dev: backing_file} + ''' + loopbacks = {} + cmd = ['losetup', '-a'] + output = check_output(cmd).decode('utf-8') + devs = [d.strip().split(' ', 2) for d in output.splitlines() if d != ''] + for dev, _, f in devs: + loopbacks[dev.replace(':', '')] = re.search(r'\((.+)\)', f).groups()[0] + return loopbacks + + +def create_loopback(file_path): + ''' + Create a loopback device for a given backing file. + + :returns: str: Full path to new loopback device (eg, /dev/loop0) + ''' + file_path = os.path.abspath(file_path) + check_call(['losetup', '--find', file_path]) + for d, f in loopback_devices().items(): + if f == file_path: + return d + + +def ensure_loopback_device(path, size): + ''' + Ensure a loopback device exists for a given backing file path and size. + If it a loopback device is not mapped to file, a new one will be created. + + TODO: Confirm size of found loopback device. + + :returns: str: Full path to the ensured loopback device (eg, /dev/loop0) + ''' + for d, f in loopback_devices().items(): + if f == path: + return d + + if not os.path.exists(path): + cmd = ['truncate', '--size', size, path] + check_call(cmd) + + return create_loopback(path) + + +def is_mapped_loopback_device(device): + """ + Checks if a given device name is an existing/mapped loopback device. + :param device: str: Full path to the device (eg, /dev/loop1). + :returns: str: Path to the backing file if is a loopback device + empty string otherwise + """ + return loopback_devices().get(device, "") diff --git a/ceph-osd/hooks/charmhelpers/contrib/storage/linux/lvm.py b/ceph-osd/hooks/charmhelpers/contrib/storage/linux/lvm.py new file mode 100644 index 00000000..0d294c79 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/storage/linux/lvm.py @@ -0,0 +1,178 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +from subprocess import ( + CalledProcessError, + check_call, + check_output, +) + + +################################################## +# LVM helpers. +################################################## +def deactivate_lvm_volume_group(block_device): + ''' + Deactivate any volume group associated with an LVM physical volume. + + :param block_device: str: Full path to LVM physical volume + ''' + vg = list_lvm_volume_group(block_device) + if vg: + cmd = ['vgchange', '-an', vg] + check_call(cmd) + + +def is_lvm_physical_volume(block_device): + ''' + Determine whether a block device is initialized as an LVM PV. + + :param block_device: str: Full path of block device to inspect. + + :returns: boolean: True if block device is a PV, False if not. + ''' + try: + check_output(['pvdisplay', block_device]) + return True + except CalledProcessError: + return False + + +def remove_lvm_physical_volume(block_device): + ''' + Remove LVM PV signatures from a given block device. + + :param block_device: str: Full path of block device to scrub. + ''' + check_call(['pvremove', '-ff', '--yes', block_device]) + + +def list_lvm_volume_group(block_device): + ''' + List LVM volume group associated with a given block device. + + Assumes block device is a valid LVM PV. + + :param block_device: str: Full path of block device to inspect. + + :returns: str: Name of volume group associated with block device or None + ''' + vg = None + pvd = check_output(['pvdisplay', block_device]).splitlines() + for lvm in pvd: + lvm = lvm.decode('UTF-8') + if lvm.strip().startswith('VG Name'): + vg = ' '.join(lvm.strip().split()[2:]) + return vg + + +def create_lvm_physical_volume(block_device): + ''' + Initialize a block device as an LVM physical volume. + + :param block_device: str: Full path of block device to initialize. + + ''' + check_call(['pvcreate', block_device]) + + +def create_lvm_volume_group(volume_group, block_device): + ''' + Create an LVM volume group backed by a given block device. + + Assumes block device has already been initialized as an LVM PV. + + :param volume_group: str: Name of volume group to create. + :block_device: str: Full path of PV-initialized block device. + ''' + check_call(['vgcreate', volume_group, block_device]) + + +def list_logical_volumes(select_criteria=None, path_mode=False): + ''' + List logical volumes + + :param select_criteria: str: Limit list to those volumes matching this + criteria (see 'lvs -S help' for more details) + :param path_mode: bool: return logical volume name in 'vg/lv' format, this + format is required for some commands like lvextend + :returns: [str]: List of logical volumes + ''' + lv_diplay_attr = 'lv_name' + if path_mode: + # Parsing output logic relies on the column order + lv_diplay_attr = 'vg_name,' + lv_diplay_attr + cmd = ['lvs', '--options', lv_diplay_attr, '--noheadings'] + if select_criteria: + cmd.extend(['--select', select_criteria]) + lvs = [] + for lv in check_output(cmd).decode('UTF-8').splitlines(): + if not lv: + continue + if path_mode: + lvs.append('/'.join(lv.strip().split())) + else: + lvs.append(lv.strip()) + return lvs + + +list_thin_logical_volume_pools = functools.partial( + list_logical_volumes, + select_criteria='lv_attr =~ ^t') + +list_thin_logical_volumes = functools.partial( + list_logical_volumes, + select_criteria='lv_attr =~ ^V') + + +def extend_logical_volume_by_device(lv_name, block_device): + ''' + Extends the size of logical volume lv_name by the amount of free space on + physical volume block_device. + + :param lv_name: str: name of logical volume to be extended (vg/lv format) + :param block_device: str: name of block_device to be allocated to lv_name + ''' + cmd = ['lvextend', lv_name, block_device] + check_call(cmd) + + +def create_logical_volume(lv_name, volume_group, size=None): + ''' + Create a new logical volume in an existing volume group + + :param lv_name: str: name of logical volume to be created. + :param volume_group: str: Name of volume group to use for the new volume. + :param size: str: Size of logical volume to create (100% if not supplied) + :raises subprocess.CalledProcessError: in the event that the lvcreate fails. + ''' + if size: + check_call([ + 'lvcreate', + '--yes', + '-L', + '{}'.format(size), + '-n', lv_name, volume_group + ]) + # create the lv with all the space available, this is needed because the + # system call is different for LVM + else: + check_call([ + 'lvcreate', + '--yes', + '-l', + '100%FREE', + '-n', lv_name, volume_group + ]) diff --git a/ceph-osd/hooks/charmhelpers/contrib/storage/linux/utils.py b/ceph-osd/hooks/charmhelpers/contrib/storage/linux/utils.py new file mode 100644 index 00000000..4d05b121 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/contrib/storage/linux/utils.py @@ -0,0 +1,143 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from stat import S_ISBLK + +from subprocess import ( + CalledProcessError, + check_call, + check_output, + call +) + +from charmhelpers.core.hookenv import ( + log, + WARNING, + INFO +) + + +def _luks_uuid(dev): + """ + Check to see if dev is a LUKS encrypted volume, returning the UUID + of volume if it is. + + :param: dev: path to block device to check. + :returns: str. UUID of LUKS device or None if not a LUKS device + """ + try: + cmd = ['cryptsetup', 'luksUUID', dev] + return check_output(cmd).decode('UTF-8').strip() + except CalledProcessError: + return None + + +def is_luks_device(dev): + """ + Determine if dev is a LUKS-formatted block device. + + :param: dev: A full path to a block device to check for LUKS header + presence + :returns: boolean: indicates whether a device is used based on LUKS header. + """ + return True if _luks_uuid(dev) else False + + +def is_mapped_luks_device(dev): + """ + Determine if dev is a mapped LUKS device + :param: dev: A full path to a block device to be checked + :returns: boolean: indicates whether a device is mapped + """ + _, dirs, _ = next(os.walk( + '/sys/class/block/{}/holders/' + .format(os.path.basename(os.path.realpath(dev)))) + ) + is_held = len(dirs) > 0 + return is_held and is_luks_device(dev) + + +def is_block_device(path): + ''' + Confirm device at path is a valid block device node. + + :returns: boolean: True if path is a block device, False if not. + ''' + if not os.path.exists(path): + return False + return S_ISBLK(os.stat(path).st_mode) + + +def zap_disk(block_device): + ''' + Clear a block device of partition table. Relies on sgdisk, which is + installed as pat of the 'gdisk' package in Ubuntu. + + :param block_device: str: Full path of block device to clean. + ''' + # https://github.com/ceph/ceph/commit/fdd7f8d83afa25c4e09aaedd90ab93f3b64a677b + # sometimes sgdisk exits non-zero; this is OK, dd will clean up + call(['sgdisk', '--zap-all', '--', block_device]) + call(['sgdisk', '--clear', '--mbrtogpt', '--', block_device]) + dev_end = check_output(['blockdev', '--getsz', + block_device]).decode('UTF-8') + gpt_end = int(dev_end.split()[0]) - 100 + check_call(['dd', 'if=/dev/zero', 'of=%s' % (block_device), + 'bs=1M', 'count=1']) + check_call(['dd', 'if=/dev/zero', 'of=%s' % (block_device), + 'bs=512', 'count=100', 'seek=%s' % (gpt_end)]) + + +def is_device_mounted(device): + '''Given a device path, return True if that device is mounted, and False + if it isn't. + + :param device: str: Full path of the device to check. + :returns: boolean: True if the path represents a mounted device, False if + it doesn't. + ''' + try: + out = check_output(['lsblk', '-P', device]).decode('UTF-8') + except Exception: + return False + return bool(re.search(r'MOUNTPOINT=".+"', out)) + + +def mkfs_xfs(device, force=False, inode_size=None): + """Format device with XFS filesystem. + + By default this should fail if the device already has a filesystem on it. + :param device: Full path to device to format + :ptype device: tr + :param force: Force operation + :ptype: force: boolean + :param inode_size: XFS inode size in bytes; if set to 0 or None, + the value used will be the XFS system default + :ptype inode_size: int""" + cmd = ['mkfs.xfs'] + if force: + cmd.append("-f") + + if inode_size: + if inode_size >= 256 and inode_size <= 2048: + cmd += ['-i', "size={}".format(inode_size)] + else: + log("Config value xfs-inode-size={} is invalid. Using system default.".format(inode_size), level=WARNING) + else: + log("Using XFS filesystem with system default inode size.", level=INFO) + + cmd += [device] + check_call(cmd) diff --git a/ceph-osd/hooks/charmhelpers/core/__init__.py b/ceph-osd/hooks/charmhelpers/core/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-osd/hooks/charmhelpers/core/decorators.py b/ceph-osd/hooks/charmhelpers/core/decorators.py new file mode 100644 index 00000000..e7e95d17 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/decorators.py @@ -0,0 +1,93 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Copyright 2014 Canonical Ltd. +# +# Authors: +# Edward Hope-Morley +# + +import time + +from charmhelpers.core.hookenv import ( + log, + INFO, +) + + +def retry_on_exception(num_retries, base_delay=0, exc_type=Exception): + """If the decorated function raises exception exc_type, allow num_retries + retry attempts before raise the exception. + """ + def _retry_on_exception_inner_1(f): + def _retry_on_exception_inner_2(*args, **kwargs): + retries = num_retries + multiplier = 1 + while True: + try: + return f(*args, **kwargs) + except exc_type: + if not retries: + raise + + delay = base_delay * multiplier + multiplier += 1 + log("Retrying '%s' %d more times (delay=%s)" % + (f.__name__, retries, delay), level=INFO) + retries -= 1 + if delay: + time.sleep(delay) + + return _retry_on_exception_inner_2 + + return _retry_on_exception_inner_1 + + +def retry_on_predicate(num_retries, predicate_fun, base_delay=0): + """Retry based on return value + + The return value of the decorated function is passed to the given predicate_fun. If the + result of the predicate is False, retry the decorated function up to num_retries times + + An exponential backoff up to base_delay^num_retries seconds can be introduced by setting + base_delay to a nonzero value. The default is to run with a zero (i.e. no) delay + + :param num_retries: Max. number of retries to perform + :type num_retries: int + :param predicate_fun: Predicate function to determine if a retry is necessary + :type predicate_fun: callable + :param base_delay: Starting value in seconds for exponential delay, defaults to 0 (no delay) + :type base_delay: float + """ + def _retry_on_pred_inner_1(f): + def _retry_on_pred_inner_2(*args, **kwargs): + retries = num_retries + multiplier = 1 + delay = base_delay + while True: + result = f(*args, **kwargs) + if predicate_fun(result) or retries <= 0: + return result + delay *= multiplier + multiplier += 1 + log("Result {}, retrying '{}' {} more times (delay={})".format( + result, f.__name__, retries, delay), level=INFO) + retries -= 1 + if delay: + time.sleep(delay) + + return _retry_on_pred_inner_2 + + return _retry_on_pred_inner_1 diff --git a/ceph-osd/hooks/charmhelpers/core/files.py b/ceph-osd/hooks/charmhelpers/core/files.py new file mode 100644 index 00000000..fdd82b75 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/files.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__author__ = 'Jorge Niedbalski ' + +import os +import subprocess + + +def sed(filename, before, after, flags='g'): + """ + Search and replaces the given pattern on filename. + + :param filename: relative or absolute file path. + :param before: expression to be replaced (see 'man sed') + :param after: expression to replace with (see 'man sed') + :param flags: sed-compatible regex flags in example, to make + the search and replace case insensitive, specify ``flags="i"``. + The ``g`` flag is always specified regardless, so you do not + need to remember to include it when overriding this parameter. + :returns: If the sed command exit code was zero then return, + otherwise raise CalledProcessError. + """ + expression = r's/{0}/{1}/{2}'.format(before, + after, flags) + + return subprocess.check_call(["sed", "-i", "-r", "-e", + expression, + os.path.expanduser(filename)]) diff --git a/ceph-osd/hooks/charmhelpers/core/fstab.py b/ceph-osd/hooks/charmhelpers/core/fstab.py new file mode 100644 index 00000000..d9fa9152 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/fstab.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import io +import os + +__author__ = 'Jorge Niedbalski R. ' + + +class Fstab(io.FileIO): + """This class extends file in order to implement a file reader/writer + for file `/etc/fstab` + """ + + class Entry(object): + """Entry class represents a non-comment line on the `/etc/fstab` file + """ + def __init__(self, device, mountpoint, filesystem, + options, d=0, p=0): + self.device = device + self.mountpoint = mountpoint + self.filesystem = filesystem + + if not options: + options = "defaults" + + self.options = options + self.d = int(d) + self.p = int(p) + + def __eq__(self, o): + return str(self) == str(o) + + def __str__(self): + return "{} {} {} {} {} {}".format(self.device, + self.mountpoint, + self.filesystem, + self.options, + self.d, + self.p) + + DEFAULT_PATH = os.path.join(os.path.sep, 'etc', 'fstab') + + def __init__(self, path=None): + if path: + self._path = path + else: + self._path = self.DEFAULT_PATH + super(Fstab, self).__init__(self._path, 'rb+') + + def _hydrate_entry(self, line): + # NOTE: use split with no arguments to split on any + # whitespace including tabs + return Fstab.Entry(*filter( + lambda x: x not in ('', None), + line.strip("\n").split())) + + @property + def entries(self): + self.seek(0) + for line in self.readlines(): + line = line.decode('us-ascii') + try: + if line.strip() and not line.strip().startswith("#"): + yield self._hydrate_entry(line) + except ValueError: + pass + + def get_entry_by_attr(self, attr, value): + for entry in self.entries: + e_attr = getattr(entry, attr) + if e_attr == value: + return entry + return None + + def add_entry(self, entry): + if self.get_entry_by_attr('device', entry.device): + return False + + self.write((str(entry) + '\n').encode('us-ascii')) + self.truncate() + return entry + + def remove_entry(self, entry): + self.seek(0) + + lines = [l.decode('us-ascii') for l in self.readlines()] + + found = False + for index, line in enumerate(lines): + if line.strip() and not line.strip().startswith("#"): + if self._hydrate_entry(line) == entry: + found = True + break + + if not found: + return False + + lines.remove(line) + + self.seek(0) + self.write(''.join(lines).encode('us-ascii')) + self.truncate() + return True + + @classmethod + def remove_by_mountpoint(cls, mountpoint, path=None): + fstab = cls(path=path) + entry = fstab.get_entry_by_attr('mountpoint', mountpoint) + if entry: + return fstab.remove_entry(entry) + return False + + @classmethod + def add(cls, device, mountpoint, filesystem, options=None, path=None): + return cls(path=path).add_entry(Fstab.Entry(device, + mountpoint, filesystem, + options=options)) diff --git a/ceph-osd/hooks/charmhelpers/core/hookenv.py b/ceph-osd/hooks/charmhelpers/core/hookenv.py new file mode 100644 index 00000000..370c3e8f --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/hookenv.py @@ -0,0 +1,1636 @@ +# Copyright 2013-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"Interactions with the Juju environment" +# +# Authors: +# Charm Helpers Developers + +import copy +from distutils.version import LooseVersion +from enum import Enum +from functools import wraps +from collections import namedtuple, UserDict +import glob +import os +import json +import yaml +import re +import subprocess +import sys +import errno +import tempfile +from subprocess import CalledProcessError + +from charmhelpers import deprecate + + +CRITICAL = "CRITICAL" +ERROR = "ERROR" +WARNING = "WARNING" +INFO = "INFO" +DEBUG = "DEBUG" +TRACE = "TRACE" +MARKER = object() +SH_MAX_ARG = 131071 + + +RANGE_WARNING = ('Passing NO_PROXY string that includes a cidr. ' + 'This may not be compatible with software you are ' + 'running in your shell.') + + +class WORKLOAD_STATES(Enum): + ACTIVE = 'active' + BLOCKED = 'blocked' + MAINTENANCE = 'maintenance' + WAITING = 'waiting' + + +cache = {} + + +def cached(func): + """Cache return values for multiple executions of func + args + + For example:: + + @cached + def unit_get(attribute): + pass + + unit_get('test') + + will cache the result of unit_get + 'test' for future calls. + """ + @wraps(func) + def wrapper(*args, **kwargs): + global cache + key = json.dumps((func, args, kwargs), sort_keys=True, default=str) + try: + return cache[key] + except KeyError: + pass # Drop out of the exception handler scope. + res = func(*args, **kwargs) + cache[key] = res + return res + wrapper._wrapped = func + return wrapper + + +def flush(key): + """Flushes any entries from function cache where the + key is found in the function+args """ + flush_list = [] + for item in cache: + if key in item: + flush_list.append(item) + for item in flush_list: + del cache[item] + + +def log(message, level=None): + """Write a message to the juju log""" + command = ['juju-log'] + if level: + command += ['-l', level] + if not isinstance(message, str): + message = repr(message) + command += [message[:SH_MAX_ARG]] + # Missing juju-log should not cause failures in unit tests + # Send log output to stderr + try: + subprocess.call(command) + except OSError as e: + if e.errno == errno.ENOENT: + if level: + message = "{}: {}".format(level, message) + message = "juju-log: {}".format(message) + print(message, file=sys.stderr) + else: + raise + + +def function_log(message): + """Write a function progress message""" + command = ['function-log'] + if not isinstance(message, str): + message = repr(message) + command += [message[:SH_MAX_ARG]] + # Missing function-log should not cause failures in unit tests + # Send function_log output to stderr + try: + subprocess.call(command) + except OSError as e: + if e.errno == errno.ENOENT: + message = "function-log: {}".format(message) + print(message, file=sys.stderr) + else: + raise + + +class Serializable(UserDict): + """Wrapper, an object that can be serialized to yaml or json""" + + def __init__(self, obj): + # wrap the object + UserDict.__init__(self) + self.data = obj + + def __getattr__(self, attr): + # See if this object has attribute. + if attr in ("json", "yaml", "data"): + return self.__dict__[attr] + # Check for attribute in wrapped object. + got = getattr(self.data, attr, MARKER) + if got is not MARKER: + return got + # Proxy to the wrapped object via dict interface. + try: + return self.data[attr] + except KeyError: + raise AttributeError(attr) + + def __getstate__(self): + # Pickle as a standard dictionary. + return self.data + + def __setstate__(self, state): + # Unpickle into our wrapper. + self.data = state + + def json(self): + """Serialize the object to json""" + return json.dumps(self.data) + + def yaml(self): + """Serialize the object to yaml""" + return yaml.dump(self.data) + + +def execution_environment(): + """A convenient bundling of the current execution context""" + context = {} + context['conf'] = config() + if relation_id(): + context['reltype'] = relation_type() + context['relid'] = relation_id() + context['rel'] = relation_get() + context['unit'] = local_unit() + context['rels'] = relations() + context['env'] = os.environ + return context + + +def in_relation_hook(): + """Determine whether we're running in a relation hook""" + return 'JUJU_RELATION' in os.environ + + +def relation_type(): + """The scope for the current relation hook""" + return os.environ.get('JUJU_RELATION', None) + + +@cached +def relation_id(relation_name=None, service_or_unit=None): + """The relation ID for the current or a specified relation""" + if not relation_name and not service_or_unit: + return os.environ.get('JUJU_RELATION_ID', None) + elif relation_name and service_or_unit: + service_name = service_or_unit.split('/')[0] + for relid in relation_ids(relation_name): + remote_service = remote_service_name(relid) + if remote_service == service_name: + return relid + else: + raise ValueError('Must specify neither or both of relation_name and service_or_unit') + + +def departing_unit(): + """The departing unit for the current relation hook. + + Available since juju 2.8. + + :returns: the departing unit, or None if the information isn't available. + :rtype: Optional[str] + """ + return os.environ.get('JUJU_DEPARTING_UNIT', None) + + +def local_unit(): + """Local unit ID""" + return os.environ['JUJU_UNIT_NAME'] + + +def remote_unit(): + """The remote unit for the current relation hook""" + return os.environ.get('JUJU_REMOTE_UNIT', None) + + +def application_name(): + """ + The name of the deployed application this unit belongs to. + """ + return local_unit().split('/')[0] + + +def service_name(): + """ + .. deprecated:: 0.19.1 + Alias for :func:`application_name`. + """ + return application_name() + + +def model_name(): + """ + Name of the model that this unit is deployed in. + """ + return os.environ['JUJU_MODEL_NAME'] + + +def model_uuid(): + """ + UUID of the model that this unit is deployed in. + """ + return os.environ['JUJU_MODEL_UUID'] + + +def principal_unit(): + """Returns the principal unit of this unit, otherwise None""" + # Juju 2.2 and above provides JUJU_PRINCIPAL_UNIT + principal_unit = os.environ.get('JUJU_PRINCIPAL_UNIT', None) + # If it's empty, then this unit is the principal + if principal_unit == '': + return os.environ['JUJU_UNIT_NAME'] + elif principal_unit is not None: + return principal_unit + # For Juju 2.1 and below, let's try work out the principle unit by + # the various charms' metadata.yaml. + for reltype in relation_types(): + for rid in relation_ids(reltype): + for unit in related_units(rid): + md = _metadata_unit(unit) + if not md: + continue + subordinate = md.pop('subordinate', None) + if not subordinate: + return unit + return None + + +@cached +def remote_service_name(relid=None): + """The remote service name for a given relation-id (or the current relation)""" + if relid is None: + unit = remote_unit() + else: + units = related_units(relid) + unit = units[0] if units else None + return unit.split('/')[0] if unit else None + + +def hook_name(): + """The name of the currently executing hook""" + return os.environ.get('JUJU_HOOK_NAME', os.path.basename(sys.argv[0])) + + +class Config(dict): + """A dictionary representation of the charm's config.yaml, with some + extra features: + + - See which values in the dictionary have changed since the previous hook. + - For values that have changed, see what the previous value was. + - Store arbitrary data for use in a later hook. + + NOTE: Do not instantiate this object directly - instead call + ``hookenv.config()``, which will return an instance of :class:`Config`. + + Example usage:: + + >>> # inside a hook + >>> from charmhelpers.core import hookenv + >>> config = hookenv.config() + >>> config['foo'] + 'bar' + >>> # store a new key/value for later use + >>> config['mykey'] = 'myval' + + + >>> # user runs `juju set mycharm foo=baz` + >>> # now we're inside subsequent config-changed hook + >>> config = hookenv.config() + >>> config['foo'] + 'baz' + >>> # test to see if this val has changed since last hook + >>> config.changed('foo') + True + >>> # what was the previous value? + >>> config.previous('foo') + 'bar' + >>> # keys/values that we add are preserved across hooks + >>> config['mykey'] + 'myval' + + """ + CONFIG_FILE_NAME = '.juju-persistent-config' + + def __init__(self, *args, **kw): + super(Config, self).__init__(*args, **kw) + self.implicit_save = True + self._prev_dict = None + self.path = os.path.join(charm_dir(), Config.CONFIG_FILE_NAME) + if os.path.exists(self.path) and os.stat(self.path).st_size: + self.load_previous() + atexit(self._implicit_save) + + def load_previous(self, path=None): + """Load previous copy of config from disk. + + In normal usage you don't need to call this method directly - it + is called automatically at object initialization. + + :param path: + + File path from which to load the previous config. If `None`, + config is loaded from the default location. If `path` is + specified, subsequent `save()` calls will write to the same + path. + + """ + self.path = path or self.path + with open(self.path) as f: + try: + self._prev_dict = json.load(f) + except ValueError as e: + log('Found but was unable to parse previous config data, ' + 'ignoring which will report all values as changed - {}' + .format(str(e)), level=ERROR) + return + for k, v in copy.deepcopy(self._prev_dict).items(): + if k not in self: + self[k] = v + + def changed(self, key): + """Return True if the current value for this key is different from + the previous value. + + """ + if self._prev_dict is None: + return True + return self.previous(key) != self.get(key) + + def previous(self, key): + """Return previous value for this key, or None if there + is no previous value. + + """ + if self._prev_dict: + return self._prev_dict.get(key) + return None + + def save(self): + """Save this config to disk. + + If the charm is using the :mod:`Services Framework ` + or :meth:'@hook ' decorator, this + is called automatically at the end of successful hook execution. + Otherwise, it should be called directly by user code. + + To disable automatic saves, set ``implicit_save=False`` on this + instance. + + """ + with open(self.path, 'w') as f: + os.fchmod(f.fileno(), 0o600) + json.dump(self, f) + + def _implicit_save(self): + if self.implicit_save: + self.save() + + +_cache_config = None + + +def config(scope=None): + """ + Get the juju charm configuration (scope==None) or individual key, + (scope=str). The returned value is a Python data structure loaded as + JSON from the Juju config command. + + :param scope: If set, return the value for the specified key. + :type scope: Optional[str] + :returns: Either the whole config as a Config, or a key from it. + :rtype: Any + """ + global _cache_config + config_cmd_line = ['config-get', '--all', '--format=json'] + try: + if _cache_config is None: + config_data = json.loads( + subprocess.check_output(config_cmd_line).decode('UTF-8')) + _cache_config = Config(config_data) + if scope is not None: + return _cache_config.get(scope) + return _cache_config + except (json.decoder.JSONDecodeError, UnicodeDecodeError) as e: + log('Unable to parse output from config-get: config_cmd_line="{}" ' + 'message="{}"' + .format(config_cmd_line, str(e)), level=ERROR) + return None + + +@cached +def relation_get(attribute=None, unit=None, rid=None, app=None): + """Get relation information""" + _args = ['relation-get', '--format=json'] + if app is not None: + if unit is not None: + raise ValueError("Cannot use both 'unit' and 'app'") + _args.append('--app') + if rid: + _args.append('-r') + _args.append(rid) + _args.append(attribute or '-') + # unit or application name + if unit or app: + _args.append(unit or app) + try: + return json.loads(subprocess.check_output(_args).decode('UTF-8')) + except ValueError: + return None + except CalledProcessError as e: + if e.returncode == 2: + return None + raise + + +@cached +def _relation_set_accepts_file(): + """Return True if the juju relation-set command accepts a file. + + Cache the result as it won't change during the execution of a hook, and + thus we can make relation_set() more efficient by only checking for the + first relation_set() call. + + :returns: True if relation_set accepts a file. + :rtype: bool + :raises: subprocess.CalledProcessError if the check fails. + """ + return "--file" in subprocess.check_output( + ["relation-set", "--help"], universal_newlines=True) + + +def relation_set(relation_id=None, relation_settings=None, app=False, **kwargs): + """Set relation information for the current unit""" + relation_settings = relation_settings if relation_settings else {} + relation_cmd_line = ['relation-set'] + if app: + relation_cmd_line.append('--app') + if relation_id is not None: + relation_cmd_line.extend(('-r', relation_id)) + settings = relation_settings.copy() + settings.update(kwargs) + for key, value in settings.items(): + # Force value to be a string: it always should, but some call + # sites pass in things like dicts or numbers. + if value is not None: + settings[key] = "{}".format(value) + if _relation_set_accepts_file(): + # --file was introduced in Juju 1.23.2. Use it by default if + # available, since otherwise we'll break if the relation data is + # too big. Ideally we should tell relation-set to read the data from + # stdin, but that feature is broken in 1.23.2: Bug #1454678. + with tempfile.NamedTemporaryFile(delete=False) as settings_file: + settings_file.write(yaml.safe_dump(settings).encode("utf-8")) + subprocess.check_call( + relation_cmd_line + ["--file", settings_file.name]) + os.remove(settings_file.name) + else: + for key, value in settings.items(): + if value is None: + relation_cmd_line.append('{}='.format(key)) + else: + relation_cmd_line.append('{}={}'.format(key, value)) + subprocess.check_call(relation_cmd_line) + # Flush cache of any relation-gets for local unit + flush(local_unit()) + + +def relation_clear(r_id=None): + ''' Clears any relation data already set on relation r_id ''' + settings = relation_get(rid=r_id, + unit=local_unit()) + for setting in settings: + if setting not in ['public-address', 'private-address']: + settings[setting] = None + relation_set(relation_id=r_id, + **settings) + + +@cached +def relation_ids(reltype=None): + """A list of relation_ids""" + reltype = reltype or relation_type() + relid_cmd_line = ['relation-ids', '--format=json'] + if reltype is not None: + relid_cmd_line.append(reltype) + return json.loads( + subprocess.check_output(relid_cmd_line).decode('UTF-8')) or [] + return [] + + +@cached +def related_units(relid=None): + """A list of related units""" + relid = relid or relation_id() + units_cmd_line = ['relation-list', '--format=json'] + if relid is not None: + units_cmd_line.extend(('-r', relid)) + return json.loads( + subprocess.check_output(units_cmd_line).decode('UTF-8')) or [] + + +def expected_peer_units(): + """Get a generator for units we expect to join peer relation based on + goal-state. + + The local unit is excluded from the result to make it easy to gauge + completion of all peers joining the relation with existing hook tools. + + Example usage: + log('peer {} of {} joined peer relation' + .format(len(related_units()), + len(list(expected_peer_units())))) + + This function will raise NotImplementedError if used with juju versions + without goal-state support. + + :returns: iterator + :rtype: types.GeneratorType + :raises: NotImplementedError + """ + if not has_juju_version("2.4.0"): + # goal-state first appeared in 2.4.0. + raise NotImplementedError("goal-state") + _goal_state = goal_state() + return (key for key in _goal_state['units'] + if '/' in key and key != local_unit()) + + +def expected_related_units(reltype=None): + """Get a generator for units we expect to join relation based on + goal-state. + + Note that you can not use this function for the peer relation, take a look + at expected_peer_units() for that. + + This function will raise KeyError if you request information for a + relation type for which juju goal-state does not have information. It will + raise NotImplementedError if used with juju versions without goal-state + support. + + Example usage: + log('participant {} of {} joined relation {}' + .format(len(related_units()), + len(list(expected_related_units())), + relation_type())) + + :param reltype: Relation type to list data for, default is to list data for + the relation type we are currently executing a hook for. + :type reltype: str + :returns: iterator + :rtype: types.GeneratorType + :raises: KeyError, NotImplementedError + """ + if not has_juju_version("2.4.4"): + # goal-state existed in 2.4.0, but did not list individual units to + # join a relation in 2.4.1 through 2.4.3. (LP: #1794739) + raise NotImplementedError("goal-state relation unit count") + reltype = reltype or relation_type() + _goal_state = goal_state() + return (key for key in _goal_state['relations'][reltype] if '/' in key) + + +@cached +def relation_for_unit(unit=None, rid=None): + """Get the json representation of a unit's relation""" + unit = unit or remote_unit() + relation = relation_get(unit=unit, rid=rid) + for key in relation: + if key.endswith('-list'): + relation[key] = relation[key].split() + relation['__unit__'] = unit + return relation + + +@cached +def relations_for_id(relid=None): + """Get relations of a specific relation ID""" + relation_data = [] + relid = relid or relation_ids() + for unit in related_units(relid): + unit_data = relation_for_unit(unit, relid) + unit_data['__relid__'] = relid + relation_data.append(unit_data) + return relation_data + + +@cached +def relations_of_type(reltype=None): + """Get relations of a specific type""" + relation_data = [] + reltype = reltype or relation_type() + for relid in relation_ids(reltype): + for relation in relations_for_id(relid): + relation['__relid__'] = relid + relation_data.append(relation) + return relation_data + + +@cached +def metadata(): + """Get the current charm metadata.yaml contents as a python object""" + with open(os.path.join(charm_dir(), 'metadata.yaml')) as md: + return yaml.safe_load(md) + + +def _metadata_unit(unit): + """Given the name of a unit (e.g. apache2/0), get the unit charm's + metadata.yaml. Very similar to metadata() but allows us to inspect + other units. Unit needs to be co-located, such as a subordinate or + principal/primary. + + :returns: metadata.yaml as a python object. + + """ + basedir = os.sep.join(charm_dir().split(os.sep)[:-2]) + unitdir = 'unit-{}'.format(unit.replace(os.sep, '-')) + joineddir = os.path.join(basedir, unitdir, 'charm', 'metadata.yaml') + if not os.path.exists(joineddir): + return None + with open(joineddir) as md: + return yaml.safe_load(md) + + +@cached +def relation_types(): + """Get a list of relation types supported by this charm""" + rel_types = [] + md = metadata() + for key in ('provides', 'requires', 'peers'): + section = md.get(key) + if section: + rel_types.extend(section.keys()) + return rel_types + + +@cached +def peer_relation_id(): + '''Get the peers relation id if a peers relation has been joined, else None.''' + md = metadata() + section = md.get('peers') + if section: + for key in section: + relids = relation_ids(key) + if relids: + return relids[0] + return None + + +@cached +def relation_to_interface(relation_name): + """ + Given the name of a relation, return the interface that relation uses. + + :returns: The interface name, or ``None``. + """ + return relation_to_role_and_interface(relation_name)[1] + + +@cached +def relation_to_role_and_interface(relation_name): + """ + Given the name of a relation, return the role and the name of the interface + that relation uses (where role is one of ``provides``, ``requires``, or ``peers``). + + :returns: A tuple containing ``(role, interface)``, or ``(None, None)``. + """ + _metadata = metadata() + for role in ('provides', 'requires', 'peers'): + interface = _metadata.get(role, {}).get(relation_name, {}).get('interface') + if interface: + return role, interface + return None, None + + +@cached +def role_and_interface_to_relations(role, interface_name): + """ + Given a role and interface name, return a list of relation names for the + current charm that use that interface under that role (where role is one + of ``provides``, ``requires``, or ``peers``). + + :returns: A list of relation names. + """ + _metadata = metadata() + results = [] + for relation_name, relation in _metadata.get(role, {}).items(): + if relation['interface'] == interface_name: + results.append(relation_name) + return results + + +@cached +def interface_to_relations(interface_name): + """ + Given an interface, return a list of relation names for the current + charm that use that interface. + + :returns: A list of relation names. + """ + results = [] + for role in ('provides', 'requires', 'peers'): + results.extend(role_and_interface_to_relations(role, interface_name)) + return results + + +@cached +def charm_name(): + """Get the name of the current charm as is specified on metadata.yaml""" + return metadata().get('name') + + +@cached +def relations(): + """Get a nested dictionary of relation data for all related units""" + rels = {} + for reltype in relation_types(): + relids = {} + for relid in relation_ids(reltype): + units = {local_unit(): relation_get(unit=local_unit(), rid=relid)} + for unit in related_units(relid): + reldata = relation_get(unit=unit, rid=relid) + units[unit] = reldata + relids[relid] = units + rels[reltype] = relids + return rels + + +@cached +def is_relation_made(relation, keys='private-address'): + ''' + Determine whether a relation is established by checking for + presence of key(s). If a list of keys is provided, they + must all be present for the relation to be identified as made + ''' + if isinstance(keys, str): + keys = [keys] + for r_id in relation_ids(relation): + for unit in related_units(r_id): + context = {} + for k in keys: + context[k] = relation_get(k, rid=r_id, + unit=unit) + if None not in context.values(): + return True + return False + + +def _port_op(op_name, port, protocol="TCP"): + """Open or close a service network port""" + _args = [op_name] + icmp = protocol.upper() == "ICMP" + if icmp: + _args.append(protocol) + else: + _args.append('{}/{}'.format(port, protocol)) + try: + subprocess.check_call(_args) + except subprocess.CalledProcessError: + # Older Juju pre 2.3 doesn't support ICMP + # so treat it as a no-op if it fails. + if not icmp: + raise + + +def open_port(port, protocol="TCP"): + """Open a service network port""" + _port_op('open-port', port, protocol) + + +def close_port(port, protocol="TCP"): + """Close a service network port""" + _port_op('close-port', port, protocol) + + +def open_ports(start, end, protocol="TCP"): + """Opens a range of service network ports""" + _args = ['open-port'] + _args.append('{}-{}/{}'.format(start, end, protocol)) + subprocess.check_call(_args) + + +def close_ports(start, end, protocol="TCP"): + """Close a range of service network ports""" + _args = ['close-port'] + _args.append('{}-{}/{}'.format(start, end, protocol)) + subprocess.check_call(_args) + + +def opened_ports(): + """Get the opened ports + + *Note that this will only show ports opened in a previous hook* + + :returns: Opened ports as a list of strings: ``['8080/tcp', '8081-8083/tcp']`` + """ + _args = ['opened-ports', '--format=json'] + return json.loads(subprocess.check_output(_args).decode('UTF-8')) + + +@cached +def unit_get(attribute): + """Get the unit ID for the remote unit""" + _args = ['unit-get', '--format=json', attribute] + try: + return json.loads(subprocess.check_output(_args).decode('UTF-8')) + except ValueError: + return None + + +def unit_public_ip(): + """Get this unit's public IP address""" + return unit_get('public-address') + + +def unit_private_ip(): + """Get this unit's private IP address""" + return unit_get('private-address') + + +@cached +def storage_get(attribute=None, storage_id=None): + """Get storage attributes""" + _args = ['storage-get', '--format=json'] + if storage_id: + _args.extend(('-s', storage_id)) + if attribute: + _args.append(attribute) + try: + return json.loads(subprocess.check_output(_args).decode('UTF-8')) + except ValueError: + return None + + +@cached +def storage_list(storage_name=None): + """List the storage IDs for the unit""" + _args = ['storage-list', '--format=json'] + if storage_name: + _args.append(storage_name) + try: + return json.loads(subprocess.check_output(_args).decode('UTF-8')) + except ValueError: + return None + except OSError as e: + import errno + if e.errno == errno.ENOENT: + # storage-list does not exist + return [] + raise + + +class UnregisteredHookError(Exception): + """Raised when an undefined hook is called""" + pass + + +class Hooks(object): + """A convenient handler for hook functions. + + Example:: + + hooks = Hooks() + + # register a hook, taking its name from the function name + @hooks.hook() + def install(): + pass # your code here + + # register a hook, providing a custom hook name + @hooks.hook("config-changed") + def config_changed(): + pass # your code here + + if __name__ == "__main__": + # execute a hook based on the name the program is called by + hooks.execute(sys.argv) + """ + + def __init__(self, config_save=None): + super(Hooks, self).__init__() + self._hooks = {} + + # For unknown reasons, we allow the Hooks constructor to override + # config().implicit_save. + if config_save is not None: + config().implicit_save = config_save + + def register(self, name, function): + """Register a hook""" + self._hooks[name] = function + + def execute(self, args): + """Execute a registered hook based on args[0]""" + _run_atstart() + hook_name = os.path.basename(args[0]) + if hook_name in self._hooks: + try: + self._hooks[hook_name]() + except SystemExit as x: + if x.code is None or x.code == 0: + _run_atexit() + raise + _run_atexit() + else: + raise UnregisteredHookError(hook_name) + + def hook(self, *hook_names): + """Decorator, registering them as hooks""" + def wrapper(decorated): + for hook_name in hook_names: + self.register(hook_name, decorated) + else: + self.register(decorated.__name__, decorated) + if '_' in decorated.__name__: + self.register( + decorated.__name__.replace('_', '-'), decorated) + return decorated + return wrapper + + +class NoNetworkBinding(Exception): + pass + + +def charm_dir(): + """Return the root directory of the current charm""" + d = os.environ.get('JUJU_CHARM_DIR') + if d is not None: + return d + return os.environ.get('CHARM_DIR') + + +def cmd_exists(cmd): + """Return True if the specified cmd exists in the path""" + return any( + os.access(os.path.join(path, cmd), os.X_OK) + for path in os.environ["PATH"].split(os.pathsep) + ) + + +@cached +def action_get(key=None): + """Gets the value of an action parameter, or all key/value param pairs.""" + cmd = ['action-get'] + if key is not None: + cmd.append(key) + cmd.append('--format=json') + action_data = json.loads(subprocess.check_output(cmd).decode('UTF-8')) + return action_data + + +@cached +@deprecate("moved to action_get()", log=log) +def function_get(key=None): + """ + .. deprecated:: + Gets the value of an action parameter, or all key/value param pairs. + """ + cmd = ['function-get'] + # Fallback for older charms. + if not cmd_exists('function-get'): + cmd = ['action-get'] + + if key is not None: + cmd.append(key) + cmd.append('--format=json') + function_data = json.loads(subprocess.check_output(cmd).decode('UTF-8')) + return function_data + + +def action_set(values): + """Sets the values to be returned after the action finishes.""" + cmd = ['action-set'] + for k, v in list(values.items()): + cmd.append('{}={}'.format(k, v)) + subprocess.check_call(cmd) + + +@deprecate("moved to action_set()", log=log) +def function_set(values): + """ + .. deprecated:: + Sets the values to be returned after the function finishes. + """ + cmd = ['function-set'] + # Fallback for older charms. + if not cmd_exists('function-get'): + cmd = ['action-set'] + + for k, v in list(values.items()): + cmd.append('{}={}'.format(k, v)) + subprocess.check_call(cmd) + + +def action_fail(message): + """ + Sets the action status to failed and sets the error message. + + The results set by action_set are preserved. + """ + subprocess.check_call(['action-fail', message]) + + +@deprecate("moved to action_fail()", log=log) +def function_fail(message): + """ + .. deprecated:: + Sets the function status to failed and sets the error message. + + The results set by function_set are preserved. + """ + cmd = ['function-fail'] + # Fallback for older charms. + if not cmd_exists('function-fail'): + cmd = ['action-fail'] + cmd.append(message) + + subprocess.check_call(cmd) + + +def action_name(): + """Get the name of the currently executing action.""" + return os.environ.get('JUJU_ACTION_NAME') + + +def function_name(): + """Get the name of the currently executing function.""" + return os.environ.get('JUJU_FUNCTION_NAME') or action_name() + + +def action_uuid(): + """Get the UUID of the currently executing action.""" + return os.environ.get('JUJU_ACTION_UUID') + + +def function_id(): + """Get the ID of the currently executing function.""" + return os.environ.get('JUJU_FUNCTION_ID') or action_uuid() + + +def action_tag(): + """Get the tag for the currently executing action.""" + return os.environ.get('JUJU_ACTION_TAG') + + +def function_tag(): + """Get the tag for the currently executing function.""" + return os.environ.get('JUJU_FUNCTION_TAG') or action_tag() + + +def status_set(workload_state, message, application=False): + """Set the workload state with a message + + Use status-set to set the workload state with a message which is visible + to the user via juju status. If the status-set command is not found then + assume this is juju < 1.23 and juju-log the message instead. + + workload_state -- valid juju workload state. str or WORKLOAD_STATES + message -- status update message + application -- Whether this is an application state set + """ + bad_state_msg = '{!r} is not a valid workload state' + + if isinstance(workload_state, str): + try: + # Convert string to enum. + workload_state = WORKLOAD_STATES[workload_state.upper()] + except KeyError: + raise ValueError(bad_state_msg.format(workload_state)) + + if workload_state not in WORKLOAD_STATES: + raise ValueError(bad_state_msg.format(workload_state)) + + cmd = ['status-set'] + if application: + cmd.append('--application') + cmd.extend([workload_state.value, message]) + try: + ret = subprocess.call(cmd) + if ret == 0: + return + except OSError as e: + if e.errno != errno.ENOENT: + raise + log_message = 'status-set failed: {} {}'.format(workload_state.value, + message) + log(log_message, level='INFO') + + +def status_get(): + """Retrieve the previously set juju workload state and message + + If the status-get command is not found then assume this is juju < 1.23 and + return 'unknown', "" + + """ + cmd = ['status-get', "--format=json", "--include-data"] + try: + raw_status = subprocess.check_output(cmd) + except OSError as e: + if e.errno == errno.ENOENT: + return ('unknown', "") + else: + raise + else: + status = json.loads(raw_status.decode("UTF-8")) + return (status["status"], status["message"]) + + +def translate_exc(from_exc, to_exc): + def inner_translate_exc1(f): + @wraps(f) + def inner_translate_exc2(*args, **kwargs): + try: + return f(*args, **kwargs) + except from_exc: + raise to_exc + + return inner_translate_exc2 + + return inner_translate_exc1 + + +def application_version_set(version): + """Charm authors may trigger this command from any hook to output what + version of the application is running. This could be a package version, + for instance postgres version 9.5. It could also be a build number or + version control revision identifier, for instance git sha 6fb7ba68. """ + + cmd = ['application-version-set'] + cmd.append(version) + try: + subprocess.check_call(cmd) + except OSError: + log("Application Version: {}".format(version)) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +@cached +def goal_state(): + """Juju goal state values""" + cmd = ['goal-state', '--format=json'] + return json.loads(subprocess.check_output(cmd).decode('UTF-8')) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def is_leader(): + """Does the current unit hold the juju leadership + + Uses juju to determine whether the current unit is the leader of its peers + """ + cmd = ['is-leader', '--format=json'] + return json.loads(subprocess.check_output(cmd).decode('UTF-8')) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def leader_get(attribute=None): + """Juju leader get value(s)""" + cmd = ['leader-get', '--format=json'] + [attribute or '-'] + return json.loads(subprocess.check_output(cmd).decode('UTF-8')) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def leader_set(settings=None, **kwargs): + """Juju leader set value(s)""" + # Don't log secrets. + # log("Juju leader-set '%s'" % (settings), level=DEBUG) + cmd = ['leader-set'] + settings = settings or {} + settings.update(kwargs) + for k, v in settings.items(): + if v is None: + cmd.append('{}='.format(k)) + else: + cmd.append('{}={}'.format(k, v)) + subprocess.check_call(cmd) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def payload_register(ptype, klass, pid): + """ is used while a hook is running to let Juju know that a + payload has been started.""" + cmd = ['payload-register'] + for x in [ptype, klass, pid]: + cmd.append(x) + subprocess.check_call(cmd) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def payload_unregister(klass, pid): + """ is used while a hook is running to let Juju know + that a payload has been manually stopped. The and provided + must match a payload that has been previously registered with juju using + payload-register.""" + cmd = ['payload-unregister'] + for x in [klass, pid]: + cmd.append(x) + subprocess.check_call(cmd) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def payload_status_set(klass, pid, status): + """is used to update the current status of a registered payload. + The and provided must match a payload that has been previously + registered with juju using payload-register. The must be one of the + follow: starting, started, stopping, stopped""" + cmd = ['payload-status-set'] + for x in [klass, pid, status]: + cmd.append(x) + subprocess.check_call(cmd) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def resource_get(name): + """used to fetch the resource path of the given name. + + must match a name of defined resource in metadata.yaml + + returns either a path or False if resource not available + """ + if not name: + return False + + cmd = ['resource-get', name] + try: + return subprocess.check_output(cmd).decode('UTF-8') + except subprocess.CalledProcessError: + return False + + +@cached +def juju_version(): + """Full version string (eg. '1.23.3.1-trusty-amd64')""" + # Per https://bugs.launchpad.net/juju-core/+bug/1455368/comments/1 + jujud = glob.glob('/var/lib/juju/tools/machine-*/jujud')[0] + return subprocess.check_output([jujud, 'version'], + universal_newlines=True).strip() + + +def has_juju_version(minimum_version): + """Return True if the Juju version is at least the provided version""" + return LooseVersion(juju_version()) >= LooseVersion(minimum_version) + + +_atexit = [] +_atstart = [] + + +def atstart(callback, *args, **kwargs): + '''Schedule a callback to run before the main hook. + + Callbacks are run in the order they were added. + + This is useful for modules and classes to perform initialization + and inject behavior. In particular: + + - Run common code before all of your hooks, such as logging + the hook name or interesting relation data. + - Defer object or module initialization that requires a hook + context until we know there actually is a hook context, + making testing easier. + - Rather than requiring charm authors to include boilerplate to + invoke your helper's behavior, have it run automatically if + your object is instantiated or module imported. + + This is not at all useful after your hook framework as been launched. + ''' + global _atstart + _atstart.append((callback, args, kwargs)) + + +def atexit(callback, *args, **kwargs): + '''Schedule a callback to run on successful hook completion. + + Callbacks are run in the reverse order that they were added.''' + _atexit.append((callback, args, kwargs)) + + +def _run_atstart(): + '''Hook frameworks must invoke this before running the main hook body.''' + global _atstart + for callback, args, kwargs in _atstart: + callback(*args, **kwargs) + del _atstart[:] + + +def _run_atexit(): + '''Hook frameworks must invoke this after the main hook body has + successfully completed. Do not invoke it if the hook fails.''' + global _atexit + for callback, args, kwargs in reversed(_atexit): + callback(*args, **kwargs) + del _atexit[:] + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def network_get_primary_address(binding): + ''' + Deprecated since Juju 2.3; use network_get() + + Retrieve the primary network address for a named binding + + :param binding: string. The name of a relation of extra-binding + :return: string. The primary IP address for the named binding + :raise: NotImplementedError if run on Juju < 2.0 + ''' + cmd = ['network-get', '--primary-address', binding] + try: + response = subprocess.check_output( + cmd, + stderr=subprocess.STDOUT).decode('UTF-8').strip() + except CalledProcessError as e: + if 'no network config found for binding' in e.output.decode('UTF-8'): + raise NoNetworkBinding("No network binding for {}" + .format(binding)) + else: + raise + return response + + +def network_get(endpoint, relation_id=None): + """ + Retrieve the network details for a relation endpoint + + :param endpoint: string. The name of a relation endpoint + :param relation_id: int. The ID of the relation for the current context. + :return: dict. The loaded YAML output of the network-get query. + :raise: NotImplementedError if request not supported by the Juju version. + """ + if not has_juju_version('2.2'): + raise NotImplementedError(juju_version()) # earlier versions require --primary-address + if relation_id and not has_juju_version('2.3'): + raise NotImplementedError # 2.3 added the -r option + + cmd = ['network-get', endpoint, '--format', 'yaml'] + if relation_id: + cmd.append('-r') + cmd.append(relation_id) + response = subprocess.check_output( + cmd, + stderr=subprocess.STDOUT).decode('UTF-8').strip() + return yaml.safe_load(response) + + +def add_metric(*args, **kwargs): + """Add metric values. Values may be expressed with keyword arguments. For + metric names containing dashes, these may be expressed as one or more + 'key=value' positional arguments. May only be called from the collect-metrics + hook.""" + _args = ['add-metric'] + _kvpairs = [] + _kvpairs.extend(args) + _kvpairs.extend(['{}={}'.format(k, v) for k, v in kwargs.items()]) + _args.extend(sorted(_kvpairs)) + try: + subprocess.check_call(_args) + return + except EnvironmentError as e: + if e.errno != errno.ENOENT: + raise + log_message = 'add-metric failed: {}'.format(' '.join(_kvpairs)) + log(log_message, level='INFO') + + +def meter_status(): + """Get the meter status, if running in the meter-status-changed hook.""" + return os.environ.get('JUJU_METER_STATUS') + + +def meter_info(): + """Get the meter status information, if running in the meter-status-changed + hook.""" + return os.environ.get('JUJU_METER_INFO') + + +def iter_units_for_relation_name(relation_name): + """Iterate through all units in a relation + + Generator that iterates through all the units in a relation and yields + a named tuple with rid and unit field names. + + Usage: + data = [(u.rid, u.unit) + for u in iter_units_for_relation_name(relation_name)] + + :param relation_name: string relation name + :yield: Named Tuple with rid and unit field names + """ + RelatedUnit = namedtuple('RelatedUnit', 'rid, unit') + for rid in relation_ids(relation_name): + for unit in related_units(rid): + yield RelatedUnit(rid, unit) + + +def ingress_address(rid=None, unit=None): + """ + Retrieve the ingress-address from a relation when available. + Otherwise, return the private-address. + + When used on the consuming side of the relation (unit is a remote + unit), the ingress-address is the IP address that this unit needs + to use to reach the provided service on the remote unit. + + When used on the providing side of the relation (unit == local_unit()), + the ingress-address is the IP address that is advertised to remote + units on this relation. Remote units need to use this address to + reach the local provided service on this unit. + + Note that charms may document some other method to use in + preference to the ingress_address(), such as an address provided + on a different relation attribute or a service discovery mechanism. + This allows charms to redirect inbound connections to their peers + or different applications such as load balancers. + + Usage: + addresses = [ingress_address(rid=u.rid, unit=u.unit) + for u in iter_units_for_relation_name(relation_name)] + + :param rid: string relation id + :param unit: string unit name + :side effect: calls relation_get + :return: string IP address + """ + settings = relation_get(rid=rid, unit=unit) + return (settings.get('ingress-address') or + settings.get('private-address')) + + +def egress_subnets(rid=None, unit=None): + """ + Retrieve the egress-subnets from a relation. + + This function is to be used on the providing side of the + relation, and provides the ranges of addresses that client + connections may come from. The result is uninteresting on + the consuming side of a relation (unit == local_unit()). + + Returns a stable list of subnets in CIDR format. + eg. ['192.168.1.0/24', '2001::F00F/128'] + + If egress-subnets is not available, falls back to using the published + ingress-address, or finally private-address. + + :param rid: string relation id + :param unit: string unit name + :side effect: calls relation_get + :return: list of subnets in CIDR format. eg. ['192.168.1.0/24', '2001::F00F/128'] + """ + def _to_range(addr): + if re.search(r'^(?:\d{1,3}\.){3}\d{1,3}$', addr) is not None: + addr += '/32' + elif ':' in addr and '/' not in addr: # IPv6 + addr += '/128' + return addr + + settings = relation_get(rid=rid, unit=unit) + if 'egress-subnets' in settings: + return [n.strip() for n in settings['egress-subnets'].split(',') if n.strip()] + if 'ingress-address' in settings: + return [_to_range(settings['ingress-address'])] + if 'private-address' in settings: + return [_to_range(settings['private-address'])] + return [] # Should never happen + + +def unit_doomed(unit=None): + """Determines if the unit is being removed from the model + + Requires Juju 2.4.1. + + :param unit: string unit name, defaults to local_unit + :side effect: calls goal_state + :side effect: calls local_unit + :side effect: calls has_juju_version + :return: True if the unit is being removed, already gone, or never existed + """ + if not has_juju_version("2.4.1"): + # We cannot risk blindly returning False for 'we don't know', + # because that could cause data loss; if call sites don't + # need an accurate answer, they likely don't need this helper + # at all. + # goal-state existed in 2.4.0, but did not handle removals + # correctly until 2.4.1. + raise NotImplementedError("is_doomed") + if unit is None: + unit = local_unit() + gs = goal_state() + units = gs.get('units', {}) + if unit not in units: + return True + # I don't think 'dead' units ever show up in the goal-state, but + # check anyway in addition to 'dying'. + return units[unit]['status'] in ('dying', 'dead') + + +def env_proxy_settings(selected_settings=None): + """Get proxy settings from process environment variables. + + Get charm proxy settings from environment variables that correspond to + juju-http-proxy, juju-https-proxy juju-no-proxy (available as of 2.4.2, see + lp:1782236) and juju-ftp-proxy in a format suitable for passing to an + application that reacts to proxy settings passed as environment variables. + Some applications support lowercase or uppercase notation (e.g. curl), some + support only lowercase (e.g. wget), there are also subjectively rare cases + of only uppercase notation support. no_proxy CIDR and wildcard support also + varies between runtimes and applications as there is no enforced standard. + + Some applications may connect to multiple destinations and expose config + options that would affect only proxy settings for a specific destination + these should be handled in charms in an application-specific manner. + + :param selected_settings: format only a subset of possible settings + :type selected_settings: list + :rtype: Option(None, dict[str, str]) + """ + SUPPORTED_SETTINGS = { + 'http': 'HTTP_PROXY', + 'https': 'HTTPS_PROXY', + 'no_proxy': 'NO_PROXY', + 'ftp': 'FTP_PROXY' + } + if selected_settings is None: + selected_settings = SUPPORTED_SETTINGS + + selected_vars = [v for k, v in SUPPORTED_SETTINGS.items() + if k in selected_settings] + proxy_settings = {} + for var in selected_vars: + var_val = os.getenv(var) + if var_val: + proxy_settings[var] = var_val + proxy_settings[var.lower()] = var_val + # Now handle juju-prefixed environment variables. The legacy vs new + # environment variable usage is mutually exclusive + charm_var_val = os.getenv('JUJU_CHARM_{}'.format(var)) + if charm_var_val: + proxy_settings[var] = charm_var_val + proxy_settings[var.lower()] = charm_var_val + if 'no_proxy' in proxy_settings: + if _contains_range(proxy_settings['no_proxy']): + log(RANGE_WARNING, level=WARNING) + return proxy_settings if proxy_settings else None + + +def _contains_range(addresses): + """Check for cidr or wildcard domain in a string. + + Given a string comprising a comma separated list of ip addresses + and domain names, determine whether the string contains IP ranges + or wildcard domains. + + :param addresses: comma separated list of domains and ip addresses. + :type addresses: str + """ + return ( + # Test for cidr (e.g. 10.20.20.0/24) + "/" in addresses or + # Test for wildcard domains (*.foo.com or .foo.com) + "*" in addresses or + addresses.startswith(".") or + ",." in addresses or + " ." in addresses) + + +def is_subordinate(): + """Check whether charm is subordinate in unit metadata. + + :returns: True if unit is subordniate, False otherwise. + :rtype: bool + """ + return metadata().get('subordinate') is True diff --git a/ceph-osd/hooks/charmhelpers/core/host.py b/ceph-osd/hooks/charmhelpers/core/host.py new file mode 100644 index 00000000..def403c5 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/host.py @@ -0,0 +1,1309 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tools for working with the host system""" +# Copyright 2012 Canonical Ltd. +# +# Authors: +# Nick Moffitt +# Matthew Wedgwood + +import errno +import os +import re +import pwd +import glob +import grp +import random +import string +import subprocess +import hashlib +import functools +import itertools + +from contextlib import contextmanager +from collections import OrderedDict, defaultdict +from .hookenv import log, INFO, DEBUG, local_unit, charm_name +from .fstab import Fstab +from charmhelpers.osplatform import get_platform + +__platform__ = get_platform() +if __platform__ == "ubuntu": + from charmhelpers.core.host_factory.ubuntu import ( # NOQA:F401 + service_available, + add_new_group, + lsb_release, + cmp_pkgrevno, + CompareHostReleases, + get_distrib_codename, + arch + ) # flake8: noqa -- ignore F401 for this import +elif __platform__ == "centos": + from charmhelpers.core.host_factory.centos import ( # NOQA:F401 + service_available, + add_new_group, + lsb_release, + cmp_pkgrevno, + CompareHostReleases, + ) # flake8: noqa -- ignore F401 for this import + +UPDATEDB_PATH = '/etc/updatedb.conf' +CA_CERT_DIR = '/usr/local/share/ca-certificates' + + +def service_start(service_name, **kwargs): + """Start a system service. + + The specified service name is managed via the system level init system. + Some init systems (e.g. upstart) require that additional arguments be + provided in order to directly control service instances whereas other init + systems allow for addressing instances of a service directly by name (e.g. + systemd). + + The kwargs allow for the additional parameters to be passed to underlying + init systems for those systems which require/allow for them. For example, + the ceph-osd upstart script requires the id parameter to be passed along + in order to identify which running daemon should be reloaded. The follow- + ing example stops the ceph-osd service for instance id=4: + + service_stop('ceph-osd', id=4) + + :param service_name: the name of the service to stop + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for systemd enabled systems. + """ + return service('start', service_name, **kwargs) + + +def service_stop(service_name, **kwargs): + """Stop a system service. + + The specified service name is managed via the system level init system. + Some init systems (e.g. upstart) require that additional arguments be + provided in order to directly control service instances whereas other init + systems allow for addressing instances of a service directly by name (e.g. + systemd). + + The kwargs allow for the additional parameters to be passed to underlying + init systems for those systems which require/allow for them. For example, + the ceph-osd upstart script requires the id parameter to be passed along + in order to identify which running daemon should be reloaded. The follow- + ing example stops the ceph-osd service for instance id=4: + + service_stop('ceph-osd', id=4) + + :param service_name: the name of the service to stop + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for systemd enabled systems. + """ + return service('stop', service_name, **kwargs) + + +def service_enable(service_name, **kwargs): + """Enable a system service. + + The specified service name is managed via the system level init system. + Some init systems (e.g. upstart) require that additional arguments be + provided in order to directly control service instances whereas other init + systems allow for addressing instances of a service directly by name (e.g. + systemd). + + The kwargs allow for the additional parameters to be passed to underlying + init systems for those systems which require/allow for them. For example, + the ceph-osd upstart script requires the id parameter to be passed along + in order to identify which running daemon should be restarted. The follow- + ing example restarts the ceph-osd service for instance id=4: + + service_enable('ceph-osd', id=4) + + :param service_name: the name of the service to enable + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for init systems not allowing additional + parameters via the commandline (systemd). + """ + return service('enable', service_name, **kwargs) + + +def service_restart(service_name, **kwargs): + """Restart a system service. + + The specified service name is managed via the system level init system. + Some init systems (e.g. upstart) require that additional arguments be + provided in order to directly control service instances whereas other init + systems allow for addressing instances of a service directly by name (e.g. + systemd). + + The kwargs allow for the additional parameters to be passed to underlying + init systems for those systems which require/allow for them. For example, + the ceph-osd upstart script requires the id parameter to be passed along + in order to identify which running daemon should be restarted. The follow- + ing example restarts the ceph-osd service for instance id=4: + + service_restart('ceph-osd', id=4) + + :param service_name: the name of the service to restart + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for init systems not allowing additional + parameters via the commandline (systemd). + """ + return service('restart', service_name) + + +def service_reload(service_name, restart_on_failure=False, **kwargs): + """Reload a system service, optionally falling back to restart if + reload fails. + + The specified service name is managed via the system level init system. + Some init systems (e.g. upstart) require that additional arguments be + provided in order to directly control service instances whereas other init + systems allow for addressing instances of a service directly by name (e.g. + systemd). + + The kwargs allow for the additional parameters to be passed to underlying + init systems for those systems which require/allow for them. For example, + the ceph-osd upstart script requires the id parameter to be passed along + in order to identify which running daemon should be reloaded. The follow- + ing example restarts the ceph-osd service for instance id=4: + + service_reload('ceph-osd', id=4) + + :param service_name: the name of the service to reload + :param restart_on_failure: boolean indicating whether to fallback to a + restart if the reload fails. + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for init systems not allowing additional + parameters via the commandline (systemd). + """ + service_result = service('reload', service_name, **kwargs) + if not service_result and restart_on_failure: + service_result = service('restart', service_name, **kwargs) + return service_result + + +def service_pause(service_name, init_dir="/etc/init", initd_dir="/etc/init.d", + **kwargs): + """Pause a system service. + + Stop it, and prevent it from starting again at boot. + + :param service_name: the name of the service to pause + :param init_dir: path to the upstart init directory + :param initd_dir: path to the sysv init directory + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for init systems which do not support + key=value arguments via the commandline. + """ + stopped = True + if service_running(service_name, **kwargs): + stopped = service_stop(service_name, **kwargs) + upstart_file = os.path.join(init_dir, "{}.conf".format(service_name)) + sysv_file = os.path.join(initd_dir, service_name) + if init_is_systemd(service_name=service_name): + service('disable', service_name) + service('mask', service_name) + elif os.path.exists(upstart_file): + override_path = os.path.join( + init_dir, '{}.override'.format(service_name)) + with open(override_path, 'w') as fh: + fh.write("manual\n") + elif os.path.exists(sysv_file): + subprocess.check_call(["update-rc.d", service_name, "disable"]) + else: + raise ValueError( + "Unable to detect {0} as SystemD, Upstart {1} or" + " SysV {2}".format( + service_name, upstart_file, sysv_file)) + return stopped + + +def service_resume(service_name, init_dir="/etc/init", + initd_dir="/etc/init.d", **kwargs): + """Resume a system service. + + Re-enable starting again at boot. Start the service. + + :param service_name: the name of the service to resume + :param init_dir: the path to the init dir + :param initd dir: the path to the initd dir + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for systemd enabled systems. + """ + upstart_file = os.path.join(init_dir, "{}.conf".format(service_name)) + sysv_file = os.path.join(initd_dir, service_name) + if init_is_systemd(service_name=service_name): + if service('is-enabled', service_name): + log('service {} already enabled'.format(service_name), level=DEBUG) + else: + service('unmask', service_name) + service('enable', service_name) + elif os.path.exists(upstart_file): + override_path = os.path.join( + init_dir, '{}.override'.format(service_name)) + if os.path.exists(override_path): + os.unlink(override_path) + elif os.path.exists(sysv_file): + subprocess.check_call(["update-rc.d", service_name, "enable"]) + else: + raise ValueError( + "Unable to detect {0} as SystemD, Upstart {1} or" + " SysV {2}".format( + service_name, upstart_file, sysv_file)) + started = service_running(service_name, **kwargs) + + if not started: + started = service_start(service_name, **kwargs) + return started + + +def service(action, service_name=None, **kwargs): + """Control a system service. + + :param action: the action to take on the service + :param service_name: the name of the service to perform th action on + :param **kwargs: additional params to be passed to the service command in + the form of key=value. + """ + if init_is_systemd(service_name=service_name): + cmd = ['systemctl', action] + if service_name is not None: + cmd.append(service_name) + else: + cmd = ['service', service_name, action] + for key, value in kwargs.items(): + parameter = '%s=%s' % (key, value) + cmd.append(parameter) + return subprocess.call(cmd) == 0 + + +_UPSTART_CONF = "/etc/init/{}.conf" +_INIT_D_CONF = "/etc/init.d/{}" + + +def service_running(service_name, **kwargs): + """Determine whether a system service is running. + + :param service_name: the name of the service + :param **kwargs: additional args to pass to the service command. This is + used to pass additional key=value arguments to the + service command line for managing specific instance + units (e.g. service ceph-osd status id=2). The kwargs + are ignored in systemd services. + """ + if init_is_systemd(service_name=service_name): + return service('is-active', service_name) + else: + if os.path.exists(_UPSTART_CONF.format(service_name)): + try: + cmd = ['status', service_name] + for key, value in kwargs.items(): + parameter = '%s=%s' % (key, value) + cmd.append(parameter) + output = subprocess.check_output( + cmd, stderr=subprocess.STDOUT).decode('UTF-8') + except subprocess.CalledProcessError: + return False + else: + # This works for upstart scripts where the 'service' command + # returns a consistent string to represent running + # 'start/running' + if ("start/running" in output or + "is running" in output or + "up and running" in output): + return True + elif os.path.exists(_INIT_D_CONF.format(service_name)): + # Check System V scripts init script return codes + return service('status', service_name) + return False + + +SYSTEMD_SYSTEM = '/run/systemd/system' + + +def init_is_systemd(service_name=None): + """ + Returns whether the host uses systemd for the specified service. + + @param Optional[str] service_name: specific name of service + """ + if str(service_name).startswith("snap."): + return True + if lsb_release()['DISTRIB_CODENAME'] == 'trusty': + return False + return os.path.isdir(SYSTEMD_SYSTEM) + + +def adduser(username, password=None, shell='/bin/bash', + system_user=False, primary_group=None, + secondary_groups=None, uid=None, home_dir=None): + """Add a user to the system. + + Will log but otherwise succeed if the user already exists. + + :param str username: Username to create + :param str password: Password for user; if ``None``, create a system user + :param str shell: The default shell for the user + :param bool system_user: Whether to create a login or system user + :param str primary_group: Primary group for user; defaults to username + :param list secondary_groups: Optional list of additional groups + :param int uid: UID for user being created + :param str home_dir: Home directory for user + + :returns: The password database entry struct, as returned by `pwd.getpwnam` + """ + try: + user_info = pwd.getpwnam(username) + log('user {0} already exists!'.format(username)) + if uid: + user_info = pwd.getpwuid(int(uid)) + log('user with uid {0} already exists!'.format(uid)) + except KeyError: + log('creating user {0}'.format(username)) + cmd = ['useradd'] + if uid: + cmd.extend(['--uid', str(uid)]) + if home_dir: + cmd.extend(['--home', str(home_dir)]) + if system_user or password is None: + cmd.append('--system') + else: + cmd.extend([ + '--create-home', + '--shell', shell, + '--password', password, + ]) + if not primary_group: + try: + grp.getgrnam(username) + primary_group = username # avoid "group exists" error + except KeyError: + pass + if primary_group: + cmd.extend(['-g', primary_group]) + if secondary_groups: + cmd.extend(['-G', ','.join(secondary_groups)]) + cmd.append(username) + subprocess.check_call(cmd) + user_info = pwd.getpwnam(username) + return user_info + + +def user_exists(username): + """Check if a user exists""" + try: + pwd.getpwnam(username) + user_exists = True + except KeyError: + user_exists = False + return user_exists + + +def uid_exists(uid): + """Check if a uid exists""" + try: + pwd.getpwuid(uid) + uid_exists = True + except KeyError: + uid_exists = False + return uid_exists + + +def group_exists(groupname): + """Check if a group exists""" + try: + grp.getgrnam(groupname) + group_exists = True + except KeyError: + group_exists = False + return group_exists + + +def gid_exists(gid): + """Check if a gid exists""" + try: + grp.getgrgid(gid) + gid_exists = True + except KeyError: + gid_exists = False + return gid_exists + + +def add_group(group_name, system_group=False, gid=None): + """Add a group to the system + + Will log but otherwise succeed if the group already exists. + + :param str group_name: group to create + :param bool system_group: Create system group + :param int gid: GID for user being created + + :returns: The password database entry struct, as returned by `grp.getgrnam` + """ + try: + group_info = grp.getgrnam(group_name) + log('group {0} already exists!'.format(group_name)) + if gid: + group_info = grp.getgrgid(gid) + log('group with gid {0} already exists!'.format(gid)) + except KeyError: + log('creating group {0}'.format(group_name)) + add_new_group(group_name, system_group, gid) + group_info = grp.getgrnam(group_name) + return group_info + + +def add_user_to_group(username, group): + """Add a user to a group""" + cmd = ['gpasswd', '-a', username, group] + log("Adding user {} to group {}".format(username, group)) + subprocess.check_call(cmd) + + +def chage(username, lastday=None, expiredate=None, inactive=None, + mindays=None, maxdays=None, root=None, warndays=None): + """Change user password expiry information + + :param str username: User to update + :param str lastday: Set when password was changed in YYYY-MM-DD format + :param str expiredate: Set when user's account will no longer be + accessible in YYYY-MM-DD format. + -1 will remove an account expiration date. + :param str inactive: Set the number of days of inactivity after a password + has expired before the account is locked. + -1 will remove an account's inactivity. + :param str mindays: Set the minimum number of days between password + changes to MIN_DAYS. + 0 indicates the password can be changed anytime. + :param str maxdays: Set the maximum number of days during which a + password is valid. + -1 as MAX_DAYS will remove checking maxdays + :param str root: Apply changes in the CHROOT_DIR directory + :param str warndays: Set the number of days of warning before a password + change is required + :raises subprocess.CalledProcessError: if call to chage fails + """ + cmd = ['chage'] + if root: + cmd.extend(['--root', root]) + if lastday: + cmd.extend(['--lastday', lastday]) + if expiredate: + cmd.extend(['--expiredate', expiredate]) + if inactive: + cmd.extend(['--inactive', inactive]) + if mindays: + cmd.extend(['--mindays', mindays]) + if maxdays: + cmd.extend(['--maxdays', maxdays]) + if warndays: + cmd.extend(['--warndays', warndays]) + cmd.append(username) + subprocess.check_call(cmd) + + +remove_password_expiry = functools.partial(chage, expiredate='-1', inactive='-1', mindays='0', maxdays='-1') + + +def rsync(from_path, to_path, flags='-r', options=None, timeout=None): + """Replicate the contents of a path""" + options = options or ['--delete', '--executability'] + cmd = ['/usr/bin/rsync', flags] + if timeout: + cmd = ['timeout', str(timeout)] + cmd + cmd.extend(options) + cmd.append(from_path) + cmd.append(to_path) + log(" ".join(cmd)) + return subprocess.check_output(cmd, stderr=subprocess.STDOUT).decode('UTF-8').strip() + + +def symlink(source, destination): + """Create a symbolic link""" + log("Symlinking {} as {}".format(source, destination)) + cmd = [ + 'ln', + '-sf', + source, + destination, + ] + subprocess.check_call(cmd) + + +def mkdir(path, owner='root', group='root', perms=0o555, force=False): + """Create a directory""" + log("Making dir {} {}:{} {:o}".format(path, owner, group, + perms)) + uid = pwd.getpwnam(owner).pw_uid + gid = grp.getgrnam(group).gr_gid + realpath = os.path.abspath(path) + path_exists = os.path.exists(realpath) + if path_exists and force: + if not os.path.isdir(realpath): + log("Removing non-directory file {} prior to mkdir()".format(path)) + os.unlink(realpath) + os.makedirs(realpath, perms) + elif not path_exists: + os.makedirs(realpath, perms) + os.chown(realpath, uid, gid) + os.chmod(realpath, perms) + + +def write_file(path, content, owner='root', group='root', perms=0o444): + """Create or overwrite a file with the contents of a byte string.""" + uid = pwd.getpwnam(owner).pw_uid + gid = grp.getgrnam(group).gr_gid + # lets see if we can grab the file and compare the context, to avoid doing + # a write. + existing_content = None + existing_uid, existing_gid, existing_perms = None, None, None + try: + with open(path, 'rb') as target: + existing_content = target.read() + stat = os.stat(path) + existing_uid, existing_gid, existing_perms = ( + stat.st_uid, stat.st_gid, stat.st_mode + ) + except Exception: + pass + if content != existing_content: + log("Writing file {} {}:{} {:o}".format(path, owner, group, perms), + level=DEBUG) + with open(path, 'wb') as target: + os.fchown(target.fileno(), uid, gid) + os.fchmod(target.fileno(), perms) + if isinstance(content, str): + content = content.encode('UTF-8') + target.write(content) + return + # the contents were the same, but we might still need to change the + # ownership or permissions. + if existing_uid != uid: + log("Changing uid on already existing content: {} -> {}" + .format(existing_uid, uid), level=DEBUG) + os.chown(path, uid, -1) + if existing_gid != gid: + log("Changing gid on already existing content: {} -> {}" + .format(existing_gid, gid), level=DEBUG) + os.chown(path, -1, gid) + if existing_perms != perms: + log("Changing permissions on existing content: {} -> {}" + .format(existing_perms, perms), level=DEBUG) + os.chmod(path, perms) + + +def fstab_remove(mp): + """Remove the given mountpoint entry from /etc/fstab""" + return Fstab.remove_by_mountpoint(mp) + + +def fstab_add(dev, mp, fs, options=None): + """Adds the given device entry to the /etc/fstab file""" + return Fstab.add(dev, mp, fs, options=options) + + +def mount(device, mountpoint, options=None, persist=False, filesystem="ext3"): + """Mount a filesystem at a particular mountpoint""" + cmd_args = ['mount'] + if options is not None: + cmd_args.extend(['-o', options]) + cmd_args.extend([device, mountpoint]) + try: + subprocess.check_output(cmd_args) + except subprocess.CalledProcessError as e: + log('Error mounting {} at {}\n{}'.format(device, mountpoint, e.output)) + return False + + if persist: + return fstab_add(device, mountpoint, filesystem, options=options) + return True + + +def umount(mountpoint, persist=False): + """Unmount a filesystem""" + cmd_args = ['umount', mountpoint] + try: + subprocess.check_output(cmd_args) + except subprocess.CalledProcessError as e: + log('Error unmounting {}\n{}'.format(mountpoint, e.output)) + return False + + if persist: + return fstab_remove(mountpoint) + return True + + +def mounts(): + """Get a list of all mounted volumes as [[mountpoint,device],[...]]""" + with open('/proc/mounts') as f: + # [['/mount/point','/dev/path'],[...]] + system_mounts = [m[1::-1] for m in [l.strip().split() + for l in f.readlines()]] + return system_mounts + + +def fstab_mount(mountpoint): + """Mount filesystem using fstab""" + cmd_args = ['mount', mountpoint] + try: + subprocess.check_output(cmd_args) + except subprocess.CalledProcessError as e: + log('Error unmounting {}\n{}'.format(mountpoint, e.output)) + return False + return True + + +def file_hash(path, hash_type='md5'): + """Generate a hash checksum of the contents of 'path' or None if not found. + + :param str hash_type: Any hash alrgorithm supported by :mod:`hashlib`, + such as md5, sha1, sha256, sha512, etc. + """ + if os.path.exists(path): + h = getattr(hashlib, hash_type)() + with open(path, 'rb') as source: + h.update(source.read()) + return h.hexdigest() + else: + return None + + +def path_hash(path): + """Generate a hash checksum of all files matching 'path'. Standard + wildcards like '*' and '?' are supported, see documentation for the 'glob' + module for more information. + + :return: dict: A { filename: hash } dictionary for all matched files. + Empty if none found. + """ + return { + filename: file_hash(filename) + for filename in glob.iglob(path) + } + + +def check_hash(path, checksum, hash_type='md5'): + """Validate a file using a cryptographic checksum. + + :param str checksum: Value of the checksum used to validate the file. + :param str hash_type: Hash algorithm used to generate `checksum`. + Can be any hash algorithm supported by :mod:`hashlib`, + such as md5, sha1, sha256, sha512, etc. + :raises ChecksumError: If the file fails the checksum + + """ + actual_checksum = file_hash(path, hash_type) + if checksum != actual_checksum: + raise ChecksumError("'%s' != '%s'" % (checksum, actual_checksum)) + + +class ChecksumError(ValueError): + """A class derived from Value error to indicate the checksum failed.""" + pass + + +class restart_on_change(object): + """Decorator and context manager to handle restarts. + + Usage: + + @restart_on_change(restart_map, ...) + def function_that_might_trigger_a_restart(...) + ... + + Or: + + with restart_on_change(restart_map, ...): + do_stuff_that_might_trigger_a_restart() + ... + """ + + def __init__(self, restart_map, stopstart=False, restart_functions=None, + can_restart_now_f=None, post_svc_restart_f=None, + pre_restarts_wait_f=None): + """ + :param restart_map: {file: [service, ...]} + :type restart_map: Dict[str, List[str,]] + :param stopstart: whether to stop, start or restart a service + :type stopstart: booleean + :param restart_functions: nonstandard functions to use to restart + services {svc: func, ...} + :type restart_functions: Dict[str, Callable[[str], None]] + :param can_restart_now_f: A function used to check if the restart is + permitted. + :type can_restart_now_f: Callable[[str, List[str]], boolean] + :param post_svc_restart_f: A function run after a service has + restarted. + :type post_svc_restart_f: Callable[[str], None] + :param pre_restarts_wait_f: A function called before any restarts. + :type pre_restarts_wait_f: Callable[None, None] + """ + self.restart_map = restart_map + self.stopstart = stopstart + self.restart_functions = restart_functions + self.can_restart_now_f = can_restart_now_f + self.post_svc_restart_f = post_svc_restart_f + self.pre_restarts_wait_f = pre_restarts_wait_f + + def __call__(self, f): + """Work like a decorator. + + Returns a wrapped function that performs the restart if triggered. + + :param f: The function that is being wrapped. + :type f: Callable[[Any], Any] + :returns: the wrapped function + :rtype: Callable[[Any], Any] + """ + @functools.wraps(f) + def wrapped_f(*args, **kwargs): + return restart_on_change_helper( + (lambda: f(*args, **kwargs)), + self.restart_map, + stopstart=self.stopstart, + restart_functions=self.restart_functions, + can_restart_now_f=self.can_restart_now_f, + post_svc_restart_f=self.post_svc_restart_f, + pre_restarts_wait_f=self.pre_restarts_wait_f) + return wrapped_f + + def __enter__(self): + """Enter the runtime context related to this object. """ + self.checksums = _pre_restart_on_change_helper(self.restart_map) + + def __exit__(self, exc_type, exc_val, exc_tb): + """Exit the runtime context related to this object. + + The parameters describe the exception that caused the context to be + exited. If the context was exited without an exception, all three + arguments will be None. + """ + if exc_type is None: + _post_restart_on_change_helper( + self.checksums, + self.restart_map, + stopstart=self.stopstart, + restart_functions=self.restart_functions, + can_restart_now_f=self.can_restart_now_f, + post_svc_restart_f=self.post_svc_restart_f, + pre_restarts_wait_f=self.pre_restarts_wait_f) + # All is good, so return False; any exceptions will propagate. + return False + + +def restart_on_change_helper(lambda_f, restart_map, stopstart=False, + restart_functions=None, + can_restart_now_f=None, + post_svc_restart_f=None, + pre_restarts_wait_f=None): + """Helper function to perform the restart_on_change function. + + This is provided for decorators to restart services if files described + in the restart_map have changed after an invocation of lambda_f(). + + This functions allows for a number of helper functions to be passed. + + `restart_functions` is a map with a service as the key and the + corresponding value being the function to call to restart the service. For + example if `restart_functions={'some-service': my_restart_func}` then + `my_restart_func` should a function which takes one argument which is the + service name to be retstarted. + + `can_restart_now_f` is a function which checks that a restart is permitted. + It should return a bool which indicates if a restart is allowed and should + take a service name (str) and a list of changed files (List[str]) as + arguments. + + `post_svc_restart_f` is a function which runs after a service has been + restarted. It takes the service name that was restarted as an argument. + + `pre_restarts_wait_f` is a function which is called before any restarts + occur. The use case for this is an application which wants to try and + stagger restarts between units. + + :param lambda_f: function to call. + :type lambda_f: Callable[[], ANY] + :param restart_map: {file: [service, ...]} + :type restart_map: Dict[str, List[str,]] + :param stopstart: whether to stop, start or restart a service + :type stopstart: booleean + :param restart_functions: nonstandard functions to use to restart services + {svc: func, ...} + :type restart_functions: Dict[str, Callable[[str], None]] + :param can_restart_now_f: A function used to check if the restart is + permitted. + :type can_restart_now_f: Callable[[str, List[str]], boolean] + :param post_svc_restart_f: A function run after a service has + restarted. + :type post_svc_restart_f: Callable[[str], None] + :param pre_restarts_wait_f: A function called before any restarts. + :type pre_restarts_wait_f: Callable[None, None] + :returns: result of lambda_f() + :rtype: ANY + """ + checksums = _pre_restart_on_change_helper(restart_map) + r = lambda_f() + _post_restart_on_change_helper(checksums, + restart_map, + stopstart, + restart_functions, + can_restart_now_f, + post_svc_restart_f, + pre_restarts_wait_f) + return r + + +def _pre_restart_on_change_helper(restart_map): + """Take a snapshot of file hashes. + + :param restart_map: {file: [service, ...]} + :type restart_map: Dict[str, List[str,]] + :returns: Dictionary of file paths and the files checksum. + :rtype: Dict[str, str] + """ + return {path: path_hash(path) for path in restart_map} + + +def _post_restart_on_change_helper(checksums, + restart_map, + stopstart=False, + restart_functions=None, + can_restart_now_f=None, + post_svc_restart_f=None, + pre_restarts_wait_f=None): + """Check whether files have changed. + + :param checksums: Dictionary of file paths and the files checksum. + :type checksums: Dict[str, str] + :param restart_map: {file: [service, ...]} + :type restart_map: Dict[str, List[str,]] + :param stopstart: whether to stop, start or restart a service + :type stopstart: booleean + :param restart_functions: nonstandard functions to use to restart services + {svc: func, ...} + :type restart_functions: Dict[str, Callable[[str], None]] + :param can_restart_now_f: A function used to check if the restart is + permitted. + :type can_restart_now_f: Callable[[str, List[str]], boolean] + :param post_svc_restart_f: A function run after a service has + restarted. + :type post_svc_restart_f: Callable[[str], None] + :param pre_restarts_wait_f: A function called before any restarts. + :type pre_restarts_wait_f: Callable[None, None] + """ + if restart_functions is None: + restart_functions = {} + changed_files = defaultdict(list) + restarts = [] + # create a list of lists of the services to restart + for path, services in restart_map.items(): + if path_hash(path) != checksums[path]: + restarts.append(services) + for svc in services: + changed_files[svc].append(path) + # create a flat list of ordered services without duplicates from lists + services_list = list(OrderedDict.fromkeys(itertools.chain(*restarts))) + if services_list: + if pre_restarts_wait_f: + pre_restarts_wait_f() + actions = ('stop', 'start') if stopstart else ('restart',) + for service_name in services_list: + if can_restart_now_f: + if not can_restart_now_f(service_name, + changed_files[service_name]): + continue + if service_name in restart_functions: + restart_functions[service_name](service_name) + else: + for action in actions: + service(action, service_name) + if post_svc_restart_f: + post_svc_restart_f(service_name) + + +def pwgen(length=None): + """Generate a random password.""" + if length is None: + # A random length is ok to use a weak PRNG + length = random.choice(range(35, 45)) + alphanumeric_chars = [ + l for l in (string.ascii_letters + string.digits) + if l not in 'l0QD1vAEIOUaeiou'] + # Use a crypto-friendly PRNG (e.g. /dev/urandom) for making the + # actual password + random_generator = random.SystemRandom() + random_chars = [ + random_generator.choice(alphanumeric_chars) for _ in range(length)] + return ''.join(random_chars) + + +def is_phy_iface(interface): + """Returns True if interface is not virtual, otherwise False.""" + if interface: + sys_net = '/sys/class/net' + if os.path.isdir(sys_net): + for iface in glob.glob(os.path.join(sys_net, '*')): + if '/virtual/' in os.path.realpath(iface): + continue + + if interface == os.path.basename(iface): + return True + + return False + + +def get_bond_master(interface): + """Returns bond master if interface is bond slave otherwise None. + + NOTE: the provided interface is expected to be physical + """ + if interface: + iface_path = '/sys/class/net/%s' % (interface) + if os.path.exists(iface_path): + if '/virtual/' in os.path.realpath(iface_path): + return None + + master = os.path.join(iface_path, 'master') + if os.path.exists(master): + master = os.path.realpath(master) + # make sure it is a bond master + if os.path.exists(os.path.join(master, 'bonding')): + return os.path.basename(master) + + return None + + +def list_nics(nic_type=None): + """Return a list of nics of given type(s)""" + if isinstance(nic_type, str): + int_types = [nic_type] + else: + int_types = nic_type + + interfaces = [] + if nic_type: + for int_type in int_types: + cmd = ['ip', 'addr', 'show', 'label', int_type + '*'] + ip_output = subprocess.check_output( + cmd).decode('UTF-8', errors='replace') + ip_output = ip_output.split('\n') + ip_output = (line for line in ip_output if line) + for line in ip_output: + if line.split()[1].startswith(int_type): + matched = re.search('.*: (' + int_type + + r'[0-9]+\.[0-9]+)@.*', line) + if matched: + iface = matched.groups()[0] + else: + iface = line.split()[1].replace(":", "") + + if iface not in interfaces: + interfaces.append(iface) + else: + cmd = ['ip', 'a'] + ip_output = subprocess.check_output( + cmd).decode('UTF-8', errors='replace').split('\n') + ip_output = (line.strip() for line in ip_output if line) + + key = re.compile(r'^[0-9]+:\s+(.+):') + for line in ip_output: + matched = re.search(key, line) + if matched: + iface = matched.group(1) + iface = iface.partition("@")[0] + if iface not in interfaces: + interfaces.append(iface) + + return interfaces + + +def set_nic_mtu(nic, mtu): + """Set the Maximum Transmission Unit (MTU) on a network interface.""" + cmd = ['ip', 'link', 'set', nic, 'mtu', mtu] + subprocess.check_call(cmd) + + +def get_nic_mtu(nic): + """Return the Maximum Transmission Unit (MTU) for a network interface.""" + cmd = ['ip', 'addr', 'show', nic] + ip_output = subprocess.check_output( + cmd).decode('UTF-8', errors='replace').split('\n') + mtu = "" + for line in ip_output: + words = line.split() + if 'mtu' in words: + mtu = words[words.index("mtu") + 1] + return mtu + + +def get_nic_hwaddr(nic): + """Return the Media Access Control (MAC) for a network interface.""" + cmd = ['ip', '-o', '-0', 'addr', 'show', nic] + ip_output = subprocess.check_output(cmd).decode('UTF-8', errors='replace') + hwaddr = "" + words = ip_output.split() + if 'link/ether' in words: + hwaddr = words[words.index('link/ether') + 1] + return hwaddr + + +@contextmanager +def chdir(directory): + """Change the current working directory to a different directory for a code + block and return the previous directory after the block exits. Useful to + run commands from a specified directory. + + :param str directory: The directory path to change to for this context. + """ + cur = os.getcwd() + try: + yield os.chdir(directory) + finally: + os.chdir(cur) + + +def chownr(path, owner, group, follow_links=True, chowntopdir=False): + """Recursively change user and group ownership of files and directories + in given path. Doesn't chown path itself by default, only its children. + + :param str path: The string path to start changing ownership. + :param str owner: The owner string to use when looking up the uid. + :param str group: The group string to use when looking up the gid. + :param bool follow_links: Also follow and chown links if True + :param bool chowntopdir: Also chown path itself if True + """ + uid = pwd.getpwnam(owner).pw_uid + gid = grp.getgrnam(group).gr_gid + if follow_links: + chown = os.chown + else: + chown = os.lchown + + if chowntopdir: + broken_symlink = os.path.lexists(path) and not os.path.exists(path) + if not broken_symlink: + chown(path, uid, gid) + for root, dirs, files in os.walk(path, followlinks=follow_links): + for name in dirs + files: + full = os.path.join(root, name) + try: + chown(full, uid, gid) + except (IOError, OSError) as e: + # Intended to ignore "file not found". + if e.errno == errno.ENOENT: + pass + + +def lchownr(path, owner, group): + """Recursively change user and group ownership of files and directories + in a given path, not following symbolic links. See the documentation for + 'os.lchown' for more information. + + :param str path: The string path to start changing ownership. + :param str owner: The owner string to use when looking up the uid. + :param str group: The group string to use when looking up the gid. + """ + chownr(path, owner, group, follow_links=False) + + +def owner(path): + """Returns a tuple containing the username & groupname owning the path. + + :param str path: the string path to retrieve the ownership + :return tuple(str, str): A (username, groupname) tuple containing the + name of the user and group owning the path. + :raises OSError: if the specified path does not exist + """ + stat = os.stat(path) + username = pwd.getpwuid(stat.st_uid)[0] + groupname = grp.getgrgid(stat.st_gid)[0] + return username, groupname + + +def get_total_ram(): + """The total amount of system RAM in bytes. + + This is what is reported by the OS, and may be overcommitted when + there are multiple containers hosted on the same machine. + """ + with open('/proc/meminfo', 'r') as f: + for line in f.readlines(): + if line: + key, value, unit = line.split() + if key == 'MemTotal:': + assert unit == 'kB', 'Unknown unit' + return int(value) * 1024 # Classic, not KiB. + raise NotImplementedError() + + +UPSTART_CONTAINER_TYPE = '/run/container_type' + + +def is_container(): + """Determine whether unit is running in a container + + @return: boolean indicating if unit is in a container + """ + if init_is_systemd(): + # Detect using systemd-detect-virt + return subprocess.call(['systemd-detect-virt', + '--container']) == 0 + else: + # Detect using upstart container file marker + return os.path.exists(UPSTART_CONTAINER_TYPE) + + +def add_to_updatedb_prunepath(path, updatedb_path=UPDATEDB_PATH): + """Adds the specified path to the mlocate's udpatedb.conf PRUNEPATH list. + + This method has no effect if the path specified by updatedb_path does not + exist or is not a file. + + @param path: string the path to add to the updatedb.conf PRUNEPATHS value + @param updatedb_path: the path the updatedb.conf file + """ + if not os.path.exists(updatedb_path) or os.path.isdir(updatedb_path): + # If the updatedb.conf file doesn't exist then don't attempt to update + # the file as the package providing mlocate may not be installed on + # the local system + return + + with open(updatedb_path, 'r+') as f_id: + updatedb_text = f_id.read() + output = updatedb(updatedb_text, path) + f_id.seek(0) + f_id.write(output) + f_id.truncate() + + +def updatedb(updatedb_text, new_path): + lines = [line for line in updatedb_text.split("\n")] + for i, line in enumerate(lines): + if line.startswith("PRUNEPATHS="): + paths_line = line.split("=")[1].replace('"', '') + paths = paths_line.split(" ") + if new_path not in paths: + paths.append(new_path) + lines[i] = 'PRUNEPATHS="{}"'.format(' '.join(paths)) + output = "\n".join(lines) + return output + + +def modulo_distribution(modulo=3, wait=30, non_zero_wait=False): + """ Modulo distribution + + This helper uses the unit number, a modulo value and a constant wait time + to produce a calculated wait time distribution. This is useful in large + scale deployments to distribute load during an expensive operation such as + service restarts. + + If you have 1000 nodes that need to restart 100 at a time 1 minute at a + time: + + time.wait(modulo_distribution(modulo=100, wait=60)) + restart() + + If you need restarts to happen serially set modulo to the exact number of + nodes and set a high constant wait time: + + time.wait(modulo_distribution(modulo=10, wait=120)) + restart() + + @param modulo: int The modulo number creates the group distribution + @param wait: int The constant time wait value + @param non_zero_wait: boolean Override unit % modulo == 0, + return modulo * wait. Used to avoid collisions with + leader nodes which are often given priority. + @return: int Calculated time to wait for unit operation + """ + unit_number = int(local_unit().split('/')[1]) + calculated_wait_time = (unit_number % modulo) * wait + if non_zero_wait and calculated_wait_time == 0: + return modulo * wait + else: + return calculated_wait_time + + +def ca_cert_absolute_path(basename_without_extension): + """Returns absolute path to CA certificate. + + :param basename_without_extension: Filename without extension + :type basename_without_extension: str + :returns: Absolute full path + :rtype: str + """ + return '{}/{}.crt'.format(CA_CERT_DIR, basename_without_extension) + + +def install_ca_cert(ca_cert, name=None): + """ + Install the given cert as a trusted CA. + + The ``name`` is the stem of the filename where the cert is written, and if + not provided, it will default to ``juju-{charm_name}``. + + If the cert is empty or None, or is unchanged, nothing is done. + """ + if not ca_cert: + return + if not isinstance(ca_cert, bytes): + ca_cert = ca_cert.encode('utf8') + if not name: + name = 'juju-{}'.format(charm_name()) + cert_file = ca_cert_absolute_path(name) + new_hash = hashlib.md5(ca_cert).hexdigest() + if file_hash(cert_file) == new_hash: + return + log("Installing new CA cert at: {}".format(cert_file), level=INFO) + write_file(cert_file, ca_cert) + subprocess.check_call(['update-ca-certificates', '--fresh']) + + +def get_system_env(key, default=None): + """Get data from system environment as represented in ``/etc/environment``. + + :param key: Key to look up + :type key: str + :param default: Value to return if key is not found + :type default: any + :returns: Value for key if found or contents of default parameter + :rtype: any + :raises: subprocess.CalledProcessError + """ + env_file = '/etc/environment' + # use the shell and env(1) to parse the global environments file. This is + # done to get the correct result even if the user has shell variable + # substitutions or other shell logic in that file. + output = subprocess.check_output( + ['env', '-i', '/bin/bash', '-c', + 'set -a && source {} && env'.format(env_file)], + universal_newlines=True) + for k, v in (line.split('=', 1) + for line in output.splitlines() if '=' in line): + if k == key: + return v + else: + return default diff --git a/ceph-osd/hooks/charmhelpers/core/host_factory/__init__.py b/ceph-osd/hooks/charmhelpers/core/host_factory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-osd/hooks/charmhelpers/core/host_factory/centos.py b/ceph-osd/hooks/charmhelpers/core/host_factory/centos.py new file mode 100644 index 00000000..7781a396 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/host_factory/centos.py @@ -0,0 +1,72 @@ +import subprocess +import yum +import os + +from charmhelpers.core.strutils import BasicStringComparator + + +class CompareHostReleases(BasicStringComparator): + """Provide comparisons of Host releases. + + Use in the form of + + if CompareHostReleases(release) > 'trusty': + # do something with mitaka + """ + + def __init__(self, item): + raise NotImplementedError( + "CompareHostReleases() is not implemented for CentOS") + + +def service_available(service_name): + # """Determine whether a system service is available.""" + if os.path.isdir('/run/systemd/system'): + cmd = ['systemctl', 'is-enabled', service_name] + else: + cmd = ['service', service_name, 'is-enabled'] + return subprocess.call(cmd) == 0 + + +def add_new_group(group_name, system_group=False, gid=None): + cmd = ['groupadd'] + if gid: + cmd.extend(['--gid', str(gid)]) + if system_group: + cmd.append('-r') + cmd.append(group_name) + subprocess.check_call(cmd) + + +def lsb_release(): + """Return /etc/os-release in a dict.""" + d = {} + with open('/etc/os-release', 'r') as lsb: + for l in lsb: + s = l.split('=') + if len(s) != 2: + continue + d[s[0].strip()] = s[1].strip() + return d + + +def cmp_pkgrevno(package, revno, pkgcache=None): + """Compare supplied revno with the revno of the installed package. + + * 1 => Installed revno is greater than supplied arg + * 0 => Installed revno is the same as supplied arg + * -1 => Installed revno is less than supplied arg + + This function imports YumBase function if the pkgcache argument + is None. + """ + if not pkgcache: + y = yum.YumBase() + packages = y.doPackageLists() + pkgcache = {i.Name: i.version for i in packages['installed']} + pkg = pkgcache[package] + if pkg > revno: + return 1 + if pkg < revno: + return -1 + return 0 diff --git a/ceph-osd/hooks/charmhelpers/core/host_factory/ubuntu.py b/ceph-osd/hooks/charmhelpers/core/host_factory/ubuntu.py new file mode 100644 index 00000000..732d76c3 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/host_factory/ubuntu.py @@ -0,0 +1,125 @@ +import subprocess + +from charmhelpers.core.hookenv import cached +from charmhelpers.core.strutils import BasicStringComparator + + +UBUNTU_RELEASES = ( + 'lucid', + 'maverick', + 'natty', + 'oneiric', + 'precise', + 'quantal', + 'raring', + 'saucy', + 'trusty', + 'utopic', + 'vivid', + 'wily', + 'xenial', + 'yakkety', + 'zesty', + 'artful', + 'bionic', + 'cosmic', + 'disco', + 'eoan', + 'focal', + 'groovy', + 'hirsute', + 'impish', + 'jammy', + 'kinetic', + 'lunar', + 'mantic', +) + + +class CompareHostReleases(BasicStringComparator): + """Provide comparisons of Ubuntu releases. + + Use in the form of + + if CompareHostReleases(release) > 'trusty': + # do something with mitaka + """ + _list = UBUNTU_RELEASES + + +def service_available(service_name): + """Determine whether a system service is available""" + try: + subprocess.check_output( + ['service', service_name, 'status'], + stderr=subprocess.STDOUT).decode('UTF-8') + except subprocess.CalledProcessError as e: + return b'unrecognized service' not in e.output + else: + return True + + +def add_new_group(group_name, system_group=False, gid=None): + cmd = ['addgroup'] + if gid: + cmd.extend(['--gid', str(gid)]) + if system_group: + cmd.append('--system') + else: + cmd.extend([ + '--group', + ]) + cmd.append(group_name) + subprocess.check_call(cmd) + + +def lsb_release(): + """Return /etc/lsb-release in a dict""" + d = {} + with open('/etc/lsb-release', 'r') as lsb: + for l in lsb: + k, v = l.split('=') + d[k.strip()] = v.strip() + return d + + +def get_distrib_codename(): + """Return the codename of the distribution + :returns: The codename + :rtype: str + """ + return lsb_release()['DISTRIB_CODENAME'].lower() + + +def cmp_pkgrevno(package, revno, pkgcache=None): + """Compare supplied revno with the revno of the installed package. + + * 1 => Installed revno is greater than supplied arg + * 0 => Installed revno is the same as supplied arg + * -1 => Installed revno is less than supplied arg + + This function imports apt_cache function from charmhelpers.fetch if + the pkgcache argument is None. Be sure to add charmhelpers.fetch if + you call this function, or pass an apt_pkg.Cache() instance. + """ + from charmhelpers.fetch import apt_pkg, get_installed_version + if not pkgcache: + current_ver = get_installed_version(package) + else: + pkg = pkgcache[package] + current_ver = pkg.current_ver + + return apt_pkg.version_compare(current_ver.ver_str, revno) + + +@cached +def arch(): + """Return the package architecture as a string. + + :returns: the architecture + :rtype: str + :raises: subprocess.CalledProcessError if dpkg command fails + """ + return subprocess.check_output( + ['dpkg', '--print-architecture'] + ).rstrip().decode('UTF-8') diff --git a/ceph-osd/hooks/charmhelpers/core/hugepage.py b/ceph-osd/hooks/charmhelpers/core/hugepage.py new file mode 100644 index 00000000..54b5b5e2 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/hugepage.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import yaml +from charmhelpers.core import fstab +from charmhelpers.core import sysctl +from charmhelpers.core.host import ( + add_group, + add_user_to_group, + fstab_mount, + mkdir, +) +from charmhelpers.core.strutils import bytes_from_string +from subprocess import check_output + + +def hugepage_support(user, group='hugetlb', nr_hugepages=256, + max_map_count=65536, mnt_point='/run/hugepages/kvm', + pagesize='2MB', mount=True, set_shmmax=False): + """Enable hugepages on system. + + Args: + user (str) -- Username to allow access to hugepages to + group (str) -- Group name to own hugepages + nr_hugepages (int) -- Number of pages to reserve + max_map_count (int) -- Number of Virtual Memory Areas a process can own + mnt_point (str) -- Directory to mount hugepages on + pagesize (str) -- Size of hugepages + mount (bool) -- Whether to Mount hugepages + """ + group_info = add_group(group) + gid = group_info.gr_gid + add_user_to_group(user, group) + if max_map_count < 2 * nr_hugepages: + max_map_count = 2 * nr_hugepages + sysctl_settings = { + 'vm.nr_hugepages': nr_hugepages, + 'vm.max_map_count': max_map_count, + 'vm.hugetlb_shm_group': gid, + } + if set_shmmax: + shmmax_current = int(check_output(['sysctl', '-n', 'kernel.shmmax'])) + shmmax_minsize = bytes_from_string(pagesize) * nr_hugepages + if shmmax_minsize > shmmax_current: + sysctl_settings['kernel.shmmax'] = shmmax_minsize + sysctl.create(yaml.dump(sysctl_settings), '/etc/sysctl.d/10-hugepage.conf') + mkdir(mnt_point, owner='root', group='root', perms=0o755, force=False) + lfstab = fstab.Fstab() + fstab_entry = lfstab.get_entry_by_attr('mountpoint', mnt_point) + if fstab_entry: + lfstab.remove_entry(fstab_entry) + entry = lfstab.Entry('nodev', mnt_point, 'hugetlbfs', + 'mode=1770,gid={},pagesize={}'.format(gid, pagesize), 0, 0) + lfstab.add_entry(entry) + if mount: + fstab_mount(mnt_point) diff --git a/ceph-osd/hooks/charmhelpers/core/kernel.py b/ceph-osd/hooks/charmhelpers/core/kernel.py new file mode 100644 index 00000000..e01f4f8b --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/kernel.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import subprocess + +from charmhelpers.osplatform import get_platform +from charmhelpers.core.hookenv import ( + log, + INFO +) + +__platform__ = get_platform() +if __platform__ == "ubuntu": + from charmhelpers.core.kernel_factory.ubuntu import ( # NOQA:F401 + persistent_modprobe, + update_initramfs, + ) # flake8: noqa -- ignore F401 for this import +elif __platform__ == "centos": + from charmhelpers.core.kernel_factory.centos import ( # NOQA:F401 + persistent_modprobe, + update_initramfs, + ) # flake8: noqa -- ignore F401 for this import + +__author__ = "Jorge Niedbalski " + + +def modprobe(module, persist=True): + """Load a kernel module and configure for auto-load on reboot.""" + cmd = ['modprobe', module] + + log('Loading kernel module %s' % module, level=INFO) + + subprocess.check_call(cmd) + if persist: + persistent_modprobe(module) + + +def rmmod(module, force=False): + """Remove a module from the linux kernel""" + cmd = ['rmmod'] + if force: + cmd.append('-f') + cmd.append(module) + log('Removing kernel module %s' % module, level=INFO) + return subprocess.check_call(cmd) + + +def lsmod(): + """Shows what kernel modules are currently loaded""" + return subprocess.check_output(['lsmod'], + universal_newlines=True) + + +def is_module_loaded(module): + """Checks if a kernel module is already loaded""" + matches = re.findall('^%s[ ]+' % module, lsmod(), re.M) + return len(matches) > 0 diff --git a/ceph-osd/hooks/charmhelpers/core/kernel_factory/__init__.py b/ceph-osd/hooks/charmhelpers/core/kernel_factory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-osd/hooks/charmhelpers/core/kernel_factory/centos.py b/ceph-osd/hooks/charmhelpers/core/kernel_factory/centos.py new file mode 100644 index 00000000..1c402c11 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/kernel_factory/centos.py @@ -0,0 +1,17 @@ +import subprocess +import os + + +def persistent_modprobe(module): + """Load a kernel module and configure for auto-load on reboot.""" + if not os.path.exists('/etc/rc.modules'): + open('/etc/rc.modules', 'a') + os.chmod('/etc/rc.modules', 111) + with open('/etc/rc.modules', 'r+') as modules: + if module not in modules.read(): + modules.write('modprobe %s\n' % module) + + +def update_initramfs(version='all'): + """Updates an initramfs image.""" + return subprocess.check_call(["dracut", "-f", version]) diff --git a/ceph-osd/hooks/charmhelpers/core/kernel_factory/ubuntu.py b/ceph-osd/hooks/charmhelpers/core/kernel_factory/ubuntu.py new file mode 100644 index 00000000..3de372fd --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/kernel_factory/ubuntu.py @@ -0,0 +1,13 @@ +import subprocess + + +def persistent_modprobe(module): + """Load a kernel module and configure for auto-load on reboot.""" + with open('/etc/modules', 'r+') as modules: + if module not in modules.read(): + modules.write(module + "\n") + + +def update_initramfs(version='all'): + """Updates an initramfs image.""" + return subprocess.check_call(["update-initramfs", "-k", version, "-u"]) diff --git a/ceph-osd/hooks/charmhelpers/core/services/__init__.py b/ceph-osd/hooks/charmhelpers/core/services/__init__.py new file mode 100644 index 00000000..61fd074e --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/services/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .base import * # NOQA +from .helpers import * # NOQA diff --git a/ceph-osd/hooks/charmhelpers/core/services/base.py b/ceph-osd/hooks/charmhelpers/core/services/base.py new file mode 100644 index 00000000..8d217b59 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/services/base.py @@ -0,0 +1,363 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import json +import inspect +from collections import OrderedDict +from collections.abc import Iterable + +from charmhelpers.core import host +from charmhelpers.core import hookenv + + +__all__ = ['ServiceManager', 'ManagerCallback', + 'PortManagerCallback', 'open_ports', 'close_ports', 'manage_ports', + 'service_restart', 'service_stop'] + + +class ServiceManager(object): + def __init__(self, services=None): + """ + Register a list of services, given their definitions. + + Service definitions are dicts in the following formats (all keys except + 'service' are optional):: + + { + "service": , + "required_data": , + "provided_data": , + "data_ready": , + "data_lost": , + "start": , + "stop": , + "ports": , + } + + The 'required_data' list should contain dicts of required data (or + dependency managers that act like dicts and know how to collect the data). + Only when all items in the 'required_data' list are populated are the list + of 'data_ready' and 'start' callbacks executed. See `is_ready()` for more + information. + + The 'provided_data' list should contain relation data providers, most likely + a subclass of :class:`charmhelpers.core.services.helpers.RelationContext`, + that will indicate a set of data to set on a given relation. + + The 'data_ready' value should be either a single callback, or a list of + callbacks, to be called when all items in 'required_data' pass `is_ready()`. + Each callback will be called with the service name as the only parameter. + After all of the 'data_ready' callbacks are called, the 'start' callbacks + are fired. + + The 'data_lost' value should be either a single callback, or a list of + callbacks, to be called when a 'required_data' item no longer passes + `is_ready()`. Each callback will be called with the service name as the + only parameter. After all of the 'data_lost' callbacks are called, + the 'stop' callbacks are fired. + + The 'start' value should be either a single callback, or a list of + callbacks, to be called when starting the service, after the 'data_ready' + callbacks are complete. Each callback will be called with the service + name as the only parameter. This defaults to + `[host.service_start, services.open_ports]`. + + The 'stop' value should be either a single callback, or a list of + callbacks, to be called when stopping the service. If the service is + being stopped because it no longer has all of its 'required_data', this + will be called after all of the 'data_lost' callbacks are complete. + Each callback will be called with the service name as the only parameter. + This defaults to `[services.close_ports, host.service_stop]`. + + The 'ports' value should be a list of ports to manage. The default + 'start' handler will open the ports after the service is started, + and the default 'stop' handler will close the ports prior to stopping + the service. + + + Examples: + + The following registers an Upstart service called bingod that depends on + a mongodb relation and which runs a custom `db_migrate` function prior to + restarting the service, and a Runit service called spadesd:: + + manager = services.ServiceManager([ + { + 'service': 'bingod', + 'ports': [80, 443], + 'required_data': [MongoRelation(), config(), {'my': 'data'}], + 'data_ready': [ + services.template(source='bingod.conf'), + services.template(source='bingod.ini', + target='/etc/bingod.ini', + owner='bingo', perms=0400), + ], + }, + { + 'service': 'spadesd', + 'data_ready': services.template(source='spadesd_run.j2', + target='/etc/sv/spadesd/run', + perms=0555), + 'start': runit_start, + 'stop': runit_stop, + }, + ]) + manager.manage() + """ + self._ready_file = os.path.join(hookenv.charm_dir(), 'READY-SERVICES.json') + self._ready = None + self.services = OrderedDict() + for service in services or []: + service_name = service['service'] + self.services[service_name] = service + + def manage(self): + """ + Handle the current hook by doing The Right Thing with the registered services. + """ + hookenv._run_atstart() + try: + hook_name = hookenv.hook_name() + if hook_name == 'stop': + self.stop_services() + else: + self.reconfigure_services() + self.provide_data() + except SystemExit as x: + if x.code is None or x.code == 0: + hookenv._run_atexit() + hookenv._run_atexit() + + def provide_data(self): + """ + Set the relation data for each provider in the ``provided_data`` list. + + A provider must have a `name` attribute, which indicates which relation + to set data on, and a `provide_data()` method, which returns a dict of + data to set. + + The `provide_data()` method can optionally accept two parameters: + + * ``remote_service`` The name of the remote service that the data will + be provided to. The `provide_data()` method will be called once + for each connected service (not unit). This allows the method to + tailor its data to the given service. + * ``service_ready`` Whether or not the service definition had all of + its requirements met, and thus the ``data_ready`` callbacks run. + + Note that the ``provided_data`` methods are now called **after** the + ``data_ready`` callbacks are run. This gives the ``data_ready`` callbacks + a chance to generate any data necessary for the providing to the remote + services. + """ + for service_name, service in self.services.items(): + service_ready = self.is_ready(service_name) + for provider in service.get('provided_data', []): + for relid in hookenv.relation_ids(provider.name): + units = hookenv.related_units(relid) + if not units: + continue + remote_service = units[0].split('/')[0] + argspec = inspect.getfullargspec(provider.provide_data) + if len(argspec.args) > 1: + data = provider.provide_data(remote_service, service_ready) + else: + data = provider.provide_data() + if data: + hookenv.relation_set(relid, data) + + def reconfigure_services(self, *service_names): + """ + Update all files for one or more registered services, and, + if ready, optionally restart them. + + If no service names are given, reconfigures all registered services. + """ + for service_name in service_names or self.services.keys(): + if self.is_ready(service_name): + self.fire_event('data_ready', service_name) + self.fire_event('start', service_name, default=[ + service_restart, + manage_ports]) + self.save_ready(service_name) + else: + if self.was_ready(service_name): + self.fire_event('data_lost', service_name) + self.fire_event('stop', service_name, default=[ + manage_ports, + service_stop]) + self.save_lost(service_name) + + def stop_services(self, *service_names): + """ + Stop one or more registered services, by name. + + If no service names are given, stops all registered services. + """ + for service_name in service_names or self.services.keys(): + self.fire_event('stop', service_name, default=[ + manage_ports, + service_stop]) + + def get_service(self, service_name): + """ + Given the name of a registered service, return its service definition. + """ + service = self.services.get(service_name) + if not service: + raise KeyError('Service not registered: %s' % service_name) + return service + + def fire_event(self, event_name, service_name, default=None): + """ + Fire a data_ready, data_lost, start, or stop event on a given service. + """ + service = self.get_service(service_name) + callbacks = service.get(event_name, default) + if not callbacks: + return + if not isinstance(callbacks, Iterable): + callbacks = [callbacks] + for callback in callbacks: + if isinstance(callback, ManagerCallback): + callback(self, service_name, event_name) + else: + callback(service_name) + + def is_ready(self, service_name): + """ + Determine if a registered service is ready, by checking its 'required_data'. + + A 'required_data' item can be any mapping type, and is considered ready + if `bool(item)` evaluates as True. + """ + service = self.get_service(service_name) + reqs = service.get('required_data', []) + return all(bool(req) for req in reqs) + + def _load_ready_file(self): + if self._ready is not None: + return + if os.path.exists(self._ready_file): + with open(self._ready_file) as fp: + self._ready = set(json.load(fp)) + else: + self._ready = set() + + def _save_ready_file(self): + if self._ready is None: + return + with open(self._ready_file, 'w') as fp: + json.dump(list(self._ready), fp) + + def save_ready(self, service_name): + """ + Save an indicator that the given service is now data_ready. + """ + self._load_ready_file() + self._ready.add(service_name) + self._save_ready_file() + + def save_lost(self, service_name): + """ + Save an indicator that the given service is no longer data_ready. + """ + self._load_ready_file() + self._ready.discard(service_name) + self._save_ready_file() + + def was_ready(self, service_name): + """ + Determine if the given service was previously data_ready. + """ + self._load_ready_file() + return service_name in self._ready + + +class ManagerCallback(object): + """ + Special case of a callback that takes the `ServiceManager` instance + in addition to the service name. + + Subclasses should implement `__call__` which should accept three parameters: + + * `manager` The `ServiceManager` instance + * `service_name` The name of the service it's being triggered for + * `event_name` The name of the event that this callback is handling + """ + def __call__(self, manager, service_name, event_name): + raise NotImplementedError() + + +class PortManagerCallback(ManagerCallback): + """ + Callback class that will open or close ports, for use as either + a start or stop action. + """ + def __call__(self, manager, service_name, event_name): + service = manager.get_service(service_name) + # turn this generator into a list, + # as we'll be going over it multiple times + new_ports = list(service.get('ports', [])) + port_file = os.path.join(hookenv.charm_dir(), '.{}.ports'.format(service_name)) + if os.path.exists(port_file): + with open(port_file) as fp: + old_ports = fp.read().split(',') + for old_port in old_ports: + if bool(old_port) and not self.ports_contains(old_port, new_ports): + hookenv.close_port(old_port) + with open(port_file, 'w') as fp: + fp.write(','.join(str(port) for port in new_ports)) + for port in new_ports: + # A port is either a number or 'ICMP' + protocol = 'TCP' + if str(port).upper() == 'ICMP': + protocol = 'ICMP' + if event_name == 'start': + hookenv.open_port(port, protocol) + elif event_name == 'stop': + hookenv.close_port(port, protocol) + + def ports_contains(self, port, ports): + if not bool(port): + return False + if str(port).upper() != 'ICMP': + port = int(port) + return port in ports + + +def service_stop(service_name): + """ + Wrapper around host.service_stop to prevent spurious "unknown service" + messages in the logs. + """ + if host.service_running(service_name): + host.service_stop(service_name) + + +def service_restart(service_name): + """ + Wrapper around host.service_restart to prevent spurious "unknown service" + messages in the logs. + """ + if host.service_available(service_name): + if host.service_running(service_name): + host.service_restart(service_name) + else: + host.service_start(service_name) + + +# Convenience aliases +open_ports = close_ports = manage_ports = PortManagerCallback() diff --git a/ceph-osd/hooks/charmhelpers/core/services/helpers.py b/ceph-osd/hooks/charmhelpers/core/services/helpers.py new file mode 100644 index 00000000..5bf62dd5 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/services/helpers.py @@ -0,0 +1,290 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import yaml + +from charmhelpers.core import hookenv +from charmhelpers.core import host +from charmhelpers.core import templating + +from charmhelpers.core.services.base import ManagerCallback + + +__all__ = ['RelationContext', 'TemplateCallback', + 'render_template', 'template'] + + +class RelationContext(dict): + """ + Base class for a context generator that gets relation data from juju. + + Subclasses must provide the attributes `name`, which is the name of the + interface of interest, `interface`, which is the type of the interface of + interest, and `required_keys`, which is the set of keys required for the + relation to be considered complete. The data for all interfaces matching + the `name` attribute that are complete will used to populate the dictionary + values (see `get_data`, below). + + The generated context will be namespaced under the relation :attr:`name`, + to prevent potential naming conflicts. + + :param str name: Override the relation :attr:`name`, since it can vary from charm to charm + :param list additional_required_keys: Extend the list of :attr:`required_keys` + """ + name = None + interface = None + + def __init__(self, name=None, additional_required_keys=None): + if not hasattr(self, 'required_keys'): + self.required_keys = [] + + if name is not None: + self.name = name + if additional_required_keys: + self.required_keys.extend(additional_required_keys) + self.get_data() + + def __bool__(self): + """ + Returns True if all of the required_keys are available. + """ + return self.is_ready() + + __nonzero__ = __bool__ + + def __repr__(self): + return super(RelationContext, self).__repr__() + + def is_ready(self): + """ + Returns True if all of the `required_keys` are available from any units. + """ + ready = len(self.get(self.name, [])) > 0 + if not ready: + hookenv.log('Incomplete relation: {}'.format(self.__class__.__name__), hookenv.DEBUG) + return ready + + def _is_ready(self, unit_data): + """ + Helper method that tests a set of relation data and returns True if + all of the `required_keys` are present. + """ + return set(unit_data.keys()).issuperset(set(self.required_keys)) + + def get_data(self): + """ + Retrieve the relation data for each unit involved in a relation and, + if complete, store it in a list under `self[self.name]`. This + is automatically called when the RelationContext is instantiated. + + The units are sorted lexographically first by the service ID, then by + the unit ID. Thus, if an interface has two other services, 'db:1' + and 'db:2', with 'db:1' having two units, 'wordpress/0' and 'wordpress/1', + and 'db:2' having one unit, 'mediawiki/0', all of which have a complete + set of data, the relation data for the units will be stored in the + order: 'wordpress/0', 'wordpress/1', 'mediawiki/0'. + + If you only care about a single unit on the relation, you can just + access it as `{{ interface[0]['key'] }}`. However, if you can at all + support multiple units on a relation, you should iterate over the list, + like:: + + {% for unit in interface -%} + {{ unit['key'] }}{% if not loop.last %},{% endif %} + {%- endfor %} + + Note that since all sets of relation data from all related services and + units are in a single list, if you need to know which service or unit a + set of data came from, you'll need to extend this class to preserve + that information. + """ + if not hookenv.relation_ids(self.name): + return + + ns = self.setdefault(self.name, []) + for rid in sorted(hookenv.relation_ids(self.name)): + for unit in sorted(hookenv.related_units(rid)): + reldata = hookenv.relation_get(rid=rid, unit=unit) + if self._is_ready(reldata): + ns.append(reldata) + + def provide_data(self): + """ + Return data to be relation_set for this interface. + """ + return {} + + +class MysqlRelation(RelationContext): + """ + Relation context for the `mysql` interface. + + :param str name: Override the relation :attr:`name`, since it can vary from charm to charm + :param list additional_required_keys: Extend the list of :attr:`required_keys` + """ + name = 'db' + interface = 'mysql' + + def __init__(self, *args, **kwargs): + self.required_keys = ['host', 'user', 'password', 'database'] + RelationContext.__init__(self, *args, **kwargs) + + +class HttpRelation(RelationContext): + """ + Relation context for the `http` interface. + + :param str name: Override the relation :attr:`name`, since it can vary from charm to charm + :param list additional_required_keys: Extend the list of :attr:`required_keys` + """ + name = 'website' + interface = 'http' + + def __init__(self, *args, **kwargs): + self.required_keys = ['host', 'port'] + RelationContext.__init__(self, *args, **kwargs) + + def provide_data(self): + return { + 'host': hookenv.unit_get('private-address'), + 'port': 80, + } + + +class RequiredConfig(dict): + """ + Data context that loads config options with one or more mandatory options. + + Once the required options have been changed from their default values, all + config options will be available, namespaced under `config` to prevent + potential naming conflicts (for example, between a config option and a + relation property). + + :param list *args: List of options that must be changed from their default values. + """ + + def __init__(self, *args): + self.required_options = args + self['config'] = hookenv.config() + with open(os.path.join(hookenv.charm_dir(), 'config.yaml')) as fp: + self.config = yaml.safe_load(fp).get('options', {}) + + def __bool__(self): + for option in self.required_options: + if option not in self['config']: + return False + current_value = self['config'][option] + default_value = self.config[option].get('default') + if current_value == default_value: + return False + if current_value in (None, '') and default_value in (None, ''): + return False + return True + + def __nonzero__(self): + return self.__bool__() + + +class StoredContext(dict): + """ + A data context that always returns the data that it was first created with. + + This is useful to do a one-time generation of things like passwords, that + will thereafter use the same value that was originally generated, instead + of generating a new value each time it is run. + """ + def __init__(self, file_name, config_data): + """ + If the file exists, populate `self` with the data from the file. + Otherwise, populate with the given data and persist it to the file. + """ + if os.path.exists(file_name): + self.update(self.read_context(file_name)) + else: + self.store_context(file_name, config_data) + self.update(config_data) + + def store_context(self, file_name, config_data): + if not os.path.isabs(file_name): + file_name = os.path.join(hookenv.charm_dir(), file_name) + with open(file_name, 'w') as file_stream: + os.fchmod(file_stream.fileno(), 0o600) + yaml.dump(config_data, file_stream) + + def read_context(self, file_name): + if not os.path.isabs(file_name): + file_name = os.path.join(hookenv.charm_dir(), file_name) + with open(file_name, 'r') as file_stream: + data = yaml.safe_load(file_stream) + if not data: + raise OSError("%s is empty" % file_name) + return data + + +class TemplateCallback(ManagerCallback): + """ + Callback class that will render a Jinja2 template, for use as a ready + action. + + :param str source: The template source file, relative to + `$CHARM_DIR/templates` + + :param str target: The target to write the rendered template to (or None) + :param str owner: The owner of the rendered file + :param str group: The group of the rendered file + :param int perms: The permissions of the rendered file + :param partial on_change_action: functools partial to be executed when + rendered file changes + :param jinja2 loader template_loader: A jinja2 template loader + + :return str: The rendered template + """ + def __init__(self, source, target, + owner='root', group='root', perms=0o444, + on_change_action=None, template_loader=None): + self.source = source + self.target = target + self.owner = owner + self.group = group + self.perms = perms + self.on_change_action = on_change_action + self.template_loader = template_loader + + def __call__(self, manager, service_name, event_name): + pre_checksum = '' + if self.on_change_action and os.path.isfile(self.target): + pre_checksum = host.file_hash(self.target) + service = manager.get_service(service_name) + context = {'ctx': {}} + for ctx in service.get('required_data', []): + context.update(ctx) + context['ctx'].update(ctx) + + result = templating.render(self.source, self.target, context, + self.owner, self.group, self.perms, + template_loader=self.template_loader) + if self.on_change_action: + if pre_checksum == host.file_hash(self.target): + hookenv.log( + 'No change detected: {}'.format(self.target), + hookenv.DEBUG) + else: + self.on_change_action() + + return result + + +# Convenience aliases for templates +render_template = template = TemplateCallback diff --git a/ceph-osd/hooks/charmhelpers/core/strutils.py b/ceph-osd/hooks/charmhelpers/core/strutils.py new file mode 100644 index 00000000..31366871 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/strutils.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +TRUTHY_STRINGS = {'y', 'yes', 'true', 't', 'on'} +FALSEY_STRINGS = {'n', 'no', 'false', 'f', 'off'} + + +def bool_from_string(value, truthy_strings=TRUTHY_STRINGS, falsey_strings=FALSEY_STRINGS, assume_false=False): + """Interpret string value as boolean. + + Returns True if value translates to True otherwise False. + """ + if isinstance(value, str): + value = str(value) + else: + msg = "Unable to interpret non-string value '%s' as boolean" % (value) + raise ValueError(msg) + + value = value.strip().lower() + + if value in truthy_strings: + return True + elif value in falsey_strings or assume_false: + return False + + msg = "Unable to interpret string value '%s' as boolean" % (value) + raise ValueError(msg) + + +def bytes_from_string(value): + """Interpret human readable string value as bytes. + + Returns int + """ + BYTE_POWER = { + 'K': 1, + 'KB': 1, + 'M': 2, + 'MB': 2, + 'G': 3, + 'GB': 3, + 'T': 4, + 'TB': 4, + 'P': 5, + 'PB': 5, + } + if isinstance(value, str): + value = str(value) + else: + msg = "Unable to interpret non-string value '%s' as bytes" % (value) + raise ValueError(msg) + matches = re.match("([0-9]+)([a-zA-Z]+)", value) + if matches: + size = int(matches.group(1)) * (1024 ** BYTE_POWER[matches.group(2)]) + else: + # Assume that value passed in is bytes + try: + size = int(value) + except ValueError: + msg = "Unable to interpret string value '%s' as bytes" % (value) + raise ValueError(msg) + return size + + +class BasicStringComparator(object): + """Provides a class that will compare strings from an iterator type object. + Used to provide > and < comparisons on strings that may not necessarily be + alphanumerically ordered. e.g. OpenStack or Ubuntu releases AFTER the + z-wrap. + """ + + _list = None + + def __init__(self, item): + if self._list is None: + raise Exception("Must define the _list in the class definition!") + try: + self.index = self._list.index(item) + except Exception: + raise KeyError("Item '{}' is not in list '{}'" + .format(item, self._list)) + + def __eq__(self, other): + assert isinstance(other, str) or isinstance(other, self.__class__) + return self.index == self._list.index(other) + + def __ne__(self, other): + return not self.__eq__(other) + + def __lt__(self, other): + assert isinstance(other, str) or isinstance(other, self.__class__) + return self.index < self._list.index(other) + + def __ge__(self, other): + return not self.__lt__(other) + + def __gt__(self, other): + assert isinstance(other, str) or isinstance(other, self.__class__) + return self.index > self._list.index(other) + + def __le__(self, other): + return not self.__gt__(other) + + def __str__(self): + """Always give back the item at the index so it can be used in + comparisons like: + + s_mitaka = CompareOpenStack('mitaka') + s_newton = CompareOpenstack('newton') + + assert s_newton > s_mitaka + + @returns: + """ + return self._list[self.index] diff --git a/ceph-osd/hooks/charmhelpers/core/sysctl.py b/ceph-osd/hooks/charmhelpers/core/sysctl.py new file mode 100644 index 00000000..386428d6 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/sysctl.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import yaml + +from subprocess import check_call, CalledProcessError + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + ERROR, + WARNING, +) + +from charmhelpers.core.host import is_container + +__author__ = 'Jorge Niedbalski R. ' + + +def create(sysctl_dict, sysctl_file, ignore=False): + """Creates a sysctl.conf file from a YAML associative array + + :param sysctl_dict: a dict or YAML-formatted string of sysctl + options eg "{ 'kernel.max_pid': 1337 }" + :type sysctl_dict: str + :param sysctl_file: path to the sysctl file to be saved + :type sysctl_file: str or unicode + :param ignore: If True, ignore "unknown variable" errors. + :type ignore: bool + :returns: None + """ + if type(sysctl_dict) is not dict: + try: + sysctl_dict_parsed = yaml.safe_load(sysctl_dict) + except yaml.YAMLError: + log("Error parsing YAML sysctl_dict: {}".format(sysctl_dict), + level=ERROR) + return + else: + sysctl_dict_parsed = sysctl_dict + + with open(sysctl_file, "w") as fd: + for key, value in sysctl_dict_parsed.items(): + fd.write("{}={}\n".format(key, value)) + + log("Updating sysctl_file: {} values: {}".format(sysctl_file, + sysctl_dict_parsed), + level=DEBUG) + + call = ["sysctl", "-p", sysctl_file] + if ignore: + call.append("-e") + + try: + check_call(call) + except CalledProcessError as e: + if is_container(): + log("Error setting some sysctl keys in this container: {}".format(e.output), + level=WARNING) + else: + raise e diff --git a/ceph-osd/hooks/charmhelpers/core/templating.py b/ceph-osd/hooks/charmhelpers/core/templating.py new file mode 100644 index 00000000..cb0213dc --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/templating.py @@ -0,0 +1,88 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from charmhelpers.core import host +from charmhelpers.core import hookenv + + +def render(source, target, context, owner='root', group='root', + perms=0o444, templates_dir=None, encoding='UTF-8', + template_loader=None, config_template=None): + """ + Render a template. + + The `source` path, if not absolute, is relative to the `templates_dir`. + + The `target` path should be absolute. It can also be `None`, in which + case no file will be written. + + The context should be a dict containing the values to be replaced in the + template. + + config_template may be provided to render from a provided template instead + of loading from a file. + + The `owner`, `group`, and `perms` options will be passed to `write_file`. + + If omitted, `templates_dir` defaults to the `templates` folder in the charm. + + The rendered template will be written to the file as well as being returned + as a string. + + Note: Using this requires python3-jinja2; if it is not installed, calling + this will attempt to use charmhelpers.fetch.apt_install to install it. + """ + try: + from jinja2 import FileSystemLoader, Environment, exceptions + except ImportError: + try: + from charmhelpers.fetch import apt_install + except ImportError: + hookenv.log('Could not import jinja2, and could not import ' + 'charmhelpers.fetch to install it', + level=hookenv.ERROR) + raise + apt_install('python3-jinja2', fatal=True) + from jinja2 import FileSystemLoader, Environment, exceptions + + if template_loader: + template_env = Environment(loader=template_loader) + else: + if templates_dir is None: + templates_dir = os.path.join(hookenv.charm_dir(), 'templates') + template_env = Environment(loader=FileSystemLoader(templates_dir)) + + # load from a string if provided explicitly + if config_template is not None: + template = template_env.from_string(config_template) + else: + try: + source = source + template = template_env.get_template(source) + except exceptions.TemplateNotFound as e: + hookenv.log('Could not load template %s from %s.' % + (source, templates_dir), + level=hookenv.ERROR) + raise e + content = template.render(context) + if target is not None: + target_dir = os.path.dirname(target) + if not os.path.exists(target_dir): + # This is a terrible default directory permission, as the file + # or its siblings will often contain secrets. + host.mkdir(os.path.dirname(target), owner, group, perms=0o755) + host.write_file(target, content.encode(encoding), owner, group, perms) + return content diff --git a/ceph-osd/hooks/charmhelpers/core/unitdata.py b/ceph-osd/hooks/charmhelpers/core/unitdata.py new file mode 100644 index 00000000..65153f1f --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/core/unitdata.py @@ -0,0 +1,563 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Authors: +# Kapil Thangavelu +# +""" +Intro +----- + +A simple way to store state in units. This provides a key value +storage with support for versioned, transactional operation, +and can calculate deltas from previous values to simplify unit logic +when processing changes. + + +Hook Integration +---------------- + +There are several extant frameworks for hook execution, including + + - charmhelpers.core.hookenv.Hooks + - charmhelpers.core.services.ServiceManager + +The storage classes are framework agnostic, one simple integration is +via the HookData contextmanager. It will record the current hook +execution environment (including relation data, config data, etc.), +setup a transaction and allow easy access to the changes from +previously seen values. One consequence of the integration is the +reservation of particular keys ('rels', 'unit', 'env', 'config', +'charm_revisions') for their respective values. + +Here's a fully worked integration example using hookenv.Hooks:: + + from charmhelper.core import hookenv, unitdata + + hook_data = unitdata.HookData() + db = unitdata.kv() + hooks = hookenv.Hooks() + + @hooks.hook + def config_changed(): + # Print all changes to configuration from previously seen + # values. + for changed, (prev, cur) in hook_data.conf.items(): + print('config changed', changed, + 'previous value', prev, + 'current value', cur) + + # Get some unit specific bookkeeping + if not db.get('pkg_key'): + key = urllib.urlopen('https://example.com/pkg_key').read() + db.set('pkg_key', key) + + # Directly access all charm config as a mapping. + conf = db.getrange('config', True) + + # Directly access all relation data as a mapping + rels = db.getrange('rels', True) + + if __name__ == '__main__': + with hook_data(): + hook.execute() + + +A more basic integration is via the hook_scope context manager which simply +manages transaction scope (and records hook name, and timestamp):: + + >>> from unitdata import kv + >>> db = kv() + >>> with db.hook_scope('install'): + ... # do work, in transactional scope. + ... db.set('x', 1) + >>> db.get('x') + 1 + + +Usage +----- + +Values are automatically json de/serialized to preserve basic typing +and complex data struct capabilities (dicts, lists, ints, booleans, etc). + +Individual values can be manipulated via get/set:: + + >>> kv.set('y', True) + >>> kv.get('y') + True + + # We can set complex values (dicts, lists) as a single key. + >>> kv.set('config', {'a': 1, 'b': True'}) + + # Also supports returning dictionaries as a record which + # provides attribute access. + >>> config = kv.get('config', record=True) + >>> config.b + True + + +Groups of keys can be manipulated with update/getrange:: + + >>> kv.update({'z': 1, 'y': 2}, prefix="gui.") + >>> kv.getrange('gui.', strip=True) + {'z': 1, 'y': 2} + +When updating values, its very helpful to understand which values +have actually changed and how have they changed. The storage +provides a delta method to provide for this:: + + >>> data = {'debug': True, 'option': 2} + >>> delta = kv.delta(data, 'config.') + >>> delta.debug.previous + None + >>> delta.debug.current + True + >>> delta + {'debug': (None, True), 'option': (None, 2)} + +Note the delta method does not persist the actual change, it needs to +be explicitly saved via 'update' method:: + + >>> kv.update(data, 'config.') + +Values modified in the context of a hook scope retain historical values +associated to the hookname. + + >>> with db.hook_scope('config-changed'): + ... db.set('x', 42) + >>> db.gethistory('x') + [(1, u'x', 1, u'install', u'2015-01-21T16:49:30.038372'), + (2, u'x', 42, u'config-changed', u'2015-01-21T16:49:30.038786')] + +""" + +import collections +import contextlib +import datetime +import itertools +import json +import logging +import os +import pprint +import sqlite3 +import sys + +__author__ = 'Kapil Thangavelu ' + + +class Storage(object): + """Simple key value database for local unit state within charms. + + Modifications are not persisted unless :meth:`flush` is called. + + To support dicts, lists, integer, floats, and booleans values + are automatically json encoded/decoded. + + Note: to facilitate unit testing, ':memory:' can be passed as the + path parameter which causes sqlite3 to only build the db in memory. + This should only be used for testing purposes. + """ + def __init__(self, path=None, keep_revisions=False): + self.db_path = path + self.keep_revisions = keep_revisions + if path is None: + if 'UNIT_STATE_DB' in os.environ: + self.db_path = os.environ['UNIT_STATE_DB'] + else: + self.db_path = os.path.join( + os.environ.get('CHARM_DIR', ''), '.unit-state.db') + if self.db_path != ':memory:': + with open(self.db_path, 'a') as f: + os.fchmod(f.fileno(), 0o600) + self.conn = sqlite3.connect('%s' % self.db_path) + self.cursor = self.conn.cursor() + self.revision = None + self._closed = False + self._init() + + def close(self): + if self._closed: + return + self.flush(False) + self.cursor.close() + self.conn.close() + self._closed = True + + def get(self, key, default=None, record=False): + self.cursor.execute('select data from kv where key=?', [key]) + result = self.cursor.fetchone() + if not result: + return default + if record: + return Record(json.loads(result[0])) + return json.loads(result[0]) + + def getrange(self, key_prefix, strip=False): + """ + Get a range of keys starting with a common prefix as a mapping of + keys to values. + + :param str key_prefix: Common prefix among all keys + :param bool strip: Optionally strip the common prefix from the key + names in the returned dict + :return dict: A (possibly empty) dict of key-value mappings + """ + self.cursor.execute("select key, data from kv where key like ?", + ['%s%%' % key_prefix]) + result = self.cursor.fetchall() + + if not result: + return {} + if not strip: + key_prefix = '' + return dict([ + (k[len(key_prefix):], json.loads(v)) for k, v in result]) + + def update(self, mapping, prefix=""): + """ + Set the values of multiple keys at once. + + :param dict mapping: Mapping of keys to values + :param str prefix: Optional prefix to apply to all keys in `mapping` + before setting + """ + for k, v in mapping.items(): + self.set("%s%s" % (prefix, k), v) + + def unset(self, key): + """ + Remove a key from the database entirely. + """ + self.cursor.execute('delete from kv where key=?', [key]) + if self.keep_revisions and self.revision and self.cursor.rowcount: + self.cursor.execute( + 'insert into kv_revisions values (?, ?, ?)', + [key, self.revision, json.dumps('DELETED')]) + + def unsetrange(self, keys=None, prefix=""): + """ + Remove a range of keys starting with a common prefix, from the database + entirely. + + :param list keys: List of keys to remove. + :param str prefix: Optional prefix to apply to all keys in ``keys`` + before removing. + """ + if keys is not None: + keys = ['%s%s' % (prefix, key) for key in keys] + self.cursor.execute('delete from kv where key in (%s)' % ','.join(['?'] * len(keys)), keys) + if self.keep_revisions and self.revision and self.cursor.rowcount: + self.cursor.execute( + 'insert into kv_revisions values %s' % ','.join(['(?, ?, ?)'] * len(keys)), + list(itertools.chain.from_iterable((key, self.revision, json.dumps('DELETED')) for key in keys))) + else: + self.cursor.execute('delete from kv where key like ?', + ['%s%%' % prefix]) + if self.keep_revisions and self.revision and self.cursor.rowcount: + self.cursor.execute( + 'insert into kv_revisions values (?, ?, ?)', + ['%s%%' % prefix, self.revision, json.dumps('DELETED')]) + + def set(self, key, value): + """ + Set a value in the database. + + :param str key: Key to set the value for + :param value: Any JSON-serializable value to be set + """ + serialized = json.dumps(value) + + self.cursor.execute('select data from kv where key=?', [key]) + exists = self.cursor.fetchone() + + # Skip mutations to the same value + if exists: + if exists[0] == serialized: + return value + + if not exists: + self.cursor.execute( + 'insert into kv (key, data) values (?, ?)', + (key, serialized)) + else: + self.cursor.execute(''' + update kv + set data = ? + where key = ?''', [serialized, key]) + + # Save + if (not self.keep_revisions) or (not self.revision): + return value + + self.cursor.execute( + 'select 1 from kv_revisions where key=? and revision=?', + [key, self.revision]) + exists = self.cursor.fetchone() + + if not exists: + self.cursor.execute( + '''insert into kv_revisions ( + revision, key, data) values (?, ?, ?)''', + (self.revision, key, serialized)) + else: + self.cursor.execute( + ''' + update kv_revisions + set data = ? + where key = ? + and revision = ?''', + [serialized, key, self.revision]) + + return value + + def delta(self, mapping, prefix): + """ + return a delta containing values that have changed. + """ + previous = self.getrange(prefix, strip=True) + if not previous: + pk = set() + else: + pk = set(previous.keys()) + ck = set(mapping.keys()) + delta = DeltaSet() + + # added + for k in ck.difference(pk): + delta[k] = Delta(None, mapping[k]) + + # removed + for k in pk.difference(ck): + delta[k] = Delta(previous[k], None) + + # changed + for k in pk.intersection(ck): + c = mapping[k] + p = previous[k] + if c != p: + delta[k] = Delta(p, c) + + return delta + + @contextlib.contextmanager + def hook_scope(self, name=""): + """Scope all future interactions to the current hook execution + revision.""" + assert not self.revision + self.cursor.execute( + 'insert into hooks (hook, date) values (?, ?)', + (name or sys.argv[0], + datetime.datetime.utcnow().isoformat())) + self.revision = self.cursor.lastrowid + try: + yield self.revision + self.revision = None + except Exception: + self.flush(False) + self.revision = None + raise + else: + self.flush() + + def flush(self, save=True): + if save: + self.conn.commit() + elif self._closed: + return + else: + self.conn.rollback() + + def _init(self): + self.cursor.execute(''' + create table if not exists kv ( + key text, + data text, + primary key (key) + )''') + self.cursor.execute(''' + create table if not exists kv_revisions ( + key text, + revision integer, + data text, + primary key (key, revision) + )''') + self.cursor.execute(''' + create table if not exists hooks ( + version integer primary key autoincrement, + hook text, + date text + )''') + self.conn.commit() + + def gethistory(self, key, deserialize=False): + self.cursor.execute( + ''' + select kv.revision, kv.key, kv.data, h.hook, h.date + from kv_revisions kv, + hooks h + where kv.key=? + and kv.revision = h.version + ''', [key]) + if deserialize is False: + return self.cursor.fetchall() + return map(_parse_history, self.cursor.fetchall()) + + def debug(self, fh=sys.stderr): + self.cursor.execute('select * from kv') + pprint.pprint(self.cursor.fetchall(), stream=fh) + self.cursor.execute('select * from kv_revisions') + pprint.pprint(self.cursor.fetchall(), stream=fh) + + +def _parse_history(d): + return (d[0], d[1], json.loads(d[2]), d[3], + datetime.datetime.strptime(d[-1], "%Y-%m-%dT%H:%M:%S.%f")) + + +class HookData(object): + """Simple integration for existing hook exec frameworks. + + Records all unit information, and stores deltas for processing + by the hook. + + Sample:: + + from charmhelper.core import hookenv, unitdata + + changes = unitdata.HookData() + db = unitdata.kv() + hooks = hookenv.Hooks() + + @hooks.hook + def config_changed(): + # View all changes to configuration + for changed, (prev, cur) in changes.conf.items(): + print('config changed', changed, + 'previous value', prev, + 'current value', cur) + + # Get some unit specific bookkeeping + if not db.get('pkg_key'): + key = urllib.urlopen('https://example.com/pkg_key').read() + db.set('pkg_key', key) + + if __name__ == '__main__': + with changes(): + hook.execute() + + """ + def __init__(self): + self.kv = kv() + self.conf = None + self.rels = None + + @contextlib.contextmanager + def __call__(self): + from charmhelpers.core import hookenv + hook_name = hookenv.hook_name() + + with self.kv.hook_scope(hook_name): + self._record_charm_version(hookenv.charm_dir()) + delta_config, delta_relation = self._record_hook(hookenv) + yield self.kv, delta_config, delta_relation + + def _record_charm_version(self, charm_dir): + # Record revisions.. charm revisions are meaningless + # to charm authors as they don't control the revision. + # so logic dependnent on revision is not particularly + # useful, however it is useful for debugging analysis. + charm_rev = open( + os.path.join(charm_dir, 'revision')).read().strip() + charm_rev = charm_rev or '0' + revs = self.kv.get('charm_revisions', []) + if charm_rev not in revs: + revs.append(charm_rev.strip() or '0') + self.kv.set('charm_revisions', revs) + + def _record_hook(self, hookenv): + data = hookenv.execution_environment() + self.conf = conf_delta = self.kv.delta(data['conf'], 'config') + self.rels = rels_delta = self.kv.delta(data['rels'], 'rels') + self.kv.set('env', dict(data['env'])) + self.kv.set('unit', data['unit']) + self.kv.set('relid', data.get('relid')) + return conf_delta, rels_delta + + +class Record(dict): + + __slots__ = () + + def __getattr__(self, k): + if k in self: + return self[k] + raise AttributeError(k) + + +class DeltaSet(Record): + + __slots__ = () + + +Delta = collections.namedtuple('Delta', ['previous', 'current']) + + +_KV = None + + +def kv(): + global _KV + + # If we are running unit tests, it is useful to go into memory-backed KV store to + # avoid concurrency issues when running multiple tests. This is not a + # problem when juju is running normally. + + env_var = os.environ.get("CHARM_HELPERS_TESTMODE", "auto").lower() + if env_var not in ["auto", "no", "yes"]: + logging.warning("Unknown value for CHARM_HELPERS_TESTMODE '%s'" + ", assuming 'no'", env_var) + env_var = "no" + + if env_var == "no": + in_memory_db = False + elif env_var == "yes": + in_memory_db = True + elif env_var == "auto": + # If UNIT_STATE_DB is set, respect this request + if "UNIT_STATE_DB" in os.environ: + in_memory_db = False + # Autodetect normal juju execution by looking for juju variables + elif "JUJU_CHARM_DIR" in os.environ or "JUJU_UNIT_NAME" in os.environ: + in_memory_db = False + else: + # We are probably running in unit test mode + logging.warning("Auto-detected unit test environment for KV store.") + in_memory_db = True + else: + # Help the linter realise that in_memory_db is always set + raise Exception("Cannot reach this line") + + if _KV is None: + if in_memory_db: + _KV = Storage(":memory:") + else: + _KV = Storage() + else: + if in_memory_db and _KV.db_path != ":memory:": + logging.warning("Running with in_memory_db and KV is not set to :memory:") + return _KV diff --git a/ceph-osd/hooks/charmhelpers/fetch/__init__.py b/ceph-osd/hooks/charmhelpers/fetch/__init__.py new file mode 100644 index 00000000..1283f25b --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/fetch/__init__.py @@ -0,0 +1,208 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +from charmhelpers.osplatform import get_platform +from yaml import safe_load +from charmhelpers.core.hookenv import ( + config, + log, +) + +from urllib.parse import urlparse, urlunparse + + +# The order of this list is very important. Handlers should be listed in from +# least- to most-specific URL matching. +FETCH_HANDLERS = ( + 'charmhelpers.fetch.archiveurl.ArchiveUrlFetchHandler', + 'charmhelpers.fetch.bzrurl.BzrUrlFetchHandler', + 'charmhelpers.fetch.giturl.GitUrlFetchHandler', +) + + +class SourceConfigError(Exception): + pass + + +class UnhandledSource(Exception): + pass + + +class AptLockError(Exception): + pass + + +class GPGKeyError(Exception): + """Exception occurs when a GPG key cannot be fetched or used. The message + indicates what the problem is. + """ + pass + + +class BaseFetchHandler(object): + + """Base class for FetchHandler implementations in fetch plugins""" + + def can_handle(self, source): + """Returns True if the source can be handled. Otherwise returns + a string explaining why it cannot""" + return "Wrong source type" + + def install(self, source): + """Try to download and unpack the source. Return the path to the + unpacked files or raise UnhandledSource.""" + raise UnhandledSource("Wrong source type {}".format(source)) + + def parse_url(self, url): + return urlparse(url) + + def base_url(self, url): + """Return url without querystring or fragment""" + parts = list(self.parse_url(url)) + parts[4:] = ['' for i in parts[4:]] + return urlunparse(parts) + + +__platform__ = get_platform() +module = "charmhelpers.fetch.%s" % __platform__ +fetch = importlib.import_module(module) + +filter_installed_packages = fetch.filter_installed_packages +filter_missing_packages = fetch.filter_missing_packages +install = fetch.apt_install +upgrade = fetch.apt_upgrade +update = _fetch_update = fetch.apt_update +purge = fetch.apt_purge +add_source = fetch.add_source + +if __platform__ == "ubuntu": + apt_cache = fetch.apt_cache + apt_install = fetch.apt_install + apt_update = fetch.apt_update + apt_upgrade = fetch.apt_upgrade + apt_purge = fetch.apt_purge + apt_autoremove = fetch.apt_autoremove + apt_mark = fetch.apt_mark + apt_hold = fetch.apt_hold + apt_unhold = fetch.apt_unhold + import_key = fetch.import_key + get_upstream_version = fetch.get_upstream_version + apt_pkg = fetch.ubuntu_apt_pkg + get_apt_dpkg_env = fetch.get_apt_dpkg_env + get_installed_version = fetch.get_installed_version + OPENSTACK_RELEASES = fetch.OPENSTACK_RELEASES + UBUNTU_OPENSTACK_RELEASE = fetch.UBUNTU_OPENSTACK_RELEASE +elif __platform__ == "centos": + yum_search = fetch.yum_search + + +def configure_sources(update=False, + sources_var='install_sources', + keys_var='install_keys'): + """Configure multiple sources from charm configuration. + + The lists are encoded as yaml fragments in the configuration. + The fragment needs to be included as a string. Sources and their + corresponding keys are of the types supported by add_source(). + + Example config: + install_sources: | + - "ppa:foo" + - "http://example.com/repo precise main" + install_keys: | + - null + - "a1b2c3d4" + + Note that 'null' (a.k.a. None) should not be quoted. + """ + sources = safe_load((config(sources_var) or '').strip()) or [] + keys = safe_load((config(keys_var) or '').strip()) or None + + if isinstance(sources, str): + sources = [sources] + + if keys is None: + for source in sources: + add_source(source, None) + else: + if isinstance(keys, str): + keys = [keys] + + if len(sources) != len(keys): + raise SourceConfigError( + 'Install sources and keys lists are different lengths') + for source, key in zip(sources, keys): + add_source(source, key) + if update: + _fetch_update(fatal=True) + + +def install_remote(source, *args, **kwargs): + """Install a file tree from a remote source. + + The specified source should be a url of the form: + scheme://[host]/path[#[option=value][&...]] + + Schemes supported are based on this modules submodules. + Options supported are submodule-specific. + Additional arguments are passed through to the submodule. + + For example:: + + dest = install_remote('http://example.com/archive.tgz', + checksum='deadbeef', + hash_type='sha1') + + This will download `archive.tgz`, validate it using SHA1 and, if + the file is ok, extract it and return the directory in which it + was extracted. If the checksum fails, it will raise + :class:`charmhelpers.core.host.ChecksumError`. + """ + # We ONLY check for True here because can_handle may return a string + # explaining why it can't handle a given source. + handlers = [h for h in plugins() if h.can_handle(source) is True] + for handler in handlers: + try: + return handler.install(source, *args, **kwargs) + except UnhandledSource as e: + log('Install source attempt unsuccessful: {}'.format(e), + level='WARNING') + raise UnhandledSource("No handler found for source {}".format(source)) + + +def install_from_config(config_var_name): + """Install a file from config.""" + charm_config = config() + source = charm_config[config_var_name] + return install_remote(source) + + +def plugins(fetch_handlers=None): + if not fetch_handlers: + fetch_handlers = FETCH_HANDLERS + plugin_list = [] + for handler_name in fetch_handlers: + package, classname = handler_name.rsplit('.', 1) + try: + handler_class = getattr( + importlib.import_module(package), + classname) + plugin_list.append(handler_class()) + except NotImplementedError: + # Skip missing plugins so that they can be omitted from + # installation if desired + log("FetchHandler {} not found, skipping plugin".format( + handler_name)) + return plugin_list diff --git a/ceph-osd/hooks/charmhelpers/fetch/archiveurl.py b/ceph-osd/hooks/charmhelpers/fetch/archiveurl.py new file mode 100644 index 00000000..0e35c901 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/fetch/archiveurl.py @@ -0,0 +1,173 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import os +import hashlib +import re + +from charmhelpers.fetch import ( + BaseFetchHandler, + UnhandledSource +) +from charmhelpers.payload.archive import ( + get_archive_handler, + extract, +) +from charmhelpers.core.hookenv import ( + env_proxy_settings, +) +from charmhelpers.core.host import mkdir, check_hash + +from urllib.request import ( + build_opener, install_opener, urlopen, urlretrieve, + HTTPPasswordMgrWithDefaultRealm, HTTPBasicAuthHandler, + ProxyHandler +) +from urllib.parse import urlparse, urlunparse, parse_qs +from urllib.error import URLError + + +def splituser(host): + _userprog = re.compile('^(.*)@(.*)$') + match = _userprog.match(host) + if match: + return match.group(1, 2) + return None, host + + +def splitpasswd(user): + _passwdprog = re.compile('^([^:]*):(.*)$', re.S) + match = _passwdprog.match(user) + if match: + return match.group(1, 2) + return user, None + + +@contextlib.contextmanager +def proxy_env(): + """ + Creates a context which temporarily modifies the proxy settings in os.environ. + """ + restore = {**os.environ} # Copy the current os.environ + juju_proxies = env_proxy_settings() or {} + os.environ.update(**juju_proxies) # Insert or Update the os.environ + yield os.environ + for key in juju_proxies: + del os.environ[key] # remove any keys which were added or updated + os.environ.update(**restore) # restore any original values + + +class ArchiveUrlFetchHandler(BaseFetchHandler): + """ + Handler to download archive files from arbitrary URLs. + + Can fetch from http, https, ftp, and file URLs. + + Can install either tarballs (.tar, .tgz, .tbz2, etc) or zip files. + + Installs the contents of the archive in $CHARM_DIR/fetched/. + """ + def can_handle(self, source): + url_parts = self.parse_url(source) + if url_parts.scheme not in ('http', 'https', 'ftp', 'file'): + # XXX: Why is this returning a boolean and a string? It's + # doomed to fail since "bool(can_handle('foo://'))" will be True. + return "Wrong source type" + if get_archive_handler(self.base_url(source)): + return True + return False + + def download(self, source, dest): + """ + Download an archive file. + + :param str source: URL pointing to an archive file. + :param str dest: Local path location to download archive file to. + """ + # propagate all exceptions + # URLError, OSError, etc + proto, netloc, path, params, query, fragment = urlparse(source) + handlers = [] + if proto in ('http', 'https'): + auth, barehost = splituser(netloc) + if auth is not None: + source = urlunparse((proto, barehost, path, params, query, fragment)) + username, password = splitpasswd(auth) + passman = HTTPPasswordMgrWithDefaultRealm() + # Realm is set to None in add_password to force the username and password + # to be used whatever the realm + passman.add_password(None, source, username, password) + handlers.append(HTTPBasicAuthHandler(passman)) + + with proxy_env(): + handlers.append(ProxyHandler()) + opener = build_opener(*handlers) + install_opener(opener) + response = urlopen(source) + try: + with open(dest, 'wb') as dest_file: + dest_file.write(response.read()) + except Exception as e: + if os.path.isfile(dest): + os.unlink(dest) + raise e + + # Mandatory file validation via Sha1 or MD5 hashing. + def download_and_validate(self, url, hashsum, validate="sha1"): + tempfile, headers = urlretrieve(url) + check_hash(tempfile, hashsum, validate) + return tempfile + + def install(self, source, dest=None, checksum=None, hash_type='sha1'): + """ + Download and install an archive file, with optional checksum validation. + + The checksum can also be given on the `source` URL's fragment. + For example:: + + handler.install('http://example.com/file.tgz#sha1=deadbeef') + + :param str source: URL pointing to an archive file. + :param str dest: Local destination path to install to. If not given, + installs to `$CHARM_DIR/archives/archive_file_name`. + :param str checksum: If given, validate the archive file after download. + :param str hash_type: Algorithm used to generate `checksum`. + Can be any hash alrgorithm supported by :mod:`hashlib`, + such as md5, sha1, sha256, sha512, etc. + + """ + url_parts = self.parse_url(source) + dest_dir = os.path.join(os.environ.get('CHARM_DIR'), 'fetched') + if not os.path.exists(dest_dir): + mkdir(dest_dir, perms=0o755) + dld_file = os.path.join(dest_dir, os.path.basename(url_parts.path)) + try: + self.download(source, dld_file) + except URLError as e: + raise UnhandledSource(e.reason) + except OSError as e: + raise UnhandledSource(e.strerror) + options = parse_qs(url_parts.fragment) + for key, value in options.items(): + algorithms = hashlib.algorithms_available + if key in algorithms: + if len(value) != 1: + raise TypeError( + "Expected 1 hash value, not %d" % len(value)) + expected = value[0] + check_hash(dld_file, expected, key) + if checksum: + check_hash(dld_file, checksum, hash_type) + return extract(dld_file, dest) diff --git a/ceph-osd/hooks/charmhelpers/fetch/bzrurl.py b/ceph-osd/hooks/charmhelpers/fetch/bzrurl.py new file mode 100644 index 00000000..c4ab3ff1 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/fetch/bzrurl.py @@ -0,0 +1,76 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from subprocess import STDOUT, check_output +from charmhelpers.fetch import ( + BaseFetchHandler, + UnhandledSource, + filter_installed_packages, + install, +) +from charmhelpers.core.host import mkdir + + +if filter_installed_packages(['bzr']) != []: + install(['bzr']) + if filter_installed_packages(['bzr']) != []: + raise NotImplementedError('Unable to install bzr') + + +class BzrUrlFetchHandler(BaseFetchHandler): + """Handler for bazaar branches via generic and lp URLs.""" + + def can_handle(self, source): + url_parts = self.parse_url(source) + if url_parts.scheme not in ('bzr+ssh', 'lp', ''): + return False + elif not url_parts.scheme: + return os.path.exists(os.path.join(source, '.bzr')) + else: + return True + + def branch(self, source, dest, revno=None): + if not self.can_handle(source): + raise UnhandledSource("Cannot handle {}".format(source)) + cmd_opts = [] + if revno: + cmd_opts += ['-r', str(revno)] + if os.path.exists(dest): + cmd = ['bzr', 'pull'] + cmd += cmd_opts + cmd += ['--overwrite', '-d', dest, source] + else: + cmd = ['bzr', 'branch'] + cmd += cmd_opts + cmd += [source, dest] + check_output(cmd, stderr=STDOUT) + + def install(self, source, dest=None, revno=None): + url_parts = self.parse_url(source) + branch_name = url_parts.path.strip("/").split("/")[-1] + if dest: + dest_dir = os.path.join(dest, branch_name) + else: + dest_dir = os.path.join(os.environ.get('CHARM_DIR'), "fetched", + branch_name) + + if dest and not os.path.exists(dest): + mkdir(dest, perms=0o755) + + try: + self.branch(source, dest_dir, revno) + except OSError as e: + raise UnhandledSource(e.strerror) + return dest_dir diff --git a/ceph-osd/hooks/charmhelpers/fetch/centos.py b/ceph-osd/hooks/charmhelpers/fetch/centos.py new file mode 100644 index 00000000..f8492018 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/fetch/centos.py @@ -0,0 +1,170 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import subprocess +import os +import time +import yum + +from tempfile import NamedTemporaryFile +from charmhelpers.core.hookenv import log + +YUM_NO_LOCK = 1 # The return code for "couldn't acquire lock" in YUM. +YUM_NO_LOCK_RETRY_DELAY = 10 # Wait 10 seconds between apt lock checks. +YUM_NO_LOCK_RETRY_COUNT = 30 # Retry to acquire the lock X times. + + +def filter_installed_packages(packages): + """Return a list of packages that require installation.""" + yb = yum.YumBase() + package_list = yb.doPackageLists() + temp_cache = {p.base_package_name: 1 for p in package_list['installed']} + + _pkgs = [p for p in packages if not temp_cache.get(p, False)] + return _pkgs + + +def install(packages, options=None, fatal=False): + """Install one or more packages.""" + cmd = ['yum', '--assumeyes'] + if options is not None: + cmd.extend(options) + cmd.append('install') + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + log("Installing {} with options: {}".format(packages, + options)) + _run_yum_command(cmd, fatal) + + +def upgrade(options=None, fatal=False, dist=False): + """Upgrade all packages.""" + cmd = ['yum', '--assumeyes'] + if options is not None: + cmd.extend(options) + cmd.append('upgrade') + log("Upgrading with options: {}".format(options)) + _run_yum_command(cmd, fatal) + + +def update(fatal=False): + """Update local yum cache.""" + cmd = ['yum', '--assumeyes', 'update'] + log("Update with fatal: {}".format(fatal)) + _run_yum_command(cmd, fatal) + + +def purge(packages, fatal=False): + """Purge one or more packages.""" + cmd = ['yum', '--assumeyes', 'remove'] + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + log("Purging {}".format(packages)) + _run_yum_command(cmd, fatal) + + +def yum_search(packages): + """Search for a package.""" + output = {} + cmd = ['yum', 'search'] + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + log("Searching for {}".format(packages)) + result = subprocess.check_output(cmd) + for package in list(packages): + output[package] = package in result + return output + + +def add_source(source, key=None): + """Add a package source to this system. + + @param source: a URL with a rpm package + + @param key: A key to be added to the system's keyring and used + to verify the signatures on packages. Ideally, this should be an + ASCII format GPG public key including the block headers. A GPG key + id may also be used, but be aware that only insecure protocols are + available to retrieve the actual public key from a public keyserver + placing your Juju environment at risk. + """ + if source is None: + log('Source is not present. Skipping') + return + + if source.startswith('http'): + directory = '/etc/yum.repos.d/' + for filename in os.listdir(directory): + with open(directory + filename, 'r') as rpm_file: + if source in rpm_file.read(): + break + else: + log("Add source: {!r}".format(source)) + # write in the charms.repo + with open(directory + 'Charms.repo', 'a') as rpm_file: + rpm_file.write('[%s]\n' % source[7:].replace('/', '_')) + rpm_file.write('name=%s\n' % source[7:]) + rpm_file.write('baseurl=%s\n\n' % source) + else: + log("Unknown source: {!r}".format(source)) + + if key: + if '-----BEGIN PGP PUBLIC KEY BLOCK-----' in key: + with NamedTemporaryFile('w+') as key_file: + key_file.write(key) + key_file.flush() + key_file.seek(0) + subprocess.check_call(['rpm', '--import', key_file.name]) + else: + subprocess.check_call(['rpm', '--import', key]) + + +def _run_yum_command(cmd, fatal=False): + """Run an YUM command. + + Checks the output and retry if the fatal flag is set to True. + + :param: cmd: str: The yum command to run. + :param: fatal: bool: Whether the command's output should be checked and + retried. + """ + env = os.environ.copy() + + if fatal: + retry_count = 0 + result = None + + # If the command is considered "fatal", we need to retry if the yum + # lock was not acquired. + + while result is None or result == YUM_NO_LOCK: + try: + result = subprocess.check_call(cmd, env=env) + except subprocess.CalledProcessError as e: + retry_count = retry_count + 1 + if retry_count > YUM_NO_LOCK_RETRY_COUNT: + raise + result = e.returncode + log("Couldn't acquire YUM lock. Will retry in {} seconds." + "".format(YUM_NO_LOCK_RETRY_DELAY)) + time.sleep(YUM_NO_LOCK_RETRY_DELAY) + + else: + subprocess.call(cmd, env=env) diff --git a/ceph-osd/hooks/charmhelpers/fetch/giturl.py b/ceph-osd/hooks/charmhelpers/fetch/giturl.py new file mode 100644 index 00000000..070ca9bb --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/fetch/giturl.py @@ -0,0 +1,69 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from subprocess import check_output, CalledProcessError, STDOUT +from charmhelpers.fetch import ( + BaseFetchHandler, + UnhandledSource, + filter_installed_packages, + install, +) + +if filter_installed_packages(['git']) != []: + install(['git']) + if filter_installed_packages(['git']) != []: + raise NotImplementedError('Unable to install git') + + +class GitUrlFetchHandler(BaseFetchHandler): + """Handler for git branches via generic and github URLs.""" + + def can_handle(self, source): + url_parts = self.parse_url(source) + # TODO (mattyw) no support for ssh git@ yet + if url_parts.scheme not in ('http', 'https', 'git', ''): + return False + elif not url_parts.scheme: + return os.path.exists(os.path.join(source, '.git')) + else: + return True + + def clone(self, source, dest, branch="master", depth=None): + if not self.can_handle(source): + raise UnhandledSource("Cannot handle {}".format(source)) + + if os.path.exists(dest): + cmd = ['git', '-C', dest, 'pull', source, branch] + else: + cmd = ['git', 'clone', source, dest, '--branch', branch] + if depth: + cmd.extend(['--depth', depth]) + check_output(cmd, stderr=STDOUT) + + def install(self, source, branch="master", dest=None, depth=None): + url_parts = self.parse_url(source) + branch_name = url_parts.path.strip("/").split("/")[-1] + if dest: + dest_dir = os.path.join(dest, branch_name) + else: + dest_dir = os.path.join(os.environ.get('CHARM_DIR'), "fetched", + branch_name) + try: + self.clone(source, dest_dir, branch, depth) + except CalledProcessError as e: + raise UnhandledSource(e) + except OSError as e: + raise UnhandledSource(e.strerror) + return dest_dir diff --git a/ceph-osd/hooks/charmhelpers/fetch/python/__init__.py b/ceph-osd/hooks/charmhelpers/fetch/python/__init__.py new file mode 100644 index 00000000..bff99dc9 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/fetch/python/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2019 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-osd/hooks/charmhelpers/fetch/python/debug.py b/ceph-osd/hooks/charmhelpers/fetch/python/debug.py new file mode 100644 index 00000000..dd5cca80 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/fetch/python/debug.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# coding: utf-8 + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import atexit +import sys + +from charmhelpers.fetch.python.rpdb import Rpdb +from charmhelpers.core.hookenv import ( + open_port, + close_port, + ERROR, + log +) + +__author__ = "Jorge Niedbalski " + +DEFAULT_ADDR = "0.0.0.0" +DEFAULT_PORT = 4444 + + +def _error(message): + log(message, level=ERROR) + + +def set_trace(addr=DEFAULT_ADDR, port=DEFAULT_PORT): + """ + Set a trace point using the remote debugger + """ + atexit.register(close_port, port) + try: + log("Starting a remote python debugger session on %s:%s" % (addr, + port)) + open_port(port) + debugger = Rpdb(addr=addr, port=port) + debugger.set_trace(sys._getframe().f_back) + except Exception: + _error("Cannot start a remote debug session on %s:%s" % (addr, + port)) diff --git a/ceph-osd/hooks/charmhelpers/fetch/python/packages.py b/ceph-osd/hooks/charmhelpers/fetch/python/packages.py new file mode 100644 index 00000000..93f1fa3f --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/fetch/python/packages.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python +# coding: utf-8 + +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import subprocess +import sys + +from charmhelpers.fetch import apt_install, apt_update +from charmhelpers.core.hookenv import charm_dir, log + +__author__ = "Jorge Niedbalski " + + +def pip_execute(*args, **kwargs): + """Overridden pip_execute() to stop sys.path being changed. + + The act of importing main from the pip module seems to cause add wheels + from the /usr/share/python-wheels which are installed by various tools. + This function ensures that sys.path remains the same after the call is + executed. + """ + try: + _path = sys.path + try: + from pip import main as _pip_execute + except ImportError: + apt_update() + apt_install('python3-pip') + from pip import main as _pip_execute + _pip_execute(*args, **kwargs) + finally: + sys.path = _path + + +def parse_options(given, available): + """Given a set of options, check if available""" + for key, value in sorted(given.items()): + if not value: + continue + if key in available: + yield "--{0}={1}".format(key, value) + + +def pip_install_requirements(requirements, constraints=None, **options): + """Install a requirements file. + + :param constraints: Path to pip constraints file. + http://pip.readthedocs.org/en/stable/user_guide/#constraints-files + """ + command = ["install"] + + available_options = ('proxy', 'src', 'log', ) + for option in parse_options(options, available_options): + command.append(option) + + command.append("-r {0}".format(requirements)) + if constraints: + command.append("-c {0}".format(constraints)) + log("Installing from file: {} with constraints {} " + "and options: {}".format(requirements, constraints, command)) + else: + log("Installing from file: {} with options: {}".format(requirements, + command)) + pip_execute(command) + + +def pip_install(package, fatal=False, upgrade=False, venv=None, + constraints=None, **options): + """Install a python package""" + if venv: + venv_python = os.path.join(venv, 'bin/pip') + command = [venv_python, "install"] + else: + command = ["install"] + + available_options = ('proxy', 'src', 'log', 'index-url', ) + for option in parse_options(options, available_options): + command.append(option) + + if upgrade: + command.append('--upgrade') + + if constraints: + command.extend(['-c', constraints]) + + if isinstance(package, list): + command.extend(package) + else: + command.append(package) + + log("Installing {} package with options: {}".format(package, + command)) + if venv: + subprocess.check_call(command) + else: + pip_execute(command) + + +def pip_uninstall(package, **options): + """Uninstall a python package""" + command = ["uninstall", "-q", "-y"] + + available_options = ('proxy', 'log', ) + for option in parse_options(options, available_options): + command.append(option) + + if isinstance(package, list): + command.extend(package) + else: + command.append(package) + + log("Uninstalling {} package with options: {}".format(package, + command)) + pip_execute(command) + + +def pip_list(): + """Returns the list of current python installed packages + """ + return pip_execute(["list"]) + + +def pip_create_virtualenv(path=None): + """Create an isolated Python environment.""" + apt_install(['python3-virtualenv', 'virtualenv']) + extra_flags = ['--python=python3'] + + if path: + venv_path = path + else: + venv_path = os.path.join(charm_dir(), 'venv') + + if not os.path.exists(venv_path): + subprocess.check_call(['virtualenv', venv_path] + extra_flags) diff --git a/ceph-osd/hooks/charmhelpers/fetch/python/rpdb.py b/ceph-osd/hooks/charmhelpers/fetch/python/rpdb.py new file mode 100644 index 00000000..9b31610c --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/fetch/python/rpdb.py @@ -0,0 +1,56 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Remote Python Debugger (pdb wrapper).""" + +import pdb +import socket +import sys + +__author__ = "Bertrand Janin " +__version__ = "0.1.3" + + +class Rpdb(pdb.Pdb): + + def __init__(self, addr="127.0.0.1", port=4444): + """Initialize the socket and initialize pdb.""" + + # Backup stdin and stdout before replacing them by the socket handle + self.old_stdout = sys.stdout + self.old_stdin = sys.stdin + + # Open a 'reusable' socket to let the webapp reload on the same port + self.skt = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.skt.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, True) + self.skt.bind((addr, port)) + self.skt.listen(1) + (clientsocket, address) = self.skt.accept() + handle = clientsocket.makefile('rw') + pdb.Pdb.__init__(self, completekey='tab', stdin=handle, stdout=handle) + sys.stdout = sys.stdin = handle + + def shutdown(self): + """Revert stdin and stdout, close the socket.""" + sys.stdout = self.old_stdout + sys.stdin = self.old_stdin + self.skt.close() + self.set_continue() + + def do_continue(self, arg): + """Stop all operation on ``continue``.""" + self.shutdown() + return 1 + + do_EOF = do_quit = do_exit = do_c = do_cont = do_continue diff --git a/ceph-osd/hooks/charmhelpers/fetch/python/version.py b/ceph-osd/hooks/charmhelpers/fetch/python/version.py new file mode 100644 index 00000000..3eb42103 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/fetch/python/version.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# coding: utf-8 + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +__author__ = "Jorge Niedbalski " + + +def current_version(): + """Current system python version""" + return sys.version_info + + +def current_version_string(): + """Current system python version as string major.minor.micro""" + return "{0}.{1}.{2}".format(sys.version_info.major, + sys.version_info.minor, + sys.version_info.micro) diff --git a/ceph-osd/hooks/charmhelpers/fetch/snap.py b/ceph-osd/hooks/charmhelpers/fetch/snap.py new file mode 100644 index 00000000..7ab7ce3e --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/fetch/snap.py @@ -0,0 +1,150 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Charm helpers snap for classic charms. + +If writing reactive charms, use the snap layer: +https://lists.ubuntu.com/archives/snapcraft/2016-September/001114.html +""" +import subprocess +import os +from time import sleep +from charmhelpers.core.hookenv import log + +__author__ = 'Joseph Borg ' + +# The return code for "couldn't acquire lock" in Snap +# (hopefully this will be improved). +SNAP_NO_LOCK = 1 +SNAP_NO_LOCK_RETRY_DELAY = 10 # Wait X seconds between Snap lock checks. +SNAP_NO_LOCK_RETRY_COUNT = 30 # Retry to acquire the lock X times. +SNAP_CHANNELS = [ + 'edge', + 'beta', + 'candidate', + 'stable', +] + + +class CouldNotAcquireLockException(Exception): + pass + + +class InvalidSnapChannel(Exception): + pass + + +def _snap_exec(commands): + """ + Execute snap commands. + + :param commands: List commands + :return: Integer exit code + """ + assert isinstance(commands, list) + + retry_count = 0 + return_code = None + + while return_code is None or return_code == SNAP_NO_LOCK: + try: + return_code = subprocess.check_call(['snap'] + commands, + env=os.environ) + except subprocess.CalledProcessError as e: + retry_count += + 1 + if retry_count > SNAP_NO_LOCK_RETRY_COUNT: + raise CouldNotAcquireLockException( + 'Could not acquire lock after {} attempts' + .format(SNAP_NO_LOCK_RETRY_COUNT)) + return_code = e.returncode + log('Snap failed to acquire lock, trying again in {} seconds.' + .format(SNAP_NO_LOCK_RETRY_DELAY), level='WARN') + sleep(SNAP_NO_LOCK_RETRY_DELAY) + + return return_code + + +def snap_install(packages, *flags): + """ + Install a snap package. + + :param packages: String or List String package name + :param flags: List String flags to pass to install command + :return: Integer return code from snap + """ + if type(packages) is not list: + packages = [packages] + + flags = list(flags) + + message = 'Installing snap(s) "%s"' % ', '.join(packages) + if flags: + message += ' with option(s) "%s"' % ', '.join(flags) + + log(message, level='INFO') + return _snap_exec(['install'] + flags + packages) + + +def snap_remove(packages, *flags): + """ + Remove a snap package. + + :param packages: String or List String package name + :param flags: List String flags to pass to remove command + :return: Integer return code from snap + """ + if type(packages) is not list: + packages = [packages] + + flags = list(flags) + + message = 'Removing snap(s) "%s"' % ', '.join(packages) + if flags: + message += ' with options "%s"' % ', '.join(flags) + + log(message, level='INFO') + return _snap_exec(['remove'] + flags + packages) + + +def snap_refresh(packages, *flags): + """ + Refresh / Update snap package. + + :param packages: String or List String package name + :param flags: List String flags to pass to refresh command + :return: Integer return code from snap + """ + if type(packages) is not list: + packages = [packages] + + flags = list(flags) + + message = 'Refreshing snap(s) "%s"' % ', '.join(packages) + if flags: + message += ' with options "%s"' % ', '.join(flags) + + log(message, level='INFO') + return _snap_exec(['refresh'] + flags + packages) + + +def valid_snap_channel(channel): + """ Validate snap channel exists + + :raises InvalidSnapChannel: When channel does not exist + :return: Boolean + """ + if channel.lower() in SNAP_CHANNELS: + return True + else: + raise InvalidSnapChannel("Invalid Snap Channel: {}".format(channel)) diff --git a/ceph-osd/hooks/charmhelpers/fetch/ubuntu.py b/ceph-osd/hooks/charmhelpers/fetch/ubuntu.py new file mode 100644 index 00000000..d0089eb7 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/fetch/ubuntu.py @@ -0,0 +1,1061 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict +import platform +import re +import subprocess +import sys +import time + +from charmhelpers import deprecate +from charmhelpers.core.host import get_distrib_codename, get_system_env + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + WARNING, + env_proxy_settings, +) +from charmhelpers.fetch import SourceConfigError, GPGKeyError +from charmhelpers.fetch import ubuntu_apt_pkg + +PROPOSED_POCKET = ( + "# Proposed\n" + "deb http://archive.ubuntu.com/ubuntu {}-proposed main universe " + "multiverse restricted\n") +PROPOSED_PORTS_POCKET = ( + "# Proposed\n" + "deb http://ports.ubuntu.com/ubuntu-ports {}-proposed main universe " + "multiverse restricted\n") +# Only supports 64bit and ppc64 at the moment. +ARCH_TO_PROPOSED_POCKET = { + 'x86_64': PROPOSED_POCKET, + 'ppc64le': PROPOSED_PORTS_POCKET, + 'aarch64': PROPOSED_PORTS_POCKET, + 's390x': PROPOSED_PORTS_POCKET, +} +CLOUD_ARCHIVE_URL = "http://ubuntu-cloud.archive.canonical.com/ubuntu" +CLOUD_ARCHIVE_KEY_ID = '5EDB1B62EC4926EA' +CLOUD_ARCHIVE = """# Ubuntu Cloud Archive +deb http://ubuntu-cloud.archive.canonical.com/ubuntu {} main +""" +CLOUD_ARCHIVE_POCKETS = { + # Folsom + 'folsom': 'precise-updates/folsom', + 'folsom/updates': 'precise-updates/folsom', + 'precise-folsom': 'precise-updates/folsom', + 'precise-folsom/updates': 'precise-updates/folsom', + 'precise-updates/folsom': 'precise-updates/folsom', + 'folsom/proposed': 'precise-proposed/folsom', + 'precise-folsom/proposed': 'precise-proposed/folsom', + 'precise-proposed/folsom': 'precise-proposed/folsom', + # Grizzly + 'grizzly': 'precise-updates/grizzly', + 'grizzly/updates': 'precise-updates/grizzly', + 'precise-grizzly': 'precise-updates/grizzly', + 'precise-grizzly/updates': 'precise-updates/grizzly', + 'precise-updates/grizzly': 'precise-updates/grizzly', + 'grizzly/proposed': 'precise-proposed/grizzly', + 'precise-grizzly/proposed': 'precise-proposed/grizzly', + 'precise-proposed/grizzly': 'precise-proposed/grizzly', + # Havana + 'havana': 'precise-updates/havana', + 'havana/updates': 'precise-updates/havana', + 'precise-havana': 'precise-updates/havana', + 'precise-havana/updates': 'precise-updates/havana', + 'precise-updates/havana': 'precise-updates/havana', + 'havana/proposed': 'precise-proposed/havana', + 'precise-havana/proposed': 'precise-proposed/havana', + 'precise-proposed/havana': 'precise-proposed/havana', + # Icehouse + 'icehouse': 'precise-updates/icehouse', + 'icehouse/updates': 'precise-updates/icehouse', + 'precise-icehouse': 'precise-updates/icehouse', + 'precise-icehouse/updates': 'precise-updates/icehouse', + 'precise-updates/icehouse': 'precise-updates/icehouse', + 'icehouse/proposed': 'precise-proposed/icehouse', + 'precise-icehouse/proposed': 'precise-proposed/icehouse', + 'precise-proposed/icehouse': 'precise-proposed/icehouse', + # Juno + 'juno': 'trusty-updates/juno', + 'juno/updates': 'trusty-updates/juno', + 'trusty-juno': 'trusty-updates/juno', + 'trusty-juno/updates': 'trusty-updates/juno', + 'trusty-updates/juno': 'trusty-updates/juno', + 'juno/proposed': 'trusty-proposed/juno', + 'trusty-juno/proposed': 'trusty-proposed/juno', + 'trusty-proposed/juno': 'trusty-proposed/juno', + # Kilo + 'kilo': 'trusty-updates/kilo', + 'kilo/updates': 'trusty-updates/kilo', + 'trusty-kilo': 'trusty-updates/kilo', + 'trusty-kilo/updates': 'trusty-updates/kilo', + 'trusty-updates/kilo': 'trusty-updates/kilo', + 'kilo/proposed': 'trusty-proposed/kilo', + 'trusty-kilo/proposed': 'trusty-proposed/kilo', + 'trusty-proposed/kilo': 'trusty-proposed/kilo', + # Liberty + 'liberty': 'trusty-updates/liberty', + 'liberty/updates': 'trusty-updates/liberty', + 'trusty-liberty': 'trusty-updates/liberty', + 'trusty-liberty/updates': 'trusty-updates/liberty', + 'trusty-updates/liberty': 'trusty-updates/liberty', + 'liberty/proposed': 'trusty-proposed/liberty', + 'trusty-liberty/proposed': 'trusty-proposed/liberty', + 'trusty-proposed/liberty': 'trusty-proposed/liberty', + # Mitaka + 'mitaka': 'trusty-updates/mitaka', + 'mitaka/updates': 'trusty-updates/mitaka', + 'trusty-mitaka': 'trusty-updates/mitaka', + 'trusty-mitaka/updates': 'trusty-updates/mitaka', + 'trusty-updates/mitaka': 'trusty-updates/mitaka', + 'mitaka/proposed': 'trusty-proposed/mitaka', + 'trusty-mitaka/proposed': 'trusty-proposed/mitaka', + 'trusty-proposed/mitaka': 'trusty-proposed/mitaka', + # Newton + 'newton': 'xenial-updates/newton', + 'newton/updates': 'xenial-updates/newton', + 'xenial-newton': 'xenial-updates/newton', + 'xenial-newton/updates': 'xenial-updates/newton', + 'xenial-updates/newton': 'xenial-updates/newton', + 'newton/proposed': 'xenial-proposed/newton', + 'xenial-newton/proposed': 'xenial-proposed/newton', + 'xenial-proposed/newton': 'xenial-proposed/newton', + # Ocata + 'ocata': 'xenial-updates/ocata', + 'ocata/updates': 'xenial-updates/ocata', + 'xenial-ocata': 'xenial-updates/ocata', + 'xenial-ocata/updates': 'xenial-updates/ocata', + 'xenial-updates/ocata': 'xenial-updates/ocata', + 'ocata/proposed': 'xenial-proposed/ocata', + 'xenial-ocata/proposed': 'xenial-proposed/ocata', + 'xenial-proposed/ocata': 'xenial-proposed/ocata', + # Pike + 'pike': 'xenial-updates/pike', + 'xenial-pike': 'xenial-updates/pike', + 'xenial-pike/updates': 'xenial-updates/pike', + 'xenial-updates/pike': 'xenial-updates/pike', + 'pike/proposed': 'xenial-proposed/pike', + 'xenial-pike/proposed': 'xenial-proposed/pike', + 'xenial-proposed/pike': 'xenial-proposed/pike', + # Queens + 'queens': 'xenial-updates/queens', + 'xenial-queens': 'xenial-updates/queens', + 'xenial-queens/updates': 'xenial-updates/queens', + 'xenial-updates/queens': 'xenial-updates/queens', + 'queens/proposed': 'xenial-proposed/queens', + 'xenial-queens/proposed': 'xenial-proposed/queens', + 'xenial-proposed/queens': 'xenial-proposed/queens', + # Rocky + 'rocky': 'bionic-updates/rocky', + 'bionic-rocky': 'bionic-updates/rocky', + 'bionic-rocky/updates': 'bionic-updates/rocky', + 'bionic-updates/rocky': 'bionic-updates/rocky', + 'rocky/proposed': 'bionic-proposed/rocky', + 'bionic-rocky/proposed': 'bionic-proposed/rocky', + 'bionic-proposed/rocky': 'bionic-proposed/rocky', + # Stein + 'stein': 'bionic-updates/stein', + 'bionic-stein': 'bionic-updates/stein', + 'bionic-stein/updates': 'bionic-updates/stein', + 'bionic-updates/stein': 'bionic-updates/stein', + 'stein/proposed': 'bionic-proposed/stein', + 'bionic-stein/proposed': 'bionic-proposed/stein', + 'bionic-proposed/stein': 'bionic-proposed/stein', + # Train + 'train': 'bionic-updates/train', + 'bionic-train': 'bionic-updates/train', + 'bionic-train/updates': 'bionic-updates/train', + 'bionic-updates/train': 'bionic-updates/train', + 'train/proposed': 'bionic-proposed/train', + 'bionic-train/proposed': 'bionic-proposed/train', + 'bionic-proposed/train': 'bionic-proposed/train', + # Ussuri + 'ussuri': 'bionic-updates/ussuri', + 'bionic-ussuri': 'bionic-updates/ussuri', + 'bionic-ussuri/updates': 'bionic-updates/ussuri', + 'bionic-updates/ussuri': 'bionic-updates/ussuri', + 'ussuri/proposed': 'bionic-proposed/ussuri', + 'bionic-ussuri/proposed': 'bionic-proposed/ussuri', + 'bionic-proposed/ussuri': 'bionic-proposed/ussuri', + # Victoria + 'victoria': 'focal-updates/victoria', + 'focal-victoria': 'focal-updates/victoria', + 'focal-victoria/updates': 'focal-updates/victoria', + 'focal-updates/victoria': 'focal-updates/victoria', + 'victoria/proposed': 'focal-proposed/victoria', + 'focal-victoria/proposed': 'focal-proposed/victoria', + 'focal-proposed/victoria': 'focal-proposed/victoria', + # Wallaby + 'wallaby': 'focal-updates/wallaby', + 'focal-wallaby': 'focal-updates/wallaby', + 'focal-wallaby/updates': 'focal-updates/wallaby', + 'focal-updates/wallaby': 'focal-updates/wallaby', + 'wallaby/proposed': 'focal-proposed/wallaby', + 'focal-wallaby/proposed': 'focal-proposed/wallaby', + 'focal-proposed/wallaby': 'focal-proposed/wallaby', + # Xena + 'xena': 'focal-updates/xena', + 'focal-xena': 'focal-updates/xena', + 'focal-xena/updates': 'focal-updates/xena', + 'focal-updates/xena': 'focal-updates/xena', + 'xena/proposed': 'focal-proposed/xena', + 'focal-xena/proposed': 'focal-proposed/xena', + 'focal-proposed/xena': 'focal-proposed/xena', + # Yoga + 'yoga': 'focal-updates/yoga', + 'focal-yoga': 'focal-updates/yoga', + 'focal-yoga/updates': 'focal-updates/yoga', + 'focal-updates/yoga': 'focal-updates/yoga', + 'yoga/proposed': 'focal-proposed/yoga', + 'focal-yoga/proposed': 'focal-proposed/yoga', + 'focal-proposed/yoga': 'focal-proposed/yoga', + # Zed + 'zed': 'jammy-updates/zed', + 'jammy-zed': 'jammy-updates/zed', + 'jammy-zed/updates': 'jammy-updates/zed', + 'jammy-updates/zed': 'jammy-updates/zed', + 'zed/proposed': 'jammy-proposed/zed', + 'jammy-zed/proposed': 'jammy-proposed/zed', + 'jammy-proposed/zed': 'jammy-proposed/zed', + # antelope + 'antelope': 'jammy-updates/antelope', + 'jammy-antelope': 'jammy-updates/antelope', + 'jammy-antelope/updates': 'jammy-updates/antelope', + 'jammy-updates/antelope': 'jammy-updates/antelope', + 'antelope/proposed': 'jammy-proposed/antelope', + 'jammy-antelope/proposed': 'jammy-proposed/antelope', + 'jammy-proposed/antelope': 'jammy-proposed/antelope', + # bobcat + 'bobcat': 'jammy-updates/bobcat', + 'jammy-bobcat': 'jammy-updates/bobcat', + 'jammy-bobcat/updates': 'jammy-updates/bobcat', + 'jammy-updates/bobcat': 'jammy-updates/bobcat', + 'bobcat/proposed': 'jammy-proposed/bobcat', + 'jammy-bobcat/proposed': 'jammy-proposed/bobcat', + 'jammy-proposed/bobcat': 'jammy-proposed/bobcat', + # caracal + 'caracal': 'jammy-updates/caracal', + 'jammy-caracal': 'jammy-updates/caracal', + 'jammy-caracal/updates': 'jammy-updates/caracal', + 'jammy-updates/caracal': 'jammy-updates/caracal', + 'caracal/proposed': 'jammy-proposed/caracal', + 'jammy-caracal/proposed': 'jammy-proposed/caracal', + 'jammy-proposed/caracal': 'jammy-proposed/caracal', + + # OVN + 'focal-ovn-22.03': 'focal-updates/ovn-22.03', + 'focal-ovn-22.03/proposed': 'focal-proposed/ovn-22.03', +} + + +OPENSTACK_RELEASES = ( + 'diablo', + 'essex', + 'folsom', + 'grizzly', + 'havana', + 'icehouse', + 'juno', + 'kilo', + 'liberty', + 'mitaka', + 'newton', + 'ocata', + 'pike', + 'queens', + 'rocky', + 'stein', + 'train', + 'ussuri', + 'victoria', + 'wallaby', + 'xena', + 'yoga', + 'zed', + 'antelope', + 'bobcat', + 'caracal', +) + + +UBUNTU_OPENSTACK_RELEASE = OrderedDict([ + ('oneiric', 'diablo'), + ('precise', 'essex'), + ('quantal', 'folsom'), + ('raring', 'grizzly'), + ('saucy', 'havana'), + ('trusty', 'icehouse'), + ('utopic', 'juno'), + ('vivid', 'kilo'), + ('wily', 'liberty'), + ('xenial', 'mitaka'), + ('yakkety', 'newton'), + ('zesty', 'ocata'), + ('artful', 'pike'), + ('bionic', 'queens'), + ('cosmic', 'rocky'), + ('disco', 'stein'), + ('eoan', 'train'), + ('focal', 'ussuri'), + ('groovy', 'victoria'), + ('hirsute', 'wallaby'), + ('impish', 'xena'), + ('jammy', 'yoga'), + ('kinetic', 'zed'), + ('lunar', 'antelope'), + ('mantic', 'bobcat'), + ('noble', 'caracal'), +]) + + +APT_NO_LOCK = 100 # The return code for "couldn't acquire lock" in APT. +CMD_RETRY_DELAY = 10 # Wait 10 seconds between command retries. +CMD_RETRY_COUNT = 10 # Retry a failing fatal command X times. + + +def filter_installed_packages(packages): + """Return a list of packages that require installation.""" + cache = apt_cache() + _pkgs = [] + for package in packages: + try: + p = cache[package] + p.current_ver or _pkgs.append(package) + except KeyError: + log('Package {} has no installation candidate.'.format(package), + level='WARNING') + _pkgs.append(package) + return _pkgs + + +def filter_missing_packages(packages): + """Return a list of packages that are installed. + + :param packages: list of packages to evaluate. + :returns list: Packages that are installed. + """ + return list( + set(packages) - + set(filter_installed_packages(packages)) + ) + + +def apt_cache(*_, **__): + """Shim returning an object simulating the apt_pkg Cache. + + :param _: Accept arguments for compatibility, not used. + :type _: any + :param __: Accept keyword arguments for compatibility, not used. + :type __: any + :returns:Object used to interrogate the system apt and dpkg databases. + :rtype:ubuntu_apt_pkg.Cache + """ + if 'apt_pkg' in sys.modules: + # NOTE(fnordahl): When our consumer use the upstream ``apt_pkg`` module + # in conjunction with the apt_cache helper function, they may expect us + # to call ``apt_pkg.init()`` for them. + # + # Detect this situation, log a warning and make the call to + # ``apt_pkg.init()`` to avoid the consumer Python interpreter from + # crashing with a segmentation fault. + @deprecate( + 'Support for use of upstream ``apt_pkg`` module in conjunction' + 'with charm-helpers is deprecated since 2019-06-25', + date=None, log=lambda x: log(x, level=WARNING)) + def one_shot_log(): + pass + + one_shot_log() + sys.modules['apt_pkg'].init() + return ubuntu_apt_pkg.Cache() + + +def apt_install(packages, options=None, fatal=False, quiet=False): + """Install one or more packages. + + :param packages: Package(s) to install + :type packages: Option[str, List[str]] + :param options: Options to pass on to apt-get + :type options: Option[None, List[str]] + :param fatal: Whether the command's output should be checked and + retried. + :type fatal: bool + :param quiet: if True (default), suppress log message to stdout/stderr + :type quiet: bool + :raises: subprocess.CalledProcessError + """ + if not packages: + log("Nothing to install", level=DEBUG) + return + if options is None: + options = ['--option=Dpkg::Options::=--force-confold'] + + cmd = ['apt-get', '--assume-yes'] + cmd.extend(options) + cmd.append('install') + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + if not quiet: + log("Installing {} with options: {}" + .format(packages, options)) + _run_apt_command(cmd, fatal, quiet=quiet) + + +def apt_upgrade(options=None, fatal=False, dist=False): + """Upgrade all packages. + + :param options: Options to pass on to apt-get + :type options: Option[None, List[str]] + :param fatal: Whether the command's output should be checked and + retried. + :type fatal: bool + :param dist: Whether ``dist-upgrade`` should be used over ``upgrade`` + :type dist: bool + :raises: subprocess.CalledProcessError + """ + if options is None: + options = ['--option=Dpkg::Options::=--force-confold'] + + cmd = ['apt-get', '--assume-yes'] + cmd.extend(options) + if dist: + cmd.append('dist-upgrade') + else: + cmd.append('upgrade') + log("Upgrading with options: {}".format(options)) + _run_apt_command(cmd, fatal) + + +def apt_update(fatal=False): + """Update local apt cache.""" + cmd = ['apt-get', 'update'] + _run_apt_command(cmd, fatal) + + +def apt_purge(packages, fatal=False): + """Purge one or more packages. + + :param packages: Package(s) to install + :type packages: Option[str, List[str]] + :param fatal: Whether the command's output should be checked and + retried. + :type fatal: bool + :raises: subprocess.CalledProcessError + """ + cmd = ['apt-get', '--assume-yes', 'purge'] + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + log("Purging {}".format(packages)) + _run_apt_command(cmd, fatal) + + +def apt_autoremove(purge=True, fatal=False): + """Purge one or more packages. + :param purge: Whether the ``--purge`` option should be passed on or not. + :type purge: bool + :param fatal: Whether the command's output should be checked and + retried. + :type fatal: bool + :raises: subprocess.CalledProcessError + """ + cmd = ['apt-get', '--assume-yes', 'autoremove'] + if purge: + cmd.append('--purge') + _run_apt_command(cmd, fatal) + + +def apt_mark(packages, mark, fatal=False): + """Flag one or more packages using apt-mark.""" + log("Marking {} as {}".format(packages, mark)) + cmd = ['apt-mark', mark] + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + + if fatal: + subprocess.check_call(cmd, universal_newlines=True) + else: + subprocess.call(cmd, universal_newlines=True) + + +def apt_hold(packages, fatal=False): + return apt_mark(packages, 'hold', fatal=fatal) + + +def apt_unhold(packages, fatal=False): + return apt_mark(packages, 'unhold', fatal=fatal) + + +def import_key(key): + """Import an ASCII Armor key. + + A Radix64 format keyid is also supported for backwards + compatibility. In this case Ubuntu keyserver will be + queried for a key via HTTPS by its keyid. This method + is less preferable because https proxy servers may + require traffic decryption which is equivalent to a + man-in-the-middle attack (a proxy server impersonates + keyserver TLS certificates and has to be explicitly + trusted by the system). + + :param key: A GPG key in ASCII armor format, + including BEGIN and END markers or a keyid. + :type key: (bytes, str) + :raises: GPGKeyError if the key could not be imported + """ + key = key.strip() + if '-' in key or '\n' in key: + # Send everything not obviously a keyid to GPG to import, as + # we trust its validation better than our own. eg. handling + # comments before the key. + log("PGP key found (looks like ASCII Armor format)", level=DEBUG) + if ('-----BEGIN PGP PUBLIC KEY BLOCK-----' in key and + '-----END PGP PUBLIC KEY BLOCK-----' in key): + log("Writing provided PGP key in the binary format", level=DEBUG) + key_bytes = key.encode('utf-8') + key_name = _get_keyid_by_gpg_key(key_bytes) + key_gpg = _dearmor_gpg_key(key_bytes) + _write_apt_gpg_keyfile(key_name=key_name, key_material=key_gpg) + else: + raise GPGKeyError("ASCII armor markers missing from GPG key") + else: + log("PGP key found (looks like Radix64 format)", level=WARNING) + log("SECURELY importing PGP key from keyserver; " + "full key not provided.", level=WARNING) + # as of bionic add-apt-repository uses curl with an HTTPS keyserver URL + # to retrieve GPG keys. `apt-key adv` command is deprecated as is + # apt-key in general as noted in its manpage. See lp:1433761 for more + # history. Instead, /etc/apt/trusted.gpg.d is used directly to drop + # gpg + key_asc = _get_key_by_keyid(key) + # write the key in GPG format so that apt-key list shows it + key_gpg = _dearmor_gpg_key(key_asc) + _write_apt_gpg_keyfile(key_name=key, key_material=key_gpg) + + +def _get_keyid_by_gpg_key(key_material): + """Get a GPG key fingerprint by GPG key material. + Gets a GPG key fingerprint (40-digit, 160-bit) by the ASCII armor-encoded + or binary GPG key material. Can be used, for example, to generate file + names for keys passed via charm options. + + :param key_material: ASCII armor-encoded or binary GPG key material + :type key_material: bytes + :raises: GPGKeyError if invalid key material has been provided + :returns: A GPG key fingerprint + :rtype: str + """ + # Use the same gpg command for both Xenial and Bionic + cmd = 'gpg --with-colons --with-fingerprint' + ps = subprocess.Popen(cmd.split(), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + stdin=subprocess.PIPE) + out, err = ps.communicate(input=key_material) + out = out.decode('utf-8') + err = err.decode('utf-8') + if 'gpg: no valid OpenPGP data found.' in err: + raise GPGKeyError('Invalid GPG key material provided') + # from gnupg2 docs: fpr :: Fingerprint (fingerprint is in field 10) + return re.search(r"^fpr:{9}([0-9A-F]{40}):$", out, re.MULTILINE).group(1) + + +def _get_key_by_keyid(keyid): + """Get a key via HTTPS from the Ubuntu keyserver. + Different key ID formats are supported by SKS keyservers (the longer ones + are more secure, see "dead beef attack" and https://evil32.com/). Since + HTTPS is used, if SSLBump-like HTTPS proxies are in place, they will + impersonate keyserver.ubuntu.com and generate a certificate with + keyserver.ubuntu.com in the CN field or in SubjAltName fields of a + certificate. If such proxy behavior is expected it is necessary to add the + CA certificate chain containing the intermediate CA of the SSLBump proxy to + every machine that this code runs on via ca-certs cloud-init directive (via + cloudinit-userdata model-config) or via other means (such as through a + custom charm option). Also note that DNS resolution for the hostname in a + URL is done at a proxy server - not at the client side. + + 8-digit (32 bit) key ID + https://keyserver.ubuntu.com/pks/lookup?search=0x4652B4E6 + 16-digit (64 bit) key ID + https://keyserver.ubuntu.com/pks/lookup?search=0x6E85A86E4652B4E6 + 40-digit key ID: + https://keyserver.ubuntu.com/pks/lookup?search=0x35F77D63B5CEC106C577ED856E85A86E4652B4E6 + + :param keyid: An 8, 16 or 40 hex digit keyid to find a key for + :type keyid: (bytes, str) + :returns: A key material for the specified GPG key id + :rtype: (str, bytes) + :raises: subprocess.CalledProcessError + """ + # options=mr - machine-readable output (disables html wrappers) + keyserver_url = ('https://keyserver.ubuntu.com' + '/pks/lookup?op=get&options=mr&exact=on&search=0x{}') + curl_cmd = ['curl', keyserver_url.format(keyid)] + # use proxy server settings in order to retrieve the key + return subprocess.check_output(curl_cmd, + env=env_proxy_settings(['https', 'no_proxy'])) + + +def _dearmor_gpg_key(key_asc): + """Converts a GPG key in the ASCII armor format to the binary format. + + :param key_asc: A GPG key in ASCII armor format. + :type key_asc: (str, bytes) + :returns: A GPG key in binary format + :rtype: (str, bytes) + :raises: GPGKeyError + """ + ps = subprocess.Popen(['gpg', '--dearmor'], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + stdin=subprocess.PIPE) + out, err = ps.communicate(input=key_asc) + # no need to decode output as it is binary (invalid utf-8), only error + err = err.decode('utf-8') + if 'gpg: no valid OpenPGP data found.' in err: + raise GPGKeyError('Invalid GPG key material. Check your network setup' + ' (MTU, routing, DNS) and/or proxy server settings' + ' as well as destination keyserver status.') + else: + return out + + +def _write_apt_gpg_keyfile(key_name, key_material): + """Writes GPG key material into a file at a provided path. + + :param key_name: A key name to use for a key file (could be a fingerprint) + :type key_name: str + :param key_material: A GPG key material (binary) + :type key_material: (str, bytes) + """ + with open('/etc/apt/trusted.gpg.d/{}.gpg'.format(key_name), + 'wb') as keyf: + keyf.write(key_material) + + +def add_source(source, key=None, fail_invalid=False): + """Add a package source to this system. + + @param source: a URL or sources.list entry, as supported by + add-apt-repository(1). Examples:: + + ppa:charmers/example + deb https://stub:key@private.example.com/ubuntu trusty main + + In addition: + 'proposed:' may be used to enable the standard 'proposed' + pocket for the release. + 'cloud:' may be used to activate official cloud archive pockets, + such as 'cloud:icehouse' + 'distro' may be used as a noop + + Full list of source specifications supported by the function are: + + 'distro': A NOP; i.e. it has no effect. + 'proposed': the proposed deb spec [2] is wrtten to + /etc/apt/sources.list/proposed + 'distro-proposed': adds -proposed to the debs [2] + 'ppa:': add-apt-repository --yes + 'deb ': add-apt-repository --yes deb + 'http://....': add-apt-repository --yes http://... + 'cloud-archive:': add-apt-repository -yes cloud-archive: + 'cloud:[-staging]': specify a Cloud Archive pocket with + optional staging version. If staging is used then the staging PPA [2] + with be used. If staging is NOT used then the cloud archive [3] will be + added, and the 'ubuntu-cloud-keyring' package will be added for the + current distro. + '': translate to cloud: based on the current + distro version (i.e. for 'ussuri' this will either be 'bionic-ussuri' or + 'distro'. + '/proposed': as above, but for proposed. + + Otherwise the source is not recognised and this is logged to the juju log. + However, no error is raised, unless sys_error_on_exit is True. + + [1] deb http://ubuntu-cloud.archive.canonical.com/ubuntu {} main + where {} is replaced with the derived pocket name. + [2] deb http://archive.ubuntu.com/ubuntu {}-proposed \ + main universe multiverse restricted + where {} is replaced with the lsb_release codename (e.g. xenial) + [3] deb http://ubuntu-cloud.archive.canonical.com/ubuntu + to /etc/apt/sources.list.d/cloud-archive-list + + @param key: A key to be added to the system's APT keyring and used + to verify the signatures on packages. Ideally, this should be an + ASCII format GPG public key including the block headers. A GPG key + id may also be used, but be aware that only insecure protocols are + available to retrieve the actual public key from a public keyserver + placing your Juju environment at risk. ppa and cloud archive keys + are securely added automatically, so should not be provided. + + @param fail_invalid: (boolean) if True, then the function raises a + SourceConfigError is there is no matching installation source. + + @raises SourceConfigError() if for cloud:, the is not a + valid pocket in CLOUD_ARCHIVE_POCKETS + """ + # extract the OpenStack versions from the CLOUD_ARCHIVE_POCKETS; can't use + # the list in contrib.openstack.utils as it might not be included in + # classic charms and would break everything. Having OpenStack specific + # code in this file is a bit of an antipattern, anyway. + os_versions_regex = "({})".format("|".join(OPENSTACK_RELEASES)) + + _mapping = OrderedDict([ + (r"^distro$", lambda: None), # This is a NOP + (r"^(?:proposed|distro-proposed)$", _add_proposed), + (r"^cloud-archive:(.*)$", _add_apt_repository), + (r"^((?:deb |http:|https:|ppa:).*)$", _add_apt_repository), + (r"^cloud:(.*)-(.*)\/staging$", _add_cloud_staging), + (r"^cloud:(.*)-(ovn-.*)$", _add_cloud_distro_check), + (r"^cloud:(.*)-(.*)$", _add_cloud_distro_check), + (r"^cloud:(.*)$", _add_cloud_pocket), + (r"^snap:.*-(.*)-(.*)$", _add_cloud_distro_check), + (r"^{}\/proposed$".format(os_versions_regex), + _add_bare_openstack_proposed), + (r"^{}$".format(os_versions_regex), _add_bare_openstack), + ]) + if source is None: + source = '' + for r, fn in _mapping.items(): + m = re.match(r, source) + if m: + if key: + # Import key before adding the source which depends on it, + # as refreshing packages could fail otherwise. + try: + import_key(key) + except GPGKeyError as e: + raise SourceConfigError(str(e)) + # call the associated function with the captured groups + # raises SourceConfigError on error. + fn(*m.groups()) + break + else: + # nothing matched. log an error and maybe sys.exit + err = "Unknown source: {!r}".format(source) + log(err) + if fail_invalid: + raise SourceConfigError(err) + + +def _add_proposed(): + """Add the PROPOSED_POCKET as /etc/apt/source.list.d/proposed.list + + Uses get_distrib_codename to determine the correct stanza for + the deb line. + + For Intel architectures PROPOSED_POCKET is used for the release, but for + other architectures PROPOSED_PORTS_POCKET is used for the release. + """ + release = get_distrib_codename() + arch = platform.machine() + if arch not in ARCH_TO_PROPOSED_POCKET.keys(): + raise SourceConfigError("Arch {} not supported for (distro-)proposed" + .format(arch)) + with open('/etc/apt/sources.list.d/proposed.list', 'w') as apt: + apt.write(ARCH_TO_PROPOSED_POCKET[arch].format(release)) + + +def _add_apt_repository(spec): + """Add the spec using add_apt_repository + + :param spec: the parameter to pass to add_apt_repository + :type spec: str + """ + if '{series}' in spec: + series = get_distrib_codename() + spec = spec.replace('{series}', series) + _run_with_retries(['add-apt-repository', '--yes', spec], + cmd_env=env_proxy_settings(['https', 'http', 'no_proxy']) + ) + + +def __write_sources_list_d_actual_pocket(file, actual_pocket): + with open('/etc/apt/sources.list.d/{}'.format(file), 'w') as apt: + apt.write(CLOUD_ARCHIVE.format(actual_pocket)) + + +def _add_cloud_pocket(pocket): + """Add a cloud pocket as /etc/apt/sources.d/cloud-archive.list + + Note that this overwrites the existing file if there is one. + + This function also converts the simple pocket in to the actual pocket using + the CLOUD_ARCHIVE_POCKETS mapping. + + :param pocket: string representing the pocket to add a deb spec for. + :raises: SourceConfigError if the cloud pocket doesn't exist or the + requested release doesn't match the current distro version. + """ + apt_install(filter_installed_packages(['ubuntu-cloud-keyring']), + fatal=True) + if pocket not in CLOUD_ARCHIVE_POCKETS: + raise SourceConfigError( + 'Unsupported cloud: source option %s' % + pocket) + actual_pocket = CLOUD_ARCHIVE_POCKETS[pocket] + __write_sources_list_d_actual_pocket( + 'cloud-archive{}.list'.format('' if 'ovn' not in pocket else '-ovn'), + actual_pocket) + + +def _add_cloud_staging(cloud_archive_release, openstack_release): + """Add the cloud staging repository which is in + ppa:ubuntu-cloud-archive/-staging + + This function checks that the cloud_archive_release matches the current + codename for the distro that charm is being installed on. + + :param cloud_archive_release: string, codename for the release. + :param openstack_release: String, codename for the openstack release. + :raises: SourceConfigError if the cloud_archive_release doesn't match the + current version of the os. + """ + _verify_is_ubuntu_rel(cloud_archive_release, openstack_release) + ppa = 'ppa:ubuntu-cloud-archive/{}-staging'.format(openstack_release) + cmd = 'add-apt-repository -y {}'.format(ppa) + _run_with_retries(cmd.split(' ')) + + +def _add_cloud_distro_check(cloud_archive_release, openstack_release): + """Add the cloud pocket, but also check the cloud_archive_release against + the current distro, and use the openstack_release as the full lookup. + + This just calls _add_cloud_pocket() with the openstack_release as pocket + to get the correct cloud-archive.list for dpkg to work with. + + :param cloud_archive_release:String, codename for the distro release. + :param openstack_release: String, spec for the release to look up in the + CLOUD_ARCHIVE_POCKETS + :raises: SourceConfigError if this is the wrong distro, or the pocket spec + doesn't exist. + """ + _verify_is_ubuntu_rel(cloud_archive_release, openstack_release) + _add_cloud_pocket("{}-{}".format(cloud_archive_release, openstack_release)) + + +def _verify_is_ubuntu_rel(release, os_release): + """Verify that the release is in the same as the current ubuntu release. + + :param release: String, lowercase for the release. + :param os_release: String, the os_release being asked for + :raises: SourceConfigError if the release is not the same as the ubuntu + release. + """ + ubuntu_rel = get_distrib_codename() + if release != ubuntu_rel: + raise SourceConfigError( + 'Invalid Cloud Archive release specified: {}-{} on this Ubuntu' + 'version ({})'.format(release, os_release, ubuntu_rel)) + + +def _add_bare_openstack(openstack_release): + """Add cloud or distro based on the release given. + + The spec given is, say, 'ussuri', but this could apply cloud:bionic-ussuri + or 'distro' depending on whether the ubuntu release is bionic or focal. + + :param openstack_release: the OpenStack codename to determine the release + for. + :type openstack_release: str + :raises: SourceConfigError + """ + # TODO(ajkavanagh) - surely this means we should be removing cloud archives + # if they exist? + __add_bare_helper(openstack_release, "{}-{}", lambda: None) + + +def _add_bare_openstack_proposed(openstack_release): + """Add cloud of distro but with proposed. + + The spec given is, say, 'ussuri' but this could apply + cloud:bionic-ussuri/proposed or 'distro/proposed' depending on whether the + ubuntu release is bionic or focal. + + :param openstack_release: the OpenStack codename to determine the release + for. + :type openstack_release: str + :raises: SourceConfigError + """ + __add_bare_helper(openstack_release, "{}-{}/proposed", _add_proposed) + + +def __add_bare_helper(openstack_release, pocket_format, final_function): + """Helper for _add_bare_openstack[_proposed] + + The bulk of the work between the two functions is exactly the same except + for the pocket format and the function that is run if it's the distro + version. + + :param openstack_release: the OpenStack codename. e.g. ussuri + :type openstack_release: str + :param pocket_format: the pocket formatter string to construct a pocket str + from the openstack_release and the current ubuntu version. + :type pocket_format: str + :param final_function: the function to call if it is the distro version. + :type final_function: Callable + :raises SourceConfigError on error + """ + ubuntu_version = get_distrib_codename() + possible_pocket = pocket_format.format(ubuntu_version, openstack_release) + if possible_pocket in CLOUD_ARCHIVE_POCKETS: + _add_cloud_pocket(possible_pocket) + return + # Otherwise it's almost certainly the distro version; verify that it + # exists. + try: + assert UBUNTU_OPENSTACK_RELEASE[ubuntu_version] == openstack_release + except KeyError: + raise SourceConfigError( + "Invalid ubuntu version {} isn't known to this library" + .format(ubuntu_version)) + except AssertionError: + raise SourceConfigError( + 'Invalid OpenStack release specified: {} for Ubuntu version {}' + .format(openstack_release, ubuntu_version)) + final_function() + + +def _run_with_retries(cmd, max_retries=CMD_RETRY_COUNT, retry_exitcodes=(1,), + retry_message="", cmd_env=None, quiet=False): + """Run a command and retry until success or max_retries is reached. + + :param cmd: The apt command to run. + :type cmd: str + :param max_retries: The number of retries to attempt on a fatal + command. Defaults to CMD_RETRY_COUNT. + :type max_retries: int + :param retry_exitcodes: Optional additional exit codes to retry. + Defaults to retry on exit code 1. + :type retry_exitcodes: tuple + :param retry_message: Optional log prefix emitted during retries. + :type retry_message: str + :param: cmd_env: Environment variables to add to the command run. + :type cmd_env: Option[None, Dict[str, str]] + :param quiet: if True, silence the output of the command from stdout and + stderr + :type quiet: bool + """ + env = get_apt_dpkg_env() + if cmd_env: + env.update(cmd_env) + + kwargs = {} + if quiet: + kwargs['stdout'] = subprocess.DEVNULL + kwargs['stderr'] = subprocess.DEVNULL + + if not retry_message: + retry_message = "Failed executing '{}'".format(" ".join(cmd)) + retry_message += ". Will retry in {} seconds".format(CMD_RETRY_DELAY) + + retry_count = 0 + result = None + + retry_results = (None,) + retry_exitcodes + while result in retry_results: + try: + result = subprocess.check_call(cmd, env=env, **kwargs) + except subprocess.CalledProcessError as e: + result = e.returncode + if result not in retry_results: + # a non-retriable exitcode was produced + raise + retry_count += 1 + if retry_count > max_retries: + # a retriable exitcode was produced more than {max_retries} times + raise + log(retry_message) + time.sleep(CMD_RETRY_DELAY) + + +def _run_apt_command(cmd, fatal=False, quiet=False): + """Run an apt command with optional retries. + + :param cmd: The apt command to run. + :type cmd: str + :param fatal: Whether the command's output should be checked and + retried. + :type fatal: bool + :param quiet: if True, silence the output of the command from stdout and + stderr + :type quiet: bool + """ + if fatal: + _run_with_retries( + cmd, retry_exitcodes=(1, APT_NO_LOCK,), + retry_message="Couldn't acquire DPKG lock", + quiet=quiet) + else: + kwargs = {} + if quiet: + kwargs['stdout'] = subprocess.DEVNULL + kwargs['stderr'] = subprocess.DEVNULL + subprocess.call(cmd, env=get_apt_dpkg_env(), **kwargs) + + +def get_upstream_version(package): + """Determine upstream version based on installed package + + @returns None (if not installed) or the upstream version + """ + cache = apt_cache() + try: + pkg = cache[package] + except Exception: + # the package is unknown to the current apt cache. + return None + + if not pkg.current_ver: + # package is known, but no version is currently installed. + return None + + return ubuntu_apt_pkg.upstream_version(pkg.current_ver.ver_str) + + +def get_installed_version(package): + """Determine installed version of a package + + @returns None (if not installed) or the installed version as + Version object + """ + cache = apt_cache() + dpkg_result = cache.dpkg_list([package]).get(package, {}) + current_ver = None + installed_version = dpkg_result.get('version') + + if installed_version: + current_ver = ubuntu_apt_pkg.Version({'ver_str': installed_version}) + return current_ver + + +def get_apt_dpkg_env(): + """Get environment suitable for execution of APT and DPKG tools. + + We keep this in a helper function instead of in a global constant to + avoid execution on import of the library. + :returns: Environment suitable for execution of APT and DPKG tools. + :rtype: Dict[str, str] + """ + # The fallback is used in the event of ``/etc/environment`` not containing + # avalid PATH variable. + return {'DEBIAN_FRONTEND': 'noninteractive', + 'PATH': get_system_env('PATH', '/usr/sbin:/usr/bin:/sbin:/bin')} diff --git a/ceph-osd/hooks/charmhelpers/fetch/ubuntu_apt_pkg.py b/ceph-osd/hooks/charmhelpers/fetch/ubuntu_apt_pkg.py new file mode 100644 index 00000000..f4dde4a9 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/fetch/ubuntu_apt_pkg.py @@ -0,0 +1,327 @@ +# Copyright 2019-2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Provide a subset of the ``python-apt`` module API. + +Data collection is done through subprocess calls to ``apt-cache`` and +``dpkg-query`` commands. + +The main purpose for this module is to avoid dependency on the +``python-apt`` python module. + +The indicated python module is a wrapper around the ``apt`` C++ library +which is tightly connected to the version of the distribution it was +shipped on. It is not developed in a backward/forward compatible manner. + +This in turn makes it incredibly hard to distribute as a wheel for a piece +of python software that supports a span of distro releases [0][1]. + +Upstream feedback like [2] does not give confidence in this ever changing, +so with this we get rid of the dependency. + +0: https://github.com/juju-solutions/layer-basic/pull/135 +1: https://bugs.launchpad.net/charm-octavia/+bug/1824112 +2: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=845330#10 +""" + +import locale +import os +import subprocess +import sys + +from charmhelpers import deprecate +from charmhelpers.core.hookenv import log + + +class _container(dict): + """Simple container for attributes.""" + __getattr__ = dict.__getitem__ + __setattr__ = dict.__setitem__ + + +class Package(_container): + """Simple container for package attributes.""" + + +class Version(_container): + """Simple container for version attributes.""" + + +class Cache(object): + """Simulation of ``apt_pkg`` Cache object.""" + def __init__(self, progress=None): + pass + + def __contains__(self, package): + try: + pkg = self.__getitem__(package) + return pkg is not None + except KeyError: + return False + + def __getitem__(self, package): + """Get information about a package from apt and dpkg databases. + + :param package: Name of package + :type package: str + :returns: Package object + :rtype: object + :raises: KeyError, subprocess.CalledProcessError + """ + apt_result = self._apt_cache_show([package])[package] + apt_result['name'] = apt_result.pop('package') + pkg = Package(apt_result) + dpkg_result = self.dpkg_list([package]).get(package, {}) + current_ver = None + installed_version = dpkg_result.get('version') + if installed_version: + current_ver = Version({'ver_str': installed_version}) + pkg.current_ver = current_ver + pkg.architecture = dpkg_result.get('architecture') + return pkg + + @deprecate("use dpkg_list() instead.", "2022-05", log=log) + def _dpkg_list(self, packages): + return self.dpkg_list(packages) + + def dpkg_list(self, packages): + """Get data from system dpkg database for package. + + Note that this method is also useful for querying package names + containing wildcards, for example + + apt_cache().dpkg_list(['nvidia-vgpu-ubuntu-*']) + + may return + + { + 'nvidia-vgpu-ubuntu-470': { + 'name': 'nvidia-vgpu-ubuntu-470', + 'version': '470.68', + 'architecture': 'amd64', + 'description': 'NVIDIA vGPU driver - version 470.68' + } + } + + :param packages: Packages to get data from + :type packages: List[str] + :returns: Structured data about installed packages, keys like + ``dpkg-query --list`` + :rtype: dict + :raises: subprocess.CalledProcessError + """ + pkgs = {} + cmd = [ + 'dpkg-query', '--show', + '--showformat', + r'${db:Status-Abbrev}\t${Package}\t${Version}\t${Architecture}\t${binary:Summary}\n' + ] + cmd.extend(packages) + try: + output = subprocess.check_output(cmd, + stderr=subprocess.STDOUT, + universal_newlines=True) + except subprocess.CalledProcessError as cp: + # ``dpkg-query`` may return error and at the same time have + # produced useful output, for example when asked for multiple + # packages where some are not installed + if cp.returncode != 1: + raise + output = cp.output + for line in output.splitlines(): + # only process lines for successfully installed packages + if not (line.startswith('ii ') or line.startswith('hi ')): + continue + status, name, version, arch, desc = line.split('\t', 4) + pkgs[name] = { + 'name': name, + 'version': version, + 'architecture': arch, + 'description': desc, + } + return pkgs + + def _apt_cache_show(self, packages): + """Get data from system apt cache for package. + + :param packages: Packages to get data from + :type packages: List[str] + :returns: Structured data about package, keys like + ``apt-cache show`` + :rtype: dict + :raises: subprocess.CalledProcessError + """ + pkgs = {} + cmd = ['apt-cache', 'show', '--no-all-versions'] + cmd.extend(packages) + if locale.getlocale() == (None, None): + # subprocess calls out to locale.getpreferredencoding(False) to + # determine encoding. Workaround for Trusty where the + # environment appears to not be set up correctly. + locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') + try: + output = subprocess.check_output(cmd, + stderr=subprocess.STDOUT, + universal_newlines=True) + previous = None + pkg = {} + for line in output.splitlines(): + if not line: + if 'package' in pkg: + pkgs.update({pkg['package']: pkg}) + pkg = {} + continue + if line.startswith(' '): + if previous and previous in pkg: + pkg[previous] += os.linesep + line.lstrip() + continue + if ':' in line: + kv = line.split(':', 1) + key = kv[0].lower() + if key == 'n': + continue + previous = key + pkg.update({key: kv[1].lstrip()}) + except subprocess.CalledProcessError as cp: + # ``apt-cache`` returns 100 if none of the packages asked for + # exist in the apt cache. + if cp.returncode != 100: + raise + return pkgs + + +class Config(_container): + def __init__(self): + super(Config, self).__init__(self._populate()) + + def _populate(self): + cfgs = {} + cmd = ['apt-config', 'dump'] + output = subprocess.check_output(cmd, + stderr=subprocess.STDOUT, + universal_newlines=True) + for line in output.splitlines(): + if not line.startswith("CommandLine"): + k, v = line.split(" ", 1) + cfgs[k] = v.strip(";").strip("\"") + + return cfgs + + +# Backwards compatibility with old apt_pkg module +sys.modules[__name__].config = Config() + + +def init(): + """Compatibility shim that does nothing.""" + pass + + +def upstream_version(version): + """Extracts upstream version from a version string. + + Upstream reference: https://salsa.debian.org/apt-team/apt/blob/master/ + apt-pkg/deb/debversion.cc#L259 + + :param version: Version string + :type version: str + :returns: Upstream version + :rtype: str + """ + if version: + version = version.split(':')[-1] + version = version.split('-')[0] + return version + + +def version_compare(a, b): + """Compare the given versions. + + Call out to ``dpkg`` to make sure the code doing the comparison is + compatible with what the ``apt`` library would do. Mimic the return + values. + + Upstream reference: + https://apt-team.pages.debian.net/python-apt/library/apt_pkg.html + ?highlight=version_compare#apt_pkg.version_compare + + :param a: version string + :type a: str + :param b: version string + :type b: str + :returns: >0 if ``a`` is greater than ``b``, 0 if a equals b, + <0 if ``a`` is smaller than ``b`` + :rtype: int + :raises: subprocess.CalledProcessError, RuntimeError + """ + for op in ('gt', 1), ('eq', 0), ('lt', -1): + try: + subprocess.check_call(['dpkg', '--compare-versions', + a, op[0], b], + stderr=subprocess.STDOUT, + universal_newlines=True) + return op[1] + except subprocess.CalledProcessError as cp: + if cp.returncode == 1: + continue + raise + else: + raise RuntimeError('Unable to compare "{}" and "{}", according to ' + 'our logic they are neither greater, equal nor ' + 'less than each other.'.format(a, b)) + + +class PkgVersion(): + """Allow package versions to be compared. + + For example:: + + >>> import charmhelpers.fetch as fetch + >>> (fetch.apt_pkg.PkgVersion('2:20.4.0') < + ... fetch.apt_pkg.PkgVersion('2:20.5.0')) + True + >>> pkgs = [fetch.apt_pkg.PkgVersion('2:20.4.0'), + ... fetch.apt_pkg.PkgVersion('2:21.4.0'), + ... fetch.apt_pkg.PkgVersion('2:17.4.0')] + >>> pkgs.sort() + >>> pkgs + [2:17.4.0, 2:20.4.0, 2:21.4.0] + """ + + def __init__(self, version): + self.version = version + + def __lt__(self, other): + return version_compare(self.version, other.version) == -1 + + def __le__(self, other): + return self.__lt__(other) or self.__eq__(other) + + def __gt__(self, other): + return version_compare(self.version, other.version) == 1 + + def __ge__(self, other): + return self.__gt__(other) or self.__eq__(other) + + def __eq__(self, other): + return version_compare(self.version, other.version) == 0 + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return self.version + + def __hash__(self): + return hash(repr(self)) diff --git a/ceph-osd/hooks/charmhelpers/osplatform.py b/ceph-osd/hooks/charmhelpers/osplatform.py new file mode 100644 index 00000000..5d121866 --- /dev/null +++ b/ceph-osd/hooks/charmhelpers/osplatform.py @@ -0,0 +1,61 @@ +import platform +import os + + +def get_platform(): + """Return the current OS platform. + + For example: if current os platform is Ubuntu then a string "ubuntu" + will be returned (which is the name of the module). + This string is used to decide which platform module should be imported. + """ + current_platform = _get_current_platform() + + if "Ubuntu" in current_platform: + return "ubuntu" + elif "CentOS" in current_platform: + return "centos" + elif "debian" in current_platform or "Debian" in current_platform: + # Stock Python does not detect Ubuntu and instead returns debian. + # Or at least it does in some build environments like Travis CI + return "ubuntu" + elif "elementary" in current_platform: + # ElementaryOS fails to run tests locally without this. + return "ubuntu" + elif "Pop!_OS" in current_platform: + # Pop!_OS also fails to run tests locally without this. + return "ubuntu" + else: + raise RuntimeError("This module is not supported on {}." + .format(current_platform)) + + +def _get_current_platform(): + """Return the current platform information for the OS. + + Attempts to lookup linux distribution information from the platform + module for releases of python < 3.7. For newer versions of python, + the platform is determined from the /etc/os-release file. + """ + # linux_distribution is deprecated and will be removed in Python 3.7 + # Warnings *not* disabled, as we certainly need to fix this. + if hasattr(platform, 'linux_distribution'): + tuple_platform = platform.linux_distribution() + current_platform = tuple_platform[0] + else: + current_platform = _get_platform_from_fs() + + return current_platform + + +def _get_platform_from_fs(): + """Get Platform from /etc/os-release.""" + with open(os.path.join(os.sep, 'etc', 'os-release')) as fin: + content = dict( + line.split('=', 1) + for line in fin.read().splitlines() + if '=' in line + ) + for k, v in content.items(): + content[k] = v.strip('"') + return content["NAME"] diff --git a/ceph-osd/hooks/config-changed b/ceph-osd/hooks/config-changed new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-osd/hooks/config-changed @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-osd/hooks/install b/ceph-osd/hooks/install new file mode 100755 index 00000000..0064ac5f --- /dev/null +++ b/ceph-osd/hooks/install @@ -0,0 +1,21 @@ +#!/bin/bash -e +# Wrapper to deal with newer Ubuntu versions that don't have py2 installed +# by default. + +declare -a DEPS=('apt' 'pip' 'yaml' 'tabulate') + +check_and_install() { + pkg="${1}-${2}" + if ! dpkg -s ${pkg} 2>&1 > /dev/null; then + apt-get -y install ${pkg} + fi +} + +PYTHON="python3" + +for dep in ${DEPS[@]}; do + check_and_install ${PYTHON} ${dep} +done + +./hooks/install_deps +exec ./hooks/install.real diff --git a/ceph-osd/hooks/install.real b/ceph-osd/hooks/install.real new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-osd/hooks/install.real @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-osd/hooks/install_deps b/ceph-osd/hooks/install_deps new file mode 100755 index 00000000..d0775edc --- /dev/null +++ b/ceph-osd/hooks/install_deps @@ -0,0 +1,18 @@ +#!/bin/bash -e +# Wrapper to ensure that python dependencies are installed before we get into +# the python part of the hook execution + +declare -a DEPS=('dnspython' 'pyudev' 'netaddr' 'netifaces') + +check_and_install() { + pkg="${1}-${2}" + if ! dpkg -s ${pkg} 2>&1 > /dev/null; then + apt-get -y install ${pkg} + fi +} + +PYTHON="python3" + +for dep in ${DEPS[@]}; do + check_and_install ${PYTHON} ${dep} +done diff --git a/ceph-osd/hooks/mon-relation-changed b/ceph-osd/hooks/mon-relation-changed new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-osd/hooks/mon-relation-changed @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-osd/hooks/mon-relation-departed b/ceph-osd/hooks/mon-relation-departed new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-osd/hooks/mon-relation-departed @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-osd/hooks/nrpe-external-master-relation-changed b/ceph-osd/hooks/nrpe-external-master-relation-changed new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-osd/hooks/nrpe-external-master-relation-changed @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-osd/hooks/nrpe-external-master-relation-joined b/ceph-osd/hooks/nrpe-external-master-relation-joined new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-osd/hooks/nrpe-external-master-relation-joined @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-osd/hooks/osd-devices-storage-attached b/ceph-osd/hooks/osd-devices-storage-attached new file mode 120000 index 00000000..68134a91 --- /dev/null +++ b/ceph-osd/hooks/osd-devices-storage-attached @@ -0,0 +1 @@ +add-storage \ No newline at end of file diff --git a/ceph-osd/hooks/osd-devices-storage-detaching b/ceph-osd/hooks/osd-devices-storage-detaching new file mode 120000 index 00000000..68134a91 --- /dev/null +++ b/ceph-osd/hooks/osd-devices-storage-detaching @@ -0,0 +1 @@ +add-storage \ No newline at end of file diff --git a/ceph-osd/hooks/post-series-upgrade b/ceph-osd/hooks/post-series-upgrade new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-osd/hooks/post-series-upgrade @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-osd/hooks/pre-series-upgrade b/ceph-osd/hooks/pre-series-upgrade new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-osd/hooks/pre-series-upgrade @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-osd/hooks/secrets-storage-relation-broken b/ceph-osd/hooks/secrets-storage-relation-broken new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-osd/hooks/secrets-storage-relation-broken @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-osd/hooks/secrets-storage-relation-changed b/ceph-osd/hooks/secrets-storage-relation-changed new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-osd/hooks/secrets-storage-relation-changed @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-osd/hooks/secrets-storage-relation-departed b/ceph-osd/hooks/secrets-storage-relation-departed new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-osd/hooks/secrets-storage-relation-departed @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-osd/hooks/secrets-storage-relation-joined b/ceph-osd/hooks/secrets-storage-relation-joined new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-osd/hooks/secrets-storage-relation-joined @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-osd/hooks/start b/ceph-osd/hooks/start new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-osd/hooks/start @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-osd/hooks/stop b/ceph-osd/hooks/stop new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-osd/hooks/stop @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-osd/hooks/storage.real b/ceph-osd/hooks/storage.real new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-osd/hooks/storage.real @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-osd/hooks/update-status b/ceph-osd/hooks/update-status new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-osd/hooks/update-status @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-osd/hooks/upgrade-charm b/ceph-osd/hooks/upgrade-charm new file mode 100755 index 00000000..dc22fdf4 --- /dev/null +++ b/ceph-osd/hooks/upgrade-charm @@ -0,0 +1,7 @@ +#!/bin/bash -e +# Wrapper to ensure that old python bytecode isn't hanging around +# after we upgrade the charm with newer libraries +find . -iname '*.pyc' -delete +find . -name '__pycache__' -prune -exec rm -rf "{}" \; +./hooks/install_deps +exec ./hooks/upgrade-charm.real diff --git a/ceph-osd/hooks/upgrade-charm.real b/ceph-osd/hooks/upgrade-charm.real new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-osd/hooks/upgrade-charm.real @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-osd/hooks/utils.py b/ceph-osd/hooks/utils.py new file mode 100644 index 00000000..b293e6a0 --- /dev/null +++ b/ceph-osd/hooks/utils.py @@ -0,0 +1,732 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import re +import os +import socket +import subprocess +import sys +import time + +sys.path.append('lib') +import charms_ceph.utils as ceph + +from charmhelpers.core.hookenv import ( + unit_get, + cached, + config, + network_get_primary_address, + log, + DEBUG, + WARNING, + status_set, + storage_get, + storage_list, + function_get, +) +from charmhelpers.core import unitdata +from charmhelpers.fetch import ( + apt_install, + filter_installed_packages +) + +from charmhelpers.core.host import ( + lsb_release, + CompareHostReleases, +) + +from charmhelpers.contrib.network.ip import ( + get_address_in_network, + get_ipv6_addr +) + +ALL = "all" # string value representing all "OSD devices" +TEMPLATES_DIR = 'templates' + +try: + import jinja2 +except ImportError: + apt_install(filter_installed_packages(['python3-jinja2']), + fatal=True) + import jinja2 + +try: + import dns.resolver +except ImportError: + apt_install(filter_installed_packages(['python3-dnspython']), + fatal=True) + import dns.resolver + + +_bootstrap_keyring = "/var/lib/ceph/bootstrap-osd/ceph.keyring" +_upgrade_keyring = "/var/lib/ceph/osd/ceph.client.osd-upgrade.keyring" +_removal_keyring = "/var/lib/ceph/osd/ceph.client.osd-removal.keyring" +_client_crash_keyring = "/var/lib/ceph/osd/ceph.client.crash.keyring" + + +def is_osd_bootstrap_ready(): + """ + Is this machine ready to add OSDs. + + :returns: boolean: Is the OSD bootstrap key present + """ + return os.path.exists(_bootstrap_keyring) + + +def _import_key(key, path, name, override=False): + exists = os.path.exists(path) + if not exists or override: + create = ['--create-keyring'] if not exists else [] + cmd = [ + 'sudo', + '-u', + ceph.ceph_user(), + 'ceph-authtool', + path + ] + create + [ + '--name={}'.format(name), + '--add-key={}'.format(key) + ] + subprocess.check_call(cmd) + + +def import_osd_bootstrap_key(key): + """ + Ensure that the osd-bootstrap keyring is setup. + + :param key: The cephx key to add to the bootstrap keyring + :type key: str + :raises: subprocess.CalledProcessError""" + _import_key(key, _bootstrap_keyring, 'client.bootstrap-osd') + + +def import_osd_upgrade_key(key): + """ + Ensure that the osd-upgrade keyring is setup. + + :param key: The cephx key to add to the upgrade keyring + :type key: str + :raises: subprocess.CalledProcessError""" + _import_key(key, _upgrade_keyring, 'client.osd-upgrade') + + +def import_osd_removal_key(key): + """ + Ensure that the osd-removal keyring is setup. + + :param key: The cephx key to add to the upgrade keyring + :type key: str + :raises: subprocess.CalledProcessError""" + _import_key(key, _removal_keyring, 'client.osd-removal') + + +def import_client_crash_key(key): + """ + Ensure that the client.crash keyring is set up. + + :param key: The cephx key to add to the client.crash keyring + :type key: str + :raises: subprocess.CalledProcessError""" + _import_key(key, _client_crash_keyring, 'client.crash') + + +def import_pending_key(key, osd_id): + """ + Import a pending key, used for key rotation. + + :param key: The pending cephx key that will replace the current one. + :type key: str + :param osd_id: The OSD id whose key will be replaced. + :type osd_id: str + :raises: subprocess.CalledProcessError""" + _import_key(key, '/var/lib/ceph/osd/ceph-%s/keyring' % osd_id, + 'osd.%s' % osd_id, override=True) + + +def render_template(template_name, context, template_dir=TEMPLATES_DIR): + """Render Jinja2 template. + + In addition to the template directory specified by the caller the shared + 'templates' directory in the ``charmhelpers.contrib.openstack`` module will + be searched. + + :param template_name: Name of template file. + :type template_name: str + :param context: Template context. + :type context: Dict[str,any] + :param template_dir: Primary path to search for templates. + (default: contents of the ``TEMPLATES_DIR`` global) + :type template_dir: Optional[str] + :returns: The rendered template + :rtype: str + """ + templates = jinja2.Environment( + loader=jinja2.ChoiceLoader(( + jinja2.FileSystemLoader(template_dir), + jinja2.PackageLoader('charmhelpers.contrib.openstack', + 'templates'), + ))) + template = templates.get_template(template_name) + return template.render(context) + + +def enable_pocket(pocket): + apt_sources = "/etc/apt/sources.list" + with open(apt_sources, "rt", encoding='UTF-8') as sources: + lines = sources.readlines() + with open(apt_sources, "wt", encoding='UTF-8') as sources: + for line in lines: + if pocket in line: + sources.write(re.sub('^# deb', 'deb', line)) + else: + sources.write(line) + + +@cached +def get_unit_hostname(): + return socket.gethostname() + + +@cached +def get_host_ip(hostname=None): + if config('prefer-ipv6'): + return get_ipv6_addr()[0] + + hostname = hostname or unit_get('private-address') + try: + # Test to see if already an IPv4 address + socket.inet_aton(hostname) + return hostname + except socket.error: + # This may throw an NXDOMAIN exception; in which case + # things are badly broken so just let it kill the hook + answers = dns.resolver.query(hostname, 'A') + if answers: + return answers[0].address + + +@cached +def get_public_addr(): + if config('ceph-public-network'): + return get_network_addrs('ceph-public-network')[0] + + try: + return network_get_primary_address('public') + except NotImplementedError: + log("network-get not supported", DEBUG) + + return get_host_ip() + + +@cached +def get_cluster_addr(): + if config('ceph-cluster-network'): + return get_network_addrs('ceph-cluster-network')[0] + + try: + return network_get_primary_address('cluster') + except NotImplementedError: + log("network-get not supported", DEBUG) + + return get_host_ip() + + +def get_networks(config_opt='ceph-public-network'): + """Get all configured networks from provided config option. + + If public network(s) are provided, go through them and return those for + which we have an address configured. + """ + networks = config(config_opt) + if networks: + networks = networks.split() + return [n for n in networks if get_address_in_network(n)] + + return [] + + +def get_network_addrs(config_opt): + """Get all configured public networks addresses. + + If public network(s) are provided, go through them and return the + addresses we have configured on any of those networks. + """ + addrs = [] + networks = config(config_opt) + if networks: + networks = networks.split() + addrs = [get_address_in_network(n) for n in networks] + addrs = [a for a in addrs if a] + + if not addrs: + if networks: + msg = ("Could not find an address on any of '%s' - resolve this " + "error to retry" % (networks)) + status_set('blocked', msg) + raise Exception(msg) + else: + return [get_host_ip()] + + return addrs + + +def assert_charm_supports_ipv6(): + """Check whether we are able to support charms ipv6.""" + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) < "trusty": + raise Exception("IPv6 is not supported in the charms for Ubuntu " + "versions less than Trusty 14.04") + + +def get_blacklist(): + """Get blacklist stored in the local kv() store""" + db = unitdata.kv() + return db.get('osd-blacklist', []) + + +def get_journal_devices(): + if config('osd-journal'): + devices = [el.strip() for el in config('osd-journal').split(' ')] + else: + devices = [] + storage_ids = storage_list('osd-journals') + devices.extend((storage_get('location', s) for s in storage_ids)) + + # Filter out any devices in the action managed unit-local device blacklist + _blacklist = get_blacklist() + return set(device for device in devices + if device not in _blacklist and os.path.exists(device)) + + +def should_enable_discard(devices): + """ + Tries to autodetect if we can enable discard on devices and if that + discard can be asynchronous. We want to enable both options if there's + any SSDs unless any of them are using SATA <= 3.0, in which case + discard is supported but is a blocking operation. + """ + discard_enable = True + for device in devices: + # whitelist some devices that do not need checking + if (device.startswith("/dev/nvme") or + device.startswith("/dev/vd")): + continue + try: + sata_3_or_less = is_sata30orless(device) + except subprocess.CalledProcessError: + sata_3_or_less = True + if (device.startswith("/dev/") and + os.path.exists(device) and + sata_3_or_less): + discard_enable = False + log("SSD Discard autodetection: {} is forcing discard off" + "(sata <= 3.0)".format(device), level=WARNING) + return discard_enable + + +def is_sata30orless(device): + result = subprocess.check_output(["/usr/sbin/smartctl", "-i", device]) + print(result) + for line in str(result).split("\\n"): + if re.match(r"SATA Version is: *SATA (1\.|2\.|3\.0)", str(line)): + return True + return False + + +def parse_osds_arguments(): + """Parse OSD IDs from action `osds` argument. + + Fetch action arguments and parse them from comma separated list to + the set of OSD IDs. + + :return: Set of OSD IDs + :rtype: set(str) + """ + raw_arg = function_get("osds") + + if raw_arg is None: + raise RuntimeError("Action argument \"osds\" is missing") + + # convert OSD IDs from user's input into the set + args = {osd_id.strip() for osd_id in str(raw_arg).split(',')} + + if ALL in args and len(args) != 1: + args = {ALL} + log("keyword \"all\" was found in \"osds\" argument. Dropping other " + "explicitly defined OSD IDs", WARNING) + + return args + + +class DeviceError(Exception): + + """Exception type used to signal errors raised by calling + external commands that manipulate devices. + """ + pass + + +def _check_output(args, **kwargs): + try: + return subprocess.check_output(args, **kwargs).decode('UTF-8') + except subprocess.CalledProcessError as e: + raise DeviceError(str(e)) + + +def _check_call(args, **kwargs): + try: + return subprocess.check_call(args, **kwargs) + except subprocess.CalledProcessError as e: + raise DeviceError(str(e)) + + +def setup_bcache(backing, cache): + """Create a bcache device out of the backing storage and caching device. + + :param backing: The path to the backing device. + :type backing: str + + :param cache: The path to the caching device. + :type cache: str + + :returns: The full path of the newly created bcache device. + :rtype: str + """ + _check_call(['sudo', 'make-bcache', '-B', backing, + '-C', cache, '--writeback']) + + def bcache_name(dev): + rv = _check_output(['lsblk', '-p', '-b', cache, '-J', '-o', 'NAME']) + for x in json.loads(rv)['blockdevices'][0].get('children', []): + if x['name'] != dev: + return x['name'] + + for _ in range(100): + rv = bcache_name(cache) + if rv is not None: + return rv + + # Tell the kernel to refresh the partitions. + time.sleep(0.3) + _check_call(['sudo', 'partprobe']) + + +def get_partition_names(dev): + """Given a raw device, return a set of the partitions it contains. + + :param dev: The path to the device. + :type dev: str + + :returns: A set with the partitions of the passed device. + :rtype: set[str] + """ + rv = _check_output(['lsblk', '-b', dev, '-J', '-p', '-o', 'NAME']) + rv = json.loads(rv)['blockdevices'][0].get('children', {}) + return set(x['name'] for x in rv) + + +def create_partition(cache, size, n_iter): + """Create a partition of a specific size in a device. If needed, + make sure the device has a GPT ready. + + :param cache: The path to the caching device from which to create + the partition. + :type cache: str + + :param size: The size (in GB) of the partition to create. + :type size: int + + :param n_iter: The iteration number. If zero, this function will + also create the GPT on the caching device. + :type n_iter: int + + :returns: The full path of the newly created partition. + :rtype: str + """ + if not n_iter: + # In our first iteration, make sure the device has a GPT. + _check_call(['sudo', 'parted', '-s', cache, 'mklabel', 'gpt']) + prev_partitions = get_partition_names(cache) + cmd = ['sudo', 'parted', '-s', cache, 'mkpart', 'primary', + str(n_iter * size) + 'GB', str((n_iter + 1) * size) + 'GB'] + + _check_call(cmd) + for _ in range(100): + ret = get_partition_names(cache) - prev_partitions + if ret: + return next(iter(ret)) + + time.sleep(0.3) + _check_call(['sudo', 'partprobe']) + + raise DeviceError('Failed to create partition') + + +def device_size(dev): + """Compute the size of a device, in GB. + + :param dev: The full path to the device. + :type dev: str + + :returns: The size in GB of the specified device. + :rtype: int + """ + ret = _check_output(['lsblk', '-b', '-d', dev, '-J', '-o', 'SIZE']) + ret = int(json.loads(ret)['blockdevices'][0]['size']) + return ret / (1024 * 1024 * 1024) # Return size in GB. + + +def remove_lvm(device): + """Remove any physical and logical volumes associated to a device.""" + vgs = [] + try: + rv = _check_output(['sudo', 'pvdisplay', device]) + except DeviceError: + # Assume no physical volumes. + return + + for line in rv.splitlines(): + line = line.strip() + if line.startswith('VG Name'): + vgs.append(line.split()[2]) + if vgs: + _check_call(['sudo', 'vgremove', '-y'] + vgs) + _check_call(['sudo', 'pvremove', '-y', device]) + + +def bcache_remove(bcache, backing, caching): + """Remove a bcache kernel device, given its caching. + + :param bache: The path of the bcache device. + :type bcache: str + + :param backing: The backing device for bcache + :type backing: str + + :param caching: The caching device for bcache + :type caching: str + """ + rv = _check_output(['sudo', 'bcache-super-show', backing]) + uuid = None + # Fetch the UUID for the caching device. + for line in rv.split('\n'): + idx = line.find('cset.uuid') + if idx >= 0: + uuid = line[idx + 9:].strip() + break + else: + return + bcache_name = bcache[bcache.rfind('/') + 1:] + + def write_one(path): + os.system('echo 1 | sudo tee {}'.format(path)) + + # The command ceph-volume typically creates PV's and VG's for the + # OSD device. Remove them now before deleting the bcache. + remove_lvm(bcache) + + # NOTE: We *must* do the following steps in this order. For + # kernels 4.x and prior, not doing so will cause the bcache device + # to be undeletable. + # In addition, we have to use 'sudo tee' as done above, since it + # can cause permission issues in some implementations. + write_one('/sys/block/{}/bcache/detach'.format(bcache_name)) + write_one('/sys/block/{}/bcache/stop'.format(bcache_name)) + write_one('/sys/fs/bcache/{}/stop'.format(uuid)) + + # We wipe the bcache signatures here because the bcache tools will not + # create the devices otherwise. There is a 'force' option, but it's not + # always available, so we do the portable thing here. + wipefs_safely(backing) + wipefs_safely(caching) + + +def wipe_disk(dev, timeout=None): + """Destroy all data in a specific device, including partition tables.""" + _check_call(['sudo', 'wipefs', '-a', dev], timeout=timeout) + + +def wipefs_safely(dev): + for _ in range(10): + try: + wipe_disk(dev, 1) + return + except DeviceError: + time.sleep(0.3) + except subprocess.TimeoutExpired: + # If this command times out, then it's likely because + # the disk is dead, so give up. + return + raise DeviceError('Failed to wipe bcache device: {}'.format(dev)) + + +class PartitionIter: + + """Class used to create partitions iteratively. + + Objects of this type are used to create partitions out of + the specified cache devices, either with a specific size, + or with a size proportional to what is needed.""" + + def __init__(self, caches, psize, devices): + """Construct a partition iterator. + + :param caches: The list of cache devices to use. + :type caches: iterable + + :param psize: The size of the partitions (in GB), or None + :type psize: Option[int, None] + + :param devices: The backing devices. Only used to get their length. + :type devices: iterable + """ + self.caches = [[cache, 0] for cache in caches] + self.idx = 0 + if not psize: + factor = min(1.0, len(caches) / len(devices)) + self.psize = [factor * device_size(cache) for cache in caches] + else: + self.psize = psize + self.created = {} + + def __iter__(self): + return self + + def __next__(self): + """Return a newly created partition. + + The object keeps track of the currently used caching device, + so upon creating a new partition, it will move to the next one, + distributing the load among them in a round-robin fashion. + """ + cache, n_iter = self.caches[self.idx] + size = self.psize + if not isinstance(size, (int, float)): + size = self.psize[self.idx] + + self.caches[self.idx][1] += 1 + self.idx = (self.idx + 1) % len(self.caches) + log('Creating partition in device {} of size {}'.format(cache, size)) + return create_partition(cache, size, n_iter) + + def create_bcache(self, backing): + """Create a bcache device, using the internal caching device, + and an external backing one. + + :param backing: The path to the backing device. + :type backing: str + + :returns: The name for the newly created bcache device. + :rtype: str + """ + cache = next(self) + ret = setup_bcache(backing, cache) + if ret is not None: + self.created[backing] = (ret, cache) + log('Bcache device created: {}'.format(cache)) + return ret + + def cleanup(self, device): + """Destroy any created partitions and bcache names for a device.""" + args = self.created.get(device) + if not args: + return + + bcache, caching = args + try: + bcache_remove(bcache, device, caching) + except DeviceError: + log('Failed to cleanup bcache device: {}'.format(bcache)) + + +def _device_suffix(dev): + ix = dev.rfind('/') + if ix >= 0: + dev = dev[ix + 1:] + return dev + + +def get_bcache_names(dev): + """Return the backing and caching devices for a bcache device, + in that specific order. + + :param dev: The path to the bcache device, i.e: /dev/bcache0 + :type dev: str + + :returns: A tuple with the backing and caching devices. + :rtype: list[Option[None, str], Option[None, str]] + """ + if dev is None: + return None, None + + dev_name = _device_suffix(dev) + bcache_path = '/sys/block/{}/slaves'.format(dev_name) + if (not os.path.exists('/sys/block/{}/bcache'.format(dev_name)) or + not os.path.exists(bcache_path)): + return None, None + + cache = os.listdir(bcache_path) + if len(cache) < 2: + return None, None + + backing = '/dev/' + cache[0] + caching = '/dev/' + cache[1] + out = _check_output(['sudo', 'bcache-super-show', backing]) + if 'backing device' not in out: + return caching, backing + return backing, caching + + +def get_parent_device(dev): + """Return the device's parent, assuming if it's a block device.""" + try: + rv = subprocess.check_output(['lsblk', '-as', dev, '-J']) + rv = json.loads(rv.decode('UTF-8')) + except subprocess.CalledProcessError: + return dev + + children = rv.get('blockdevices', []) + if not children: + return dev + + children = children[0].get('children', []) + for child in children: + if 'children' not in child: + return '/dev/' + child['name'] + + return dev + + +def find_filestore_osds(): + # Path to Ceph OSD + osd_path = '/var/lib/ceph/osd' + + # Search through OSD directories in path starting with 'ceph-' + dirs = [d for d in os.listdir(osd_path) + if d.startswith('ceph-') + and os.path.isdir(os.path.join(osd_path, d))] + + found = [] + for dir in dirs: + # Construct the full path + type_file_path = os.path.join(osd_path, dir, 'type') + # Open and read the type file + with open(type_file_path, 'r') as f: + content = f.read() + # Check if the content includes 'filestore' + if 'filestore' in content: + found.append(dir) + + return found diff --git a/ceph-osd/icon.svg b/ceph-osd/icon.svg new file mode 100644 index 00000000..e9383990 --- /dev/null +++ b/ceph-osd/icon.svg @@ -0,0 +1,311 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + diff --git a/ceph-osd/lib/charms_ceph/__init__.py b/ceph-osd/lib/charms_ceph/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-osd/lib/charms_ceph/broker.py b/ceph-osd/lib/charms_ceph/broker.py new file mode 100644 index 00000000..7ca96922 --- /dev/null +++ b/ceph-osd/lib/charms_ceph/broker.py @@ -0,0 +1,980 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import json +import os + +from subprocess import check_call, check_output, CalledProcessError +from tempfile import NamedTemporaryFile + +from charms_ceph.utils import ( + get_cephfs, + get_osd_weight +) +from charms_ceph.crush_utils import Crushmap + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + INFO, + ERROR, +) +from charmhelpers.contrib.storage.linux.ceph import ( + create_erasure_profile, + delete_pool, + erasure_profile_exists, + get_osds, + monitor_key_get, + monitor_key_set, + pool_exists, + pool_set, + remove_pool_snapshot, + rename_pool, + snapshot_pool, + validator, + ErasurePool, + BasePool, + ReplicatedPool, +) + +# This comes from http://docs.ceph.com/docs/master/rados/operations/pools/ +# This should do a decent job of preventing people from passing in bad values. +# It will give a useful error message + +POOL_KEYS = { + # "Ceph Key Name": [Python type, [Valid Range]] + "size": [int], + "min_size": [int], + "crash_replay_interval": [int], + "pgp_num": [int], # = or < pg_num + "crush_ruleset": [int], + "hashpspool": [bool], + "nodelete": [bool], + "nopgchange": [bool], + "nosizechange": [bool], + "write_fadvise_dontneed": [bool], + "noscrub": [bool], + "nodeep-scrub": [bool], + "hit_set_type": [str, ["bloom", "explicit_hash", + "explicit_object"]], + "hit_set_count": [int, [1, 1]], + "hit_set_period": [int], + "hit_set_fpp": [float, [0.0, 1.0]], + "cache_target_dirty_ratio": [float], + "cache_target_dirty_high_ratio": [float], + "cache_target_full_ratio": [float], + "target_max_bytes": [int], + "target_max_objects": [int], + "cache_min_flush_age": [int], + "cache_min_evict_age": [int], + "fast_read": [bool], + "allow_ec_overwrites": [bool], + "compression_mode": [str, ["none", "passive", "aggressive", "force"]], + "compression_algorithm": [str, ["lz4", "snappy", "zlib", "zstd"]], + "compression_required_ratio": [float, [0.0, 1.0]], + "crush_rule": [str], +} + +CEPH_BUCKET_TYPES = [ + 'osd', + 'host', + 'chassis', + 'rack', + 'row', + 'pdu', + 'pod', + 'room', + 'datacenter', + 'region', + 'root' +] + + +def decode_req_encode_rsp(f): + """Decorator to decode incoming requests and encode responses.""" + + def decode_inner(req): + if isinstance(req, bytes): + req = req.decode('utf-8') + return json.dumps(f(json.loads(req))) + + return decode_inner + + +@decode_req_encode_rsp +def process_requests(reqs): + """Process Ceph broker request(s). + + This is a versioned api. API version must be supplied by the client making + the request. + + :param reqs: dict of request parameters. + :returns: dict. exit-code and reason if not 0 + """ + request_id = reqs.get('request-id') + try: + version = reqs.get('api-version') + if version == 1: + log('Processing request {}'.format(request_id), level=DEBUG) + resp = process_requests_v1(reqs['ops']) + if request_id: + resp['request-id'] = request_id + + return resp + + except Exception as exc: + log(str(exc), level=ERROR) + msg = ("Unexpected error occurred while processing requests: %s" % + reqs) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + msg = ("Missing or invalid api version ({})".format(version)) + resp = {'exit-code': 1, 'stderr': msg} + if request_id: + resp['request-id'] = request_id + + return resp + + +def handle_create_erasure_profile(request, service): + """Create an erasure profile. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + # "isa" | "lrc" | "shec" | "clay" or it defaults to "jerasure" + erasure_type = request.get('erasure-type') + # dependent on erasure coding type + erasure_technique = request.get('erasure-technique') + # "host" | "rack" | ... + failure_domain = request.get('failure-domain') + name = request.get('name') + # Binary Distribution Matrix (BDM) parameters + bdm_k = request.get('k') + bdm_m = request.get('m') + # LRC parameters + bdm_l = request.get('l') + crush_locality = request.get('crush-locality') + # SHEC parameters + bdm_c = request.get('c') + # CLAY parameters + bdm_d = request.get('d') + scalar_mds = request.get('scalar-mds') + # Device Class + device_class = request.get('device-class') + + if failure_domain and failure_domain not in CEPH_BUCKET_TYPES: + msg = "failure-domain must be one of {}".format(CEPH_BUCKET_TYPES) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + create_erasure_profile(service=service, + erasure_plugin_name=erasure_type, + profile_name=name, + failure_domain=failure_domain, + data_chunks=bdm_k, + coding_chunks=bdm_m, + locality=bdm_l, + durability_estimator=bdm_d, + helper_chunks=bdm_c, + scalar_mds=scalar_mds, + crush_locality=crush_locality, + device_class=device_class, + erasure_plugin_technique=erasure_technique) + + return {'exit-code': 0} + + +def handle_add_permissions_to_key(request, service): + """Groups are defined by the key cephx.groups.(namespace-)?-(name). This + key will contain a dict serialized to JSON with data about the group, + including pools and members. + + A group can optionally have a namespace defined that will be used to + further restrict pool access. + """ + resp = {'exit-code': 0} + + service_name = request.get('name') + group_name = request.get('group') + group_namespace = request.get('group-namespace') + if group_namespace: + group_name = "{}-{}".format(group_namespace, group_name) + group = get_group(group_name=group_name) + service_obj = get_service_groups(service=service_name, + namespace=group_namespace) + if request.get('object-prefix-permissions'): + service_obj['object_prefix_perms'] = request.get( + 'object-prefix-permissions') + format("Service object: {}".format(service_obj)) + permission = request.get('group-permission') or "rwx" + if service_name not in group['services']: + group['services'].append(service_name) + save_group(group=group, group_name=group_name) + if permission not in service_obj['group_names']: + service_obj['group_names'][permission] = [] + if group_name not in service_obj['group_names'][permission]: + service_obj['group_names'][permission].append(group_name) + save_service(service=service_obj, service_name=service_name) + service_obj['groups'] = _build_service_groups(service_obj, + group_namespace) + update_service_permissions(service_name, service_obj, group_namespace) + + return resp + + +def handle_set_key_permissions(request, service): + """Ensure the key has the requested permissions.""" + permissions = request.get('permissions') + client = request.get('client') + call = ['ceph', '--id', service, 'auth', 'caps', + 'client.{}'.format(client)] + permissions + try: + check_call(call) + except CalledProcessError as e: + log("Error updating key capabilities: {}".format(e), level=ERROR) + + +def update_service_permissions(service, service_obj=None, namespace=None): + """Update the key permissions for the named client in Ceph""" + if not service_obj: + service_obj = get_service_groups(service=service, namespace=namespace) + permissions = pool_permission_list_for_service(service_obj) + call = ['ceph', 'auth', 'caps', 'client.{}'.format(service)] + permissions + try: + check_call(call) + except CalledProcessError as e: + log("Error updating key capabilities: {}".format(e)) + + +def add_pool_to_group(pool, group, namespace=None): + """Add a named pool to a named group""" + group_name = group + if namespace: + group_name = "{}-{}".format(namespace, group_name) + group = get_group(group_name=group_name) + if pool not in group['pools']: + group["pools"].append(pool) + save_group(group, group_name=group_name) + for service in group['services']: + update_service_permissions(service, namespace=namespace) + + +def pool_permission_list_for_service(service): + """Build the permission string for Ceph for a given service""" + permissions = [] + permission_types = collections.OrderedDict() + for permission, group in sorted(service["group_names"].items()): + if permission not in permission_types: + permission_types[permission] = [] + for item in group: + permission_types[permission].append(item) + for permission, groups in permission_types.items(): + permission = "allow {}".format(permission) + for group in groups: + for pool in service['groups'][group].get('pools', []): + permissions.append("{} pool={}".format(permission, pool)) + for permission, prefixes in sorted( + service.get("object_prefix_perms", {}).items()): + for prefix in prefixes: + permissions.append("allow {} object_prefix {}".format(permission, + prefix)) + return ['mon', ('allow r, allow command "osd blacklist"' + ', allow command "osd blocklist"'), + 'osd', ', '.join(permissions)] + + +def get_service_groups(service, namespace=None): + """Services are objects stored with some metadata, they look like (for a + service named "nova"): + { + group_names: {'rwx': ['images']}, + groups: {} + } + After populating the group, it looks like: + { + group_names: {'rwx': ['images']}, + groups: { + 'images': { + pools: ['glance'], + services: ['nova'] + } + } + } + """ + service_json = monitor_key_get(service='admin', + key="cephx.services.{}".format(service)) + try: + service = json.loads(service_json) + except (TypeError, ValueError): + service = None + if service: + service['groups'] = _build_service_groups(service, namespace) + else: + service = {'group_names': {}, 'groups': {}} + return service + + +def _build_service_groups(service, namespace=None): + """Rebuild the 'groups' dict for a service group + + :returns: dict: dictionary keyed by group name of the following + format: + + { + 'images': { + pools: ['glance'], + services: ['nova', 'glance] + }, + 'vms':{ + pools: ['nova'], + services: ['nova'] + } + } + """ + all_groups = {} + for groups in service['group_names'].values(): + for group in groups: + name = group + if namespace: + name = "{}-{}".format(namespace, name) + all_groups[group] = get_group(group_name=name) + return all_groups + + +def get_group(group_name): + """A group is a structure to hold data about a named group, structured as: + { + pools: ['glance'], + services: ['nova'] + } + """ + group_key = get_group_key(group_name=group_name) + group_json = monitor_key_get(service='admin', key=group_key) + try: + group = json.loads(group_json) + except (TypeError, ValueError): + group = None + if not group: + group = { + 'pools': [], + 'services': [] + } + return group + + +def save_service(service_name, service): + """Persist a service in the monitor cluster""" + service['groups'] = {} + return monitor_key_set(service='admin', + key="cephx.services.{}".format(service_name), + value=json.dumps(service, sort_keys=True)) + + +def save_group(group, group_name): + """Persist a group in the monitor cluster""" + group_key = get_group_key(group_name=group_name) + return monitor_key_set(service='admin', + key=group_key, + value=json.dumps(group, sort_keys=True)) + + +def get_group_key(group_name): + """Build group key""" + return 'cephx.groups.{}'.format(group_name) + + +def handle_erasure_pool(request, service): + """Create a new erasure coded pool. + + :param request: dict of request operations and params. + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0. + """ + pool_name = request.get('name') + erasure_profile = request.get('erasure-profile') + group_name = request.get('group') + + if erasure_profile is None: + erasure_profile = "default-canonical" + + if group_name: + group_namespace = request.get('group-namespace') + # Add the pool to the group named "group_name" + add_pool_to_group(pool=pool_name, + group=group_name, + namespace=group_namespace) + + # TODO: Default to 3/2 erasure coding. I believe this requires min 5 osds + if not erasure_profile_exists(service=service, name=erasure_profile): + # TODO: Fail and tell them to create the profile or default + msg = ("erasure-profile {} does not exist. Please create it with: " + "create-erasure-profile".format(erasure_profile)) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + try: + pool = ErasurePool(service=service, + op=request) + except KeyError: + msg = "Missing parameter." + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + # Ok make the erasure pool + if not pool_exists(service=service, name=pool_name): + log("Creating pool '{}' (erasure_profile={})" + .format(pool.name, erasure_profile), level=INFO) + pool.create() + + # Set/update properties that are allowed to change after pool creation. + pool.update() + + +def handle_replicated_pool(request, service): + """Create a new replicated pool. + + :param request: dict of request operations and params. + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0. + """ + pool_name = request.get('name') + group_name = request.get('group') + + # Optional params + # NOTE: Check this against the handling in the Pool classes, reconcile and + # remove. + pg_num = request.get('pg_num') + replicas = request.get('replicas') + if pg_num: + # Cap pg_num to max allowed just in case. + osds = get_osds(service) + if osds: + pg_num = min(pg_num, (len(osds) * 100 // replicas)) + request.update({'pg_num': pg_num}) + + if group_name: + group_namespace = request.get('group-namespace') + # Add the pool to the group named "group_name" + add_pool_to_group(pool=pool_name, + group=group_name, + namespace=group_namespace) + + try: + pool = ReplicatedPool(service=service, + op=request) + except KeyError: + msg = "Missing parameter." + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + if not pool_exists(service=service, name=pool_name): + log("Creating pool '{}' (replicas={})".format(pool.name, replicas), + level=INFO) + pool.create() + else: + log("Pool '{}' already exists - skipping create".format(pool.name), + level=DEBUG) + + # Set/update properties that are allowed to change after pool creation. + pool.update() + + +def handle_create_cache_tier(request, service): + """Create a cache tier on a cold pool. Modes supported are + "writeback" and "readonly". + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + # mode = "writeback" | "readonly" + storage_pool = request.get('cold-pool') + cache_pool = request.get('hot-pool') + cache_mode = request.get('mode') + + if cache_mode is None: + cache_mode = "writeback" + + # cache and storage pool must exist first + if not pool_exists(service=service, name=storage_pool) or not pool_exists( + service=service, name=cache_pool): + msg = ("cold-pool: {} and hot-pool: {} must exist. Please create " + "them first".format(storage_pool, cache_pool)) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + p = BasePool(service=service, name=storage_pool) + p.add_cache_tier(cache_pool=cache_pool, mode=cache_mode) + + +def handle_remove_cache_tier(request, service): + """Remove a cache tier from the cold pool. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + storage_pool = request.get('cold-pool') + cache_pool = request.get('hot-pool') + # cache and storage pool must exist first + if not pool_exists(service=service, name=storage_pool) or not pool_exists( + service=service, name=cache_pool): + msg = ("cold-pool: {} or hot-pool: {} doesn't exist. Not " + "deleting cache tier".format(storage_pool, cache_pool)) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + pool = BasePool(name=storage_pool, service=service) + pool.remove_cache_tier(cache_pool=cache_pool) + + +def handle_set_pool_value(request, service, coerce=False): + """Sets an arbitrary pool value. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :param coerce: Try to parse/coerce the value into the correct type. + Used by the action code that only gets Str from Juju + :returns: dict. exit-code and reason if not 0 + """ + # Set arbitrary pool values + params = {'pool': request.get('name'), + 'key': request.get('key'), + 'value': request.get('value')} + if params['key'] not in POOL_KEYS: + msg = "Invalid key '{}'".format(params['key']) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + # Get the validation method + validator_params = POOL_KEYS[params['key']] + # BUG: #1838650 - the function needs to try to coerce the value param to + # the type required for the validator to pass. Note, if this blows, then + # the param isn't parsable to the correct type. + if coerce: + try: + params['value'] = validator_params[0](params['value']) + except ValueError: + raise RuntimeError("Value {} isn't of type {}" + .format(params['value'], validator_params[0])) + # end of BUG: #1838650 + if len(validator_params) == 1: + # Validate that what the user passed is actually legal per Ceph's rules + validator(params['value'], validator_params[0]) + else: + # Validate that what the user passed is actually legal per Ceph's rules + validator(params['value'], validator_params[0], validator_params[1]) + + # Set the value + pool_set(service=service, pool_name=params['pool'], key=params['key'], + value=params['value']) + + +def handle_rgw_regionmap_update(request, service): + """Change the radosgw region map. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + name = request.get('client-name') + if not name: + msg = "Missing rgw-region or client-name params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + try: + check_output(['radosgw-admin', + '--id', service, + 'regionmap', 'update', '--name', name]) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + + +def handle_rgw_regionmap_default(request, service): + """Create a radosgw region map. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + region = request.get('rgw-region') + name = request.get('client-name') + if not region or not name: + msg = "Missing rgw-region or client-name params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + try: + check_output( + [ + 'radosgw-admin', + '--id', service, + 'regionmap', + 'default', + '--rgw-region', region, + '--name', name]) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + + +def handle_rgw_zone_set(request, service): + """Create a radosgw zone. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + json_file = request.get('zone-json') + name = request.get('client-name') + region_name = request.get('region-name') + zone_name = request.get('zone-name') + if not json_file or not name or not region_name or not zone_name: + msg = "Missing json-file or client-name params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + infile = NamedTemporaryFile(delete=False) + with open(infile.name, 'w') as infile_handle: + infile_handle.write(json_file) + try: + check_output( + [ + 'radosgw-admin', + '--id', service, + 'zone', + 'set', + '--rgw-zone', zone_name, + '--infile', infile.name, + '--name', name, + ] + ) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + os.unlink(infile.name) + + +def handle_put_osd_in_bucket(request, service): + """Move an osd into a specified crush bucket. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + osd_id = request.get('osd') + target_bucket = request.get('bucket') + if not osd_id or not target_bucket: + msg = "Missing OSD ID or Bucket" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + crushmap = Crushmap() + try: + crushmap.ensure_bucket_is_present(target_bucket) + check_output( + [ + 'ceph', + '--id', service, + 'osd', + 'crush', + 'set', + str(osd_id), + str(get_osd_weight(osd_id)), + "root={}".format(target_bucket) + ] + ) + + except Exception as exc: + msg = "Failed to move OSD " \ + "{} into Bucket {} :: {}".format(osd_id, target_bucket, exc) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + +def handle_rgw_create_user(request, service): + """Create a new rados gateway user. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + user_id = request.get('rgw-uid') + display_name = request.get('display-name') + name = request.get('client-name') + if not name or not display_name or not user_id: + msg = "Missing client-name, display-name or rgw-uid" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + try: + create_output = check_output( + [ + 'radosgw-admin', + '--id', service, + 'user', + 'create', + '--uid', user_id, + '--display-name', display_name, + '--name', name, + '--system' + ] + ) + try: + user_json = json.loads(str(create_output.decode('UTF-8'))) + return {'exit-code': 0, 'user': user_json} + except ValueError as err: + log(err, level=ERROR) + return {'exit-code': 1, 'stderr': err} + + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + + +def handle_create_cephfs(request, service): + """Create a new cephfs. + + :param request: The broker request + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + cephfs_name = request.get('mds_name') + data_pool = request.get('data_pool') + extra_pools = request.get('extra_pools', None) or [] + metadata_pool = request.get('metadata_pool') + # Check if the user params were provided + if not cephfs_name or not data_pool or not metadata_pool: + msg = "Missing mds_name, data_pool or metadata_pool params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + # Sanity check that the required pools exist + for pool_name in [data_pool, metadata_pool] + extra_pools: + if not pool_exists(service=service, name=pool_name): + msg = "CephFS pool {} does not exist. Cannot create CephFS".format( + pool_name) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + if cephfs_name in get_cephfs(service=service): + # CephFS new has already been called + log("CephFS already created") + return + + # Finally create CephFS + try: + check_output(["ceph", + '--id', service, + "fs", "new", cephfs_name, + metadata_pool, + data_pool]) + except CalledProcessError as err: + if err.returncode == 22: + log("CephFS already created") + return + else: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + for pool_name in extra_pools: + cmd = ["ceph", '--id', service, "fs", "add_data_pool", cephfs_name, + pool_name] + try: + check_output(cmd) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + + +def handle_rgw_region_set(request, service): + # radosgw-admin region set --infile us.json --name client.radosgw.us-east-1 + """Set the rados gateway region. + + :param request: dict. The broker request. + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + json_file = request.get('region-json') + name = request.get('client-name') + region_name = request.get('region-name') + zone_name = request.get('zone-name') + if not json_file or not name or not region_name or not zone_name: + msg = "Missing json-file or client-name params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + infile = NamedTemporaryFile(delete=False) + with open(infile.name, 'w') as infile_handle: + infile_handle.write(json_file) + try: + check_output( + [ + 'radosgw-admin', + '--id', service, + 'region', + 'set', + '--rgw-zone', zone_name, + '--infile', infile.name, + '--name', name, + ] + ) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + os.unlink(infile.name) + + +def handle_create_cephfs_client(request, service): + """Creates a new CephFS client for a filesystem. + + :param request: The broker request + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0. + """ + fs_name = request.get('fs_name') + client_id = request.get('client_id') + # TODO: fs allows setting write permissions for a list of paths. + path = request.get('path') + perms = request.get('perms') + # Need all parameters + if not fs_name or not client_id or not path or not perms: + msg = "Missing fs_name, client_id, path or perms params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + # Skip creation if the request has already been called + # This makes it a bit more compatible with older Ceph versions + # that throw when trying to authorize a user with the same + # capabilites that it currently has. + try: + cmd = ["ceph", "--id", service, "auth", "ls", "-f", "json"] + auth_ls = json.loads(check_output(cmd, encoding="utf-8")) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + except ValueError as err: + log(str(err), level=ERROR) + return {'exit-code': 1, 'stderr': str(err)} + + client = "client.{}".format(client_id) + for elem in auth_ls["auth_dump"]: + if client == elem["entity"]: + log("Client {} has already been created".format(client)) + return {'exit-code': 0, 'key': elem["key"]} + + # Try to authorize the client + # `ceph fs authorize` already returns the correct error + # message if the filesystem doesn't exist. + try: + cmd = [ + "ceph", + "--id", service, + "fs", "authorize", + fs_name, + client, + path, + perms, + "-f", "json" + ] + fs_auth = json.loads(check_output(cmd, encoding="utf-8")) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + except ValueError as err: + log(str(err), level=ERROR) + return {'exit-code': 1, 'stderr': str(err)} + + return {'exit-code': 0, 'key': fs_auth[0]["key"]} + + +def process_requests_v1(reqs): + """Process v1 requests. + + Takes a list of requests (dicts) and processes each one. If an error is + found, processing stops and the client is notified in the response. + + Returns a response dict containing the exit code (non-zero if any + operation failed along with an explanation). + """ + ret = None + log("Processing {} ceph broker requests".format(len(reqs)), level=INFO) + for req in reqs: + op = req.get('op') + log("Processing op='{}'".format(op), level=DEBUG) + # Use admin client since we do not have other client key locations + # setup to use them for these operations. + svc = 'admin' + if op == "create-pool": + pool_type = req.get('pool-type') # "replicated" | "erasure" + + # Default to replicated if pool_type isn't given + if pool_type == 'erasure': + ret = handle_erasure_pool(request=req, service=svc) + else: + ret = handle_replicated_pool(request=req, service=svc) + elif op == "create-cephfs": + ret = handle_create_cephfs(request=req, service=svc) + elif op == "create-cache-tier": + ret = handle_create_cache_tier(request=req, service=svc) + elif op == "remove-cache-tier": + ret = handle_remove_cache_tier(request=req, service=svc) + elif op == "create-erasure-profile": + ret = handle_create_erasure_profile(request=req, service=svc) + elif op == "delete-pool": + pool = req.get('name') + ret = delete_pool(service=svc, name=pool) + elif op == "rename-pool": + old_name = req.get('name') + new_name = req.get('new-name') + ret = rename_pool(service=svc, old_name=old_name, + new_name=new_name) + elif op == "snapshot-pool": + pool = req.get('name') + snapshot_name = req.get('snapshot-name') + ret = snapshot_pool(service=svc, pool_name=pool, + snapshot_name=snapshot_name) + elif op == "remove-pool-snapshot": + pool = req.get('name') + snapshot_name = req.get('snapshot-name') + ret = remove_pool_snapshot(service=svc, pool_name=pool, + snapshot_name=snapshot_name) + elif op == "set-pool-value": + ret = handle_set_pool_value(request=req, service=svc) + elif op == "rgw-region-set": + ret = handle_rgw_region_set(request=req, service=svc) + elif op == "rgw-zone-set": + ret = handle_rgw_zone_set(request=req, service=svc) + elif op == "rgw-regionmap-update": + ret = handle_rgw_regionmap_update(request=req, service=svc) + elif op == "rgw-regionmap-default": + ret = handle_rgw_regionmap_default(request=req, service=svc) + elif op == "rgw-create-user": + ret = handle_rgw_create_user(request=req, service=svc) + elif op == "move-osd-to-bucket": + ret = handle_put_osd_in_bucket(request=req, service=svc) + elif op == "add-permissions-to-key": + ret = handle_add_permissions_to_key(request=req, service=svc) + elif op == 'set-key-permissions': + ret = handle_set_key_permissions(request=req, service=svc) + elif op == "create-cephfs-client": + ret = handle_create_cephfs_client(request=req, service=svc) + else: + msg = "Unknown operation '{}'".format(op) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + if isinstance(ret, dict) and 'exit-code' in ret: + return ret + + return {'exit-code': 0} diff --git a/ceph-osd/lib/charms_ceph/crush_utils.py b/ceph-osd/lib/charms_ceph/crush_utils.py new file mode 100644 index 00000000..37084bf1 --- /dev/null +++ b/ceph-osd/lib/charms_ceph/crush_utils.py @@ -0,0 +1,154 @@ +# Copyright 2014 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +from subprocess import check_output, CalledProcessError + +from charmhelpers.core.hookenv import ( + log, + ERROR, +) + +CRUSH_BUCKET = """root {name} {{ + id {id} # do not change unnecessarily + # weight 0.000 + alg straw2 + hash 0 # rjenkins1 +}} + +rule {name} {{ + ruleset 0 + type replicated + min_size 1 + max_size 10 + step take {name} + step chooseleaf firstn 0 type host + step emit +}}""" + +# This regular expression looks for a string like: +# root NAME { +# id NUMBER +# so that we can extract NAME and ID from the crushmap +CRUSHMAP_BUCKETS_RE = re.compile(r"root\s+(.+)\s+\{\s*id\s+(-?\d+)") + +# This regular expression looks for ID strings in the crushmap like: +# id NUMBER +# so that we can extract the IDs from a crushmap +CRUSHMAP_ID_RE = re.compile(r"id\s+(-?\d+)") + + +class Crushmap(object): + """An object oriented approach to Ceph crushmap management.""" + + def __init__(self): + self._crushmap = self.load_crushmap() + roots = re.findall(CRUSHMAP_BUCKETS_RE, self._crushmap) + buckets = [] + ids = list(map( + lambda x: int(x), + re.findall(CRUSHMAP_ID_RE, self._crushmap))) + ids = sorted(ids) + if roots != []: + for root in roots: + buckets.append(CRUSHBucket(root[0], root[1], True)) + + self._buckets = buckets + if ids != []: + self._ids = ids + else: + self._ids = [0] + + def load_crushmap(self): + try: + crush = str(check_output(['ceph', 'osd', 'getcrushmap']) + .decode('UTF-8')) + return str(check_output(['crushtool', '-d', '-'], + stdin=crush.stdout) + .decode('UTF-8')) + except CalledProcessError as e: + log("Error occurred while loading and decompiling CRUSH map:" + "{}".format(e), ERROR) + raise + + def ensure_bucket_is_present(self, bucket_name): + if bucket_name not in [bucket.name for bucket in self.buckets()]: + self.add_bucket(bucket_name) + self.save() + + def buckets(self): + """Return a list of buckets that are in the Crushmap.""" + return self._buckets + + def add_bucket(self, bucket_name): + """Add a named bucket to Ceph""" + new_id = min(self._ids) - 1 + self._ids.append(new_id) + self._buckets.append(CRUSHBucket(bucket_name, new_id)) + + def save(self): + """Persist Crushmap to Ceph""" + try: + crushmap = self.build_crushmap() + compiled = str(check_output(['crushtool', '-c', '/dev/stdin', '-o', + '/dev/stdout'], stdin=crushmap) + .decode('UTF-8')) + ceph_output = str(check_output(['ceph', 'osd', 'setcrushmap', '-i', + '/dev/stdin'], stdin=compiled) + .decode('UTF-8')) + return ceph_output + except CalledProcessError as e: + log("save error: {}".format(e)) + raise + + def build_crushmap(self): + """Modifies the current CRUSH map to include the new buckets""" + tmp_crushmap = self._crushmap + for bucket in self._buckets: + if not bucket.default: + tmp_crushmap = "{}\n\n{}".format( + tmp_crushmap, + Crushmap.bucket_string(bucket.name, bucket.id)) + + return tmp_crushmap + + @staticmethod + def bucket_string(name, id): + return CRUSH_BUCKET.format(name=name, id=id) + + +class CRUSHBucket(object): + """CRUSH bucket description object.""" + + def __init__(self, name, id, default=False): + self.name = name + self.id = int(id) + self.default = default + + def __repr__(self): + return "Bucket {{Name: {name}, ID: {id}}}".format( + name=self.name, id=self.id) + + def __eq__(self, other): + """Override the default Equals behavior""" + if isinstance(other, self.__class__): + return self.__dict__ == other.__dict__ + return NotImplemented + + def __ne__(self, other): + """Define a non-equality test""" + if isinstance(other, self.__class__): + return not self.__eq__(other) + return NotImplemented diff --git a/ceph-osd/lib/charms_ceph/utils.py b/ceph-osd/lib/charms_ceph/utils.py new file mode 100644 index 00000000..85e6249b --- /dev/null +++ b/ceph-osd/lib/charms_ceph/utils.py @@ -0,0 +1,3571 @@ +# Copyright 2017-2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import glob +import itertools +import json +import os +import pyudev +import random +import re +import socket +import subprocess +import sys +import time +import uuid +import functools + +from contextlib import contextmanager +from datetime import datetime + +from charmhelpers.core import hookenv +from charmhelpers.core import templating +from charmhelpers.core.host import ( + chownr, + cmp_pkgrevno, + lsb_release, + mkdir, + owner, + service_restart, + service_start, + service_stop, + CompareHostReleases, + write_file, + is_container, +) +from charmhelpers.core.hookenv import ( + cached, + config, + log, + status_set, + DEBUG, + ERROR, + WARNING, + storage_get, + storage_list, +) +from charmhelpers.fetch import ( + add_source, + apt_install, + apt_purge, + apt_update, + filter_missing_packages, + get_installed_version +) +from charmhelpers.contrib.storage.linux.ceph import ( + get_mon_map, + monitor_key_set, + monitor_key_exists, + monitor_key_get, +) +from charmhelpers.contrib.storage.linux.utils import ( + is_block_device, + is_device_mounted, +) +from charmhelpers.contrib.openstack.utils import ( + get_os_codename_install_source, +) +from charmhelpers.contrib.storage.linux import lvm +from charmhelpers.core.unitdata import kv + +CEPH_BASE_DIR = os.path.join(os.sep, 'var', 'lib', 'ceph') +OSD_BASE_DIR = os.path.join(CEPH_BASE_DIR, 'osd') +HDPARM_FILE = os.path.join(os.sep, 'etc', 'hdparm.conf') + +LEADER = 'leader' +PEON = 'peon' +QUORUM = [LEADER, PEON] + +PACKAGES = ['ceph', 'gdisk', + 'radosgw', 'xfsprogs', + 'lvm2', 'parted', 'smartmontools'] + +REMOVE_PACKAGES = [] +CHRONY_PACKAGE = 'chrony' + +CEPH_KEY_MANAGER = 'ceph' +VAULT_KEY_MANAGER = 'vault' +KEY_MANAGERS = [ + CEPH_KEY_MANAGER, + VAULT_KEY_MANAGER, +] + +LinkSpeed = { + "BASE_10": 10, + "BASE_100": 100, + "BASE_1000": 1000, + "GBASE_10": 10000, + "GBASE_40": 40000, + "GBASE_100": 100000, + "UNKNOWN": None +} + +# Mapping of adapter speed to sysctl settings +NETWORK_ADAPTER_SYSCTLS = { + # 10Gb + LinkSpeed["GBASE_10"]: { + 'net.core.rmem_default': 524287, + 'net.core.wmem_default': 524287, + 'net.core.rmem_max': 524287, + 'net.core.wmem_max': 524287, + 'net.core.optmem_max': 524287, + 'net.core.netdev_max_backlog': 300000, + 'net.ipv4.tcp_rmem': '10000000 10000000 10000000', + 'net.ipv4.tcp_wmem': '10000000 10000000 10000000', + 'net.ipv4.tcp_mem': '10000000 10000000 10000000' + }, + # Mellanox 10/40Gb + LinkSpeed["GBASE_40"]: { + 'net.ipv4.tcp_timestamps': 0, + 'net.ipv4.tcp_sack': 1, + 'net.core.netdev_max_backlog': 250000, + 'net.core.rmem_max': 4194304, + 'net.core.wmem_max': 4194304, + 'net.core.rmem_default': 4194304, + 'net.core.wmem_default': 4194304, + 'net.core.optmem_max': 4194304, + 'net.ipv4.tcp_rmem': '4096 87380 4194304', + 'net.ipv4.tcp_wmem': '4096 65536 4194304', + 'net.ipv4.tcp_low_latency': 1, + 'net.ipv4.tcp_adv_win_scale': 1 + } +} + + +class Partition(object): + def __init__(self, name, number, size, start, end, sectors, uuid): + """A block device partition. + + :param name: Name of block device + :param number: Partition number + :param size: Capacity of the device + :param start: Starting block + :param end: Ending block + :param sectors: Number of blocks + :param uuid: UUID of the partition + """ + self.name = name, + self.number = number + self.size = size + self.start = start + self.end = end + self.sectors = sectors + self.uuid = uuid + + def __str__(self): + return "number: {} start: {} end: {} sectors: {} size: {} " \ + "name: {} uuid: {}".format(self.number, self.start, + self.end, + self.sectors, self.size, + self.name, self.uuid) + + def __eq__(self, other): + if isinstance(other, self.__class__): + return self.__dict__ == other.__dict__ + return False + + def __ne__(self, other): + return not self.__eq__(other) + + +def unmounted_disks(): + """List of unmounted block devices on the current host.""" + disks = [] + context = pyudev.Context() + for device in context.list_devices(DEVTYPE='disk'): + if device['SUBSYSTEM'] == 'block': + if device.device_node is None: + continue + + matched = False + for block_type in [u'dm-', u'loop', u'ram', u'nbd']: + if block_type in device.device_node: + matched = True + if matched: + continue + + disks.append(device.device_node) + log("Found disks: {}".format(disks)) + return [disk for disk in disks if not is_device_mounted(disk)] + + +def save_sysctls(sysctl_dict, save_location): + """Persist the sysctls to the hard drive. + + :param sysctl_dict: dict + :param save_location: path to save the settings to + :raises: IOError if anything goes wrong with writing. + """ + try: + # Persist the settings for reboots + with open(save_location, "w") as fd: + for key, value in sysctl_dict.items(): + fd.write("{}={}\n".format(key, value)) + + except IOError as e: + log("Unable to persist sysctl settings to {}. Error {}".format( + save_location, e), level=ERROR) + raise + + +def tune_nic(network_interface): + """This will set optimal sysctls for the particular network adapter. + + :param network_interface: string The network adapter name. + """ + speed = get_link_speed(network_interface) + if speed in NETWORK_ADAPTER_SYSCTLS: + status_set('maintenance', 'Tuning device {}'.format( + network_interface)) + sysctl_file = os.path.join( + os.sep, + 'etc', + 'sysctl.d', + '51-ceph-osd-charm-{}.conf'.format(network_interface)) + try: + log("Saving sysctl_file: {} values: {}".format( + sysctl_file, NETWORK_ADAPTER_SYSCTLS[speed]), + level=DEBUG) + save_sysctls(sysctl_dict=NETWORK_ADAPTER_SYSCTLS[speed], + save_location=sysctl_file) + except IOError as e: + log("Write to /etc/sysctl.d/51-ceph-osd-charm-{} " + "failed. {}".format(network_interface, e), + level=ERROR) + + try: + # Apply the settings + log("Applying sysctl settings", level=DEBUG) + subprocess.check_output(["sysctl", "-p", sysctl_file]) + except subprocess.CalledProcessError as err: + log('sysctl -p {} failed with error {}'.format(sysctl_file, + err.output), + level=ERROR) + else: + log("No settings found for network adapter: {}".format( + network_interface), level=DEBUG) + + +def get_link_speed(network_interface): + """This will find the link speed for a given network device. Returns None + if an error occurs. + :param network_interface: string The network adapter interface. + :returns: LinkSpeed + """ + speed_path = os.path.join(os.sep, 'sys', 'class', 'net', + network_interface, 'speed') + # I'm not sure where else we'd check if this doesn't exist + if not os.path.exists(speed_path): + return LinkSpeed["UNKNOWN"] + + try: + with open(speed_path, 'r') as sysfs: + nic_speed = sysfs.readlines() + + # Did we actually read anything? + if not nic_speed: + return LinkSpeed["UNKNOWN"] + + # Try to find a sysctl match for this particular speed + for name, speed in LinkSpeed.items(): + if speed == int(nic_speed[0].strip()): + return speed + # Default to UNKNOWN if we can't find a match + return LinkSpeed["UNKNOWN"] + except IOError as e: + log("Unable to open {path} because of error: {error}".format( + path=speed_path, + error=e), level='error') + return LinkSpeed["UNKNOWN"] + + +def persist_settings(settings_dict): + # Write all settings to /etc/hdparm.conf + """This will persist the hard drive settings to the /etc/hdparm.conf file + + The settings_dict should be in the form of {"uuid": {"key":"value"}} + + :param settings_dict: dict of settings to save + """ + if not settings_dict: + return + + try: + templating.render(source='hdparm.conf', target=HDPARM_FILE, + context=settings_dict) + except IOError as err: + log("Unable to open {path} because of error: {error}".format( + path=HDPARM_FILE, error=err), level=ERROR) + except Exception as e: + # The templating.render can raise a jinja2 exception if the + # template is not found. Rather than polluting the import + # space of this charm, simply catch Exception + log('Unable to render {path} due to error: {error}'.format( + path=HDPARM_FILE, error=e), level=ERROR) + + +def set_max_sectors_kb(dev_name, max_sectors_size): + """This function sets the max_sectors_kb size of a given block device. + + :param dev_name: Name of the block device to query + :param max_sectors_size: int of the max_sectors_size to save + """ + max_sectors_kb_path = os.path.join('sys', 'block', dev_name, 'queue', + 'max_sectors_kb') + try: + with open(max_sectors_kb_path, 'w') as f: + f.write(max_sectors_size) + except IOError as e: + log('Failed to write max_sectors_kb to {}. Error: {}'.format( + max_sectors_kb_path, e), level=ERROR) + + +def get_max_sectors_kb(dev_name): + """This function gets the max_sectors_kb size of a given block device. + + :param dev_name: Name of the block device to query + :returns: int which is either the max_sectors_kb or 0 on error. + """ + max_sectors_kb_path = os.path.join('sys', 'block', dev_name, 'queue', + 'max_sectors_kb') + + # Read in what Linux has set by default + if os.path.exists(max_sectors_kb_path): + try: + with open(max_sectors_kb_path, 'r') as f: + max_sectors_kb = f.read().strip() + return int(max_sectors_kb) + except IOError as e: + log('Failed to read max_sectors_kb to {}. Error: {}'.format( + max_sectors_kb_path, e), level=ERROR) + # Bail. + return 0 + return 0 + + +def get_max_hw_sectors_kb(dev_name): + """This function gets the max_hw_sectors_kb for a given block device. + + :param dev_name: Name of the block device to query + :returns: int which is either the max_hw_sectors_kb or 0 on error. + """ + max_hw_sectors_kb_path = os.path.join('sys', 'block', dev_name, 'queue', + 'max_hw_sectors_kb') + # Read in what the hardware supports + if os.path.exists(max_hw_sectors_kb_path): + try: + with open(max_hw_sectors_kb_path, 'r') as f: + max_hw_sectors_kb = f.read().strip() + return int(max_hw_sectors_kb) + except IOError as e: + log('Failed to read max_hw_sectors_kb to {}. Error: {}'.format( + max_hw_sectors_kb_path, e), level=ERROR) + return 0 + return 0 + + +def set_hdd_read_ahead(dev_name, read_ahead_sectors=256): + """This function sets the hard drive read ahead. + + :param dev_name: Name of the block device to set read ahead on. + :param read_ahead_sectors: int How many sectors to read ahead. + """ + try: + # Set the read ahead sectors to 256 + log('Setting read ahead to {} for device {}'.format( + read_ahead_sectors, + dev_name)) + subprocess.check_output(['hdparm', + '-a{}'.format(read_ahead_sectors), + dev_name]) + except subprocess.CalledProcessError as e: + log('hdparm failed with error: {}'.format(e.output), + level=ERROR) + + +def get_block_uuid(block_dev): + """This queries blkid to get the uuid for a block device. + + :param block_dev: Name of the block device to query. + :returns: The UUID of the device or None on Error. + """ + try: + block_info = str(subprocess + .check_output(['blkid', '-o', 'export', block_dev]) + .decode('UTF-8')) + for tag in block_info.split('\n'): + parts = tag.split('=') + if parts[0] == 'UUID': + return parts[1] + return None + except subprocess.CalledProcessError as err: + log('get_block_uuid failed with error: {}'.format(err.output), + level=ERROR) + return None + + +def check_max_sectors(save_settings_dict, + block_dev, + uuid): + """Tune the max_hw_sectors if needed. + + make sure that /sys/.../max_sectors_kb matches max_hw_sectors_kb or at + least 1MB for spinning disks + If the box has a RAID card with cache this could go much bigger. + + :param save_settings_dict: The dict used to persist settings + :param block_dev: A block device name: Example: /dev/sda + :param uuid: The uuid of the block device + """ + dev_name = None + path_parts = os.path.split(block_dev) + if len(path_parts) == 2: + dev_name = path_parts[1] + else: + log('Unable to determine the block device name from path: {}'.format( + block_dev)) + # Play it safe and bail + return + max_sectors_kb = get_max_sectors_kb(dev_name=dev_name) + max_hw_sectors_kb = get_max_hw_sectors_kb(dev_name=dev_name) + + if max_sectors_kb < max_hw_sectors_kb: + # OK we have a situation where the hardware supports more than Linux is + # currently requesting + config_max_sectors_kb = hookenv.config('max-sectors-kb') + if config_max_sectors_kb < max_hw_sectors_kb: + # Set the max_sectors_kb to the config.yaml value if it is less + # than the max_hw_sectors_kb + log('Setting max_sectors_kb for device {} to {}'.format( + dev_name, config_max_sectors_kb)) + save_settings_dict[ + "drive_settings"][uuid][ + "read_ahead_sect"] = config_max_sectors_kb + set_max_sectors_kb(dev_name=dev_name, + max_sectors_size=config_max_sectors_kb) + else: + # Set to the max_hw_sectors_kb + log('Setting max_sectors_kb for device {} to {}'.format( + dev_name, max_hw_sectors_kb)) + save_settings_dict[ + "drive_settings"][uuid]['read_ahead_sect'] = max_hw_sectors_kb + set_max_sectors_kb(dev_name=dev_name, + max_sectors_size=max_hw_sectors_kb) + else: + log('max_sectors_kb match max_hw_sectors_kb. No change needed for ' + 'device: {}'.format(block_dev)) + + +def tune_dev(block_dev): + """Try to make some intelligent decisions with HDD tuning. Future work will + include optimizing SSDs. + + This function will change the read ahead sectors and the max write + sectors for each block device. + + :param block_dev: A block device name: Example: /dev/sda + """ + uuid = get_block_uuid(block_dev) + if uuid is None: + log('block device {} uuid is None. Unable to save to ' + 'hdparm.conf'.format(block_dev), level=DEBUG) + return + save_settings_dict = {} + log('Tuning device {}'.format(block_dev)) + status_set('maintenance', 'Tuning device {}'.format(block_dev)) + set_hdd_read_ahead(block_dev) + save_settings_dict["drive_settings"] = {} + save_settings_dict["drive_settings"][uuid] = {} + save_settings_dict["drive_settings"][uuid]['read_ahead_sect'] = 256 + + check_max_sectors(block_dev=block_dev, + save_settings_dict=save_settings_dict, + uuid=uuid) + + persist_settings(settings_dict=save_settings_dict) + status_set('maintenance', 'Finished tuning device {}'.format(block_dev)) + + +def ceph_user(): + return 'ceph' + + +class CrushLocation(object): + def __init__(self, identifier, name, osd="", host="", chassis="", + rack="", row="", pdu="", pod="", room="", + datacenter="", zone="", region="", root=""): + self.identifier = identifier + self.name = name + self.osd = osd + self.host = host + self.chassis = chassis + self.rack = rack + self.row = row + self.pdu = pdu + self.pod = pod + self.room = room + self.datacenter = datacenter + self.zone = zone + self.region = region + self.root = root + + def __str__(self): + return "name: {} id: {} osd: {} host: {} chassis: {} rack: {} " \ + "row: {} pdu: {} pod: {} room: {} datacenter: {} zone: {} " \ + "region: {} root: {}".format(self.name, self.identifier, + self.osd, self.host, self.chassis, + self.rack, self.row, self.pdu, + self.pod, self.room, + self.datacenter, self.zone, + self.region, self.root) + + def __eq__(self, other): + return not self.name < other.name and not other.name < self.name + + def __ne__(self, other): + return self.name < other.name or other.name < self.name + + def __gt__(self, other): + return self.name > other.name + + def __ge__(self, other): + return not self.name < other.name + + def __le__(self, other): + return self.name < other.name + + +def get_osd_weight(osd_id): + """Returns the weight of the specified OSD. + + :returns: Float + :raises: ValueError if the monmap fails to parse. + :raises: CalledProcessError if our Ceph command fails. + """ + try: + tree = str(subprocess + .check_output(['ceph', 'osd', 'tree', '--format=json']) + .decode('UTF-8')) + try: + json_tree = json.loads(tree) + # Make sure children are present in the JSON + if not json_tree['nodes']: + return None + for device in json_tree['nodes']: + if device['type'] == 'osd' and device['name'] == osd_id: + return device['crush_weight'] + except ValueError as v: + log("Unable to parse ceph tree json: {}. Error: {}".format( + tree, v)) + raise + except subprocess.CalledProcessError as e: + log("ceph osd tree command failed with message: {}".format( + e)) + raise + + +def _filter_nodes_and_set_attributes(node, node_lookup_map, lookup_type): + """Get all nodes of the desired type, with all their attributes. + + These attributes can be direct or inherited from ancestors. + """ + attribute_dict = {node['type']: node['name']} + if node['type'] == lookup_type: + attribute_dict['name'] = node['name'] + attribute_dict['identifier'] = node['id'] + return [attribute_dict] + elif not node.get('children'): + return [attribute_dict] + else: + descendant_attribute_dicts = [ + _filter_nodes_and_set_attributes(node_lookup_map[node_id], + node_lookup_map, lookup_type) + for node_id in node.get('children', []) + ] + return [dict(attribute_dict, **descendant_attribute_dict) + for descendant_attribute_dict + in itertools.chain.from_iterable(descendant_attribute_dicts)] + + +def _flatten_roots(nodes, lookup_type='host'): + """Get a flattened list of nodes of the desired type. + + :param nodes: list of nodes defined as a dictionary of attributes and + children + :type nodes: List[Dict[int, Any]] + :param lookup_type: type of searched node + :type lookup_type: str + :returns: flattened list of nodes + :rtype: List[Dict[str, Any]] + """ + lookup_map = {node['id']: node for node in nodes} + root_attributes_dicts = [_filter_nodes_and_set_attributes(node, lookup_map, + lookup_type) + for node in nodes if node['type'] == 'root'] + # get a flattened list of roots. + return list(itertools.chain.from_iterable(root_attributes_dicts)) + + +def get_osd_tree(service): + """Returns the current OSD map in JSON. + + :returns: List. + :rtype: List[CrushLocation] + :raises: ValueError if the monmap fails to parse. + Also raises CalledProcessError if our Ceph command fails + """ + try: + tree = str(subprocess + .check_output(['ceph', '--id', service, + 'osd', 'tree', '--format=json']) + .decode('UTF-8')) + try: + json_tree = json.loads(tree) + roots = _flatten_roots(json_tree["nodes"]) + return [CrushLocation(**host) for host in roots] + except ValueError as v: + log("Unable to parse ceph tree json: {}. Error: {}".format( + tree, v)) + raise + except subprocess.CalledProcessError as e: + log("ceph osd tree command failed with message: {}".format(e)) + raise + + +def _get_child_dirs(path): + """Returns a list of directory names in the specified path. + + :param path: a full path listing of the parent directory to return child + directory names + :returns: list. A list of child directories under the parent directory + :raises: ValueError if the specified path does not exist or is not a + directory, + OSError if an error occurs reading the directory listing + """ + if not os.path.exists(path): + raise ValueError('Specified path "%s" does not exist' % path) + if not os.path.isdir(path): + raise ValueError('Specified path "%s" is not a directory' % path) + + files_in_dir = [os.path.join(path, f) for f in os.listdir(path)] + return list(filter(os.path.isdir, files_in_dir)) + + +def _get_osd_num_from_dirname(dirname): + """Parses the dirname and returns the OSD id. + + Parses a string in the form of 'ceph-{osd#}' and returns the OSD number + from the directory name. + + :param dirname: the directory name to return the OSD number from + :return int: the OSD number the directory name corresponds to + :raises ValueError: if the OSD number cannot be parsed from the provided + directory name. + """ + match = re.search(r'ceph-(?P\d+)', dirname) + if not match: + raise ValueError("dirname not in correct format: {}".format(dirname)) + + return match.group('osd_id') + + +def get_crimson_osd_ids(): + """Return a set of the OSDs that are running with the Crimson backend.""" + rv = set() + try: + out = subprocess.check_output(['pgrep', 'crimson-osd', '-a']) + for line in out.decode('utf8').splitlines(): + rv.add(line.split()[-1]) + except Exception: + pass + + return rv + + +def get_local_osd_ids(): + """This will list the /var/lib/ceph/osd/* directories and try + to split the ID off of the directory name and return it in + a list. Excludes crimson OSD's from the returned list. + + :returns: list. A list of OSD identifiers + :raises: OSError if something goes wrong with listing the directory. + """ + osd_ids = [] + crimson_osds = get_crimson_osd_ids() + osd_path = os.path.join(os.sep, 'var', 'lib', 'ceph', 'osd') + if os.path.exists(osd_path): + try: + dirs = os.listdir(osd_path) + for osd_dir in dirs: + osd_id = osd_dir.split('-')[1] if '-' in osd_dir else '' + if (_is_int(osd_id) and + filesystem_mounted(os.path.join( + os.sep, osd_path, osd_dir)) and + osd_id not in crimson_osds): + osd_ids.append(osd_id) + except OSError: + raise + return osd_ids + + +def get_local_mon_ids(): + """This will list the /var/lib/ceph/mon/* directories and try + to split the ID off of the directory name and return it in + a list. + + :returns: list. A list of monitor identifiers + :raises: OSError if something goes wrong with listing the directory. + """ + mon_ids = [] + mon_path = os.path.join(os.sep, 'var', 'lib', 'ceph', 'mon') + if os.path.exists(mon_path): + try: + dirs = os.listdir(mon_path) + for mon_dir in dirs: + # Basically this takes everything after ceph- as the monitor ID + match = re.search('ceph-(?P.*)', mon_dir) + if match: + mon_ids.append(match.group('mon_id')) + except OSError: + raise + return mon_ids + + +def _is_int(v): + """Return True if the object v can be turned into an integer.""" + try: + int(v) + return True + except ValueError: + return False + + +def get_version(): + """Derive Ceph release from an installed package.""" + import apt_pkg as apt + + package = "ceph" + + current_ver = get_installed_version(package) + if not current_ver: + # package is known, but no version is currently installed. + e = 'Could not determine version of uninstalled package: %s' % package + error_out(e) + + vers = apt.upstream_version(current_ver.ver_str) + + # x.y match only for 20XX.X + # and ignore patch level for other packages + match = re.match(r'^(\d+)\.(\d+)', vers) + + if match: + vers = match.group(0) + return float(vers) + + +def error_out(msg): + log("FATAL ERROR: {}".format(msg), + level=ERROR) + sys.exit(1) + + +def is_quorum(): + asok = "/var/run/ceph/ceph-mon.{}.asok".format(socket.gethostname()) + cmd = [ + "sudo", + "-u", + ceph_user(), + "ceph", + "--admin-daemon", + asok, + "mon_status" + ] + if os.path.exists(asok): + try: + result = json.loads(str(subprocess + .check_output(cmd) + .decode('UTF-8'))) + except subprocess.CalledProcessError: + return False + except ValueError: + # Non JSON response from mon_status + return False + if result['state'] in QUORUM: + return True + else: + return False + else: + return False + + +def is_leader(): + asok = "/var/run/ceph/ceph-mon.{}.asok".format(socket.gethostname()) + cmd = [ + "sudo", + "-u", + ceph_user(), + "ceph", + "--admin-daemon", + asok, + "mon_status" + ] + if os.path.exists(asok): + try: + result = json.loads(str(subprocess + .check_output(cmd) + .decode('UTF-8'))) + except subprocess.CalledProcessError: + return False + except ValueError: + # Non JSON response from mon_status + return False + if result['state'] == LEADER: + return True + else: + return False + else: + return False + + +def manager_available(): + # if manager daemon isn't on this release, just say it is Fine + if cmp_pkgrevno('ceph', '11.0.0') < 0: + return True + cmd = ["sudo", "-u", "ceph", "ceph", "mgr", "dump", "-f", "json"] + try: + result = json.loads(subprocess.check_output(cmd).decode('UTF-8')) + return result['available'] + except subprocess.CalledProcessError as e: + log("'{}' failed: {}".format(" ".join(cmd), str(e))) + return False + except Exception: + return False + + +def wait_for_quorum(): + while not is_quorum(): + log("Waiting for quorum to be reached") + time.sleep(3) + + +def wait_for_manager(): + while not manager_available(): + log("Waiting for manager to be available") + time.sleep(5) + + +def add_bootstrap_hint(peer): + asok = "/var/run/ceph/ceph-mon.{}.asok".format(socket.gethostname()) + cmd = [ + "sudo", + "-u", + ceph_user(), + "ceph", + "--admin-daemon", + asok, + "add_bootstrap_peer_hint", + peer + ] + if os.path.exists(asok): + # Ignore any errors for this call + subprocess.call(cmd) + + +DISK_FORMATS = [ + 'xfs', + 'ext4', + 'btrfs' +] + +CEPH_PARTITIONS = [ + '89C57F98-2FE5-4DC0-89C1-5EC00CEFF2BE', # Ceph encrypted disk in creation + '45B0969E-9B03-4F30-B4C6-5EC00CEFF106', # Ceph encrypted journal + '4FBD7E29-9D25-41B8-AFD0-5EC00CEFF05D', # Ceph encrypted OSD data + '4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D', # Ceph OSD data + '45B0969E-9B03-4F30-B4C6-B4B80CEFF106', # Ceph OSD journal + '89C57F98-2FE5-4DC0-89C1-F3AD0CEFF2BE', # Ceph disk in creation +] + + +def get_partition_list(dev): + """Lists the partitions of a block device. + + :param dev: Path to a block device. ex: /dev/sda + :returns: Returns a list of Partition objects. + :raises: CalledProcessException if lsblk fails + """ + partitions_list = [] + try: + partitions = get_partitions(dev) + # For each line of output + for partition in partitions: + parts = partition.split() + try: + partitions_list.append( + Partition(number=parts[0], + start=parts[1], + end=parts[2], + sectors=parts[3], + size=parts[4], + name=parts[5], + uuid=parts[6]) + ) + except IndexError: + partitions_list.append( + Partition(number=parts[0], + start=parts[1], + end=parts[2], + sectors=parts[3], + size=parts[4], + name="", + uuid=parts[5]) + ) + + return partitions_list + except subprocess.CalledProcessError: + raise + + +def is_pristine_disk(dev): + """ + Read first 2048 bytes (LBA 0 - 3) of block device to determine whether it + is actually all zeros and safe for us to use. + + Existing partitioning tools does not discern between a failure to read from + block device, failure to understand a partition table and the fact that a + block device has no partition table. Since we need to be positive about + which is which we need to read the device directly and confirm ourselves. + + :param dev: Path to block device + :type dev: str + :returns: True all 2048 bytes == 0x0, False if not + :rtype: bool + """ + want_bytes = 2048 + + try: + f = open(dev, 'rb') + except OSError as e: + log(e) + return False + + data = f.read(want_bytes) + read_bytes = len(data) + if read_bytes != want_bytes: + log('{}: short read, got {} bytes expected {}.' + .format(dev, read_bytes, want_bytes), level=WARNING) + return False + + return all(byte == 0x0 for byte in data) + + +def is_osd_disk(dev): + db = kv() + osd_devices = db.get('osd-devices', []) + if dev in osd_devices: + log('Device {} already processed by charm,' + ' skipping'.format(dev)) + return True + + partitions = get_partition_list(dev) + for partition in partitions: + try: + info = str(subprocess + .check_output(['sgdisk', '-i', partition.number, dev]) + .decode('UTF-8')) + info = info.split("\n") # IGNORE:E1103 + for line in info: + for ptype in CEPH_PARTITIONS: + sig = 'Partition GUID code: {}'.format(ptype) + if line.startswith(sig): + return True + except subprocess.CalledProcessError as e: + log("sgdisk inspection of partition {} on {} failed with " + "error: {}. Skipping".format(partition.minor, dev, e), + level=ERROR) + return False + + +def start_osds(devices): + # Scan for Ceph block devices + rescan_osd_devices() + if (cmp_pkgrevno('ceph', '0.56.6') >= 0 and + cmp_pkgrevno('ceph', '14.2.0') < 0): + # Use ceph-disk activate for directory based OSD's + for dev_or_path in devices: + if os.path.exists(dev_or_path) and os.path.isdir(dev_or_path): + subprocess.check_call( + ['ceph-disk', 'activate', dev_or_path]) + + +def udevadm_settle(): + cmd = ['udevadm', 'settle'] + subprocess.call(cmd) + + +def rescan_osd_devices(): + cmd = [ + 'udevadm', 'trigger', + '--subsystem-match=block', '--action=add' + ] + + subprocess.call(cmd) + + udevadm_settle() + + +_client_admin_keyring = '/etc/ceph/ceph.client.admin.keyring' + + +def is_bootstrapped(): + return os.path.exists( + '/var/lib/ceph/mon/ceph-{}/done'.format(socket.gethostname())) + + +def wait_for_bootstrap(): + while not is_bootstrapped(): + time.sleep(3) + + +def generate_monitor_secret(): + cmd = [ + 'ceph-authtool', + '/dev/stdout', + '--name=mon.', + '--gen-key' + ] + res = str(subprocess.check_output(cmd).decode('UTF-8')) + + return "{}==".format(res.split('=')[1].strip()) + + +# OSD caps taken from ceph-create-keys +_osd_bootstrap_caps = { + 'mon': [ + 'allow command osd create ...', + 'allow command osd crush set ...', + r'allow command auth add * osd allow\ * mon allow\ rwx', + 'allow command mon getmap' + ] +} + +_osd_bootstrap_caps_profile = { + 'mon': [ + 'allow profile bootstrap-osd' + ] +} + + +def parse_key(raw_key): + # get-or-create appears to have different output depending + # on whether its 'get' or 'create' + # 'create' just returns the key, 'get' is more verbose and + # needs parsing + key = None + if len(raw_key.splitlines()) == 1: + key = raw_key + else: + for element in raw_key.splitlines(): + if 'key' in element: + return element.split(' = ')[1].strip() # IGNORE:E1103 + return key + + +def get_osd_bootstrap_key(): + try: + # Attempt to get/create a key using the OSD bootstrap profile first + key = get_named_key('bootstrap-osd', + _osd_bootstrap_caps_profile) + except Exception: + # If that fails try with the older style permissions + key = get_named_key('bootstrap-osd', + _osd_bootstrap_caps) + return key + + +_radosgw_keyring = "/etc/ceph/keyring.rados.gateway" + + +def import_radosgw_key(key): + if not os.path.exists(_radosgw_keyring): + cmd = [ + "sudo", + "-u", + ceph_user(), + 'ceph-authtool', + _radosgw_keyring, + '--create-keyring', + '--name=client.radosgw.gateway', + '--add-key={}'.format(key) + ] + subprocess.check_call(cmd) + + +# OSD caps taken from ceph-create-keys +_radosgw_caps = { + 'mon': ['allow rw'], + 'osd': ['allow rwx'] +} +_upgrade_caps = { + 'mon': ['allow rwx'] +} + + +def get_radosgw_key(pool_list=None, name=None): + return get_named_key(name=name or 'radosgw.gateway', + caps=_radosgw_caps, + pool_list=pool_list) + + +def get_mds_key(name): + return create_named_keyring(entity='mds', + name=name, + caps=mds_caps) + + +_mds_bootstrap_caps_profile = { + 'mon': [ + 'allow profile bootstrap-mds' + ] +} + + +def get_mds_bootstrap_key(): + return get_named_key('bootstrap-mds', + _mds_bootstrap_caps_profile) + + +_default_caps = collections.OrderedDict([ + ('mon', ['allow r', + 'allow command "osd blacklist"', + 'allow command "osd blocklist"']), + ('osd', ['allow rwx']), +]) + +admin_caps = collections.OrderedDict([ + ('mds', ['allow *']), + ('mgr', ['allow *']), + ('mon', ['allow *']), + ('osd', ['allow *']) +]) + +mds_caps = collections.OrderedDict([ + ('osd', ['allow *']), + ('mds', ['allow']), + ('mon', ['allow rwx']), +]) + +osd_upgrade_caps = collections.OrderedDict([ + ('mon', ['allow command "config-key"', + 'allow command "osd tree"', + 'allow command "config-key list"', + 'allow command "config-key put"', + 'allow command "config-key get"', + 'allow command "config-key exists"', + 'allow command "osd out"', + 'allow command "osd in"', + 'allow command "osd rm"', + 'allow command "auth del"', + ]) +]) + +rbd_mirror_caps = collections.OrderedDict([ + ('mon', ['allow profile rbd-mirror-peer', + 'allow command "service dump"', + 'allow command "service status"' + ]), + ('osd', ['profile rbd']), + ('mgr', ['allow r']), +]) + + +def get_rbd_mirror_key(name): + return get_named_key(name=name, caps=rbd_mirror_caps) + + +def create_named_keyring(entity, name, caps=None): + caps = caps or _default_caps + cmd = [ + "sudo", + "-u", + ceph_user(), + 'ceph', + '--name', 'mon.', + '--keyring', + '/var/lib/ceph/mon/ceph-{}/keyring'.format( + socket.gethostname() + ), + 'auth', 'get-or-create', '{entity}.{name}'.format(entity=entity, + name=name), + ] + for subsystem, subcaps in caps.items(): + cmd.extend([subsystem, '; '.join(subcaps)]) + log("Calling check_output: {}".format(cmd), level=DEBUG) + return (parse_key(str(subprocess + .check_output(cmd) + .decode('UTF-8')) + .strip())) # IGNORE:E1103 + + +def get_upgrade_key(): + return get_named_key('upgrade-osd', _upgrade_caps) + + +def is_internal_client(name): + keys = ('osd-upgrade', 'osd-removal', 'admin', 'rbd-mirror', 'mds') + return any(name.startswith(key) for key in keys) + + +def get_named_key(name, caps=None, pool_list=None): + """Retrieve a specific named cephx key. + + :param name: String Name of key to get. + :param pool_list: The list of pools to give access to + :param caps: dict of cephx capabilities + :returns: Returns a cephx key + """ + caps = caps or _default_caps + key_name = 'client.{}'.format(name) + + key = ceph_auth_get(key_name) + if key: + if is_internal_client(name): + upgrade_key_caps(key_name, caps) + return key + + log("Creating new key for {}".format(name), level=DEBUG) + cmd = [ + "sudo", + "-u", + ceph_user(), + 'ceph', + '--name', 'mon.', + '--keyring', + '/var/lib/ceph/mon/ceph-{}/keyring'.format( + socket.gethostname() + ), + 'auth', 'get-or-create', key_name, + ] + # Add capabilities + for subsystem, subcaps in caps.items(): + if subsystem == 'osd': + if pool_list: + # This will output a string similar to: + # "pool=rgw pool=rbd pool=something" + pools = " ".join(['pool={0}'.format(i) for i in pool_list]) + subcaps[0] = subcaps[0] + " " + pools + cmd.extend([subsystem, '; '.join(subcaps)]) + ceph_auth_get.cache_clear() + + log("Calling check_output: {}".format(cmd), level=DEBUG) + return parse_key(str(subprocess + .check_output(cmd) + .decode('UTF-8')) + .strip()) # IGNORE:E1103 + + +@functools.lru_cache() +def ceph_auth_get(key_name): + try: + # Does the key already exist? + output = str(subprocess.check_output( + [ + 'sudo', + '-u', ceph_user(), + 'ceph', + '--name', 'mon.', + '--keyring', + '/var/lib/ceph/mon/ceph-{}/keyring'.format( + socket.gethostname() + ), + 'auth', + 'get', + key_name, + ]).decode('UTF-8')).strip() + return parse_key(output) + except subprocess.CalledProcessError: + # Couldn't get the key + pass + + +def upgrade_key_caps(key, caps, pool_list=None): + """Upgrade key to have capabilities caps""" + if not is_leader(): + # Not the MON leader OR not clustered + return + cmd = [ + "sudo", "-u", ceph_user(), 'ceph', 'auth', 'caps', key + ] + for subsystem, subcaps in caps.items(): + if subsystem == 'osd': + if pool_list: + # This will output a string similar to: + # "pool=rgw pool=rbd pool=something" + pools = " ".join(['pool={0}'.format(i) for i in pool_list]) + subcaps[0] = subcaps[0] + " " + pools + cmd.extend([subsystem, '; '.join(subcaps)]) + subprocess.check_call(cmd) + + +@cached +def systemd(): + return CompareHostReleases(lsb_release()['DISTRIB_CODENAME']) >= 'vivid' + + +def bootstrap_monitor_cluster(secret): + """Bootstrap local Ceph mon into the Ceph cluster + + :param secret: cephx secret to use for monitor authentication + :type secret: str + :raises: Exception if Ceph mon cannot be bootstrapped + """ + hostname = socket.gethostname() + path = '/var/lib/ceph/mon/ceph-{}'.format(hostname) + done = '{}/done'.format(path) + if systemd(): + init_marker = '{}/systemd'.format(path) + else: + init_marker = '{}/upstart'.format(path) + + keyring = '/var/lib/ceph/tmp/{}.mon.keyring'.format(hostname) + + if os.path.exists(done): + log('bootstrap_monitor_cluster: mon already initialized.') + else: + # Ceph >= 0.61.3 needs this for ceph-mon fs creation + mkdir('/var/run/ceph', owner=ceph_user(), + group=ceph_user(), perms=0o755) + mkdir(path, owner=ceph_user(), group=ceph_user(), + perms=0o755) + # end changes for Ceph >= 0.61.3 + try: + _create_monitor(keyring, + secret, + hostname, + path, + done, + init_marker) + except Exception: + raise + finally: + os.unlink(keyring) + + +def _create_monitor(keyring, secret, hostname, path, done, init_marker): + """Create monitor filesystem and enable and start ceph-mon process + + :param keyring: path to temporary keyring on disk + :type keyring: str + :param secret: cephx secret to use for monitor authentication + :type: secret: str + :param hostname: hostname of the local unit + :type hostname: str + :param path: full path to Ceph mon directory + :type path: str + :param done: full path to 'done' marker for Ceph mon + :type done: str + :param init_marker: full path to 'init' marker for Ceph mon + :type init_marker: str + """ + subprocess.check_call(['ceph-authtool', keyring, + '--create-keyring', '--name=mon.', + '--add-key={}'.format(secret), + '--cap', 'mon', 'allow *']) + subprocess.check_call(['ceph-mon', '--mkfs', + '-i', hostname, + '--keyring', keyring]) + chownr('/var/log/ceph', ceph_user(), ceph_user()) + chownr(path, ceph_user(), ceph_user()) + with open(done, 'w'): + pass + with open(init_marker, 'w'): + pass + + if systemd(): + if cmp_pkgrevno('ceph', '14.0.0') >= 0: + systemd_unit = 'ceph-mon@{}'.format(socket.gethostname()) + else: + systemd_unit = 'ceph-mon' + subprocess.check_call(['systemctl', 'enable', systemd_unit]) + service_restart(systemd_unit) + else: + service_restart('ceph-mon-all') + + +def create_keyrings(): + """Create keyrings for operation of ceph-mon units + + NOTE: The quorum should be done before to execute this function. + + :raises: Exception if keyrings cannot be created + """ + if cmp_pkgrevno('ceph', '14.0.0') >= 0: + # NOTE(jamespage): At Nautilus, keys are created by the + # monitors automatically and just need + # exporting. + output = str(subprocess.check_output( + [ + 'sudo', + '-u', ceph_user(), + 'ceph', + '--name', 'mon.', + '--keyring', + '/var/lib/ceph/mon/ceph-{}/keyring'.format( + socket.gethostname() + ), + 'auth', 'get', 'client.admin', + ]).decode('UTF-8')).strip() + if not output: + # NOTE: key not yet created, raise exception and retry + raise Exception + # NOTE: octopus wants newline at end of file LP: #1864706 + output += '\n' + write_file(_client_admin_keyring, output, + owner=ceph_user(), group=ceph_user(), + perms=0o400) + else: + # NOTE(jamespage): Later Ceph releases require explicit + # call to ceph-create-keys to setup the + # admin keys for the cluster; this command + # will wait for quorum in the cluster before + # returning. + # NOTE(fnordahl): Explicitly run `ceph-create-keys` for older + # Ceph releases too. This improves bootstrap + # resilience as the charm will wait for + # presence of peer units before attempting + # to bootstrap. Note that charms deploying + # ceph-mon service should disable running of + # `ceph-create-keys` service in init system. + cmd = ['ceph-create-keys', '--id', socket.gethostname()] + if cmp_pkgrevno('ceph', '12.0.0') >= 0: + # NOTE(fnordahl): The default timeout in ceph-create-keys of 600 + # seconds is not adequate. Increase timeout when + # timeout parameter available. For older releases + # we rely on retry_on_exception decorator. + # LP#1719436 + cmd.extend(['--timeout', '1800']) + subprocess.check_call(cmd) + osstat = os.stat(_client_admin_keyring) + if not osstat.st_size: + # NOTE(fnordahl): Retry will fail as long as this file exists. + # LP#1719436 + os.remove(_client_admin_keyring) + raise Exception + + +def update_monfs(): + hostname = socket.gethostname() + monfs = '/var/lib/ceph/mon/ceph-{}'.format(hostname) + if systemd(): + init_marker = '{}/systemd'.format(monfs) + else: + init_marker = '{}/upstart'.format(monfs) + if os.path.exists(monfs) and not os.path.exists(init_marker): + # Mark mon as managed by upstart so that + # it gets start correctly on reboots + with open(init_marker, 'w'): + pass + + +def get_partitions(dev): + cmd = ['partx', '--raw', '--noheadings', dev] + try: + out = str(subprocess.check_output(cmd).decode('UTF-8')).splitlines() + log("get partitions: {}".format(out), level=DEBUG) + return out + except subprocess.CalledProcessError as e: + log("Can't get info for {0}: {1}".format(dev, e.output)) + return [] + + +def get_lvs(dev): + """ + List logical volumes for the provided block device + + :param: dev: Full path to block device. + :raises subprocess.CalledProcessError: in the event that any supporting + operation failed. + :returns: list: List of logical volumes provided by the block device + """ + if not lvm.is_lvm_physical_volume(dev): + return [] + vg_name = lvm.list_lvm_volume_group(dev) + return lvm.list_logical_volumes('vg_name={}'.format(vg_name)) + + +def find_least_used_utility_device(utility_devices, lvs=False): + """ + Find a utility device which has the smallest number of partitions + among other devices in the supplied list. + + :utility_devices: A list of devices to be used for filestore journal + or bluestore wal or db. + :lvs: flag to indicate whether inspection should be based on LVM LV's + :return: string device name + """ + if lvs: + usages = map(lambda a: (len(get_lvs(a)), a), utility_devices) + else: + usages = map(lambda a: (len(get_partitions(a)), a), utility_devices) + least = min(usages, key=lambda t: t[0]) + return least[1] + + +def get_devices(name): + """Merge config and Juju storage based devices + + :name: The name of the device type, e.g.: wal, osd, journal + :returns: Set(device names), which are strings + """ + if config(name): + devices = [dev.strip() for dev in config(name).split(' ')] + else: + devices = [] + storage_ids = storage_list(name) + devices.extend((storage_get('location', sid) for sid in storage_ids)) + devices = filter(os.path.exists, devices) + + return set(devices) + + +def osdize(dev, osd_format, osd_journal, ignore_errors=False, encrypt=False, + key_manager=CEPH_KEY_MANAGER, osd_id=None, bluestore_skip=None): + if dev.startswith('/dev'): + osdize_dev(dev, osd_format, osd_journal, + ignore_errors, encrypt, + key_manager, osd_id, bluestore_skip) + else: + if cmp_pkgrevno('ceph', '14.0.0') >= 0: + log("Directory backed OSDs can not be created on Nautilus", + level=WARNING) + return + osdize_dir(dev, encrypt) + + +def osdize_dev(dev, osd_format, osd_journal, ignore_errors=False, + encrypt=False, key_manager=CEPH_KEY_MANAGER, + osd_id=None, bluestore_skip=None): + """ + Prepare a block device for use as a Ceph OSD + + A block device will only be prepared once during the lifetime + of the calling charm unit; future executions will be skipped. + + :param: dev: Full path to block device to use + :param: osd_format: Format for OSD filesystem + :param: osd_journal: List of block devices to use for OSD journals + :param: ignore_errors: Don't fail in the event of any errors during + processing + :param: encrypt: Encrypt block devices using 'key_manager' + :param: key_manager: Key management approach for encryption keys + :param: osd_id: The ID for the newly created OSD + :param: bluestore_skip: Bluestore parameters to skip ('wal' and/or 'db') + :raises subprocess.CalledProcessError: in the event that any supporting + subprocess operation failed + :raises ValueError: if an invalid key_manager is provided + """ + if key_manager not in KEY_MANAGERS: + raise ValueError('Unsupported key manager: {}'.format(key_manager)) + + db = kv() + osd_devices = db.get('osd-devices', []) + try: + if dev in osd_devices: + log('Device {} already processed by charm,' + ' skipping'.format(dev)) + return + + if not os.path.exists(dev): + log('Path {} does not exist - bailing'.format(dev)) + return + + if not is_block_device(dev): + log('Path {} is not a block device - bailing'.format(dev)) + return + + if is_osd_disk(dev): + log('Looks like {} is already an' + ' OSD data or journal, skipping.'.format(dev)) + if is_device_mounted(dev): + osd_devices.append(dev) + return + + if is_device_mounted(dev): + log('Looks like {} is in use, skipping.'.format(dev)) + return + + if is_active_bluestore_device(dev): + log('{} is in use as an active bluestore block device,' + ' skipping.'.format(dev)) + osd_devices.append(dev) + return + + if is_mapped_luks_device(dev): + log('{} is a mapped LUKS device,' + ' skipping.'.format(dev)) + return + + if cmp_pkgrevno('ceph', '12.2.4') >= 0: + cmd = _ceph_volume(dev, + osd_journal, + encrypt, + key_manager, + osd_id, + bluestore_skip) + else: + cmd = _ceph_disk(dev, + osd_format, + osd_journal, + encrypt) + + try: + status_set('maintenance', 'Initializing device {}'.format(dev)) + log("osdize cmd: {}".format(cmd)) + subprocess.check_call(cmd) + except subprocess.CalledProcessError: + try: + lsblk_output = subprocess.check_output( + ['lsblk', '-P']).decode('UTF-8') + except subprocess.CalledProcessError as e: + log("Couldn't get lsblk output: {}".format(e), ERROR) + if ignore_errors: + log('Unable to initialize device: {}'.format(dev), WARNING) + if lsblk_output: + log('lsblk output: {}'.format(lsblk_output), DEBUG) + else: + log('Unable to initialize device: {}'.format(dev), ERROR) + if lsblk_output: + log('lsblk output: {}'.format(lsblk_output), WARNING) + raise + + # NOTE: Record processing of device only on success to ensure that + # the charm only tries to initialize a device of OSD usage + # once during its lifetime. + osd_devices.append(dev) + finally: + db.set('osd-devices', osd_devices) + db.flush() + + +def _ceph_disk(dev, osd_format, osd_journal, encrypt=False): + """ + Prepare a device for usage as a Ceph OSD using ceph-disk + + :param: dev: Full path to use for OSD block device setup, + The function looks up realpath of the device + :param: osd_journal: List of block devices to use for OSD journals + :param: encrypt: Use block device encryption (unsupported) + :returns: list. 'ceph-disk' command and required parameters for + execution by check_call + """ + cmd = ['ceph-disk', 'prepare'] + + if encrypt: + cmd.append('--dmcrypt') + + cmd.append('--bluestore') + wal = get_devices('bluestore-wal') + if wal: + cmd.append('--block.wal') + least_used_wal = find_least_used_utility_device(wal) + cmd.append(least_used_wal) + db = get_devices('bluestore-db') + if db: + cmd.append('--block.db') + least_used_db = find_least_used_utility_device(db) + cmd.append(least_used_db) + + cmd.append(os.path.realpath(dev)) + + if osd_journal: + least_used = find_least_used_utility_device(osd_journal) + cmd.append(least_used) + + return cmd + + +def _ceph_volume(dev, osd_journal, encrypt=False, key_manager=CEPH_KEY_MANAGER, + osd_id=None, bluestore_skip=None): + """ + Prepare and activate a device for usage as a Ceph OSD using ceph-volume. + + This also includes creation of all PV's, VG's and LV's required to + support the initialization of the OSD. + + :param: dev: Full path to use for OSD block device setup + :param: osd_journal: List of block devices to use for OSD journals + :param: encrypt: Use block device encryption + :param: key_manager: dm-crypt Key Manager to use + :param: osd_id: The OSD-id to recycle, or None to create a new one + :param: bluestore_skip: Bluestore parameters to skip ('wal' and/or 'db') + :raises subprocess.CalledProcessError: in the event that any supporting + LVM operation failed. + :returns: list. 'ceph-volume' command and required parameters for + execution by check_call + """ + cmd = ['ceph-volume', 'lvm', 'create'] + + osd_fsid = str(uuid.uuid4()) + cmd.append('--osd-fsid') + cmd.append(osd_fsid) + cmd.append('--bluestore') + main_device_type = 'block' + + if encrypt and key_manager == CEPH_KEY_MANAGER: + cmd.append('--dmcrypt') + + if osd_id is not None: + cmd.extend(['--osd-id', str(osd_id)]) + + cmd.append('--data') + cmd.append(_allocate_logical_volume(dev=dev, + lv_type=main_device_type, + osd_fsid=osd_fsid, + encrypt=encrypt, + key_manager=key_manager)) + + extras = ('wal', 'db') + if bluestore_skip: + extras = tuple(set(extras) - set(bluestore_skip)) + + for extra_volume in extras: + devices = get_devices('bluestore-{}'.format(extra_volume)) + if devices: + cmd.append('--block.{}'.format(extra_volume)) + least_used = find_least_used_utility_device(devices, + lvs=True) + cmd.append(_allocate_logical_volume( + dev=least_used, + lv_type=extra_volume, + osd_fsid=osd_fsid, + size='{}M'.format(calculate_volume_size(extra_volume)), + shared=True, + encrypt=encrypt, + key_manager=key_manager) + ) + + return cmd + + +def _partition_name(dev): + """ + Derive the first partition name for a block device + + :param: dev: Full path to block device. + :returns: str: Full path to first partition on block device. + """ + if dev[-1].isdigit(): + return '{}p1'.format(dev) + else: + return '{}1'.format(dev) + + +def is_active_bluestore_device(dev): + """ + Determine whether provided device is part of an active + bluestore based OSD (as its block component). + + :param: dev: Full path to block device to check for Bluestore usage. + :returns: boolean: indicating whether device is in active use. + """ + if not lvm.is_lvm_physical_volume(dev): + return False + + vg_name = lvm.list_lvm_volume_group(dev) + try: + lv_name = lvm.list_logical_volumes('vg_name={}'.format(vg_name))[0] + except IndexError: + return False + + block_symlinks = glob.glob('/var/lib/ceph/osd/ceph-*/block') + for block_candidate in block_symlinks: + if os.path.islink(block_candidate): + target = os.readlink(block_candidate) + if target.endswith(lv_name): + return True + + return False + + +def is_luks_device(dev): + """ + Determine if dev is a LUKS-formatted block device. + + :param: dev: A full path to a block device to check for LUKS header + presence + :returns: boolean: indicates whether a device is used based on LUKS header. + """ + return True if _luks_uuid(dev) else False + + +def is_mapped_luks_device(dev): + """ + Determine if dev is a mapped LUKS device + :param: dev: A full path to a block device to be checked + :returns: boolean: indicates whether a device is mapped + """ + _, dirs, _ = next(os.walk( + '/sys/class/block/{}/holders/' + .format(os.path.basename(os.path.realpath(dev)))) + ) + is_held = len(dirs) > 0 + return is_held and is_luks_device(dev) + + +def get_conf(variable): + """ + Get the value of the given configuration variable from the + cluster. + + :param variable: Ceph configuration variable + :returns: str. configured value for provided variable + + """ + return subprocess.check_output([ + 'ceph-osd', + '--show-config-value={}'.format(variable), + '--no-mon-config', + ]).strip() + + +def calculate_volume_size(lv_type): + """ + Determine the configured size for Bluestore DB/WAL or + Filestore Journal devices + + :param lv_type: volume type (db, wal or journal) + :raises KeyError: if invalid lv_type is supplied + :returns: int. Configured size in megabytes for volume type + """ + # lv_type -> Ceph configuration option + _config_map = { + 'db': 'bluestore_block_db_size', + 'wal': 'bluestore_block_wal_size', + 'journal': 'osd_journal_size', + } + + # default sizes in MB + _default_size = { + 'db': 1024, + 'wal': 576, + 'journal': 1024, + } + + # conversion of Ceph config units to MB + _units = { + 'db': 1048576, # Bytes -> MB + 'wal': 1048576, # Bytes -> MB + 'journal': 1, # Already in MB + } + + configured_size = get_conf(_config_map[lv_type]) + + if configured_size is None or int(configured_size) == 0: + return _default_size[lv_type] + else: + return int(configured_size) / _units[lv_type] + + +def _luks_uuid(dev): + """ + Check to see if dev is a LUKS encrypted volume, returning the UUID + of volume if it is. + + :param: dev: path to block device to check. + :returns: str. UUID of LUKS device or None if not a LUKS device + """ + try: + cmd = ['cryptsetup', 'luksUUID', dev] + return subprocess.check_output(cmd).decode('UTF-8').strip() + except subprocess.CalledProcessError: + return None + + +def _initialize_disk(dev, dev_uuid, encrypt=False, + key_manager=CEPH_KEY_MANAGER): + """ + Initialize a raw block device consuming 100% of the available + disk space. + + Function assumes that block device has already been wiped. + + :param: dev: path to block device to initialize + :param: dev_uuid: UUID to use for any dm-crypt operations + :param: encrypt: Encrypt OSD devices using dm-crypt + :param: key_manager: Key management approach for dm-crypt keys + :raises: subprocess.CalledProcessError: if any parted calls fail + :returns: str: Full path to new partition. + """ + use_vaultlocker = encrypt and key_manager == VAULT_KEY_MANAGER + + if use_vaultlocker: + # NOTE(jamespage): Check to see if already initialized as a LUKS + # volume, which indicates this is a shared block + # device for journal, db or wal volumes. + luks_uuid = _luks_uuid(dev) + if luks_uuid: + return '/dev/mapper/crypt-{}'.format(luks_uuid) + + dm_crypt = '/dev/mapper/crypt-{}'.format(dev_uuid) + + if use_vaultlocker and not os.path.exists(dm_crypt): + subprocess.check_call([ + 'vaultlocker', + 'encrypt', + '--uuid', dev_uuid, + dev, + ]) + subprocess.check_call([ + 'dd', + 'if=/dev/zero', + 'of={}'.format(dm_crypt), + 'bs=512', + 'count=1', + ]) + + if use_vaultlocker: + return dm_crypt + else: + return dev + + +def _allocate_logical_volume(dev, lv_type, osd_fsid, + size=None, shared=False, + encrypt=False, + key_manager=CEPH_KEY_MANAGER): + """ + Allocate a logical volume from a block device, ensuring any + required initialization and setup of PV's and VG's to support + the LV. + + :param: dev: path to block device to allocate from. + :param: lv_type: logical volume type to create + (data, block, journal, wal, db) + :param: osd_fsid: UUID of the OSD associate with the LV + :param: size: Size in LVM format for the device; + if unset 100% of VG + :param: shared: Shared volume group (journal, wal, db) + :param: encrypt: Encrypt OSD devices using dm-crypt + :param: key_manager: dm-crypt Key Manager to use + :raises subprocess.CalledProcessError: in the event that any supporting + LVM or parted operation fails. + :returns: str: String in the format 'vg_name/lv_name'. + """ + lv_name = "osd-{}-{}".format(lv_type, osd_fsid) + current_volumes = lvm.list_logical_volumes() + if shared: + dev_uuid = str(uuid.uuid4()) + else: + dev_uuid = osd_fsid + pv_dev = _initialize_disk(dev, dev_uuid, encrypt, key_manager) + + vg_name = None + if not lvm.is_lvm_physical_volume(pv_dev): + lvm.create_lvm_physical_volume(pv_dev) + if not os.path.exists(pv_dev): + # NOTE: trigger rescan to work around bug 1878752 + rescan_osd_devices() + if shared: + vg_name = 'ceph-{}-{}'.format(lv_type, + str(uuid.uuid4())) + else: + vg_name = 'ceph-{}'.format(osd_fsid) + lvm.create_lvm_volume_group(vg_name, pv_dev) + else: + vg_name = lvm.list_lvm_volume_group(pv_dev) + + if lv_name not in current_volumes: + lvm.create_logical_volume(lv_name, vg_name, size) + + return "{}/{}".format(vg_name, lv_name) + + +def osdize_dir(path, encrypt=False): + """Ask ceph-disk to prepare a directory to become an OSD. + + :param path: str. The directory to osdize + :param encrypt: bool. Should the OSD directory be encrypted at rest + :returns: None + """ + + db = kv() + osd_devices = db.get('osd-devices', []) + if path in osd_devices: + log('Device {} already processed by charm,' + ' skipping'.format(path)) + return + + for t in ['upstart', 'systemd']: + if os.path.exists(os.path.join(path, t)): + log('Path {} is already used as an OSD dir - bailing'.format(path)) + return + + if cmp_pkgrevno('ceph', "0.56.6") < 0: + log('Unable to use directories for OSDs with ceph < 0.56.6', + level=ERROR) + return + + mkdir(path, owner=ceph_user(), group=ceph_user(), perms=0o755) + chownr('/var/lib/ceph', ceph_user(), ceph_user()) + cmd = [ + 'sudo', '-u', ceph_user(), + 'ceph-disk', + 'prepare', + '--data-dir', + path + ] + if cmp_pkgrevno('ceph', '0.60') >= 0: + if encrypt: + cmd.append('--dmcrypt') + cmd.append('--bluestore') + + log("osdize dir cmd: {}".format(cmd)) + subprocess.check_call(cmd) + + # NOTE: Record processing of device only on success to ensure that + # the charm only tries to initialize a device of OSD usage + # once during its lifetime. + osd_devices.append(path) + db.set('osd-devices', osd_devices) + db.flush() + + +def filesystem_mounted(fs): + return subprocess.call(['grep', '-wqs', fs, '/proc/mounts']) == 0 + + +def get_running_osds(): + """Returns a list of the pids of the current running OSD daemons""" + cmd = ['pgrep', 'ceph-osd|crimson-osd'] + try: + result = str(subprocess.check_output(cmd).decode('UTF-8')) + return result.split() + except subprocess.CalledProcessError: + return [] + + +def get_cephfs(service): + """List the Ceph Filesystems that exist. + + :param service: The service name to run the Ceph command under + :returns: list. Returns a list of the Ceph filesystems + """ + if get_version() < 0.86: + # This command wasn't introduced until 0.86 Ceph + return [] + try: + output = str(subprocess + .check_output(["ceph", '--id', service, "fs", "ls"]) + .decode('UTF-8')) + if not output: + return [] + """ + Example subprocess output: + 'name: ip-172-31-23-165, metadata pool: ip-172-31-23-165_metadata, + data pools: [ip-172-31-23-165_data ]\n' + output: filesystems: ['ip-172-31-23-165'] + """ + filesystems = [] + for line in output.splitlines(): + parts = line.split(',') + for part in parts: + if "name" in part: + filesystems.append(part.split(' ')[1]) + return filesystems + except subprocess.CalledProcessError: + return [] + + +def wait_for_all_monitors_to_upgrade(new_version, upgrade_key): + """Fairly self explanatory name. This function will wait + for all monitors in the cluster to upgrade or it will + return after a timeout period has expired. + + :param new_version: str of the version to watch + :param upgrade_key: the cephx key name to use + """ + done = False + start_time = time.time() + monitor_list = [] + + mon_map = get_mon_map('admin') + if mon_map['monmap']['mons']: + for mon in mon_map['monmap']['mons']: + monitor_list.append(mon['name']) + while not done: + try: + done = all(monitor_key_exists(upgrade_key, "{}_{}_{}_done".format( + "mon", mon, new_version + )) for mon in monitor_list) + current_time = time.time() + if current_time > (start_time + 10 * 60): + raise Exception + else: + # Wait 30 seconds and test again if all monitors are upgraded + time.sleep(30) + except subprocess.CalledProcessError: + raise + + +# Edge cases: +# 1. Previous node dies on upgrade, can we retry? +def roll_monitor_cluster(new_version, upgrade_key): + """This is tricky to get right so here's what we're going to do. + + There's 2 possible cases: Either I'm first in line or not. + If I'm not first in line I'll wait a random time between 5-30 seconds + and test to see if the previous monitor is upgraded yet. + + :param new_version: str of the version to upgrade to + :param upgrade_key: the cephx key name to use when upgrading + """ + log('roll_monitor_cluster called with {}'.format(new_version)) + my_name = socket.gethostname() + monitor_list = [] + mon_map = get_mon_map('admin') + if mon_map['monmap']['mons']: + for mon in mon_map['monmap']['mons']: + monitor_list.append(mon['name']) + else: + status_set('blocked', 'Unable to get monitor cluster information') + sys.exit(1) + log('monitor_list: {}'.format(monitor_list)) + + # A sorted list of OSD unit names + mon_sorted_list = sorted(monitor_list) + + # Install packages immediately but defer restarts to when it's our time. + upgrade_monitor(new_version, restart_daemons=False) + try: + position = mon_sorted_list.index(my_name) + log("upgrade position: {}".format(position)) + if position == 0: + # I'm first! Roll + # First set a key to inform others I'm about to roll + lock_and_roll(upgrade_key=upgrade_key, + service='mon', + my_name=my_name, + version=new_version) + else: + # Check if the previous node has finished + status_set('waiting', + 'Waiting on {} to finish upgrading'.format( + mon_sorted_list[position - 1])) + wait_on_previous_node(upgrade_key=upgrade_key, + service='mon', + previous_node=mon_sorted_list[position - 1], + version=new_version) + lock_and_roll(upgrade_key=upgrade_key, + service='mon', + my_name=my_name, + version=new_version) + # NOTE(jamespage): + # Wait until all monitors have upgraded before bootstrapping + # the ceph-mgr daemons due to use of new mgr keyring profiles + if new_version == 'luminous': + wait_for_all_monitors_to_upgrade(new_version=new_version, + upgrade_key=upgrade_key) + bootstrap_manager() + + # NOTE(jmcvaughn): + # Nautilus and later binaries use msgr2 by default, but existing + # clusters that have been upgraded from pre-Nautilus will not + # automatically have msgr2 enabled. Without this, Ceph will show + # a warning only (with no impact to operations), but newly added units + # will not be able to join the cluster. Therefore, we ensure it is + # enabled on upgrade for all versions including and after Nautilus + # (to cater for previous charm versions that will not have done this). + nautilus_or_later = cmp_pkgrevno('ceph-common', '14.0.0') >= 0 + if nautilus_or_later: + wait_for_all_monitors_to_upgrade(new_version=new_version, + upgrade_key=upgrade_key) + enable_msgr2() + except ValueError: + log("Failed to find {} in list {}.".format( + my_name, mon_sorted_list)) + status_set('blocked', 'failed to upgrade monitor') + + +# For E731 we can't assign a lambda, therefore, instead pass this. +def noop(): + pass + + +def upgrade_monitor(new_version, kick_function=None, restart_daemons=True): + """Upgrade the current Ceph monitor to the new version + + :param new_version: String version to upgrade to. + """ + if kick_function is None: + kick_function = noop + current_version = get_version() + status_set("maintenance", "Upgrading monitor") + log("Current Ceph version is {}".format(current_version)) + log("Upgrading to: {}".format(new_version)) + + # Needed to determine if whether to stop/start ceph-mgr + luminous_or_later = cmp_pkgrevno('ceph-common', '12.2.0') >= 0 + # Needed to differentiate between systemd unit names + nautilus_or_later = cmp_pkgrevno('ceph-common', '14.0.0') >= 0 + kick_function() + try: + add_source(config('source'), config('key')) + apt_update(fatal=True) + except subprocess.CalledProcessError as err: + log("Adding the Ceph source failed with message: {}".format( + err)) + status_set("blocked", "Upgrade to {} failed".format(new_version)) + sys.exit(1) + kick_function() + + try: + apt_install(packages=determine_packages(), fatal=True) + rm_packages = determine_packages_to_remove() + if rm_packages: + apt_purge(packages=rm_packages, fatal=True) + except subprocess.CalledProcessError as err: + log("Upgrading packages failed " + "with message: {}".format(err)) + status_set("blocked", "Upgrade to {} failed".format(new_version)) + sys.exit(1) + + if not restart_daemons: + log("Packages upgraded but not restarting daemons yet.") + return + + try: + if systemd(): + if nautilus_or_later: + systemd_unit = 'ceph-mon@{}'.format(socket.gethostname()) + else: + systemd_unit = 'ceph-mon' + service_stop(systemd_unit) + log("restarting ceph-mgr.target maybe: {}" + .format(luminous_or_later)) + if luminous_or_later: + service_stop('ceph-mgr.target') + else: + service_stop('ceph-mon-all') + + kick_function() + + owner = ceph_user() + + # Ensure the files and directories under /var/lib/ceph is chowned + # properly as part of the move to the Jewel release, which moved the + # ceph daemons to running as ceph:ceph instead of root:root. + if new_version == 'jewel': + # Ensure the ownership of Ceph's directories is correct + chownr(path=os.path.join(os.sep, "var", "lib", "ceph"), + owner=owner, + group=owner, + follow_links=True) + + kick_function() + + # Ensure that mon directory is user writable + hostname = socket.gethostname() + path = '/var/lib/ceph/mon/ceph-{}'.format(hostname) + mkdir(path, owner=ceph_user(), group=ceph_user(), + perms=0o755) + + if systemd(): + if nautilus_or_later: + systemd_unit = 'ceph-mon@{}'.format(socket.gethostname()) + else: + systemd_unit = 'ceph-mon' + service_restart(systemd_unit) + log("starting ceph-mgr.target maybe: {}".format(luminous_or_later)) + if luminous_or_later: + # due to BUG: #1849874 we have to force a restart to get it to + # drop the previous version of ceph-manager and start the new + # one. + service_restart('ceph-mgr.target') + else: + service_start('ceph-mon-all') + except subprocess.CalledProcessError as err: + log("Stopping ceph and upgrading packages failed " + "with message: {}".format(err)) + status_set("blocked", "Upgrade to {} failed".format(new_version)) + sys.exit(1) + + +def lock_and_roll(upgrade_key, service, my_name, version): + """Create a lock on the Ceph monitor cluster and upgrade. + + :param upgrade_key: str. The cephx key to use + :param service: str. The cephx id to use + :param my_name: str. The current hostname + :param version: str. The version we are upgrading to + """ + start_timestamp = time.time() + + log('monitor_key_set {}_{}_{}_start {}'.format( + service, + my_name, + version, + start_timestamp)) + monitor_key_set(upgrade_key, "{}_{}_{}_start".format( + service, my_name, version), start_timestamp) + + # alive indication: + alive_function = ( + lambda: monitor_key_set( + upgrade_key, "{}_{}_{}_alive" + .format(service, my_name, version), time.time())) + dog = WatchDog(kick_interval=3 * 60, + kick_function=alive_function) + + log("Rolling") + + # This should be quick + if service == 'osd': + upgrade_osd(version, kick_function=dog.kick_the_dog) + elif service == 'mon': + upgrade_monitor(version, kick_function=dog.kick_the_dog) + else: + log("Unknown service {}. Unable to upgrade".format(service), + level=ERROR) + log("Done") + + stop_timestamp = time.time() + # Set a key to inform others I am finished + log('monitor_key_set {}_{}_{}_done {}'.format(service, + my_name, + version, + stop_timestamp)) + status_set('maintenance', 'Finishing upgrade') + monitor_key_set(upgrade_key, "{}_{}_{}_done".format(service, + my_name, + version), + stop_timestamp) + + +def wait_on_previous_node(upgrade_key, service, previous_node, version): + """A lock that sleeps the current thread while waiting for the previous + node to finish upgrading. + + :param upgrade_key: + :param service: str. the cephx id to use + :param previous_node: str. The name of the previous node to wait on + :param version: str. The version we are upgrading to + :returns: None + """ + log("Previous node is: {}".format(previous_node)) + + previous_node_started_f = ( + lambda: monitor_key_exists( + upgrade_key, + "{}_{}_{}_start".format(service, previous_node, version))) + previous_node_finished_f = ( + lambda: monitor_key_exists( + upgrade_key, + "{}_{}_{}_done".format(service, previous_node, version))) + previous_node_alive_time_f = ( + lambda: monitor_key_get( + upgrade_key, + "{}_{}_{}_alive".format(service, previous_node, version))) + + # wait for 30 minutes until the previous node starts. We don't proceed + # unless we get a start condition. + try: + WatchDog.wait_until(previous_node_started_f, timeout=30 * 60) + except WatchDog.WatchDogTimeoutException: + log("Waited for previous node to start for 30 minutes. " + "It didn't start, so may have a serious issue. Continuing with " + "upgrade of this node.", + level=WARNING) + return + + # keep the time it started from this nodes' perspective. + previous_node_started_at = time.time() + log("Detected that previous node {} has started. Time now: {}" + .format(previous_node, previous_node_started_at)) + + # Now wait for the node to complete. The node may optionally be kicking + # with the *_alive key, which allows this node to wait longer as it 'knows' + # the other node is proceeding. + try: + WatchDog.timed_wait(kicked_at_function=previous_node_alive_time_f, + complete_function=previous_node_finished_f, + wait_time=30 * 60, + compatibility_wait_time=10 * 60, + max_kick_interval=5 * 60) + except WatchDog.WatchDogDeadException: + # previous node was kicking, but timed out; log this condition and move + # on. + now = time.time() + waited = int((now - previous_node_started_at) / 60) + log("Previous node started, but has now not ticked for 5 minutes. " + "Waited total of {} mins on node {}. current time: {} > " + "previous node start time: {}. " + "Continuing with upgrade of this node." + .format(waited, previous_node, now, previous_node_started_at), + level=WARNING) + except WatchDog.WatchDogTimeoutException: + # previous node never kicked, or simply took too long; log this + # condition and move on. + now = time.time() + waited = int((now - previous_node_started_at) / 60) + log("Previous node is taking too long; assuming it has died." + "Waited {} mins on node {}. current time: {} > " + "previous node start time: {}. " + "Continuing with upgrade of this node." + .format(waited, previous_node, now, previous_node_started_at), + level=WARNING) + + +class WatchDog(object): + """Watch a dog; basically a kickable timer with a timeout between two async + units. + + The idea is that you have an overall timeout and then can kick that timeout + with intermediary hits, with a max time between those kicks allowed. + + Note that this watchdog doesn't rely on the clock of the other side; just + roughly when it detects when the other side started. All timings are based + on the local clock. + + The kicker will not 'kick' more often than a set interval, regardless of + how often the kick_the_dog() function is called. The kicker provides a + function (lambda: -> None) that is called when the kick interval is + reached. + + The waiter calls the static method with a check function + (lambda: -> Boolean) that indicates when the wait should be over and the + maximum interval to wait. e.g. 30 minutes with a 5 minute kick interval. + + So the waiter calls wait(f, 30, 3) and the kicker sets up a 3 minute kick + interval, or however long it is expected for the key to propagate and to + allow for other delays. + + There is a compatibility mode where if the otherside never kicks, then it + simply waits for the compatibility timer. + """ + + class WatchDogDeadException(Exception): + pass + + class WatchDogTimeoutException(Exception): + pass + + def __init__(self, kick_interval=3 * 60, kick_function=None): + """Initialise a new WatchDog + + :param kick_interval: the interval when this side kicks the other in + seconds. + :type kick_interval: Int + :param kick_function: The function to call that does the kick. + :type kick_function: Callable[] + """ + self.start_time = time.time() + self.last_run_func = None + self.last_kick_at = None + self.kick_interval = kick_interval + self.kick_f = kick_function + + def kick_the_dog(self): + """Might call the kick_function if it's time. + + This function can be called as frequently as needed, but will run the + self.kick_function after kick_interval seconds have passed. + """ + now = time.time() + if (self.last_run_func is None or + (now - self.last_run_func > self.kick_interval)): + if self.kick_f is not None: + self.kick_f() + self.last_run_func = now + self.last_kick_at = now + + @staticmethod + def wait_until(wait_f, timeout=10 * 60): + """Wait for timeout seconds until the passed function return True. + + :param wait_f: The function to call that will end the wait. + :type wait_f: Callable[[], Boolean] + :param timeout: The time to wait in seconds. + :type timeout: int + """ + start_time = time.time() + while not wait_f(): + now = time.time() + if now > start_time + timeout: + raise WatchDog.WatchDogTimeoutException() + wait_time = random.randrange(5, 30) + log('wait_until: waiting for {} seconds'.format(wait_time)) + time.sleep(wait_time) + + @staticmethod + def timed_wait(kicked_at_function, + complete_function, + wait_time=30 * 60, + compatibility_wait_time=10 * 60, + max_kick_interval=5 * 60): + """Wait a maximum time with an intermediate 'kick' time. + + This function will wait for max_kick_interval seconds unless the + kicked_at_function() call returns a time that is not older that + max_kick_interval (in seconds). i.e. the other side can signal that it + is still doing things during the max_kick_interval as long as it kicks + at least every max_kick_interval seconds. + + The maximum wait is "wait_time", but the otherside must keep kicking + during this period. + + The "compatibility_wait_time" is used if the other side never kicks + (i.e. the kicked_at_function() always returns None. In this case the + function wait up to "compatibility_wait_time". + + Note that the type of the return from the kicked_at_function is an + Optional[str], not a Float. The function will coerce this to a float + for the comparison. This represents the return value of + time.time() at the "other side". It's a string to simplify the + function obtaining the time value from the other side. + + The function raises WatchDogTimeoutException if either the + compatibility_wait_time or the wait_time are exceeded. + + The function raises WatchDogDeadException if the max_kick_interval is + exceeded. + + Note that it is possible that the first kick interval is extended to + compatibility_wait_time if the "other side" doesn't kick immediately. + The best solution is for the other side to kick early and often. + + :param kicked_at_function: The function to call to retrieve the time + that the other side 'kicked' at. None if the other side hasn't + kicked. + :type kicked_at_function: Callable[[], Optional[str]] + :param complete_function: The callable that returns True when done. + :type complete_function: Callable[[], Boolean] + :param wait_time: the maximum time to wait, even with kicks, in + seconds. + :type wait_time: int + :param compatibility_wait_time: The time to wait if no kicks are + received, in seconds. + :type compatibility_wait_time: int + :param max_kick_interval: The maximum time allowed between kicks before + the wait is over, in seconds: + :type max_kick_interval: int + :raises: WatchDog.WatchDogTimeoutException, + WatchDog.WatchDogDeadException + """ + start_time = time.time() + while True: + if complete_function(): + break + # the time when the waiting for unit last kicked. + kicked_at = kicked_at_function() + now = time.time() + if kicked_at is None: + # assume other end doesn't do alive kicks + if (now - start_time > compatibility_wait_time): + raise WatchDog.WatchDogTimeoutException() + else: + # other side is participating in kicks; must kick at least + # every 'max_kick_interval' to stay alive. + if (now - float(kicked_at) > max_kick_interval): + raise WatchDog.WatchDogDeadException() + if (now - start_time > wait_time): + raise WatchDog.WatchDogTimeoutException() + delay_time = random.randrange(5, 30) + log('waiting for {} seconds'.format(delay_time)) + time.sleep(delay_time) + + +def get_upgrade_position(osd_sorted_list, match_name): + """Return the upgrade position for the given OSD. + + :param osd_sorted_list: OSDs sorted + :type osd_sorted_list: [str] + :param match_name: The OSD name to match + :type match_name: str + :returns: The position of the name + :rtype: int + :raises: ValueError if name is not found + """ + for index, item in enumerate(osd_sorted_list): + if item.name == match_name: + return index + raise ValueError("OSD name '{}' not found in get_upgrade_position list" + .format(match_name)) + + +# Edge cases: +# 1. Previous node dies on upgrade, can we retry? +# 2. This assumes that the OSD failure domain is not set to OSD. +# It rolls an entire server at a time. +def roll_osd_cluster(new_version, upgrade_key): + """This is tricky to get right so here's what we're going to do. + + There's 2 possible cases: Either I'm first in line or not. + If I'm not first in line I'll wait a random time between 5-30 seconds + and test to see if the previous OSD is upgraded yet. + + TODO: If you're not in the same failure domain it's safe to upgrade + 1. Examine all pools and adopt the most strict failure domain policy + Example: Pool 1: Failure domain = rack + Pool 2: Failure domain = host + Pool 3: Failure domain = row + + outcome: Failure domain = host + + :param new_version: str of the version to upgrade to + :param upgrade_key: the cephx key name to use when upgrading + """ + log('roll_osd_cluster called with {}'.format(new_version)) + my_name = socket.gethostname() + osd_tree = get_osd_tree(service=upgrade_key) + # A sorted list of OSD unit names + osd_sorted_list = sorted(osd_tree) + log("osd_sorted_list: {}".format(osd_sorted_list)) + + try: + position = get_upgrade_position(osd_sorted_list, my_name) + log("upgrade position: {}".format(position)) + if position == 0: + # I'm first! Roll + # First set a key to inform others I'm about to roll + lock_and_roll(upgrade_key=upgrade_key, + service='osd', + my_name=my_name, + version=new_version) + else: + # Check if the previous node has finished + status_set('waiting', + 'Waiting on {} to finish upgrading'.format( + osd_sorted_list[position - 1].name)) + wait_on_previous_node( + upgrade_key=upgrade_key, + service='osd', + previous_node=osd_sorted_list[position - 1].name, + version=new_version) + lock_and_roll(upgrade_key=upgrade_key, + service='osd', + my_name=my_name, + version=new_version) + except ValueError: + log("Failed to find name {} in list {}".format( + my_name, osd_sorted_list)) + status_set('blocked', 'failed to upgrade osd') + + +def upgrade_osd(new_version, kick_function=None): + """Upgrades the current OSD + + :param new_version: str. The new version to upgrade to + """ + if kick_function is None: + kick_function = noop + + current_version = get_version() + status_set("maintenance", "Upgrading OSD") + log("Current Ceph version is {}".format(current_version)) + log("Upgrading to: {}".format(new_version)) + + try: + add_source(config('source'), config('key')) + apt_update(fatal=True) + except subprocess.CalledProcessError as err: + log("Adding the Ceph sources failed with message: {}".format( + err)) + status_set("blocked", "Upgrade to {} failed".format(new_version)) + sys.exit(1) + + kick_function() + + try: + # Upgrade the packages before restarting the daemons. + status_set('maintenance', 'Upgrading packages to %s' % new_version) + apt_install(packages=determine_packages(), fatal=True) + kick_function() + + # If the upgrade does not need an ownership update of any of the + # directories in the OSD service directory, then simply restart + # all of the OSDs at the same time as this will be the fastest + # way to update the code on the node. + if not dirs_need_ownership_update('osd'): + log('Restarting all OSDs to load new binaries', DEBUG) + with maintain_all_osd_states(): + if systemd(): + service_restart('ceph-osd.target') + else: + service_restart('ceph-osd-all') + return + + # Need to change the ownership of all directories which are not OSD + # directories as well. + # TODO - this should probably be moved to the general upgrade function + # and done before mon/OSD. + update_owner(CEPH_BASE_DIR, recurse_dirs=False) + non_osd_dirs = filter(lambda x: not x == 'osd', + os.listdir(CEPH_BASE_DIR)) + non_osd_dirs = map(lambda x: os.path.join(CEPH_BASE_DIR, x), + non_osd_dirs) + for i, path in enumerate(non_osd_dirs): + if i % 100 == 0: + kick_function() + update_owner(path) + + # Fast service restart wasn't an option because each of the OSD + # directories need the ownership updated for all the files on + # the OSD. Walk through the OSDs one-by-one upgrading the OSD. + for osd_dir in _get_child_dirs(OSD_BASE_DIR): + kick_function() + try: + osd_num = _get_osd_num_from_dirname(osd_dir) + _upgrade_single_osd(osd_num, osd_dir) + except ValueError as ex: + # Directory could not be parsed - junk directory? + log('Could not parse OSD directory %s: %s' % (osd_dir, ex), + WARNING) + continue + + except (subprocess.CalledProcessError, IOError) as err: + log("Stopping Ceph and upgrading packages failed " + "with message: {}".format(err)) + status_set("blocked", "Upgrade to {} failed".format(new_version)) + sys.exit(1) + + +def _upgrade_single_osd(osd_num, osd_dir): + """Upgrades the single OSD directory. + + :param osd_num: the num of the OSD + :param osd_dir: the directory of the OSD to upgrade + :raises CalledProcessError: if an error occurs in a command issued as part + of the upgrade process + :raises IOError: if an error occurs reading/writing to a file as part + of the upgrade process + """ + with maintain_osd_state(osd_num): + stop_osd(osd_num) + disable_osd(osd_num) + update_owner(osd_dir) + enable_osd(osd_num) + start_osd(osd_num) + + +def stop_osd(osd_num): + """Stops the specified OSD number. + + :param osd_num: the OSD number to stop + """ + if systemd(): + service_stop('ceph-osd@{}'.format(osd_num)) + else: + service_stop('ceph-osd', id=osd_num) + + +def start_osd(osd_num): + """Starts the specified OSD number. + + :param osd_num: the OSD number to start. + """ + if systemd(): + service_start('ceph-osd@{}'.format(osd_num)) + else: + service_start('ceph-osd', id=osd_num) + + +def disable_osd(osd_num): + """Disables the specified OSD number. + + Ensures that the specified OSD will not be automatically started at the + next reboot of the system. Due to differences between init systems, + this method cannot make any guarantees that the specified OSD cannot be + started manually. + + :param osd_num: the OSD id which should be disabled. + :raises CalledProcessError: if an error occurs invoking the systemd cmd + to disable the OSD + :raises IOError, OSError: if the attempt to read/remove the ready file in + an upstart enabled system fails + """ + if systemd(): + # When running under systemd, the individual ceph-osd daemons run as + # templated units and can be directly addressed by referring to the + # templated service name ceph-osd@. Additionally, systemd + # allows one to disable a specific templated unit by running the + # 'systemctl disable ceph-osd@' command. When disabled, the + # OSD should remain disabled until re-enabled via systemd. + # Note: disabling an already disabled service in systemd returns 0, so + # no need to check whether it is enabled or not. + cmd = ['systemctl', 'disable', 'ceph-osd@{}'.format(osd_num)] + subprocess.check_call(cmd) + else: + # Neither upstart nor the ceph-osd upstart script provides for + # disabling the starting of an OSD automatically. The specific OSD + # cannot be prevented from running manually, however it can be + # prevented from running automatically on reboot by removing the + # 'ready' file in the OSD's root directory. This is due to the + # ceph-osd-all upstart script checking for the presence of this file + # before starting the OSD. + ready_file = os.path.join(OSD_BASE_DIR, 'ceph-{}'.format(osd_num), + 'ready') + if os.path.exists(ready_file): + os.unlink(ready_file) + + +def enable_osd(osd_num): + """Enables the specified OSD number. + + Ensures that the specified osd_num will be enabled and ready to start + automatically in the event of a reboot. + + :param osd_num: the osd id which should be enabled. + :raises CalledProcessError: if the call to the systemd command issued + fails when enabling the service + :raises IOError: if the attempt to write the ready file in an upstart + enabled system fails + """ + if systemd(): + cmd = ['systemctl', 'enable', 'ceph-osd@{}'.format(osd_num)] + subprocess.check_call(cmd) + else: + # When running on upstart, the OSDs are started via the ceph-osd-all + # upstart script which will only start the OSD if it has a 'ready' + # file. Make sure that file exists. + ready_file = os.path.join(OSD_BASE_DIR, 'ceph-{}'.format(osd_num), + 'ready') + with open(ready_file, 'w') as f: + f.write('ready') + + # Make sure the correct user owns the file. It shouldn't be necessary + # as the upstart script should run with root privileges, but its better + # to have all the files matching ownership. + update_owner(ready_file) + + +def update_owner(path, recurse_dirs=True): + """Changes the ownership of the specified path. + + Changes the ownership of the specified path to the new ceph daemon user + using the system's native chown functionality. This may take awhile, + so this method will issue a set_status for any changes of ownership which + recurses into directory structures. + + :param path: the path to recursively change ownership for + :param recurse_dirs: boolean indicating whether to recursively change the + ownership of all the files in a path's subtree or to + simply change the ownership of the path. + :raises CalledProcessError: if an error occurs issuing the chown system + command + """ + user = ceph_user() + user_group = '{ceph_user}:{ceph_user}'.format(ceph_user=user) + cmd = ['chown', user_group, path] + if os.path.isdir(path) and recurse_dirs: + status_set('maintenance', ('Updating ownership of %s to %s' % + (path, user))) + cmd.insert(1, '-R') + + log('Changing ownership of {path} to {user}'.format( + path=path, user=user_group), DEBUG) + start = datetime.now() + subprocess.check_call(cmd) + elapsed_time = (datetime.now() - start) + + log('Took {secs} seconds to change the ownership of path: {path}'.format( + secs=elapsed_time.total_seconds(), path=path), DEBUG) + + +def get_osd_state(osd_num, osd_goal_state=None): + """Get OSD state or loop until OSD state matches OSD goal state. + + If osd_goal_state is None, just return the current OSD state. + If osd_goal_state is not None, loop until the current OSD state matches + the OSD goal state. + + :param osd_num: the OSD id to get state for + :param osd_goal_state: (Optional) string indicating state to wait for + Defaults to None + :returns: Returns a str, the OSD state. + :rtype: str + """ + while True: + asok = "/var/run/ceph/ceph-osd.{}.asok".format(osd_num) + cmd = [ + 'ceph', + 'daemon', + asok, + 'status' + ] + try: + result = json.loads(str(subprocess + .check_output(cmd) + .decode('UTF-8'))) + except (subprocess.CalledProcessError, ValueError) as e: + log("{}".format(e), level=DEBUG) + continue + osd_state = result['state'] + log("OSD {} state: {}, goal state: {}".format( + osd_num, osd_state, osd_goal_state), level=DEBUG) + if not osd_goal_state: + return osd_state + if osd_state == osd_goal_state: + return osd_state + time.sleep(3) + + +def get_all_osd_states(osd_goal_states=None): + """Get all OSD states or loop until all OSD states match OSD goal states. + + If osd_goal_states is None, just return a dictionary of current OSD states. + If osd_goal_states is not None, loop until the current OSD states match + the OSD goal states. + + :param osd_goal_states: (Optional) dict indicating states to wait for + Defaults to None + :returns: Returns a dictionary of current OSD states. + :rtype: dict + """ + osd_states = {} + for osd_num in get_local_osd_ids(): + if not osd_goal_states: + osd_states[osd_num] = get_osd_state(osd_num) + else: + osd_states[osd_num] = get_osd_state( + osd_num, + osd_goal_state=osd_goal_states[osd_num]) + return osd_states + + +@contextmanager +def maintain_osd_state(osd_num): + """Ensure the state of an OSD is maintained. + + Ensures the state of an OSD is the same at the end of a block nested + in a with statement as it was at the beginning of the block. + + :param osd_num: the OSD id to maintain state for + """ + osd_state = get_osd_state(osd_num) + try: + yield + finally: + get_osd_state(osd_num, osd_goal_state=osd_state) + + +@contextmanager +def maintain_all_osd_states(): + """Ensure all local OSD states are maintained. + + Ensures the states of all local OSDs are the same at the end of a + block nested in a with statement as they were at the beginning of + the block. + """ + osd_states = get_all_osd_states() + try: + yield + finally: + get_all_osd_states(osd_goal_states=osd_states) + + +def list_pools(client='admin'): + """This will list the current pools that Ceph has + + :param client: (Optional) client id for Ceph key to use + Defaults to ``admin`` + :type client: str + :returns: Returns a list of available pools. + :rtype: list + :raises: subprocess.CalledProcessError if the subprocess fails to run. + """ + try: + pool_list = [] + pools = subprocess.check_output(['rados', '--id', client, 'lspools'], + universal_newlines=True, + stderr=subprocess.STDOUT) + for pool in pools.splitlines(): + pool_list.append(pool) + return pool_list + except subprocess.CalledProcessError as err: + log("rados lspools failed with error: {}".format(err.output)) + raise + + +def get_pool_param(pool, param, client='admin'): + """Get parameter from pool. + + :param pool: Name of pool to get variable from + :type pool: str + :param param: Name of variable to get + :type param: str + :param client: (Optional) client id for Ceph key to use + Defaults to ``admin`` + :type client: str + :returns: Value of variable on pool or None + :rtype: str or None + :raises: subprocess.CalledProcessError + """ + try: + output = subprocess.check_output( + ['ceph', '--id', client, 'osd', 'pool', 'get', pool, param], + universal_newlines=True, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as cp: + if cp.returncode == 2 and 'ENOENT: option' in cp.output: + return None + raise + if ':' in output: + return output.split(':')[1].lstrip().rstrip() + + +def get_pool_erasure_profile(pool, client='admin'): + """Get erasure code profile for pool. + + :param pool: Name of pool to get variable from + :type pool: str + :param client: (Optional) client id for Ceph key to use + Defaults to ``admin`` + :type client: str + :returns: Erasure code profile of pool or None + :rtype: str or None + :raises: subprocess.CalledProcessError + """ + try: + return get_pool_param(pool, 'erasure_code_profile', client=client) + except subprocess.CalledProcessError as cp: + if cp.returncode == 13 and 'EACCES: pool' in cp.output: + # Not a Erasure coded pool + return None + raise + + +def get_pool_quota(pool, client='admin'): + """Get pool quota. + + :param pool: Name of pool to get variable from + :type pool: str + :param client: (Optional) client id for Ceph key to use + Defaults to ``admin`` + :type client: str + :returns: Dictionary with quota variables + :rtype: dict + :raises: subprocess.CalledProcessError + """ + output = subprocess.check_output( + ['ceph', '--id', client, 'osd', 'pool', 'get-quota', pool], + universal_newlines=True, stderr=subprocess.STDOUT) + rc = re.compile(r'\s+max\s+(\S+)\s*:\s+(\d+)') + result = {} + for line in output.splitlines(): + m = rc.match(line) + if m: + result.update({'max_{}'.format(m.group(1)): m.group(2)}) + return result + + +def get_pool_applications(pool='', client='admin'): + """Get pool applications. + + :param pool: (Optional) Name of pool to get applications for + Defaults to get for all pools + :type pool: str + :param client: (Optional) client id for Ceph key to use + Defaults to ``admin`` + :type client: str + :returns: Dictionary with pool name as key + :rtype: dict + :raises: subprocess.CalledProcessError + """ + + cmd = ['ceph', '--id', client, 'osd', 'pool', 'application', 'get'] + if pool: + cmd.append(pool) + try: + output = subprocess.check_output(cmd, + universal_newlines=True, + stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as cp: + if cp.returncode == 2 and 'ENOENT' in cp.output: + return {} + raise + return json.loads(output) + + +def list_pools_detail(): + """Get detailed information about pools. + + Structure: + {'pool_name_1': {'applications': {'application': {}}, + 'parameters': {'pg_num': '42', 'size': '42'}, + 'quota': {'max_bytes': '1000', + 'max_objects': '10'}, + }, + 'pool_name_2': ... + } + + :returns: Dictionary with detailed pool information. + :rtype: dict + :raises: subproces.CalledProcessError + """ + get_params = ['pg_num', 'size'] + result = {} + applications = get_pool_applications() + for pool in list_pools(): + result[pool] = { + 'applications': applications.get(pool, {}), + 'parameters': {}, + 'quota': get_pool_quota(pool), + } + for param in get_params: + result[pool]['parameters'].update({ + param: get_pool_param(pool, param)}) + erasure_profile = get_pool_erasure_profile(pool) + if erasure_profile: + result[pool]['parameters'].update({ + 'erasure_code_profile': erasure_profile}) + return result + + +def dirs_need_ownership_update(service): + """Determines if directories still need change of ownership. + + Examines the set of directories under the /var/lib/ceph/{service} directory + and determines if they have the correct ownership or not. This is + necessary due to the upgrade from Hammer to Jewel where the daemon user + changes from root: to ceph:. + + :param service: the name of the service folder to check (e.g. OSD, mon) + :returns: boolean. True if the directories need a change of ownership, + False otherwise. + :raises IOError: if an error occurs reading the file stats from one of + the child directories. + :raises OSError: if the specified path does not exist or some other error + """ + expected_owner = expected_group = ceph_user() + path = os.path.join(CEPH_BASE_DIR, service) + for child in _get_child_dirs(path): + curr_owner, curr_group = owner(child) + + if (curr_owner == expected_owner) and (curr_group == expected_group): + continue + + # NOTE(lathiat): when config_changed runs on reboot, the OSD might not + # yet be mounted or started, and the underlying directory the OSD is + # mounted to is expected to be owned by root. So skip the check. This + # may also happen for OSD directories for OSDs that were removed. + if (service == 'osd' and + not os.path.exists(os.path.join(child, 'magic'))): + continue + + log('Directory "%s" needs its ownership updated' % child, DEBUG) + return True + + # All child directories had the expected ownership + return False + + +# A dict of valid Ceph upgrade paths. Mapping is old -> new +UPGRADE_PATHS = collections.OrderedDict([ + ('firefly', 'hammer'), + ('hammer', 'jewel'), + ('jewel', 'luminous'), + ('luminous', 'mimic'), + ('mimic', 'nautilus'), + ('nautilus', 'octopus'), + ('octopus', 'pacific'), + ('pacific', 'quincy'), + ('quincy', 'reef'), + ('reef', 'squid'), +]) + +# Map UCA codenames to Ceph codenames +UCA_CODENAME_MAP = { + 'icehouse': 'firefly', + 'juno': 'firefly', + 'kilo': 'hammer', + 'liberty': 'hammer', + 'mitaka': 'jewel', + 'newton': 'jewel', + 'ocata': 'jewel', + 'pike': 'luminous', + 'queens': 'luminous', + 'rocky': 'mimic', + 'stein': 'mimic', + 'train': 'nautilus', + 'ussuri': 'octopus', + 'victoria': 'octopus', + 'wallaby': 'pacific', + 'xena': 'pacific', + 'yoga': 'quincy', + 'zed': 'quincy', + 'antelope': 'quincy', + 'bobcat': 'reef', + 'caracal': 'squid', +} + + +def pretty_print_upgrade_paths(): + """Pretty print supported upgrade paths for Ceph""" + return ["{} -> {}".format(key, value) + for key, value in UPGRADE_PATHS.items()] + + +def resolve_ceph_version(source): + """Resolves a version of Ceph based on source configuration + based on Ubuntu Cloud Archive pockets. + + @param: source: source configuration option of charm + :returns: Ceph release codename or None if not resolvable + """ + os_release = get_os_codename_install_source(source) + return UCA_CODENAME_MAP.get(os_release) + + +def get_ceph_pg_stat(): + """Returns the result of 'ceph pg stat'. + + :returns: dict + """ + try: + tree = str(subprocess + .check_output(['ceph', 'pg', 'stat', '--format=json']) + .decode('UTF-8')) + try: + json_tree = json.loads(tree) + if not json_tree['num_pg_by_state']: + return None + return json_tree + except ValueError as v: + log("Unable to parse ceph pg stat json: {}. Error: {}".format( + tree, v)) + raise + except subprocess.CalledProcessError as e: + log("ceph pg stat command failed with message: {}".format(e)) + raise + + +def get_ceph_health(): + """Returns the health of the cluster from a 'ceph status' + + :returns: dict tree of ceph status + :raises: CalledProcessError if our ceph command fails to get the overall + status, use get_ceph_health()['overall_status']. + """ + try: + tree = str(subprocess + .check_output(['ceph', 'status', '--format=json']) + .decode('UTF-8')) + try: + json_tree = json.loads(tree) + # Make sure children are present in the JSON + if not json_tree['overall_status']: + return None + + return json_tree + except ValueError as v: + log("Unable to parse ceph tree json: {}. Error: {}".format( + tree, v)) + raise + except subprocess.CalledProcessError as e: + log("ceph status command failed with message: {}".format(e)) + raise + + +def reweight_osd(osd_num, new_weight): + """Changes the crush weight of an OSD to the value specified. + + :param osd_num: the OSD id which should be changed + :param new_weight: the new weight for the OSD + :returns: bool. True if output looks right, else false. + :raises CalledProcessError: if an error occurs invoking the systemd cmd + """ + try: + cmd_result = str(subprocess + .check_output(['ceph', 'osd', 'crush', + 'reweight', "osd.{}".format(osd_num), + new_weight], + stderr=subprocess.STDOUT) + .decode('UTF-8')) + expected_result = "reweighted item id {ID} name \'osd.{ID}\'".format( + ID=osd_num) + " to {}".format(new_weight) + log(cmd_result) + if expected_result in cmd_result: + return True + return False + except subprocess.CalledProcessError as e: + log("ceph osd crush reweight command failed" + " with message: {}".format(e)) + raise + + +def determine_packages(): + """Determines packages for installation. + + :returns: list of Ceph packages + """ + packages = PACKAGES.copy() + if CompareHostReleases(lsb_release()['DISTRIB_CODENAME']) >= 'eoan': + btrfs_package = 'btrfs-progs' + else: + btrfs_package = 'btrfs-tools' + packages.append(btrfs_package) + return packages + + +def determine_packages_to_remove(): + """Determines packages for removal + + Note: if in a container, then the CHRONY_PACKAGE is removed. + + :returns: list of packages to be removed + :rtype: List[str] + """ + rm_packages = REMOVE_PACKAGES.copy() + if is_container(): + rm_packages.extend(filter_missing_packages([CHRONY_PACKAGE])) + return rm_packages + + +def bootstrap_manager(): + hostname = socket.gethostname() + path = '/var/lib/ceph/mgr/ceph-{}'.format(hostname) + keyring = os.path.join(path, 'keyring') + + if os.path.exists(keyring): + log('bootstrap_manager: mgr already initialized.') + else: + mkdir(path, owner=ceph_user(), group=ceph_user()) + subprocess.check_call(['ceph', 'auth', 'get-or-create', + 'mgr.{}'.format(hostname), 'mon', + 'allow profile mgr', 'osd', 'allow *', + 'mds', 'allow *', '--out-file', + keyring]) + chownr(path, ceph_user(), ceph_user()) + + unit = 'ceph-mgr@{}'.format(hostname) + subprocess.check_call(['systemctl', 'enable', unit]) + service_restart(unit) + + +def enable_msgr2(): + """ + Enables msgr2 + + :raises: subprocess.CalledProcessError if the command fails + """ + cmd = ['ceph', 'mon', 'enable-msgr2'] + subprocess.check_call(cmd) + + +def osd_noout(enable): + """Sets or unsets 'noout' + + :param enable: bool. True to set noout, False to unset. + :returns: bool. True if output looks right. + :raises CalledProcessError: if an error occurs invoking the systemd cmd + """ + operation = { + True: 'set', + False: 'unset', + } + try: + subprocess.check_call(['ceph', '--id', 'admin', + 'osd', operation[enable], + 'noout']) + log('running ceph osd {} noout'.format(operation[enable])) + return True + except subprocess.CalledProcessError as e: + log(e) + raise + + +class OSDConfigSetError(Exception): + """Error occurred applying OSD settings.""" + pass + + +def apply_osd_settings(settings): + """Applies the provided OSD settings + + Apply the provided settings to all local OSD unless settings are already + present. Settings stop being applied on encountering an error. + + :param settings: dict. Dictionary of settings to apply. + :returns: bool. True if commands ran successfully. + :raises: OSDConfigSetError + """ + current_settings = {} + base_cmd = 'ceph daemon osd.{osd_id} config --format=json' + get_cmd = base_cmd + ' get {key}' + set_cmd = base_cmd + ' set {key} {value}' + + def _get_cli_key(key): + return key.replace(' ', '_') + # Retrieve the current values to check keys are correct and to make this a + # noop if setting are already applied. + for osd_id in get_local_osd_ids(): + for key, value in sorted(settings.items()): + cli_key = _get_cli_key(key) + cmd = get_cmd.format(osd_id=osd_id, key=cli_key) + out = json.loads( + subprocess.check_output(cmd.split()).decode('UTF-8')) + if 'error' in out: + log("Error retrieving OSD setting: {}".format(out['error']), + level=ERROR) + return False + current_settings[key] = out[cli_key] + settings_diff = { + k: v + for k, v in settings.items() + if str(v) != str(current_settings[k])} + for key, value in sorted(settings_diff.items()): + log("Setting {} to {}".format(key, value), level=DEBUG) + cmd = set_cmd.format( + osd_id=osd_id, + key=_get_cli_key(key), + value=value) + out = json.loads( + subprocess.check_output(cmd.split()).decode('UTF-8')) + if 'error' in out: + log("Error applying OSD setting: {}".format(out['error']), + level=ERROR) + raise OSDConfigSetError + return True + + +def enabled_manager_modules(): + """Return a list of enabled manager modules. + + :rtype: List[str] + """ + cmd = ['ceph', 'mgr', 'module', 'ls'] + quincy_or_later = cmp_pkgrevno('ceph-common', '17.1.0') >= 0 + if quincy_or_later: + cmd.append('--format=json') + try: + modules = subprocess.check_output(cmd).decode('UTF-8') + except subprocess.CalledProcessError as e: + log("Failed to list ceph modules: {}".format(e), WARNING) + return [] + modules = json.loads(modules) + return modules['enabled_modules'] + + +def is_mgr_module_enabled(module): + """Is a given manager module enabled. + + :param module: + :type module: str + :returns: Whether the named module is enabled + :rtype: bool + """ + return module in enabled_manager_modules() + + +is_dashboard_enabled = functools.partial(is_mgr_module_enabled, 'dashboard') + + +def mgr_enable_module(module): + """Enable a Ceph Manager Module. + + :param module: The module name to enable + :type module: str + + :raises: subprocess.CalledProcessError + """ + if not is_mgr_module_enabled(module): + subprocess.check_call(['ceph', 'mgr', 'module', 'enable', module]) + return True + return False + + +mgr_enable_dashboard = functools.partial(mgr_enable_module, 'dashboard') + + +def mgr_disable_module(module): + """Enable a Ceph Manager Module. + + :param module: The module name to enable + :type module: str + + :raises: subprocess.CalledProcessError + """ + if is_mgr_module_enabled(module): + subprocess.check_call(['ceph', 'mgr', 'module', 'disable', module]) + return True + return False + + +mgr_disable_dashboard = functools.partial(mgr_disable_module, 'dashboard') + + +def ceph_config_set(name, value, who): + """Set a Ceph config option + + :param name: key to set + :type name: str + :param value: value corresponding to key + :type value: str + :param who: Config area the key is associated with (e.g. 'dashboard') + :type who: str + + :raises: subprocess.CalledProcessError + """ + subprocess.check_call(['ceph', 'config', 'set', who, name, value]) + + +mgr_config_set = functools.partial(ceph_config_set, who='mgr') + + +def ceph_config_get(name, who): + """Retrieve the value of a Ceph config option + + :param name: key to lookup + :type name: str + :param who: Config area the key is associated with (e.g. 'dashboard') + :type who: str + :returns: Value associated with key + :rtype: str + :raises: subprocess.CalledProcessError + """ + return subprocess.check_output( + ['ceph', 'config', 'get', who, name]).decode('UTF-8') + + +mgr_config_get = functools.partial(ceph_config_get, who='mgr') + + +def _dashboard_set_ssl_artifact(path, artifact_name, hostname=None): + """Set SSL dashboard config option. + + :param path: Path to file + :type path: str + :param artifact_name: Option name for setting the artifact + :type artifact_name: str + :param hostname: If hostname is set artifact will only be associated with + the dashboard on that host. + :type hostname: str + :raises: subprocess.CalledProcessError + """ + cmd = ['ceph', 'dashboard', artifact_name] + if hostname: + cmd.append(hostname) + cmd.extend(['-i', path]) + log(cmd, level=DEBUG) + subprocess.check_call(cmd) + + +dashboard_set_ssl_certificate = functools.partial( + _dashboard_set_ssl_artifact, + artifact_name='set-ssl-certificate') + + +dashboard_set_ssl_certificate_key = functools.partial( + _dashboard_set_ssl_artifact, + artifact_name='set-ssl-certificate-key') diff --git a/ceph-osd/metadata.yaml b/ceph-osd/metadata.yaml new file mode 100644 index 00000000..379f88d1 --- /dev/null +++ b/ceph-osd/metadata.yaml @@ -0,0 +1,53 @@ +name: ceph-osd +summary: Highly scalable distributed storage - Ceph OSD storage +maintainer: OpenStack Charmers +provides: + nrpe-external-master: + interface: nrpe-external-master + scope: container +tags: +- openstack +- storage +- file-servers +- misc +series: +- focal +- jammy +description: | + Ceph is a distributed storage and network file system designed to provide + excellent performance, reliability, and scalability. + . + This charm provides the Ceph OSD personality for expanding storage capacity + within a ceph deployment. +docs: https://discourse.charmhub.io/t/ceph-osd-docs-index/10545 +extra-bindings: + public: + cluster: +requires: + mon: + interface: ceph-osd + secrets-storage: + interface: vault-kv +storage: + osd-devices: + type: block + multiple: + range: 0- + minimum-size: 1G + osd-journals: + type: block + multiple: + range: 0- + bluestore-db: + type: block + multiple: + range: 0- + bluestore-wal: + type: block + multiple: + range: 0- + cache-devices: + type: block + multiple: + range: 0- + minimum-size: 10G diff --git a/ceph-osd/osci.yaml b/ceph-osd/osci.yaml new file mode 100644 index 00000000..26d8e8b8 --- /dev/null +++ b/ceph-osd/osci.yaml @@ -0,0 +1,22 @@ +- project: + templates: + - charm-unit-jobs-py38 + - charm-unit-jobs-py310 + - charm-functional-jobs + vars: + needs_charm_build: true + charm_build_name: ceph-osd + build_type: charmcraft + charmcraft_channel: 2.x/stable + check: + jobs: + - new-install-jammy-caracal +- job: + name: new-install-jammy-caracal + parent: func-target + dependencies: + - osci-lint + - charm-build + - tox-py38 + vars: + tox_extra_args: '-- install:local-jammy-caracal' diff --git a/ceph-osd/rename.sh b/ceph-osd/rename.sh new file mode 100755 index 00000000..d0c35c97 --- /dev/null +++ b/ceph-osd/rename.sh @@ -0,0 +1,13 @@ +#!/bin/bash +charm=$(grep "charm_build_name" osci.yaml | awk '{print $2}') +echo "renaming ${charm}_*.charm to ${charm}.charm" +echo -n "pwd: " +pwd +ls -al +echo "Removing bad downloaded charm maybe?" +if [[ -e "${charm}.charm" ]]; +then + rm "${charm}.charm" +fi +echo "Renaming charm here." +mv ${charm}_*.charm ${charm}.charm diff --git a/ceph-osd/requirements.txt b/ceph-osd/requirements.txt new file mode 100644 index 00000000..3b1cb7b1 --- /dev/null +++ b/ceph-osd/requirements.txt @@ -0,0 +1,29 @@ +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of *requirements.txt files for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools +# +# TODO: Distill the func test requirements from the lint/unit test +# requirements. They are intertwined. Also, Zaza itself should specify +# all of its own requirements and if it doesn't, fix it there. +# +pbr==5.6.0 +simplejson>=2.2.0 +netifaces>=0.10.4 + +# NOTE: newer versions of cryptography require a Rust compiler to build, +# see +# * https://github.com/openstack-charmers/zaza/issues/421 +# * https://mail.python.org/pipermail/cryptography-dev/2021-January/001003.html +# +cryptography<3.4 + +# Strange import error with newer netaddr: +netaddr>0.7.16,<0.8.0 + +Jinja2>=2.6 # BSD License (3 clause) +six>=1.9.0 + +dnspython + +psutil>=1.1.1,<2.0.0 diff --git a/ceph-osd/revision b/ceph-osd/revision new file mode 100644 index 00000000..3f10ffe7 --- /dev/null +++ b/ceph-osd/revision @@ -0,0 +1 @@ +15 \ No newline at end of file diff --git a/ceph-osd/setup.cfg b/ceph-osd/setup.cfg new file mode 100644 index 00000000..37083b62 --- /dev/null +++ b/ceph-osd/setup.cfg @@ -0,0 +1,5 @@ +[nosetests] +verbosity=2 +with-coverage=1 +cover-erase=1 +cover-package=hooks diff --git a/ceph-osd/templates/ceph.conf b/ceph-osd/templates/ceph.conf new file mode 100644 index 00000000..81b9ea1a --- /dev/null +++ b/ceph-osd/templates/ceph.conf @@ -0,0 +1,118 @@ +[global] +{%- if old_auth %} +auth supported = {{ auth_supported }} +{%- else %} +auth cluster required = {{ auth_supported }} +auth service required = {{ auth_supported }} +auth client required = {{ auth_supported }} +{%- endif %} + +mon host = {{ mon_hosts }} +fsid = {{ fsid }} + +log to syslog = {{ use_syslog }} +err to syslog = {{ use_syslog }} +clog to syslog = {{ use_syslog }} +debug osd = {{ loglevel }}/5 + +{% if ms_bind_ipv6 %} +ms_bind_ipv6 = true +{%- endif %} +{%- if ms_bind_ipv4 == false %} +ms_bind_ipv4 = false +{% endif %} +{% if ceph_public_network is string %} +public network = {{ ceph_public_network }} +{%- endif %} +{%- if ceph_cluster_network is string %} +cluster network = {{ ceph_cluster_network }} +{%- endif %} +{%- if public_addr %} +public addr = {{ public_addr }} +{%- endif %} +{%- if cluster_addr %} +cluster addr = {{ cluster_addr }} +{%- endif %} +{%- if crush_location %} +crush location = {{crush_location}} +{%- endif %} +{%- if upgrade_in_progress %} +setuser match path = /var/lib/ceph/$type/$cluster-$id +{%- endif %} +{%- if crush_initial_weight is not none %} +osd crush initial weight = {{ crush_initial_weight }} +{%- endif %} +{% if global -%} +# The following are user-provided options provided via the config-flags charm option. +# User-provided [global] section config +{% for key in global -%} +{{ key }} = {{ global[key] }} +{% endfor %} +{% endif %} + +{% if bluestore_experimental -%} +enable experimental unrecoverable data corrupting features = bluestore rocksdb +{%- endif %} + +[client.osd-upgrade] +keyring = /var/lib/ceph/osd/ceph.client.osd-upgrade.keyring + +[client.osd-removal] +keyring = /var/lib/ceph/osd/ceph.client.osd-removal.keyring + +[client.crash] +keyring = /var/lib/ceph/osd/ceph.client.crash.keyring + +[mon] +keyring = /var/lib/ceph/mon/$cluster-$id/keyring + +[mds] +keyring = /var/lib/ceph/mds/$cluster-$id/keyring + +[osd] +keyring = /var/lib/ceph/osd/$cluster-$id/keyring + +{% if not bluestore_experimental -%} +osd objectstore = bluestore +{%- endif %} +{% if bluestore_block_wal_size -%} +bluestore block wal size = {{ bluestore_block_wal_size }} +{%- endif %} +{% if bluestore_block_db_size -%} +bluestore block db size = {{ bluestore_block_db_size }} +{%- endif %} +{% include 'section-ceph-bluestore-compression' %} + +bdev enable discard = {{ bdev_discard }} +bdev async discard = {{ bdev_discard }} + +{%- if short_object_len %} +osd max object name len = 256 +osd max object namespace len = 64 +{% endif %} +{%- if osd_max_backfills %} +osd max backfills = {{ osd_max_backfills }} +{%- endif %} +{%- if osd_recovery_max_active %} +osd recovery max active = {{ osd_recovery_max_active }} +{%- endif %} + +{% if osd_from_client -%} +# The following are charm provided options provided via the mon relation. +{% for key in osd_from_client -%} +{{ key }} = {{ osd_from_client[key] }} +{% endfor %} +{% endif %} +{% if osd_from_client_conflict -%} +# The following are charm provided options which conflict with options from +# config-flags. +{% for key in osd_from_client_conflict -%} +# {{ key }} = {{ osd_from_client_conflict[key] }} +{% endfor %} +{% endif %} +{% if osd -%} +# The following are user-provided options provided via the config-flags charm option. +{% for key in osd -%} +{{ key }} = {{ osd[key] }} +{% endfor %} +{% endif %} diff --git a/ceph-osd/templates/hdparm.conf b/ceph-osd/templates/hdparm.conf new file mode 100644 index 00000000..f0a4d59b --- /dev/null +++ b/ceph-osd/templates/hdparm.conf @@ -0,0 +1,7 @@ +{% for uuid,settings in drive_settings.items() %} + /dev/disk/by-uuid/{{ uuid }} { + {% for key, value in settings.items() %} + {{ key }} = {{ value }} + {% endfor %} + } +{% endfor %} \ No newline at end of file diff --git a/ceph-osd/templates/vaultlocker.conf.j2 b/ceph-osd/templates/vaultlocker.conf.j2 new file mode 100644 index 00000000..5679f81b --- /dev/null +++ b/ceph-osd/templates/vaultlocker.conf.j2 @@ -0,0 +1,6 @@ +# vaultlocker configuration from ceph-osd charm +[vault] +url = {{ vault_url }} +approle = {{ role_id }} +backend = {{ secret_backend }} +secret_id = {{ secret_id }} diff --git a/ceph-osd/terraform/README.md b/ceph-osd/terraform/README.md new file mode 100644 index 00000000..fe5edbe4 --- /dev/null +++ b/ceph-osd/terraform/README.md @@ -0,0 +1,94 @@ +# Terraform Manifest Module + +This module reads a yaml configuration file and exports the values into terraform variables that +can be passed down into other modules. It is specifically tailored for working with +modules for charms defined with the +[juju terraform provider](https://registry.terraform.io/providers/juju/juju/latest/docs). It +simplifies having to pass every individual charm input as a variable in the product level +module for a given product. + +## Inputs + +| Name | Type | Description | Required | +|------------|--------|------------------------------------------------------------------------|----------| +| `manifest` | string | Absolute path to the yaml file with the config for a Juju application. | true | +| `app` | string | Name of the application to load the config for. | true | + +## Outputs + +All outputs are under `config` as a map of values below: + +| Name | Description | +|---------------|-------------------------------------------------------------------------------| +| `app_name` | Name of the application in Juju. | +| `base` | Base to deploy the charm as eg. ubuntu@24.04. | +| `channel` | Channel of the application being deployed. | +| `config` | Map of the config for the charm, values can be found under the specific charm | +| `constraints` | String of constraints when deploying the charm `cores=2 mem=4069M` | +| `resources` | List of resources to deploy with the charm. | +| `revision` | Specific revision of this charm to deploy. | +| `units` | Number of units of a charm to deploy | +| `storage` | Storage configuration of a charm to deploy | + +## Usage + +This module is meant to be use as a helper for product modules. It is meant to allow the +user to have one manifest yaml file that can allow hold all the configuration for a solution +or deployment while also allowing the developer to not have to maintain the configuration +between each charm and the overall product. + +### Defining a `manifest` in terraform + +The manifest module will have to be defined for each charm in question. Terraform will +load the config under the app key and output the values. If the key is not found in the +manifest, then the module will return `null` and terraform will ignore the configuration. + +``` +module "ceph_osd_config" { + source = "git::https://github.com/canonical/k8s-bundles//terraform/manifest?ref=main" + manifest = var.manifest_yaml + app = "ceph_osd" +} +``` + +These values can the be passed into a resource for a specific charm: + +``` +module "ceph_osd" { + source = "git::https://github.com/canonical/ceph-charms//ceph-osd/terraform?ref=main" + app_name = module.ceph_osd_config.config.app_name + channel = module.ceph_osd_config.config.channel + config = module.ceph_osd_config.config.config + constraints = module.ceph_osd_config.config.constraints + model = var.model + resources = module.ceph_osd_config.config.resources + revision = module.ceph_osd_config.config.revision + base = module.ceph_osd_config.config.base + units = module.ceph_osd_config.config.units +} +``` + +### Defining a manifest.yaml + +In the implementation of the product module, the user can specify their configuration using +a single manifest file similar to the one below: + +``` yaml +ceph_mon: + channel: quincy/stable + constraints: arch=amd64 cores=2 mem=8192M root-disk=16384M virt-type=virtual-machine + units: 1 + config: + monitor-count: 1 + expected-osd-count: 2 +ceph_osd: + channel: quincy/stable + constraints: arch=amd64 cores=2 mem=8192M root-disk=16384M virt-type=virtual-machine + units: 2 + storage: + osd-devices: 1G,1 + osd-journals: 1G,1 +``` + +Using the terraform in the above section, the `units`, `base`, `constraints`, and `channel` +forward into the `ceph-osd` deployment. diff --git a/ceph-osd/terraform/main.tf b/ceph-osd/terraform/main.tf new file mode 100644 index 00000000..e6c5b939 --- /dev/null +++ b/ceph-osd/terraform/main.tf @@ -0,0 +1,20 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +resource "juju_application" "ceph_osd" { + name = var.app_name + model = var.model + + charm { + name = "ceph-osd" + channel = var.channel + revision = var.revision + base = var.base + } + + config = var.config + constraints = var.constraints + units = var.units + resources = var.resources + storage_directives = var.storage +} diff --git a/ceph-osd/terraform/outputs.tf b/ceph-osd/terraform/outputs.tf new file mode 100644 index 00000000..8b155a32 --- /dev/null +++ b/ceph-osd/terraform/outputs.tf @@ -0,0 +1,13 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +output "app_name" { + description = "Name of the deployed application." + value = juju_application.ceph_osd.name +} + +output "requires" { + value = { + mon = "mon" + } +} diff --git a/ceph-osd/terraform/variables.tf b/ceph-osd/terraform/variables.tf new file mode 100644 index 00000000..100c3b09 --- /dev/null +++ b/ceph-osd/terraform/variables.tf @@ -0,0 +1,61 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +variable "app_name" { + description = "Name of the application in the Juju model." + type = string + default = "ceph-osd" +} + +variable "base" { + description = "Ubuntu bases to deploy the charm onto" + type = string + default = "ubuntu@24.04" +} + +variable "channel" { + description = "The channel to use when deploying a charm." + type = string + default = "squid/beta" +} + +variable "resources" { + description = "Resources to use with the application." + type = map(string) + default = {} +} + +variable "revision" { + description = "Revision number of the charm" + type = number + default = null +} + +variable "units" { + description = "Number of units to deploy" + type = number + default = 1 +} + +variable "config" { + description = "Application config. Details about available options can be found at https://charmhub.io/ceph-osd/configurations." + type = map(string) + default = {} +} + +variable "storage" { + description = "Storage configuration for this application." + type = map(string) + default = {} +} + +variable "constraints" { + description = "Juju constraints to apply for this application." + type = string + default = "arch=amd64" +} + +variable "model" { + description = "Reference to a `juju_model`." + type = string +} diff --git a/ceph-osd/terraform/versions.tf b/ceph-osd/terraform/versions.tf new file mode 100644 index 00000000..650fa445 --- /dev/null +++ b/ceph-osd/terraform/versions.tf @@ -0,0 +1,13 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +terraform { + + required_version = ">= 1.6" + required_providers { + juju = { + source = "juju/juju" + version = ">= 0.14.0, < 1.0.0" + } + } +} diff --git a/ceph-osd/test-requirements.txt b/ceph-osd/test-requirements.txt new file mode 100644 index 00000000..e972406e --- /dev/null +++ b/ceph-osd/test-requirements.txt @@ -0,0 +1,29 @@ +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of *requirements.txt files for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools +# +# TODO: Distill the func test requirements from the lint/unit test +# requirements. They are intertwined. Also, Zaza itself should specify +# all of its own requirements and if it doesn't, fix it there. +# +pyparsing<3.0.0 # aodhclient is pinned in zaza and needs pyparsing < 3.0.0, but cffi also needs it, so pin here. + +requests>=2.18.4 + +stestr>=2.2.0 + +# Dependency of stestr. Workaround for +# https://github.com/mtreinish/stestr/issues/145 +cliff<3.0.0 + +coverage>=4.5.2 +pyudev # for ceph-* charm unit tests (need to fix the ceph-* charm unit tests/mocking) +git+https://github.com/openstack-charmers/zaza.git#egg=zaza +git+https://github.com/openstack-charmers/zaza-openstack-tests.git#egg=zaza.openstack + +# Needed for charm-glance: +git+https://opendev.org/openstack/tempest.git#egg=tempest + +croniter # needed for charm-rabbitmq-server unit tests +psutil diff --git a/ceph-osd/tests/bundles/focal-yoga.yaml b/ceph-osd/tests/bundles/focal-yoga.yaml new file mode 100644 index 00000000..6fe81b77 --- /dev/null +++ b/ceph-osd/tests/bundles/focal-yoga.yaml @@ -0,0 +1,239 @@ +variables: + openstack-origin: &openstack-origin cloud:focal-yoga + +series: focal + +comment: +- 'machines section to decide order of deployment. database sooner = faster' +machines: + '0': + constraints: mem=3072M + '1': + constraints: mem=3072M + '2': + constraints: mem=3072M + '3': + '4': + '5': + '6': + '7': + '8': + '9': + '10': + '11': + '12': + '13': + '14': + '15': + +applications: + + keystone-mysql-router: + charm: ch:mysql-router + channel: 8.0.19/edge + glance-mysql-router: + charm: ch:mysql-router + channel: 8.0.19/edge + cinder-mysql-router: + charm: ch:mysql-router + channel: 8.0.19/edge + nova-cloud-controller-mysql-router: + charm: ch:mysql-router + channel: 8.0.19/edge + placement-mysql-router: + charm: ch:mysql-router + channel: 8.0.19/edge + + mysql-innodb-cluster: + charm: ch:mysql-innodb-cluster + num_units: 3 + options: + source: *openstack-origin + to: + - '0' + - '1' + - '2' + channel: 8.0.19/edge + + ceph-osd: + charm: ch:ceph-osd + channel: quincy/edge + num_units: 3 + storage: + osd-devices: 'cinder,10G,2' + options: + osd-devices: '/dev/test-non-existent' + source: *openstack-origin + aa-profile-mode: enforce + to: + - '3' + - '4' + - '5' + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + monitor-count: '3' + source: *openstack-origin + to: + - '6' + - '7' + - '8' + channel: quincy/edge + + rabbitmq-server: + charm: ch:rabbitmq-server + num_units: 1 + options: + source: *openstack-origin + to: + - '9' + channel: 3.9/edge + + keystone: + expose: True + charm: ch:keystone + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '10' + channel: yoga/edge + + nova-compute: + charm: ch:nova-compute + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '11' + channel: yoga/edge + + glance: + expose: True + charm: ch:glance + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '12' + channel: yoga/edge + + cinder: + expose: True + charm: ch:cinder + num_units: 1 + options: + openstack-origin: *openstack-origin + block-device: 'None' + glance-api-version: '2' + to: + - '13' + channel: yoga/edge + + cinder-ceph: + charm: ch:cinder-ceph + channel: yoga/edge + + nova-cloud-controller: + expose: True + charm: ch:nova-cloud-controller + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '14' + channel: yoga/edge + + placement: + charm: ch:placement + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '15' + channel: yoga/edge + +relations: + - - 'nova-compute:amqp' + - 'rabbitmq-server:amqp' + + - - 'nova-compute:image-service' + - 'glance:image-service' + + - - 'nova-compute:ceph' + - 'ceph-mon:client' + + - - 'keystone:shared-db' + - 'keystone-mysql-router:shared-db' + - - 'keystone-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'glance:shared-db' + - 'glance-mysql-router:shared-db' + - - 'glance-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'glance:identity-service' + - 'keystone:identity-service' + + - - 'glance:amqp' + - 'rabbitmq-server:amqp' + + - - 'glance:ceph' + - 'ceph-mon:client' + + - - 'cinder:shared-db' + - 'cinder-mysql-router:shared-db' + - - 'cinder-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'cinder:identity-service' + - 'keystone:identity-service' + + - - 'cinder:amqp' + - 'rabbitmq-server:amqp' + + - - 'cinder:image-service' + - 'glance:image-service' + + - - 'cinder-ceph:storage-backend' + - 'cinder:storage-backend' + + - - 'cinder-ceph:ceph' + - 'ceph-mon:client' + + - - 'ceph-osd:mon' + - 'ceph-mon:osd' + + - - 'nova-cloud-controller:shared-db' + - 'nova-cloud-controller-mysql-router:shared-db' + - - 'nova-cloud-controller-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'nova-cloud-controller:identity-service' + - 'keystone:identity-service' + + - - 'nova-cloud-controller:amqp' + - 'rabbitmq-server:amqp' + + - - 'nova-cloud-controller:cloud-compute' + - 'nova-compute:cloud-compute' + + - - 'nova-cloud-controller:image-service' + - 'glance:image-service' + + - - 'placement:shared-db' + - 'placement-mysql-router:shared-db' + - - 'placement-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'placement' + - 'keystone' + + - - 'placement' + - 'nova-cloud-controller' + + - - 'cinder-ceph:ceph-access' + - 'nova-compute:ceph-access' diff --git a/ceph-osd/tests/bundles/jammy-antelope.yaml b/ceph-osd/tests/bundles/jammy-antelope.yaml new file mode 100644 index 00000000..6474006a --- /dev/null +++ b/ceph-osd/tests/bundles/jammy-antelope.yaml @@ -0,0 +1,235 @@ +variables: + openstack-origin: &openstack-origin cloud:jammy-antelope + +series: jammy + +comment: +- 'machines section to decide order of deployment. database sooner = faster' +machines: + '0': + constraints: mem=3072M + '1': + constraints: mem=3072M + '2': + constraints: mem=3072M + '3': + '4': + '5': + '6': + '7': + '8': + '9': + '10': + '11': + '12': + '13': + '14': + '15': + +applications: + + keystone-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + glance-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + cinder-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + nova-cloud-controller-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + placement-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + + mysql-innodb-cluster: + charm: ch:mysql-innodb-cluster + num_units: 3 + to: + - '0' + - '1' + - '2' + channel: 8.0/edge + + ceph-osd: + charm: ch:ceph-osd + channel: quincy/edge + num_units: 3 + storage: + osd-devices: 'cinder,10G,2' + options: + osd-devices: '/dev/test-non-existent' + source: *openstack-origin + aa-profile-mode: enforce + to: + - '3' + - '4' + - '5' + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + monitor-count: '3' + source: *openstack-origin + to: + - '6' + - '7' + - '8' + channel: quincy/edge + + rabbitmq-server: + charm: ch:rabbitmq-server + num_units: 1 + to: + - '9' + channel: 3.9/edge + + keystone: + expose: True + charm: ch:keystone + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '10' + channel: 2023.1/edge + + nova-compute: + charm: ch:nova-compute + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '11' + channel: 2023.1/edge + + glance: + expose: True + charm: ch:glance + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '12' + channel: 2023.1/edge + + cinder: + expose: True + charm: ch:cinder + num_units: 1 + options: + openstack-origin: *openstack-origin + block-device: 'None' + glance-api-version: '2' + to: + - '13' + channel: 2023.1/edge + + cinder-ceph: + charm: ch:cinder-ceph + channel: 2023.1/edge + + nova-cloud-controller: + expose: True + charm: ch:nova-cloud-controller + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '14' + channel: 2023.1/edge + + placement: + charm: ch:placement + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '15' + channel: 2023.1/edge + +relations: + - - 'nova-compute:amqp' + - 'rabbitmq-server:amqp' + + - - 'nova-compute:image-service' + - 'glance:image-service' + + - - 'nova-compute:ceph' + - 'ceph-mon:client' + + - - 'keystone:shared-db' + - 'keystone-mysql-router:shared-db' + - - 'keystone-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'glance:shared-db' + - 'glance-mysql-router:shared-db' + - - 'glance-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'glance:identity-service' + - 'keystone:identity-service' + + - - 'glance:amqp' + - 'rabbitmq-server:amqp' + + - - 'glance:ceph' + - 'ceph-mon:client' + + - - 'cinder:shared-db' + - 'cinder-mysql-router:shared-db' + - - 'cinder-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'cinder:identity-service' + - 'keystone:identity-service' + + - - 'cinder:amqp' + - 'rabbitmq-server:amqp' + + - - 'cinder:image-service' + - 'glance:image-service' + + - - 'cinder-ceph:storage-backend' + - 'cinder:storage-backend' + + - - 'cinder-ceph:ceph' + - 'ceph-mon:client' + + - - 'ceph-osd:mon' + - 'ceph-mon:osd' + + - - 'nova-cloud-controller:shared-db' + - 'nova-cloud-controller-mysql-router:shared-db' + - - 'nova-cloud-controller-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'nova-cloud-controller:identity-service' + - 'keystone:identity-service' + + - - 'nova-cloud-controller:amqp' + - 'rabbitmq-server:amqp' + + - - 'nova-cloud-controller:cloud-compute' + - 'nova-compute:cloud-compute' + + - - 'nova-cloud-controller:image-service' + - 'glance:image-service' + + - - 'placement:shared-db' + - 'placement-mysql-router:shared-db' + - - 'placement-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'placement' + - 'keystone' + + - - 'placement' + - 'nova-cloud-controller' + + - - 'cinder-ceph:ceph-access' + - 'nova-compute:ceph-access' diff --git a/ceph-osd/tests/bundles/jammy-bobcat.yaml b/ceph-osd/tests/bundles/jammy-bobcat.yaml new file mode 100644 index 00000000..84bc99de --- /dev/null +++ b/ceph-osd/tests/bundles/jammy-bobcat.yaml @@ -0,0 +1,235 @@ +variables: + openstack-origin: &openstack-origin cloud:jammy-bobcat + +series: jammy + +comment: +- 'machines section to decide order of deployment. database sooner = faster' +machines: + '0': + constraints: mem=3072M + '1': + constraints: mem=3072M + '2': + constraints: mem=3072M + '3': + '4': + '5': + '6': + '7': + '8': + '9': + '10': + '11': + '12': + '13': + '14': + '15': + +applications: + + keystone-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + glance-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + cinder-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + nova-cloud-controller-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + placement-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + + mysql-innodb-cluster: + charm: ch:mysql-innodb-cluster + num_units: 3 + to: + - '0' + - '1' + - '2' + channel: 8.0/edge + + ceph-osd: + charm: ch:ceph-osd + channel: reef/edge + num_units: 3 + storage: + osd-devices: 'cinder,10G,2' + options: + osd-devices: '/dev/test-non-existent' + source: *openstack-origin + aa-profile-mode: enforce + to: + - '3' + - '4' + - '5' + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + monitor-count: '3' + source: *openstack-origin + to: + - '6' + - '7' + - '8' + channel: reef/edge + + rabbitmq-server: + charm: ch:rabbitmq-server + num_units: 1 + to: + - '9' + channel: 3.9/edge + + keystone: + expose: True + charm: ch:keystone + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '10' + channel: 2023.2/edge + + nova-compute: + charm: ch:nova-compute + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '11' + channel: 2023.2/edge + + glance: + expose: True + charm: ch:glance + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '12' + channel: 2023.2/edge + + cinder: + expose: True + charm: ch:cinder + num_units: 1 + options: + openstack-origin: *openstack-origin + block-device: 'None' + glance-api-version: '2' + to: + - '13' + channel: 2023.2/edge + + cinder-ceph: + charm: ch:cinder-ceph + channel: 2023.2/edge + + nova-cloud-controller: + expose: True + charm: ch:nova-cloud-controller + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '14' + channel: 2023.2/edge + + placement: + charm: ch:placement + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '15' + channel: 2023.2/edge + +relations: + - - 'nova-compute:amqp' + - 'rabbitmq-server:amqp' + + - - 'nova-compute:image-service' + - 'glance:image-service' + + - - 'nova-compute:ceph' + - 'ceph-mon:client' + + - - 'keystone:shared-db' + - 'keystone-mysql-router:shared-db' + - - 'keystone-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'glance:shared-db' + - 'glance-mysql-router:shared-db' + - - 'glance-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'glance:identity-service' + - 'keystone:identity-service' + + - - 'glance:amqp' + - 'rabbitmq-server:amqp' + + - - 'glance:ceph' + - 'ceph-mon:client' + + - - 'cinder:shared-db' + - 'cinder-mysql-router:shared-db' + - - 'cinder-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'cinder:identity-service' + - 'keystone:identity-service' + + - - 'cinder:amqp' + - 'rabbitmq-server:amqp' + + - - 'cinder:image-service' + - 'glance:image-service' + + - - 'cinder-ceph:storage-backend' + - 'cinder:storage-backend' + + - - 'cinder-ceph:ceph' + - 'ceph-mon:client' + + - - 'ceph-osd:mon' + - 'ceph-mon:osd' + + - - 'nova-cloud-controller:shared-db' + - 'nova-cloud-controller-mysql-router:shared-db' + - - 'nova-cloud-controller-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'nova-cloud-controller:identity-service' + - 'keystone:identity-service' + + - - 'nova-cloud-controller:amqp' + - 'rabbitmq-server:amqp' + + - - 'nova-cloud-controller:cloud-compute' + - 'nova-compute:cloud-compute' + + - - 'nova-cloud-controller:image-service' + - 'glance:image-service' + + - - 'placement:shared-db' + - 'placement-mysql-router:shared-db' + - - 'placement-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'placement' + - 'keystone' + + - - 'placement' + - 'nova-cloud-controller' + + - - 'cinder-ceph:ceph-access' + - 'nova-compute:ceph-access' diff --git a/ceph-osd/tests/bundles/jammy-caracal.yaml b/ceph-osd/tests/bundles/jammy-caracal.yaml new file mode 100644 index 00000000..5d34f4cd --- /dev/null +++ b/ceph-osd/tests/bundles/jammy-caracal.yaml @@ -0,0 +1,44 @@ +variables: + openstack-origin: &openstack-origin cloud:jammy-caracal + +series: &series jammy + +machines: + '0': + '1': + '2': + '3': + constraints: cores=2 mem=4G root-disk=25G virt-type=virtual-machine + '4': + constraints: cores=2 mem=4G root-disk=25G virt-type=virtual-machine + '5': + constraints: cores=2 mem=4G root-disk=25G virt-type=virtual-machine + +applications: + ceph-mon: + charm: ch:ceph-mon + channel: latest/edge + num_units: 3 + options: + monitor-count: 3 + to: + - '0' + - '1' + - '2' + + ceph-osd: + charm: ch:ceph-osd + channel: latest/edge + num_units: 3 + storage: + osd-devices: 'loop,5G,2' + options: + source: *openstack-origin + to: + - '3' + - '4' + - '5' + +relations: + - - 'ceph-osd:mon' + - 'ceph-mon:osd' diff --git a/ceph-osd/tests/bundles/local-jammy-caracal.yaml b/ceph-osd/tests/bundles/local-jammy-caracal.yaml new file mode 100644 index 00000000..a7f59e04 --- /dev/null +++ b/ceph-osd/tests/bundles/local-jammy-caracal.yaml @@ -0,0 +1,234 @@ +variables: + openstack-origin: &openstack-origin cloud:jammy-caracal + +series: jammy + +comment: +- 'machines section to decide order of deployment. database sooner = faster' +machines: + '0': + constraints: mem=3072M + '1': + constraints: mem=3072M + '2': + constraints: mem=3072M + '3': + '4': + '5': + '6': + '7': + '8': + '9': + '10': + '11': + '12': + '13': + '14': + '15': + +applications: + + keystone-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + glance-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + cinder-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + nova-cloud-controller-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + placement-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + + mysql-innodb-cluster: + charm: ch:mysql-innodb-cluster + num_units: 3 + to: + - '0' + - '1' + - '2' + channel: 8.0/edge + + ceph-osd: + charm: ../../ceph-osd.charm + num_units: 3 + storage: + osd-devices: 'cinder,10G,2' + options: + osd-devices: '/dev/test-non-existent' + source: *openstack-origin + aa-profile-mode: enforce + to: + - '3' + - '4' + - '5' + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + monitor-count: '3' + source: *openstack-origin + to: + - '6' + - '7' + - '8' + channel: quincy/edge + + rabbitmq-server: + charm: ch:rabbitmq-server + num_units: 1 + to: + - '9' + channel: 3.9/edge + + keystone: + expose: True + charm: ch:keystone + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '10' + channel: latest/edge + + nova-compute: + charm: ch:nova-compute + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '11' + channel: latest/edge + + glance: + expose: True + charm: ch:glance + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '12' + channel: latest/edge + + cinder: + expose: True + charm: ch:cinder + num_units: 1 + options: + openstack-origin: *openstack-origin + block-device: 'None' + glance-api-version: '2' + to: + - '13' + channel: latest/edge + + cinder-ceph: + charm: ch:cinder-ceph + channel: latest/edge + + nova-cloud-controller: + expose: True + charm: ch:nova-cloud-controller + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '14' + channel: latest/edge + + placement: + charm: ch:placement + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '15' + channel: latest/edge + +relations: + - - 'nova-compute:amqp' + - 'rabbitmq-server:amqp' + + - - 'nova-compute:image-service' + - 'glance:image-service' + + - - 'nova-compute:ceph' + - 'ceph-mon:client' + + - - 'keystone:shared-db' + - 'keystone-mysql-router:shared-db' + - - 'keystone-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'glance:shared-db' + - 'glance-mysql-router:shared-db' + - - 'glance-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'glance:identity-service' + - 'keystone:identity-service' + + - - 'glance:amqp' + - 'rabbitmq-server:amqp' + + - - 'glance:ceph' + - 'ceph-mon:client' + + - - 'cinder:shared-db' + - 'cinder-mysql-router:shared-db' + - - 'cinder-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'cinder:identity-service' + - 'keystone:identity-service' + + - - 'cinder:amqp' + - 'rabbitmq-server:amqp' + + - - 'cinder:image-service' + - 'glance:image-service' + + - - 'cinder-ceph:storage-backend' + - 'cinder:storage-backend' + + - - 'cinder-ceph:ceph' + - 'ceph-mon:client' + + - - 'ceph-osd:mon' + - 'ceph-mon:osd' + + - - 'nova-cloud-controller:shared-db' + - 'nova-cloud-controller-mysql-router:shared-db' + - - 'nova-cloud-controller-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'nova-cloud-controller:identity-service' + - 'keystone:identity-service' + + - - 'nova-cloud-controller:amqp' + - 'rabbitmq-server:amqp' + + - - 'nova-cloud-controller:cloud-compute' + - 'nova-compute:cloud-compute' + + - - 'nova-cloud-controller:image-service' + - 'glance:image-service' + + - - 'placement:shared-db' + - 'placement-mysql-router:shared-db' + - - 'placement-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'placement' + - 'keystone' + + - - 'placement' + - 'nova-cloud-controller' + + - - 'cinder-ceph:ceph-access' + - 'nova-compute:ceph-access' diff --git a/ceph-osd/tests/target.py b/ceph-osd/tests/target.py new file mode 100644 index 00000000..6b9e68f6 --- /dev/null +++ b/ceph-osd/tests/target.py @@ -0,0 +1,922 @@ +# Copyright 2018 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Ceph Testing.""" + +import unittest +from copy import deepcopy +import json +import logging +from os import ( + listdir, + path +) +import re +import tempfile +import urllib3 + +import tenacity + +import zaza.openstack.charm_tests.test_utils as test_utils +import zaza.model as zaza_model +import zaza.openstack.utilities.ceph as zaza_ceph +import zaza.openstack.utilities.exceptions as zaza_exceptions +import zaza.openstack.utilities.generic as zaza_utils +import zaza.openstack.utilities.openstack as zaza_openstack + +# Disable warnings for ssl_verify=false +urllib3.disable_warnings( + urllib3.exceptions.InsecureRequestWarning +) + + +class CephLowLevelTest(test_utils.BaseCharmTest): + """Ceph Low Level Test Class.""" + + @classmethod + def setUpClass(cls): + """Run class setup for running ceph low level tests.""" + super(CephLowLevelTest, cls).setUpClass() + + def test_processes(self): + """Verify Ceph processes. + + Verify that the expected service processes are running + on each ceph unit. + """ + logging.info('Checking ceph-mon and ceph-osd processes...') + # Process name and quantity of processes to expect on each unit + ceph_mon_processes = { + 'ceph-mon': 1, + 'ceph-mgr': 1, + } + + ceph_osd_processes = { + 'ceph-osd': [1, 2, 3] + } + + # Units with process names and PID quantities expected + expected_processes = { + 'ceph-mon/0': ceph_mon_processes, + 'ceph-mon/1': ceph_mon_processes, + 'ceph-mon/2': ceph_mon_processes, + 'ceph-osd/0': ceph_osd_processes, + 'ceph-osd/1': ceph_osd_processes, + 'ceph-osd/2': ceph_osd_processes + } + + actual_pids = zaza_utils.get_unit_process_ids(expected_processes) + ret = zaza_utils.validate_unit_process_ids(expected_processes, + actual_pids) + self.assertTrue(ret) + + def test_services(self): + """Verify the ceph services. + + Verify the expected services are running on the service units. + """ + logging.info('Checking ceph-osd and ceph-mon services...') + services = {} + ceph_services = ['ceph-mon', 'ceph-mgr'] + services['ceph-osd/0'] = ['ceph-osd'] + + services['ceph-mon/0'] = ceph_services + services['ceph-mon/1'] = ceph_services + services['ceph-mon/2'] = ceph_services + + for unit_name, unit_services in services.items(): + zaza_model.block_until_service_status( + unit_name=unit_name, + services=unit_services, + target_status='running' + ) + + @test_utils.skipUntilVersion('ceph-mon', 'ceph', '14.2.0') + def test_pg_tuning(self): + """Verify that auto PG tuning is enabled for Nautilus+.""" + unit_name = 'ceph-mon/0' + cmd = "ceph osd pool autoscale-status --format=json" + result = zaza_model.run_on_unit(unit_name, cmd) + self.assertEqual(result['Code'], '0') + for pool in json.loads(result['Stdout']): + self.assertEqual(pool['pg_autoscale_mode'], 'on') + + +class CephTest(test_utils.BaseCharmTest): + """Ceph common functional tests.""" + + @classmethod + def setUpClass(cls): + """Run the ceph's common class setup.""" + super(CephTest, cls).setUpClass() + + def osd_out_in(self, services): + """Run OSD out and OSD in tests. + + Remove OSDs and then add them back in on a unit checking that services + are in the required state after each action + + :param services: Services expected to be restarted when config_file is + changed. + :type services: list + """ + zaza_model.block_until_service_status( + self.lead_unit, + services, + 'running', + model_name=self.model_name) + zaza_model.block_until_unit_wl_status( + self.lead_unit, + 'active', + model_name=self.model_name) + zaza_model.run_action( + self.lead_unit, + 'osd-out', + model_name=self.model_name) + zaza_model.block_until_unit_wl_status( + self.lead_unit, + 'maintenance', + model_name=self.model_name) + zaza_model.block_until_all_units_idle(model_name=self.model_name) + zaza_model.run_action( + self.lead_unit, + 'osd-in', + model_name=self.model_name) + zaza_model.block_until_unit_wl_status( + self.lead_unit, + 'active', + model_name=self.model_name) + zaza_model.block_until_all_units_idle(model_name=self.model_name) + zaza_model.block_until_service_status( + self.lead_unit, + services, + 'running', + model_name=self.model_name) + + def test_ceph_check_osd_pools(self): + """Check OSD pools. + + Check osd pools on all ceph units, expect them to be + identical, and expect specific pools to be present. + """ + try: + zaza_model.get_application('cinder-ceph') + except KeyError: + raise unittest.SkipTest("Skipping OpenStack dependent test") + logging.info('Checking pools on ceph units...') + + expected_pools = zaza_ceph.get_expected_pools() + results = [] + unit_name = 'ceph-mon/0' + + # Check for presence of expected pools on each unit + logging.debug('Expected pools: {}'.format(expected_pools)) + pools = zaza_ceph.get_ceph_pools(unit_name) + results.append(pools) + + for expected_pool in expected_pools: + if expected_pool not in pools: + msg = ('{} does not have pool: ' + '{}'.format(unit_name, expected_pool)) + raise zaza_exceptions.CephPoolNotFound(msg) + logging.debug('{} has (at least) the expected ' + 'pools.'.format(unit_name)) + + # Check that all units returned the same pool name:id data + for i, result in enumerate(results): + for other in results[i+1:]: + logging.debug('result: {}, other: {}'.format(result, other)) + self.assertEqual(result, other) + + def test_ceph_pool_creation_with_text_file(self): + """Check the creation of a pool and a text file. + + Create a pool, add a text file to it and retrieve its content. + Verify that the content matches the original file. + """ + unit_name = 'ceph-mon/0' + cmd = 'sudo ceph osd pool create test {PG_NUM}; \ + echo 123456789 > /tmp/input.txt; \ + rados put -p test test_input /tmp/input.txt; \ + rados get -p test test_input /dev/stdout' + cmd = cmd.format(PG_NUM=32) + logging.debug('Creating test pool and putting test file in pool...') + result = zaza_model.run_on_unit(unit_name, cmd) + code = result.get('Code') + if code != '0': + raise zaza_model.CommandRunFailed(cmd, result) + output = result.get('Stdout').strip() + logging.debug('Output received: {}'.format(output)) + self.assertEqual(output, '123456789') + + def test_ceph_encryption(self): + """Test Ceph encryption. + + Verify that the new disk is added with encryption by checking for + Ceph's encryption keys directory. + """ + current_release = zaza_openstack.get_os_release(application='ceph-mon') + trusty_mitaka = zaza_openstack.get_os_release('trusty_mitaka') + if current_release >= trusty_mitaka: + logging.warn("Skipping encryption test for Mitaka and higher") + return + unit_name = 'ceph-osd/0' + set_default = { + 'osd-encrypt': 'False', + 'osd-devices': '/dev/vdb /srv/ceph', + } + set_alternate = { + 'osd-encrypt': 'True', + 'osd-devices': '/dev/vdb /srv/ceph /srv/ceph_encrypted', + } + juju_service = 'ceph-osd' + logging.info('Making config change on {}...'.format(juju_service)) + mtime = zaza_model.get_unit_time(unit_name) + + file_mtime = None + + folder_name = '/etc/ceph/dmcrypt-keys/' + with self.config_change(set_default, set_alternate, + application_name=juju_service): + with tempfile.TemporaryDirectory() as tempdir: + # Creating a temp dir to copy keys + temp_folder = '/tmp/dmcrypt-keys' + cmd = 'mkdir {}'.format(temp_folder) + ret = zaza_model.run_on_unit(unit_name, cmd) + logging.debug('Ret for cmd {} is {}'.format(cmd, ret)) + # Copy keys from /etc to /tmp + cmd = 'sudo cp {}* {}'.format(folder_name, temp_folder) + ret = zaza_model.run_on_unit(unit_name, cmd) + logging.debug('Ret for cmd {} is {}'.format(cmd, ret)) + # Changing permissions to be able to SCP the files + cmd = 'sudo chown -R ubuntu:ubuntu {}'.format(temp_folder) + ret = zaza_model.run_on_unit(unit_name, cmd) + logging.debug('Ret for cmd {} is {}'.format(cmd, ret)) + # SCP to retrieve all files in folder + # -p: preserve timestamps + source = '/tmp/dmcrypt-keys/*' + zaza_model.scp_from_unit(unit_name=unit_name, + source=source, + destination=tempdir, + scp_opts='-p') + for elt in listdir(tempdir): + file_path = '/'.join([tempdir, elt]) + if path.isfile(file_path): + file_mtime = path.getmtime(file_path) + if file_mtime: + break + + if not file_mtime: + logging.warn('Could not determine mtime, assuming ' + 'folder does not exist') + raise FileNotFoundError('folder does not exist') + + if file_mtime >= mtime: + logging.info('Folder mtime is newer than provided mtime ' + '(%s >= %s) on %s (OK)' % (file_mtime, + mtime, unit_name)) + else: + logging.warn('Folder mtime is older than provided mtime' + '(%s < on %s) on %s' % (file_mtime, + mtime, unit_name)) + raise Exception('Folder mtime is older than provided mtime') + + def test_blocked_when_non_pristine_disk_appears(self): + """Test blocked state with non-pristine disk. + + Validate that charm goes into blocked state when it is presented with + new block devices that have foreign data on them. + Instances used in UOSCI has a flavour with ephemeral storage in + addition to the bootable instance storage. The ephemeral storage + device is partitioned, formatted and mounted early in the boot process + by cloud-init. + As long as the device is mounted the charm will not attempt to use it. + If we unmount it and trigger the config-changed hook the block device + will appear as a new and previously untouched device for the charm. + One of the first steps of device eligibility checks should be to make + sure we are seeing a pristine and empty device before doing any + further processing. + As the ephemeral device will have data on it we can use it to validate + that these checks work as intended. + """ + current_release = zaza_openstack.get_os_release(application='ceph-mon') + focal_ussuri = zaza_openstack.get_os_release('focal_ussuri') + if current_release >= focal_ussuri: + # NOTE(ajkavanagh) - focal (on ServerStack) is broken for /dev/vdb + # and so this test can't pass: LP#1842751 discusses the issue, but + # basically the snapd daemon along with lxcfs results in /dev/vdb + # being mounted in the lxcfs process namespace. If the charm + # 'tries' to umount it, it can (as root), but the mount is still + # 'held' by lxcfs and thus nothing else can be done with it. This + # is only a problem in serverstack with images with a default + # /dev/vdb ephemeral + logging.warn("Skipping pristine disk test for focal and higher") + return + logging.info('Checking behaviour when non-pristine disks appear...') + logging.info('Configuring ephemeral-unmount...') + alternate_conf = { + 'ephemeral-unmount': '/mnt', + 'osd-devices': '/dev/vdb' + } + juju_service = 'ceph-osd' + zaza_model.set_application_config(juju_service, alternate_conf) + ceph_osd_states = { + 'ceph-osd': { + 'workload-status': 'blocked', + 'workload-status-message': 'Non-pristine' + } + } + zaza_model.wait_for_application_states(states=ceph_osd_states) + logging.info('Units now in blocked state, running zap-disk action...') + unit_names = ['ceph-osd/0', 'ceph-osd/1', 'ceph-osd/2'] + for unit_name in unit_names: + zap_disk_params = { + 'devices': '/dev/vdb', + 'i-really-mean-it': True, + } + action_obj = zaza_model.run_action( + unit_name=unit_name, + action_name='zap-disk', + action_params=zap_disk_params + ) + logging.debug('Result of action: {}'.format(action_obj)) + + logging.info('Running add-disk action...') + for unit_name in unit_names: + add_disk_params = { + 'osd-devices': '/dev/vdb', + } + action_obj = zaza_model.run_action( + unit_name=unit_name, + action_name='add-disk', + action_params=add_disk_params + ) + logging.debug('Result of action: {}'.format(action_obj)) + + logging.info('Wait for idle/ready status...') + zaza_model.wait_for_application_states() + + logging.info('OK') + + set_default = { + 'ephemeral-unmount': '', + 'osd-devices': '/dev/vdb', + } + + bionic_train = zaza_openstack.get_os_release('bionic_train') + if current_release < bionic_train: + set_default['osd-devices'] = '/dev/vdb /srv/ceph' + + logging.info('Restoring to default configuration...') + zaza_model.set_application_config(juju_service, set_default) + + zaza_model.wait_for_application_states() + + def test_pause_and_resume(self): + """The services can be paused and resumed.""" + logging.info('Checking pause and resume actions...') + self.pause_resume(['ceph-osd']) + + def get_device_for_blacklist(self, unit): + """Return a device to be used by the blacklist tests.""" + cmd = "mount | grep 'on / ' | awk '{print $1}'" + obj = zaza_model.run_on_unit(unit, cmd) + return obj.get('Stdout').strip() + + def test_blacklist(self): + """Check the blacklist action. + + The blacklist actions execute and behave as expected. + """ + logging.info('Checking blacklist-add-disk and ' + 'blacklist-remove-disk actions...') + unit_name = 'ceph-osd/0' + + zaza_model.block_until_unit_wl_status( + unit_name, + 'active' + ) + + # Attempt to add device with non-absolute path should fail + action_obj = zaza_model.run_action( + unit_name=unit_name, + action_name='blacklist-add-disk', + action_params={'osd-devices': 'vda'} + ) + self.assertTrue(action_obj.status != 'completed') + zaza_model.block_until_unit_wl_status( + unit_name, + 'active' + ) + + # Attempt to add device with non-existent path should fail + action_obj = zaza_model.run_action( + unit_name=unit_name, + action_name='blacklist-add-disk', + action_params={'osd-devices': '/non-existent'} + ) + self.assertTrue(action_obj.status != 'completed') + zaza_model.block_until_unit_wl_status( + unit_name, + 'active' + ) + + # Attempt to add device with existent path should succeed + device = self.get_device_for_blacklist(unit_name) + if not device: + raise unittest.SkipTest( + "Skipping test because no device was found") + + action_obj = zaza_model.run_action( + unit_name=unit_name, + action_name='blacklist-add-disk', + action_params={'osd-devices': device} + ) + self.assertEqual('completed', action_obj.status) + zaza_model.block_until_unit_wl_status( + unit_name, + 'active' + ) + + # Attempt to remove listed device should always succeed + action_obj = zaza_model.run_action( + unit_name=unit_name, + action_name='blacklist-remove-disk', + action_params={'osd-devices': device} + ) + self.assertEqual('completed', action_obj.status) + zaza_model.block_until_unit_wl_status( + unit_name, + 'active' + ) + logging.debug('OK') + + def test_list_disks(self): + """Test the list-disks action. + + The list-disks action execute. + """ + logging.info('Checking list-disks action...') + unit_name = 'ceph-osd/0' + + zaza_model.block_until_unit_wl_status( + unit_name, + 'active' + ) + + action_obj = zaza_model.run_action( + unit_name=unit_name, + action_name='list-disks', + ) + self.assertEqual('completed', action_obj.status) + zaza_model.block_until_unit_wl_status( + unit_name, + 'active' + ) + logging.debug('OK') + + def get_local_osd_id(self, unit): + """Get the OSD id for a unit.""" + ret = zaza_model.run_on_unit(unit, + 'ceph-volume lvm list --format=json') + local = list(json.loads(ret['Stdout']))[-1] + return local if local.startswith('osd.') else 'osd.' + local + + def get_num_osds(self, osd, is_up_only=False): + """Compute the number of active OSD's.""" + result = zaza_model.run_on_unit(osd, 'ceph osd stat --format=json') + result = json.loads(result['Stdout']) + if is_up_only: + return int(result['num_up_osds']) + else: + return int(result['num_osds']) + + def get_osd_devices_on_unit(self, unit_name): + """Get information for osd devices present on a particular unit. + + :param unit: Unit name to be queried for osd device info. + :type unit: str + """ + osd_devices = json.loads( + zaza_model.run_on_unit( + unit_name, 'ceph-volume lvm list --format=json' + ).get('Stdout', '') + ) + + return osd_devices + + def remove_disk_from_osd_unit(self, unit, osd_id, is_purge=False): + """Remove osd device with provided osd_id from unit. + + :param unit: Unit name where the osd device is to be removed from. + :type unit: str + + :param osd_id: osd-id for the osd device to be removed. + :type osd_id: str + + :param is_purge: whether to purge the osd device + :type is_purge: bool + """ + action_obj = zaza_model.run_action( + unit_name=unit, + action_name='remove-disk', + action_params={ + 'osd-ids': osd_id, + 'timeout': 10, + 'format': 'json', + 'purge': is_purge + } + ) + zaza_utils.assertActionRanOK(action_obj) + results = json.loads(action_obj.data['results']['message']) + results = results[next(iter(results))] + self.assertEqual(results['osd-ids'], osd_id) + zaza_model.run_on_unit(unit, 'partprobe') + + def remove_one_osd(self, unit, block_devs): + """Remove one device from osd unit. + + :param unit: Unit name where the osd device is to be removed from. + :type unit: str + :params block_devs: list of block devices on the scpecified unit + :type block_devs: list[str] + """ + # Should have more than 1 OSDs to take one out and test. + self.assertGreater(len(block_devs), 1) + + # Get complete device details for an OSD. + key = list(block_devs)[-1] + device = { + 'osd-id': key if key.startswith('osd.') else 'osd.' + key, + 'block-device': block_devs[key][0]['devices'][0] + } + + self.remove_disk_from_osd_unit(unit, device['osd-id'], is_purge=True) + return device + + def test_cache_device(self): + """Test replacing a disk in use.""" + logging.info('Running add-disk action with a caching device') + mon = next(iter(zaza_model.get_units('ceph-mon'))).entity_id + osds = [x.entity_id for x in zaza_model.get_units('ceph-osd')] + osd_info = dict() + + # Remove one of the two disks. + logging.info('Removing single disk from each OSD') + for unit in osds: + block_devs = self.get_osd_devices_on_unit(unit) + if len(block_devs) < 2: + continue + device_info = self.remove_one_osd(unit, block_devs) + block_dev = device_info['block-device'] + logging.info("Removing device %s from unit %s" % (block_dev, unit)) + osd_info[unit] = device_info + if not osd_info: + raise unittest.SkipTest( + 'Skipping OSD replacement Test, no spare devices added') + + logging.debug('Removed OSD Info: {}'.format(osd_info)) + zaza_model.wait_for_application_states() + + logging.info('Recycling previously removed disks') + for unit, device_info in osd_info.items(): + osd_id = device_info['osd-id'] + block_dev = device_info['block-device'] + logging.info("Found device %s on unit %s" % (block_dev, unit)) + self.assertNotEqual(block_dev, None) + action_obj = zaza_model.run_action( + unit_name=unit, + action_name='add-disk', + action_params={'osd-devices': block_dev, + 'osd-ids': osd_id, + 'partition-size': 5} + ) + zaza_utils.assertActionRanOK(action_obj) + zaza_model.wait_for_application_states() + + logging.info('Removing previously added OSDs') + for unit, device_info in osd_info.items(): + osd_id = device_info['osd-id'] + block_dev = device_info['block-device'] + logging.info( + "Removing block device %s from unit %s" % + (block_dev, unit) + ) + self.remove_disk_from_osd_unit(unit, osd_id, is_purge=False) + zaza_model.wait_for_application_states() + + logging.info('Finally adding back OSDs') + for unit, device_info in osd_info.items(): + block_dev = device_info['block-device'] + action_obj = zaza_model.run_action( + unit_name=unit, + action_name='add-disk', + action_params={'osd-devices': block_dev, + 'partition-size': 5} + ) + zaza_utils.assertActionRanOK(action_obj) + zaza_model.wait_for_application_states() + + for attempt in tenacity.Retrying( + wait=tenacity.wait_exponential(multiplier=2, max=32), + reraise=True, stop=tenacity.stop_after_attempt(10), + retry=tenacity.retry_if_exception_type(AssertionError) + ): + with attempt: + self.assertEqual( + len(osds) * 2, self.get_num_osds(mon, is_up_only=True) + ) + + +class SecurityTest(unittest.TestCase): + """Ceph Security Tests.""" + + @classmethod + def setUpClass(cls): + """Run class setup for running ceph security tests.""" + super(SecurityTest, cls).setUpClass() + + def test_osd_security_checklist(self): + """Verify expected state with security-checklist.""" + expected_failures = [] + expected_passes = [ + 'validate-file-ownership', + 'validate-file-permissions', + ] + + logging.info('Running `security-checklist` action' + ' on Ceph OSD leader unit') + test_utils.audit_assertions( + zaza_model.run_action_on_leader( + 'ceph-osd', + 'security-checklist', + action_params={}), + expected_passes, + expected_failures, + expected_to_pass=True) + + +class OsdService: + """Simple representation of ceph-osd systemd service.""" + + def __init__(self, id_): + """ + Init service using its ID. + + e.g.: id_=1 -> ceph-osd@1 + """ + self.id = id_ + self.name = 'ceph-osd@{}'.format(id_) + + +async def async_wait_for_service_status(unit_name, services, target_status, + model_name=None, timeout=2700): + """Wait for all services on the unit to be in the desired state. + + Note: This function emulates the + `zaza.model.async_block_until_service_status` function, but it's using + `systemctl is-active` command instead of `pidof/pgrep` of the original + function. + + :param unit_name: Name of unit to run action on + :type unit_name: str + :param services: List of services to check + :type services: List[str] + :param target_status: State services must be in (stopped or running) + :type target_status: str + :param model_name: Name of model to query. + :type model_name: str + :param timeout: Time to wait for status to be achieved + :type timeout: int + """ + async def _check_service(): + services_ok = True + for service in services: + command = r"systemctl is-active '{}'".format(service) + out = await zaza_model.async_run_on_unit( + unit_name, + command, + model_name=model_name, + timeout=timeout) + response = out['Stdout'].strip() + + if target_status == "running" and response == 'active': + continue + elif target_status == "stopped" and response == 'inactive': + continue + else: + services_ok = False + break + + return services_ok + + accepted_states = ('stopped', 'running') + if target_status not in accepted_states: + raise RuntimeError('Invalid target state "{}". Accepted states: ' + '{}'.format(target_status, accepted_states)) + + async with zaza_model.run_in_model(model_name): + await zaza_model.async_block_until(_check_service, timeout=timeout) + + +wait_for_service = zaza_model.sync_wrapper(async_wait_for_service_status) + + +class ServiceTest(unittest.TestCase): + """ceph-osd systemd service tests.""" + + TESTED_UNIT = 'ceph-osd/0' # This can be any ceph-osd unit in the model + SERVICE_PATTERN = re.compile(r'ceph-osd@(?P\d+)\.service') + + def __init__(self, methodName='runTest'): + """Initialize Test Case.""" + super(ServiceTest, self).__init__(methodName) + self._available_services = None + + @classmethod + def setUpClass(cls): + """Run class setup for running ceph service tests.""" + super(ServiceTest, cls).setUpClass() + + def setUp(self): + """Run test setup.""" + # Skip 'service' action tests on systems without systemd + result = zaza_model.run_on_unit(self.TESTED_UNIT, 'which systemctl') + if not result['Stdout']: + raise unittest.SkipTest("'service' action is not supported on " + "systems without 'systemd'. Skipping " + "tests.") + # Note(mkalcok): This counter reset is needed because ceph-osd service + # is limited to 3 restarts per 30 mins which is insufficient + # when running functional tests for 'service' action. This + # limitation is defined in /lib/systemd/system/ceph-osd@.service + # in section [Service] with options 'StartLimitInterval' and + # 'StartLimitBurst' + reset_counter = 'systemctl reset-failed' + zaza_model.run_on_unit(self.TESTED_UNIT, reset_counter) + + def tearDown(self): + """Start ceph-osd services after each test. + + This ensures that the environment is ready for the next tests. + """ + zaza_model.run_action_on_units([self.TESTED_UNIT, ], 'start', + action_params={'osds': 'all'}, + raise_on_failure=True) + + @property + def available_services(self): + """Return list of all ceph-osd services present on the TESTED_UNIT.""" + if self._available_services is None: + self._available_services = self._fetch_osd_services() + return self._available_services + + def _fetch_osd_services(self): + """Fetch all ceph-osd services present on the TESTED_UNIT.""" + service_list = [] + service_list_cmd = 'systemctl list-units --full --all ' \ + '--no-pager -t service' + result = zaza_model.run_on_unit(self.TESTED_UNIT, service_list_cmd) + for line in result['Stdout'].split('\n'): + service_name = self.SERVICE_PATTERN.search(line) + if service_name: + service_id = int(service_name.group('service_id')) + service_list.append(OsdService(service_id)) + return service_list + + def test_start_stop_all_by_keyword(self): + """Start and Stop all ceph-osd services using keyword 'all'.""" + service_list = [service.name for service in self.available_services] + + logging.info("Running 'service stop=all' action on {} " + "unit".format(self.TESTED_UNIT)) + zaza_model.run_action_on_units([self.TESTED_UNIT], 'stop', + action_params={'osds': 'all'}) + wait_for_service(unit_name=self.TESTED_UNIT, + services=service_list, + target_status='stopped') + + logging.info("Running 'service start=all' action on {} " + "unit".format(self.TESTED_UNIT)) + zaza_model.run_action_on_units([self.TESTED_UNIT, ], 'start', + action_params={'osds': 'all'}) + wait_for_service(unit_name=self.TESTED_UNIT, + services=service_list, + target_status='running') + + def test_start_stop_all_by_list(self): + """Start and Stop all ceph-osd services using explicit list.""" + service_list = [service.name for service in self.available_services] + service_ids = [str(service.id) for service in self.available_services] + action_params = ','.join(service_ids) + + logging.info("Running 'service stop={}' action on {} " + "unit".format(action_params, self.TESTED_UNIT)) + zaza_model.run_action_on_units([self.TESTED_UNIT, ], 'stop', + action_params={'osds': action_params}) + wait_for_service(unit_name=self.TESTED_UNIT, + services=service_list, + target_status='stopped') + + logging.info("Running 'service start={}' action on {} " + "unit".format(action_params, self.TESTED_UNIT)) + zaza_model.run_action_on_units([self.TESTED_UNIT, ], 'start', + action_params={'osds': action_params}) + wait_for_service(unit_name=self.TESTED_UNIT, + services=service_list, + target_status='running') + + def test_stop_specific(self): + """Stop only specified ceph-osd service.""" + if len(self.available_services) < 2: + raise unittest.SkipTest('This test can be performed only if ' + 'there\'s more than one ceph-osd service ' + 'present on the tested unit') + + should_run = deepcopy(self.available_services) + to_stop = should_run.pop() + should_run = [service.name for service in should_run] + + logging.info("Running 'service stop={} on {} " + "unit".format(to_stop.id, self.TESTED_UNIT)) + + zaza_model.run_action_on_units([self.TESTED_UNIT, ], 'stop', + action_params={'osds': to_stop.id}) + + wait_for_service(unit_name=self.TESTED_UNIT, + services=[to_stop.name, ], + target_status='stopped') + wait_for_service(unit_name=self.TESTED_UNIT, + services=should_run, + target_status='running') + + def test_start_specific(self): + """Start only specified ceph-osd service.""" + if len(self.available_services) < 2: + raise unittest.SkipTest('This test can be performed only if ' + 'there\'s more than one ceph-osd service ' + 'present on the tested unit') + + service_names = [service.name for service in self.available_services] + should_stop = deepcopy(self.available_services) + to_start = should_stop.pop() + should_stop = [service.name for service in should_stop] + + # Note: can't stop ceph-osd.target as restarting a single OSD will + # cause this to start all of the OSDs when a single one starts. + logging.info("Stopping all running ceph-osd services") + service_stop_cmd = '; '.join(['systemctl stop {}'.format(service) + for service in service_names]) + zaza_model.run_on_unit(self.TESTED_UNIT, service_stop_cmd) + + wait_for_service(unit_name=self.TESTED_UNIT, + services=service_names, + target_status='stopped') + + logging.info("Running 'service start={} on {} " + "unit".format(to_start.id, self.TESTED_UNIT)) + + zaza_model.run_action_on_units([self.TESTED_UNIT, ], 'start', + action_params={'osds': to_start.id}) + + wait_for_service(unit_name=self.TESTED_UNIT, + services=[to_start.name, ], + target_status='running') + + wait_for_service(unit_name=self.TESTED_UNIT, + services=should_stop, + target_status='stopped') + + def test_active_after_pristine_block(self): + """Test if we can get back to active state after pristine block. + + Set a non-pristine status, then trigger update-status to see if it + clears. + """ + logging.info('Setting Non-pristine status') + zaza_model.run_on_leader( + "ceph-osd", + "status-set blocked 'Non-pristine'" + ) + ceph_osd_states = { + 'ceph-osd': { + 'workload-status': 'blocked', + 'workload-status-message-prefix': 'Non-pristine' + } + } + zaza_model.wait_for_application_states(states=ceph_osd_states) + logging.info('Running update-status action') + zaza_model.run_on_leader('ceph-osd', 'hooks/update-status') + logging.info('Wait for idle/ready status') + zaza_model.wait_for_application_states() diff --git a/ceph-osd/tests/tests.yaml b/ceph-osd/tests/tests.yaml new file mode 100644 index 00000000..3b2c91b6 --- /dev/null +++ b/ceph-osd/tests/tests.yaml @@ -0,0 +1,17 @@ +charm_name: ceph-osd + +gate_bundles: + - jammy-caracal + +smoke_bundles: + - jammy-caracal + +dev_bundles: + - jammy-caracal + +tests: + - zaza.charm_tests.lifecycle.refresh.CharmRefreshAll + - tests.target.CephLowLevelTest + - tests.target.CephTest + - tests.target.SecurityTest + - tests.target.ServiceTest diff --git a/ceph-osd/tox.ini b/ceph-osd/tox.ini new file mode 100644 index 00000000..44ffd2eb --- /dev/null +++ b/ceph-osd/tox.ini @@ -0,0 +1,144 @@ +# Classic charm (with zaza): ./tox.ini +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of tox.ini for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools +# +# TODO: Distill the func test requirements from the lint/unit test +# requirements. They are intertwined. Also, Zaza itself should specify +# all of its own requirements and if it doesn't, fix it there. +[tox] +envlist = pep8,py3 +# NOTE: Avoid build/test env pollution by not enabling sitepackages. +sitepackages = False +# NOTE: Avoid false positives by not skipping missing interpreters. +skip_missing_interpreters = False + +[testenv] +# We use tox mainly for virtual environment management for test requirements +# and do not install the charm code as a Python package into that environment. +# Ref: https://tox.wiki/en/latest/config.html#skip_install +skip_install = True +setenv = VIRTUAL_ENV={envdir} + PYTHONHASHSEED=0 + TEST_JUJU3=1 + CHARM_DIR={envdir} + CHARMS_ARTIFACT_DIR={toxinidir}/.. + +commands = stestr run --slowest {posargs} +allowlist_externals = + charmcraft + {toxinidir}/rename.sh +passenv = + HOME + TERM + CS_* + OS_* + TEST_* +deps = -r{toxinidir}/test-requirements.txt + +[testenv:build] +basepython = python3 +deps = -r{toxinidir}/build-requirements.txt +# charmcraft clean is done to ensure that +# `tox -e build` always performs a clean, repeatable build. +# For faster rebuilds during development, +# directly run `charmcraft -v pack && ./rename.sh`. +commands = + charmcraft clean + charmcraft -v pack + {toxinidir}/rename.sh + charmcraft clean + +[testenv:py36] +basepython = python3.6 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py38] +basepython = python3.8 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py39] +basepython = python3.9 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py310] +basepython = python3.10 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py3] +basepython = python3 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:pep8] +basepython = python3 +deps = flake8 + git+https://github.com/juju/charm-tools.git +commands = flake8 {posargs} hooks unit_tests tests actions lib files + charm-proof + +[testenv:cover] +# Technique based heavily upon +# https://github.com/openstack/nova/blob/master/tox.ini +basepython = python3 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt +setenv = + {[testenv]setenv} + PYTHON=coverage run +commands = + coverage erase + stestr run --slowest {posargs} + coverage combine + coverage html -d cover + coverage xml -o cover/coverage.xml + coverage report + +[coverage:run] +branch = True +concurrency = multiprocessing +parallel = True +source = + . +omit = + .tox/* + */charmhelpers/* + unit_tests/* + +[testenv:venv] +basepython = python3 +commands = {posargs} + +[testenv:func-noop] +basepython = python3 +commands = + functest-run-suite --help + +[testenv:func] +basepython = python3 +commands = + functest-run-suite --keep-model + +[testenv:func-smoke] +basepython = python3 +commands = + functest-run-suite --keep-model --smoke + +[testenv:func-dev] +basepython = python3 +commands = + functest-run-suite --keep-model --dev + +[testenv:func-target] +basepython = python3 +commands = + functest-run-suite --keep-model --bundle {posargs} + +[flake8] +ignore = E402,E226,W503,W504 +exclude = */charmhelpers diff --git a/ceph-osd/unit_tests/__init__.py b/ceph-osd/unit_tests/__init__.py new file mode 100644 index 00000000..f460572a --- /dev/null +++ b/ceph-osd/unit_tests/__init__.py @@ -0,0 +1,36 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from unittest.mock import MagicMock +from unittest import mock + +sys.path.append('hooks') +sys.path.append('lib') +sys.path.append('actions') +sys.path.append('unit_tests') + +sys.modules["tabulate"] = MagicMock() + +# Patch out lsb_release() and get_platform() as unit tests should be fully +# insulated from the underlying platform. Unit tests assume that the system is +# ubuntu jammy. +mock.patch( + 'charmhelpers.osplatform.get_platform', return_value='ubuntu' +).start() +mock.patch( + 'charmhelpers.core.host.lsb_release', + return_value={ + 'DISTRIB_CODENAME': 'jammy' + }).start() diff --git a/ceph-osd/unit_tests/test_actions_add_disk.py b/ceph-osd/unit_tests/test_actions_add_disk.py new file mode 100644 index 00000000..781ba427 --- /dev/null +++ b/ceph-osd/unit_tests/test_actions_add_disk.py @@ -0,0 +1,123 @@ +# Copyright 2020 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +from actions import add_disk + +from test_utils import CharmTestCase + + +class AddDiskActionTests(CharmTestCase): + def setUp(self): + super(AddDiskActionTests, self).setUp( + add_disk, ['hookenv', 'kv']) + self.kv.return_value = self.kv + + @mock.patch.object(add_disk.ceph_hooks, 'get_journal_devices') + @mock.patch.object(add_disk.charms_ceph.utils, 'osdize') + def test_add_device(self, mock_osdize, mock_get_journal_devices): + + def fake_config(key): + return { + 'ignore-device-errors': True, + 'osd-encrypt': True, + 'osd-encrypt-keymanager': True, + 'autotune': False, + }.get(key) + + self.hookenv.config.side_effect = fake_config + mock_get_journal_devices.return_value = '' + self.hookenv.relation_ids.return_value = ['ceph:0'] + + db = mock.MagicMock() + self.kv.return_value = db + db.get.side_effect = {'osd-devices': ['/dev/myosddev']}.get + + request = {'ops': []} + add_disk.add_device(request, '/dev/myosddev') + + call = mock.call(relation_id='ceph:0', + relation_settings={'bootstrapped-osds': 1}) + self.hookenv.relation_set.assert_has_calls([call]) + mock_osdize.assert_has_calls([mock.call('/dev/myosddev', + None, '', True, True, + True, None, None)]) + + piter = add_disk.PartitionIter(['/dev/cache'], 100, ['/dev/myosddev']) + mock_create_bcache = mock.MagicMock(side_effect=lambda b: '/dev/cache') + with mock.patch.object(add_disk.PartitionIter, 'create_bcache', + mock_create_bcache) as mock_call: + add_disk.add_device(request, '/dev/myosddev', part_iter=piter) + mock_call.assert_called() + db.set.assert_called_with('osd-aliases', + {'/dev/myosddev': '/dev/cache'}) + + mock_create_bcache.side_effect = lambda b: None + with mock.patch.object(add_disk.PartitionIter, 'create_bcache', + mock_create_bcache) as mock_call: + with self.assertRaises(add_disk.DeviceError): + add_disk.add_device(request, '/dev/myosddev', part_iter=piter) + + def test_get_devices(self): + self.hookenv.action_get.return_value = '/dev/foo bar' + rv = add_disk.get_devices('') + self.assertEqual(rv, ['/dev/foo']) + self.hookenv.action_get.return_value = None + rv = add_disk.get_devices('') + self.assertEqual(rv, []) + + @mock.patch.object(add_disk, 'device_size') + @mock.patch.object(add_disk, 'function_fail') + def test_validate_psize(self, function_fail, device_size): + caches = {'cache1': 100, 'cache2': 200} + device_size.side_effect = lambda c: caches[c] + function_fail.return_value = None + with self.assertRaises(SystemExit): + add_disk.validate_partition_size( + 60, ['a', 'b', 'c'], list(caches.keys())) + self.assertIsNone(add_disk.validate_partition_size( + 60, ['a', 'b'], list(caches.keys()))) + + def test_cache_storage(self): + self.hookenv.storage_list.return_value = [{'location': 'a', 'key': 1}, + {'location': 'b'}] + self.hookenv.storage_get.side_effect = lambda k, elem: elem.get(k) + rv = add_disk.cache_storage() + self.assertEqual(['a', 'b'], rv) + + def test_validate_osd_id(self): + for elem in ('osd.1', '1', 0, 113): + self.assertTrue(add_disk.validate_osd_id(elem)) + for elem in ('osd.-1', '-3', '???', -100, 3.4, {}): + self.assertFalse(add_disk.validate_osd_id(elem)) + + @mock.patch.object(add_disk.charms_ceph.utils, 'disable_osd') + @mock.patch.object(add_disk.charms_ceph.utils, 'stop_osd') + @mock.patch.object(add_disk.subprocess, 'check_output') + @mock.patch.object(add_disk.subprocess, 'check_call') + @mock.patch.object(add_disk, 'apt_install') + @mock.patch.object(add_disk.shutil, 'copy') + @mock.patch.object(add_disk.os.path, 'exists') + def test_crimson_osd(self, os_path_exists, shcopy, apt_install, + check_call, check_output, stop_osd, disable_osd): + os_path_exists.return_value = False + check_output.return_value = b'{"1": [{"devices": ["/dev/vdc"]}]}' + self.assertIsNone(add_disk.get_osd_from_device("/dev/vda")) + + add_disk.start_crimson_osd(None, '/dev/vdc') + stop_osd.assert_called_with("1") + check_call.assert_any_call(['systemctl', 'start', 'crimson-osd@1']) + shcopy.assert_called() + apt_install.assert_called() diff --git a/ceph-osd/unit_tests/test_actions_blacklist.py b/ceph-osd/unit_tests/test_actions_blacklist.py new file mode 100644 index 00000000..9484ccd5 --- /dev/null +++ b/ceph-osd/unit_tests/test_actions_blacklist.py @@ -0,0 +1,141 @@ +# Copyright 2017 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +from charmhelpers.core import hookenv + +from actions import blacklist + +from test_utils import CharmTestCase + + +class BlacklistActionTests(CharmTestCase): + def setUp(self): + super(BlacklistActionTests, self).setUp( + blacklist, []) + + @mock.patch('os.path.isabs') + @mock.patch('os.path.exists') + @mock.patch('charmhelpers.core.unitdata.kv') + @mock.patch('charmhelpers.core.hookenv.action_get') + def test_add_disk(self, _action_get, _kv, _exists, _isabs): + """Add device with absolute and existent path succeeds""" + _action_get.return_value = '/dev/vda' + _kv.return_value = _kv + _kv.get.return_value = [] + _exists.return_value = True + _isabs.return_value = True + blacklist.blacklist_add() + _exists.assert_called() + _isabs.assert_called() + _kv.get.assert_called_with('osd-blacklist', []) + _kv.set.assert_called_with('osd-blacklist', ['/dev/vda']) + _kv.flush.assert_called() + + @mock.patch('os.path.isabs') + @mock.patch('os.path.exists') + @mock.patch('charmhelpers.core.unitdata.kv') + @mock.patch('charmhelpers.core.hookenv.action_get') + def test_add_disk_nonexistent(self, _action_get, _kv, _exists, _isabs): + """Add device with non-existent path raises exception""" + _action_get.return_value = '/dev/vda' + _kv.return_value = _kv + _kv.get.return_value = [] + _exists.return_value = False + _isabs.return_value = True + with self.assertRaises(blacklist.Error): + blacklist.blacklist_add() + _isabs.assert_called() + _exists.assert_called() + _kv.get.assert_called_with('osd-blacklist', []) + assert not _kv.set.called + assert not _kv.flush.called + + @mock.patch('os.path.isabs') + @mock.patch('os.path.exists') + @mock.patch('charmhelpers.core.unitdata.kv') + @mock.patch('charmhelpers.core.hookenv.action_get') + def test_add_disk_nonabsolute(self, _action_get, _kv, _exists, _isabs): + """Add device with non-absolute path raises exception""" + _action_get.return_value = 'vda' + _kv.return_value = _kv + _kv.get.return_value = [] + _exists.return_value = True + _isabs.return_value = False + with self.assertRaises(blacklist.Error): + blacklist.blacklist_add() + _isabs.assert_called() + _kv.get.assert_called_with('osd-blacklist', []) + assert not _exists.called + assert not _kv.set.called + assert not _kv.flush.called + + @mock.patch('charmhelpers.core.unitdata.kv') + @mock.patch('charmhelpers.core.hookenv.action_get') + def test_remove_disk(self, _action_get, _kv): + """Remove action succeeds, and regardless of existence of device""" + _action_get.return_value = '/nonexistent2' + _kv.return_value = _kv + _kv.get.return_value = ['/nonexistent1', '/nonexistent2'] + blacklist.blacklist_remove() + _kv.get.assert_called_with('osd-blacklist', []) + _kv.set.assert_called_with('osd-blacklist', ['/nonexistent1']) + _kv.flush.assert_called() + + @mock.patch('charmhelpers.core.unitdata.kv') + @mock.patch('charmhelpers.core.hookenv.action_get') + def test_remove_disk_nonlisted(self, _action_get, _kv): + """Remove action raises on removal of device not in list""" + _action_get.return_value = '/nonexistent3' + _kv.return_value = _kv + _kv.get.return_value = ['/nonexistent1', '/nonexistent2'] + with self.assertRaises(blacklist.Error): + blacklist.blacklist_remove() + _kv.get.assert_called_with('osd-blacklist', []) + assert not _kv.set.called + assert not _kv.flush.called + + +class MainTestCase(CharmTestCase): + def setUp(self): + super(MainTestCase, self).setUp(hookenv, ["action_fail"]) + + def test_invokes_action(self): + dummy_calls = [] + + def dummy_action(): + dummy_calls.append(True) + + with mock.patch.dict(blacklist.ACTIONS, {"foo": dummy_action}): + blacklist.main(["foo"]) + self.assertEqual(dummy_calls, [True]) + + def test_unknown_action(self): + """Unknown actions aren't a traceback.""" + exit_string = blacklist.main(["foo"]) + self.assertEqual("Action foo undefined", exit_string) + + def test_failing_action(self): + """Actions which traceback trigger action_fail() calls.""" + dummy_calls = [] + + self.action_fail.side_effect = dummy_calls.append + + def dummy_action(): + raise ValueError("uh oh") + + with mock.patch.dict(blacklist.ACTIONS, {"foo": dummy_action}): + blacklist.main(["foo"]) + self.assertEqual(dummy_calls, ["Action foo failed: uh oh"]) diff --git a/ceph-osd/unit_tests/test_actions_get_availability_zone.py b/ceph-osd/unit_tests/test_actions_get_availability_zone.py new file mode 100644 index 00000000..cec7b477 --- /dev/null +++ b/ceph-osd/unit_tests/test_actions_get_availability_zone.py @@ -0,0 +1,119 @@ +# Copyright 2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json + +from actions import get_availability_zone +from lib.charms_ceph.utils import CrushLocation + +from test_utils import CharmTestCase + + +TABULATE_OUTPUT = """ ++-------------+---------+-------------+ +| unit | root | region | ++=============+=========+=============+ +| juju-ceph-0 | default | juju-ceph-0 | ++-------------+---------+-------------+ +| juju-ceph-1 | default | juju-ceph-1 | ++-------------+---------+-------------+ +| juju-ceph-2 | default | juju-ceph-2 | ++-------------+---------+-------------+ +""" + +AVAILABILITY_ZONES = { + "unit": {"root": "default", "host": "juju-ceph-0"}, + "all-units": { + "juju-ceph-0": {"root": "default", "host": "juju-ceph-0"}, + "juju-ceph-1": {"root": "default", "host": "juju-ceph-1"}, + "juju-ceph-2": {"root": "default", "host": "juju-ceph-2"} + } +} + + +class GetAvailabilityZoneActionTests(CharmTestCase): + def setUp(self): + super(GetAvailabilityZoneActionTests, self).setUp( + get_availability_zone, + ["get_osd_tree", "get_unit_hostname", "tabulate"] + ) + self.tabulate.return_value = TABULATE_OUTPUT + self.get_unit_hostname.return_value = "juju-ceph-0" + + def test_get_human_readable(self): + """Test formatting as human readable.""" + table = get_availability_zone._get_human_readable(AVAILABILITY_ZONES) + self.assertTrue(table == TABULATE_OUTPUT) + + def test_get_crush_map(self): + """Test get Crush Map hierarchy from CrushLocation.""" + crush_location = CrushLocation( + name="test", identifier="t1", host="test", rack=None, row=None, + datacenter=None, chassis=None, root="default") + crush_map = get_availability_zone._get_crush_map(crush_location) + self.assertDictEqual(crush_map, {"root": "default", "host": "test"}) + + crush_location = CrushLocation( + name="test", identifier="t1", host="test", rack="AZ", + row="customAZ", datacenter=None, chassis=None, root="default") + crush_map = get_availability_zone._get_crush_map(crush_location) + self.assertDictEqual(crush_map, {"root": "default", "row": "customAZ", + "rack": "AZ", "host": "test"}) + + def test_get_availability_zones(self): + """Test function to get information about availability zones.""" + self.get_unit_hostname.return_value = "test_1" + self.get_osd_tree.return_value = [ + CrushLocation(name="test_1", identifier="t1", host="test_1", + rack="AZ1", row="AZ", datacenter=None, + chassis=None, root="default"), + CrushLocation(name="test_2", identifier="t2", host="test_2", + rack="AZ1", row="AZ", datacenter=None, + chassis=None, root="default"), + CrushLocation(name="test_3", identifier="t3", host="test_3", + rack="AZ2", row="AZ", datacenter=None, + chassis=None, root="default"), + CrushLocation(name="test_4", identifier="t4", host="test_4", + rack="AZ2", row="AZ", datacenter=None, + chassis=None, root="default"), + ] + results = get_availability_zone.get_availability_zones() + + self.assertDictEqual(results, { + "unit": dict(root="default", row="AZ", rack="AZ1", host="test_1")}) + + results = get_availability_zone.get_availability_zones(show_all=True) + self.assertDictEqual(results, { + "unit": dict(root="default", row="AZ", rack="AZ1", host="test_1"), + "all-units": { + "test_1": dict(root="default", row="AZ", rack="AZ1", + host="test_1"), + "test_2": dict(root="default", row="AZ", rack="AZ1", + host="test_2"), + "test_3": dict(root="default", row="AZ", rack="AZ2", + host="test_3"), + "test_4": dict(root="default", row="AZ", rack="AZ2", + host="test_4"), + }}) + + def test_format_availability_zones(self): + """Test function to formatted availability zones.""" + # human readable format + results_table = get_availability_zone.format_availability_zones( + AVAILABILITY_ZONES, True) + self.assertEqual(results_table, TABULATE_OUTPUT) + + # json format + results_json = get_availability_zone.format_availability_zones( + AVAILABILITY_ZONES, False) + self.assertDictEqual(json.loads(results_json), AVAILABILITY_ZONES) diff --git a/ceph-osd/unit_tests/test_actions_list_disks.py b/ceph-osd/unit_tests/test_actions_list_disks.py new file mode 100644 index 00000000..358dd663 --- /dev/null +++ b/ceph-osd/unit_tests/test_actions_list_disks.py @@ -0,0 +1,27 @@ +from actions import list_disks + +from test_utils import CharmTestCase + + +class ListDisksActionTests(CharmTestCase): + def setUp(self): + super(ListDisksActionTests, self).setUp( + list_disks, ['hookenv', + 'charms_ceph', + 'utils', + 'os']) + self.charms_ceph.utils.unmounted_disks.return_value = ['/dev/sda', + '/dev/sdm'] + + def test_list_disks_journal_symbol_link(self): + self.utils.get_journal_devices.return_value = {'/dev/disk/ceph/sdm'} + self.os.path.realpath.return_value = '/dev/sdm' + self.charms_ceph.utils.is_active_bluestore_device.return_value = False + self.charms_ceph.utils.is_pristine_disk.return_value = False + self.utils.get_blacklist.return_value = [] + list_disks.list_disk() + self.hookenv.action_set.assert_called_with({ + 'disks': ['/dev/sda'], + 'blacklist': [], + 'non-pristine': ['/dev/sda'] + }) diff --git a/ceph-osd/unit_tests/test_actions_osd_out_in.py b/ceph-osd/unit_tests/test_actions_osd_out_in.py new file mode 100644 index 00000000..5808adfe --- /dev/null +++ b/ceph-osd/unit_tests/test_actions_osd_out_in.py @@ -0,0 +1,166 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import subprocess +import sys + +from unittest import mock + +from test_utils import CharmTestCase + +sys.path.append('hooks') + +import osd_in_out as actions + + +def mock_check_output(cmd, **kwargs): + action, osd_id = cmd[-2:] # get the last two arguments from cmd + return "marked {} osd.{}. \n".format(action, osd_id).encode("utf-8") + + +class OSDOutTestCase(CharmTestCase): + def setUp(self): + super(OSDOutTestCase, self).setUp( + actions, ["check_output", + "get_local_osd_ids", + "assess_status", + "parse_osds_arguments", + "function_fail", + "function_set"]) + + self.check_output.side_effect = mock_check_output + + def test_osd_out(self): + self.get_local_osd_ids.return_value = ["5", "6", "7"] + self.parse_osds_arguments.return_value = {"5"} + actions.osd_out() + self.check_output.assert_called_once_with( + ["ceph", "--id", "osd-upgrade", "osd", "out", "5"], + stderr=subprocess.STDOUT + ) + self.assess_status.assert_called_once_with() + + def test_osd_out_all(self): + self.get_local_osd_ids.return_value = ["5", "6", "7"] + self.parse_osds_arguments.return_value = {"all"} + actions.osd_out() + self.check_output.assert_has_calls( + [mock.call( + ["ceph", "--id", "osd-upgrade", "osd", "out", i], + stderr=subprocess.STDOUT + ) for i in set(["5", "6", "7"])]) + self.assess_status.assert_called_once_with() + + def test_osd_out_not_local(self): + self.get_local_osd_ids.return_value = ["5"] + self.parse_osds_arguments.return_value = {"6", "7", "8"} + actions.osd_out() + self.check_output.assert_not_called() + self.function_fail.assert_called_once_with( + "invalid ceph OSD device id: " + "{}".format(",".join(set(["6", "7", "8"])))) + self.assess_status.assert_not_called() + + +class OSDInTestCase(CharmTestCase): + def setUp(self): + super(OSDInTestCase, self).setUp( + actions, ["check_output", + "get_local_osd_ids", + "assess_status", + "parse_osds_arguments", + "function_fail", + "function_set"]) + + self.check_output.side_effect = mock_check_output + + def test_osd_in(self): + self.get_local_osd_ids.return_value = ["5", "6", "7"] + self.parse_osds_arguments.return_value = {"5"} + actions.osd_in() + self.check_output.assert_called_once_with( + ["ceph", "--id", "osd-upgrade", "osd", "in", "5"], + stderr=subprocess.STDOUT + ) + self.assess_status.assert_called_once_with() + + def test_osd_in_all(self): + self.get_local_osd_ids.return_value = ["5", "6", "7"] + self.parse_osds_arguments.return_value = {"all"} + actions.osd_in() + self.check_output.assert_has_calls( + [mock.call( + ["ceph", "--id", "osd-upgrade", "osd", "in", i], + stderr=subprocess.STDOUT + ) for i in set(["5", "6", "7"])]) + self.assess_status.assert_called_once_with() + + def test_osd_in_not_local(self): + self.get_local_osd_ids.return_value = ["5"] + self.parse_osds_arguments.return_value = {"6"} + actions.osd_in() + self.check_output.assert_not_called() + self.function_fail.assert_called_once_with( + "invalid ceph OSD device id: 6") + self.assess_status.assert_not_called() + + +class OSDMountTestCase(CharmTestCase): + def setUp(self): + super(OSDMountTestCase, self).setUp(actions, []) + + @mock.patch('os.path.exists') + @mock.patch('os.listdir') + @mock.patch('charms_ceph.utils.filesystem_mounted') + def test_mounted_osds(self, fs_mounted, listdir, exists): + exists.return_value = True + listdir.return_value = [ + '/var/lib/ceph/osd/ceph-1', '/var/lib/ceph/osd/ceph-2'] + fs_mounted.side_effect = lambda x: x == listdir.return_value[0] + osds = actions.get_local_osd_ids() + self.assertIn(listdir.return_value[0][-1], osds) + self.assertNotIn(listdir.return_value[1][-1], osds) + + +class MainTestCase(CharmTestCase): + def setUp(self): + super(MainTestCase, self).setUp(actions, ["function_fail"]) + + def test_invokes_action(self): + dummy_calls = [] + + def dummy_action(): + dummy_calls.append(True) + + with mock.patch.dict(actions.ACTIONS, {"foo": dummy_action}): + actions.main(["foo"]) + self.assertEqual(dummy_calls, [True]) + + def test_unknown_action(self): + """Unknown actions aren't a traceback.""" + exit_string = actions.main(["foo"]) + self.assertEqual("Action foo undefined", exit_string) + + def test_failing_action(self): + """Actions which traceback trigger function_fail() calls.""" + dummy_calls = [] + + self.function_fail.side_effect = dummy_calls.append + + def dummy_action(): + raise ValueError("uh oh") + + with mock.patch.dict(actions.ACTIONS, {"foo": dummy_action}): + actions.main(["foo"]) + self.assertEqual(dummy_calls, ["Action foo failed: uh oh"]) diff --git a/ceph-osd/unit_tests/test_actions_remove_disk.py b/ceph-osd/unit_tests/test_actions_remove_disk.py new file mode 100644 index 00000000..2fb7148d --- /dev/null +++ b/ceph-osd/unit_tests/test_actions_remove_disk.py @@ -0,0 +1,147 @@ +# Copyright 2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +from actions import remove_disk + +from test_utils import CharmTestCase + + +class RemoveDiskActionTests(CharmTestCase): + + @mock.patch.object(remove_disk.subprocess, 'check_output') + def test_get_device_map(self, check_output): + check_output.return_value = b''' +{ + "1": [{"devices": ["/dev/sdx1"]}], + "2": [{"devices": ["/dev/sdc2", "/dev/sdc3"]}] +} + ''' + rv = remove_disk.get_device_map() + self.assertEqual(rv[0]['path'], '/dev/sdx1') + self.assertEqual(rv[1]['id'], rv[2]['id']) + + def test_normalize_osd_id(self): + self.assertEqual('osd.1', remove_disk.normalize_osd_id(1)) + self.assertEqual('osd.2', remove_disk.normalize_osd_id('osd.2')) + self.assertEqual('osd.3', remove_disk.normalize_osd_id('3')) + + def test_map_device_id(self): + dev_map = [ + {'id': 'osd.1', 'path': '/dev/sdc1'}, + {'id': 'osd.2', 'path': '/dev/sdd2'}, + {'id': 'osd.2', 'path': '/dev/sdx3'} + ] + self.assertEqual( + 'osd.1', + remove_disk.map_device_to_id(dev_map, '/dev/sdc1')) + self.assertIsNone( + remove_disk.map_device_to_id(dev_map, '/dev/sdx4')) + + self.assertEqual( + '/dev/sdd2', + remove_disk.map_id_to_device(dev_map, 'osd.2')) + self.assertIsNone( + remove_disk.map_id_to_device(dev_map, 'osd.3')) + + @mock.patch.object(remove_disk, 'get_bcache_names') + def test_action_osd_constructor(self, bcache_names): + bcache_names.return_value = ('bcache0', '/dev/bcache0') + dev_map = [ + {'path': '/dev/sdx1', 'id': 'osd.1'} + ] + with self.assertRaises(remove_disk.RemoveException): + remove_disk.ActionOSD(dev_map, dev='/dev/sdx1', osd_id='osd.1') + obj = remove_disk.ActionOSD(dev_map, dev='/dev/sdx1') + self.assertEqual(obj.osd_id, 'osd.1') + obj = remove_disk.ActionOSD(dev_map, osd_id='1') + self.assertEqual(obj.device, '/dev/sdx1') + + @mock.patch.object(remove_disk.charms_ceph.utils, 'disable_osd') + @mock.patch.object(remove_disk, 'device_size') + @mock.patch.object(remove_disk.charms_ceph.utils, 'stop_osd') + @mock.patch.object(remove_disk, 'bcache_remove') + @mock.patch.object(remove_disk.subprocess, 'call') + @mock.patch.object(remove_disk.subprocess, 'check_call') + @mock.patch.object(remove_disk, 'get_bcache_names') + def test_action_osd_remove(self, get_bcache_names, check_call, + call, bcache_remove, stop_osd, device_size, + disable_osd): + call.return_value = 0 + get_bcache_names.return_value = ('/dev/backing', '/dev/caching') + device_size.side_effect = lambda x: 1 if x == '/dev/caching' else 0 + dev_map = [ + {'path': '/dev/bcache0', 'id': 'osd.1'} + ] + prefix_args = ['ceph', '--id', 'osd-removal'] + obj = remove_disk.ActionOSD(dev_map, osd_id='1') + + obj.remove(True, 1, True) + + # Subprocess Call checks + call.assert_any_call( + prefix_args + ['osd', 'safe-to-destroy', 'osd.1'], timeout=300 + ) + check_call.assert_any_call( + prefix_args + ['osd', 'purge', 'osd.1', '--yes-i-really-mean-it'], + timeout=600 + ) + check_call.assert_any_call( + prefix_args + ['osd', 'crush', 'reweight', 'osd.1', '0'], + timeout=300 + ) + + bcache_remove.assert_called_with( + '/dev/bcache0', '/dev/backing', '/dev/caching') + report = obj.report + self.assertIn('/dev/backing', report) + report = report['/dev/backing'] + self.assertIn('osd-ids', report) + self.assertIn('osd.1', report['osd-ids']) + self.assertIn('cache-devices', report) + self.assertIn('partition-size', report) + self.assertEqual('/dev/caching', report['cache-devices']) + self.assertEqual(1, report['partition-size']) + + # Test the timeout check. + with self.assertRaises(remove_disk.RemoveException): + call.return_value = 1 + obj.remove(False, 0, False) + + @mock.patch.object(remove_disk.hookenv, 'local_unit') + @mock.patch.object(remove_disk.hookenv, 'action_set') + def test_write_report(self, action_set, local_unit): + output = {} + local_unit.return_value = 'ceph-osd/0' + action_set.side_effect = lambda x: output.update(x) + report = {'dev@': {'osd-ids': 'osd.1', 'cache-devices': 'cache@', + 'partition-size': 5}} + remove_disk.write_report(report, 'text') + self.assertIn('message', output) + msg = output['message'] + self.assertIn('juju run ceph-osd/0 add-disk', msg) + self.assertIn('osd-devices=dev@', msg) + self.assertIn('osd-ids=osd.1', msg) + self.assertIn('cache-devices=cache@', msg) + self.assertIn('partition-size=5', msg) + + def test_make_same_length(self): + l1, l2 = [1], [] + remove_disk.make_same_length(l1, l2) + self.assertEqual(len(l1), len(l2)) + self.assertIsNone(l2[0]) + prev_len = len(l1) + remove_disk.make_same_length(l1, l2) + self.assertEqual(len(l1), prev_len) diff --git a/ceph-osd/unit_tests/test_actions_service.py b/ceph-osd/unit_tests/test_actions_service.py new file mode 100644 index 00000000..5c3af854 --- /dev/null +++ b/ceph-osd/unit_tests/test_actions_service.py @@ -0,0 +1,195 @@ +# Copyright 2020 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest.mock as mock +from contextlib import contextmanager + +from actions import service +from hooks import utils + +from test_utils import CharmTestCase + + +class CompletedProcessMock: + def __init__(self, stdout=b'', stderr=b''): + self.stdout = stdout + self.stderr = stderr + + +class ServiceActionTests(CharmTestCase): + _PRESENT_SERVICES = [ + "ceph-osd@0.service", + "ceph-osd@1.service", + "ceph-osd@2.service", + ] + + _TARGET_ALL = 'ceph-osd.target' + + _CHECK_CALL_TIMEOUT = 300 + + def __init__(self, methodName='runTest'): + super(ServiceActionTests, self).__init__(methodName) + + def setUp(self, obj=None, patches=None): + super(ServiceActionTests, self).setUp( + service, + ['subprocess', 'function_fail', + 'log', 'assess_status', 'shutil'] + ) + present_services = '\n'.join(self._PRESENT_SERVICES).encode('utf-8') + + self.shutil.which.return_value = '/bin/systemctl' + self.subprocess.check_call.return_value = None + self.subprocess.run.return_value = CompletedProcessMock( + stdout=present_services) + + @contextmanager + def func_call_arguments(self, osds=None): + with mock.patch("utils.function_get") as mock_function_get: + self._func_args = {'osds': osds} + mock_function_get.side_effect = \ + lambda arg: self._func_args.get(arg) + yield + + def assert_action_start_fail(self, msg): + self.assert_function_fail(service.START, msg) + + def assert_action_stop_fail(self, msg): + self.assert_function_fail(service.STOP, msg) + + def assert_function_fail(self, action, msg): + expected_error = "Action '{}' failed: {}".format(action, msg) + self.function_fail.assert_called_with(expected_error) + + @staticmethod + def call_action_start(): + service.main(['start']) + + @staticmethod + def call_action_stop(): + service.main(['stop']) + + def test_systemctl_execute_all(self): + action = 'start' + services = utils.ALL + + expected_call = mock.call(['systemctl', action, self._TARGET_ALL], + timeout=self._CHECK_CALL_TIMEOUT) + + service.systemctl_execute(action, services) + + self.subprocess.check_call.assert_has_calls([expected_call]) + + def systemctl_execute_specific(self): + action = 'start' + services = ['ceph-osd@1.service', 'ceph-osd@2.service'] + + systemctl_call = ['systemctl', action] + services + expected_call = mock.call(systemctl_call, + timeout=self._CHECK_CALL_TIMEOUT) + + service.systemctl_execute(action, services) + + self.subprocess.check_call.assert_has_calls([expected_call]) + + def test_id_translation(self): + service_ids = {1, utils.ALL, 2} + expected_names = [ + 'ceph-osd@1.service', + utils.ALL, + 'ceph-osd@2.service', + ] + service_names = service.osd_ids_to_service_names(service_ids) + self.assertEqual(sorted(service_names), sorted(expected_names)) + + def test_skip_service_presence_check(self): + service_list = [utils.ALL] + + service.check_service_is_present(service_list) + + self.subprocess.run.assert_not_called() + + def test_raise_all_missing_services(self): + missing_service_id = '99,100' + missing_list = [] + for id_ in missing_service_id.split(','): + missing_list.append("ceph-osd@{}.service".format(id_)) + + service_list_cmd = ['systemctl', 'list-units', '--full', '--all', + '--no-pager', '-t', 'service'] + + err_msg = 'Some services are not present on this ' \ + 'unit: {}'.format(missing_list) + + with self.assertRaises(RuntimeError, msg=err_msg): + service.check_service_is_present(missing_list) + + self.subprocess.run.assert_called_with(service_list_cmd, + stdout=self.subprocess.PIPE, + timeout=30) + + def test_fail_execute_unknown_action(self): + action = 'foo' + err_msg = 'Unknown action "{}"'.format(action) + with self.assertRaises(RuntimeError, msg=err_msg): + service.execute_action(action) + + @mock.patch.object(service, 'systemctl_execute') + def test_execute_action(self, _): + with self.func_call_arguments(osds=utils.ALL): + service.execute_action(service.START) + service.systemctl_execute.assert_called_with(service.START, + [utils.ALL]) + + service.execute_action(service.STOP) + service.systemctl_execute.assert_called_with(service.STOP, + [utils.ALL]) + + @mock.patch.object(service, 'execute_action') + def test_action_stop(self, execute_action): + self.call_action_stop() + execute_action.assert_called_with(service.STOP) + + @mock.patch.object(service, 'execute_action') + def test_action_start(self, execute_action): + self.call_action_start() + execute_action.assert_called_with(service.START) + + def test_actions_requires_systemd(self): + """Actions will fail if systemd is not present on the system""" + self.shutil.which.return_value = None + expected_error = 'This action requires systemd' + with self.func_call_arguments(osds='all'): + self.call_action_start() + self.assert_action_start_fail(expected_error) + + self.call_action_stop() + self.assert_action_stop_fail(expected_error) + + self.subprocess.check_call.assert_not_called() + + def test_unknown_action(self): + action = 'foo' + err_msg = 'Action {} undefined'.format(action) + service.main([action]) + self.function_fail.assert_called_with(err_msg) + + @mock.patch.object(service, 'execute_action') + def test_action_failure(self, start_function): + err_msg = 'Test Error' + service.execute_action.side_effect = RuntimeError(err_msg) + + self.call_action_start() + + self.assert_action_start_fail(err_msg) diff --git a/ceph-osd/unit_tests/test_actions_zap_disk.py b/ceph-osd/unit_tests/test_actions_zap_disk.py new file mode 100644 index 00000000..fa7c1eaf --- /dev/null +++ b/ceph-osd/unit_tests/test_actions_zap_disk.py @@ -0,0 +1,238 @@ +# Copyright 2018 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +from actions import zap_disk + +from test_utils import CharmTestCase + + +class ZapDiskActionTests(CharmTestCase): + def setUp(self): + super(ZapDiskActionTests, self).setUp( + zap_disk, ['hookenv', + 'is_block_device', + 'is_device_mounted', + 'is_active_bluestore_device', + 'is_mapped_luks_device', + 'is_lvm_physical_volume', + 'kv']) + self.is_device_mounted.return_value = False + self.is_block_device.return_value = True + self.is_active_bluestore_device.return_value = False + self.is_mapped_luks_device.return_value = False + self.is_lvm_physical_volume.return_value = False + self.kv.return_value = self.kv + self.hookenv.local_unit.return_value = "ceph-osd-test/0" + + @mock.patch('os.path.exists', mock.MagicMock(return_value=True)) + @mock.patch.object(zap_disk, 'zap_disk') + def test_authorized_zap_single_disk(self, + _zap_disk): + """Will zap disk with extra config set""" + def side_effect(arg): + return { + 'devices': '/dev/vdb', + 'i-really-mean-it': True, + }.get(arg) + + self.hookenv.action_get.side_effect = side_effect + self.kv.get.return_value = ['/dev/vdb', '/dev/vdz'] + zap_disk.zap() + _zap_disk.assert_called_with('/dev/vdb') + self.kv.get.assert_called_with('osd-devices', []) + self.kv.set.assert_called_with('osd-devices', ['/dev/vdz']) + self.hookenv.action_set.assert_called_with({ + 'message': "1 disk(s) have been zapped, to use " + "them as OSDs, run: \njuju " + "run ceph-osd-test/0 add-disk " + "osd-devices=\"/dev/vdb\"" + }) + + @mock.patch('os.path.exists', mock.MagicMock(return_value=True)) + @mock.patch.object(zap_disk, 'zap_disk') + def test_authorized_zap_multiple_disks(self, + _zap_disk): + """Will zap disk with extra config set""" + def side_effect(arg): + return { + 'devices': '/dev/vdb /dev/vdc', + 'i-really-mean-it': True, + }.get(arg) + + self.hookenv.action_get.side_effect = side_effect + self.kv.get.return_value = ['/dev/vdb', '/dev/vdz'] + zap_disk.zap() + _zap_disk.assert_has_calls([ + mock.call('/dev/vdb'), + mock.call('/dev/vdc'), + ]) + self.kv.get.assert_called_with('osd-devices', []) + self.kv.set.assert_called_with('osd-devices', ['/dev/vdz']) + self.hookenv.action_set.assert_called_with({ + 'message': "2 disk(s) have been zapped, to use " + "them as OSDs, run: \njuju " + "run ceph-osd-test/0 add-disk " + "osd-devices=\"/dev/vdb /dev/vdc\"" + }) + + @mock.patch('os.path.exists', mock.MagicMock(return_value=True)) + @mock.patch.object(zap_disk, 'zap_disk') + def test_wont_zap_non_block_device(self, + _zap_disk): + """Will not zap a disk that isn't a block device""" + def side_effect(arg): + return { + 'devices': '/dev/vdb', + 'i-really-mean-it': True, + }.get(arg) + + self.hookenv.action_get.side_effect = side_effect + self.is_block_device.return_value = False + zap_disk.zap() + _zap_disk.assert_not_called() + self.hookenv.action_fail.assert_called_with( + "1 devices are not block devices: /dev/vdb") + + @mock.patch('os.path.exists', mock.MagicMock(return_value=True)) + @mock.patch.object(zap_disk, 'zap_disk') + def test_wont_zap_mounted_block_device(self, + _zap_disk): + """Will not zap a disk that is mounted""" + def side_effect(arg): + return { + 'devices': '/dev/vdb', + 'i-really-mean-it': True, + }.get(arg) + + self.hookenv.action_get.side_effect = side_effect + self.is_device_mounted.return_value = True + zap_disk.zap() + _zap_disk.assert_not_called() + self.hookenv.action_fail.assert_called_with( + "1 devices are mounted: /dev/vdb") + + @mock.patch('os.path.exists', mock.MagicMock(return_value=True)) + @mock.patch.object(zap_disk, 'zap_disk') + def test_wont_zap__mounted_bluestore_device(self, + _zap_disk): + """Will not zap a disk that is mounted""" + def side_effect(arg): + return { + 'devices': '/dev/vdb', + 'i-really-mean-it': True, + }.get(arg) + + self.hookenv.action_get.side_effect = side_effect + self.is_active_bluestore_device.return_value = True + zap_disk.zap() + _zap_disk.assert_not_called() + self.hookenv.action_fail.assert_called_with( + "1 devices are mounted: /dev/vdb") + + @mock.patch('os.path.exists', mock.MagicMock(return_value=True)) + @mock.patch.object(zap_disk, 'zap_disk') + def test_wont_zap__mapped_luks_device(self, _zap_disk): + """Will not zap a disk that has a LUKS header""" + def side_effect(arg): + return { + 'devices': '/dev/vdb', + 'i-really-mean-it': True, + }.get(arg) + + self.hookenv.action_get.side_effect = side_effect + self.is_active_bluestore_device.return_value = False + self.is_mapped_luks_device.return_value = True + zap_disk.zap() + _zap_disk.assert_not_called() + self.hookenv.action_fail.assert_called_with( + "1 devices are mounted: /dev/vdb") + + @mock.patch('os.path.exists', mock.MagicMock(return_value=True)) + @mock.patch.object(zap_disk, 'zap_disk') + def test_zap_luks_not_mapped(self, _zap_disk): + """Will zap disk with extra config set""" + def side_effect(arg): + return { + 'devices': '/dev/vdb', + 'i-really-mean-it': True, + }.get(arg) + + self.is_active_bluestore_device.return_value = False + self.is_mapped_luks_device.return_value = False + + self.hookenv.action_get.side_effect = side_effect + self.kv.get.return_value = ['/dev/vdb', '/dev/vdz'] + zap_disk.zap() + _zap_disk.assert_called_with('/dev/vdb') + self.kv.get.assert_called_with('osd-devices', []) + self.kv.set.assert_called_with('osd-devices', ['/dev/vdz']) + self.hookenv.action_set.assert_called_with({ + 'message': "1 disk(s) have been zapped, to use " + "them as OSDs, run: \njuju " + "run ceph-osd-test/0 add-disk " + "osd-devices=\"/dev/vdb\"" + }) + + @mock.patch.object(zap_disk, 'zap_disk') + def test_wont_zap_non_existent_device(self, _zap_disk): + """Won't zap non-existent disk""" + def side_effect(arg): + return { + 'devices': '/dev/not-valid-disk', + 'i-really-mean-it': True, + }.get(arg) + + self.hookenv.action_get.side_effect = side_effect + zap_disk.zap() + _zap_disk.assert_not_called() + self.hookenv.action_fail.assert_called_with( + 'Failed due to: /dev/not-valid-disk: Device does not exist.') + self.hookenv.action_set.assert_not_called() + + @mock.patch.object(zap_disk, 'zap_disk') + def test_wont_zap_not_abs_path(self, _zap_disk): + """Won't zap not absolute path""" + def side_effect(arg): + return { + 'devices': 'not-absolute', + 'i-really-mean-it': True, + }.get(arg) + + self.hookenv.action_get.side_effect = side_effect + zap_disk.zap() + _zap_disk.assert_not_called() + self.hookenv.action_fail.assert_called_with( + 'Failed due to: not-absolute: Not absolute path.') + self.hookenv.action_set.assert_not_called() + + @mock.patch('os.path.exists', mock.MagicMock(return_value=True)) + @mock.patch.object(zap_disk, 'zap_disk') + def test_wont_zap_lvm_device(self, _zap_disk): + """Won't zap lvm disk""" + def side_effect(arg): + return { + 'devices': '/dev/vdb', + 'i-really-mean-it': True, + }.get(arg) + + self.hookenv.action_get.side_effect = side_effect + self.is_lvm_physical_volume.return_value = True + + zap_disk.zap() + _zap_disk.assert_not_called() + self.hookenv.action_fail.assert_called_with( + '1 devices are lvm devices: /dev/vdb') + self.hookenv.action_set.assert_not_called() diff --git a/ceph-osd/unit_tests/test_ceph_hooks.py b/ceph-osd/unit_tests/test_ceph_hooks.py new file mode 100644 index 00000000..e6d37c23 --- /dev/null +++ b/ceph-osd/unit_tests/test_ceph_hooks.py @@ -0,0 +1,1054 @@ +# Copyright 2016 Canonical Ltd + +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict +import copy +import unittest + +from unittest.mock import patch, MagicMock, call + +import charmhelpers.contrib.storage.linux.ceph as ceph + +with patch('charmhelpers.contrib.hardening.harden.harden') as mock_dec: + mock_dec.side_effect = (lambda *dargs, **dkwargs: lambda f: + lambda *args, **kwargs: f(*args, **kwargs)) + import ceph_hooks + +import charms_ceph.utils as ceph_utils + +CHARM_CONFIG = {'config-flags': '', + 'loglevel': 1, + 'use-syslog': True, + 'osd-journal-size': 1024, + 'osd-max-backfills': 1, + 'osd-recovery-max-active': 2, + 'tune-osd-memory-target': '', + 'use-direct-io': True, + 'osd-format': 'ext4', + 'prefer-ipv6': False, + 'customize-failure-domain': False, + 'bluestore': False, + 'crush-initial-weight': '0', + 'bdev-enable-discard': 'enable', + 'osd-devices': '/dev/vdb', + 'bluestore': False, + 'bluestore-block-wal-size': 0, + 'bluestore-block-db-size': 0, + 'bluestore-wal': None, + 'bluestore-db': None} + + +BLUESTORE_WAL_TEST_SIZE = 128 * 2 ** 20 +BLUESTORE_DB_TEST_SIZE = 2 * 2 ** 30 + + +class CephHooksTestCase(unittest.TestCase): + maxDiff = None + + def setUp(self): + super(CephHooksTestCase, self).setUp() + + @patch.object(ceph_hooks.ch_context, 'CephBlueStoreCompressionContext', + lambda: lambda: {}) + @patch.object(ceph_hooks.ch_ceph, 'get_osd_settings', lambda *args: {}) + @patch.object(ceph_hooks, 'get_fsid', lambda *args: '1234') + @patch.object(ceph_hooks, 'get_auth', lambda *args: False) + @patch.object(ceph_hooks, 'get_public_addr', lambda *args: "10.0.0.1") + @patch.object(ceph_hooks, 'get_cluster_addr', lambda *args: "10.1.0.1") + @patch.object(ceph_hooks, 'cmp_pkgrevno', lambda *args: 1) + @patch.object(ceph_hooks, 'get_mon_hosts', lambda *args: ['10.0.0.1', + '10.0.0.2']) + @patch.object(ceph_hooks, 'get_networks', lambda *args: "") + @patch.object(ceph, 'config') + @patch.object(ceph_hooks, 'config') + def test_get_ceph_context(self, mock_config, mock_config2): + config = copy.deepcopy(CHARM_CONFIG) + mock_config.side_effect = lambda key: config[key] + mock_config2.side_effect = lambda key: config[key] + ctxt = ceph_hooks.get_ceph_context() + expected = {'auth_supported': False, + 'ceph_cluster_network': '', + 'ceph_public_network': '', + 'cluster_addr': '10.1.0.1', + 'dio': 'true', + 'fsid': '1234', + 'loglevel': 1, + 'mon_hosts': '10.0.0.1 10.0.0.2', + 'old_auth': False, + 'crush_initial_weight': '0', + 'osd_journal_size': 1024, + 'osd_max_backfills': 1, + 'osd_recovery_max_active': 2, + 'osd_from_client': OrderedDict(), + 'osd_from_client_conflict': OrderedDict(), + 'public_addr': '10.0.0.1', + 'short_object_len': True, + 'upgrade_in_progress': False, + 'use_syslog': 'true', + 'bdev_discard': True, + 'bluestore_experimental': False, + 'bluestore_block_wal_size': 0, + 'bluestore_block_db_size': 0} + self.assertEqual(ctxt, expected) + + @patch.object(ceph_hooks.ch_context, 'CephBlueStoreCompressionContext', + lambda: lambda: {}) + @patch.object(ceph_hooks.ch_ceph, 'get_osd_settings', lambda *args: {}) + @patch.object(ceph_hooks, 'get_fsid', lambda *args: '1234') + @patch.object(ceph_hooks, 'get_auth', lambda *args: False) + @patch.object(ceph_hooks, 'get_public_addr', lambda *args: "10.0.0.1") + @patch.object(ceph_hooks, 'get_cluster_addr', lambda *args: "10.1.0.1") + @patch.object(ceph_hooks, 'cmp_pkgrevno', lambda *args: 1) + @patch.object(ceph_hooks, 'get_mon_hosts', lambda *args: ['10.0.0.1', + '10.0.0.2']) + @patch.object(ceph_hooks, 'get_networks', lambda *args: "") + @patch.object(ceph, 'config') + @patch.object(ceph_hooks, 'config') + def test_get_ceph_context_invalid_bdev_enable_discard(self, mock_config, + mock_config2): + config = copy.deepcopy(CHARM_CONFIG) + config['bdev-enable-discard'] = 'some-invalid-value' + mock_config.side_effect = lambda key: config[key] + mock_config2.side_effect = lambda key: config[key] + ctxt = ceph_hooks.get_ceph_context() + expected = {'auth_supported': False, + 'ceph_cluster_network': '', + 'ceph_public_network': '', + 'cluster_addr': '10.1.0.1', + 'dio': 'true', + 'fsid': '1234', + 'loglevel': 1, + 'mon_hosts': '10.0.0.1 10.0.0.2', + 'old_auth': False, + 'crush_initial_weight': '0', + 'osd_journal_size': 1024, + 'osd_max_backfills': 1, + 'osd_recovery_max_active': 2, + 'osd_from_client': OrderedDict(), + 'osd_from_client_conflict': OrderedDict(), + 'public_addr': '10.0.0.1', + 'short_object_len': True, + 'upgrade_in_progress': False, + 'use_syslog': 'true', + 'bdev_discard': False, + 'bluestore_experimental': False, + 'bluestore_block_wal_size': 0, + 'bluestore_block_db_size': 0} + self.assertEqual(ctxt, expected) + + @patch.object(ceph_hooks.ch_context, 'CephBlueStoreCompressionContext', + lambda: lambda: {}) + @patch.object(ceph_hooks.ch_ceph, 'get_osd_settings', lambda *args: {}) + @patch.object(ceph_hooks, 'get_fsid', lambda *args: '1234') + @patch.object(ceph_hooks, 'get_auth', lambda *args: False) + @patch.object(ceph_hooks, 'get_public_addr', lambda *args: "10.0.0.1") + @patch.object(ceph_hooks, 'get_cluster_addr', lambda *args: "10.1.0.1") + @patch.object(ceph_hooks, 'cmp_pkgrevno', + lambda pkg, ver: -1 if ver == '12.1.0' else 1) + @patch.object(ceph_hooks, 'get_mon_hosts', lambda *args: ['10.0.0.1', + '10.0.0.2']) + @patch.object(ceph_hooks, 'get_networks', lambda *args: "") + @patch.object(ceph, 'config') + @patch.object(ceph_hooks, 'config') + def test_get_ceph_context_filestore_old(self, mock_config, mock_config2): + config = copy.deepcopy(CHARM_CONFIG) + mock_config.side_effect = lambda key: config[key] + mock_config2.side_effect = lambda key: config[key] + ctxt = ceph_hooks.get_ceph_context() + expected = {'auth_supported': False, + 'ceph_cluster_network': '', + 'ceph_public_network': '', + 'cluster_addr': '10.1.0.1', + 'dio': 'true', + 'fsid': '1234', + 'loglevel': 1, + 'mon_hosts': '10.0.0.1 10.0.0.2', + 'old_auth': False, + 'crush_initial_weight': '0', + 'osd_journal_size': 1024, + 'osd_max_backfills': 1, + 'osd_recovery_max_active': 2, + 'osd_from_client': OrderedDict(), + 'osd_from_client_conflict': OrderedDict(), + 'public_addr': '10.0.0.1', + 'short_object_len': True, + 'upgrade_in_progress': False, + 'use_syslog': 'true', + 'bdev_discard': True, + 'bluestore_experimental': True, + 'bluestore_block_wal_size': 0, + 'bluestore_block_db_size': 0} + self.assertEqual(ctxt, expected) + + @patch.object(ceph_hooks.ch_context, 'CephBlueStoreCompressionContext', + lambda: lambda: {}) + @patch.object(ceph_hooks.ch_ceph, 'get_osd_settings', lambda *args: {}) + @patch.object(ceph_hooks, 'get_fsid', lambda *args: '1234') + @patch.object(ceph_hooks, 'get_auth', lambda *args: False) + @patch.object(ceph_hooks, 'get_public_addr', lambda *args: "10.0.0.1") + @patch.object(ceph_hooks, 'get_cluster_addr', lambda *args: "10.1.0.1") + @patch.object(ceph_hooks, 'cmp_pkgrevno', lambda *args: 1) + @patch.object(ceph_hooks, 'get_mon_hosts', lambda *args: ['10.0.0.1', + '10.0.0.2']) + @patch.object(ceph_hooks, 'get_networks', lambda *args: "") + @patch.object(ceph, 'config') + @patch.object(ceph_hooks, 'config') + def test_get_ceph_context_bluestore(self, mock_config, mock_config2): + config = copy.deepcopy(CHARM_CONFIG) + config['bluestore'] = True + BLUESTORE_WAL = '/dev/sdb /dev/sdc' + BLUESTORE_DB = '/dev/sdb /dev/sdc' + config['bluestore-block-wal-size'] = BLUESTORE_WAL_TEST_SIZE + config['bluestore-block-db-size'] = BLUESTORE_DB_TEST_SIZE + config['bluestore-wal'] = BLUESTORE_WAL + config['bluestore-db'] = BLUESTORE_DB + mock_config.side_effect = lambda key: config[key] + mock_config2.side_effect = lambda key: config[key] + ctxt = ceph_hooks.get_ceph_context() + expected = {'auth_supported': False, + 'ceph_cluster_network': '', + 'ceph_public_network': '', + 'cluster_addr': '10.1.0.1', + 'dio': 'true', + 'fsid': '1234', + 'loglevel': 1, + 'mon_hosts': '10.0.0.1 10.0.0.2', + 'old_auth': False, + 'crush_initial_weight': '0', + 'osd_journal_size': 1024, + 'osd_max_backfills': 1, + 'osd_recovery_max_active': 2, + 'osd_from_client': OrderedDict(), + 'osd_from_client_conflict': OrderedDict(), + 'public_addr': '10.0.0.1', + 'short_object_len': True, + 'upgrade_in_progress': False, + 'use_syslog': 'true', + 'bdev_discard': True, + 'bluestore_experimental': False, + 'bluestore_block_wal_size': BLUESTORE_WAL_TEST_SIZE, + 'bluestore_block_db_size': BLUESTORE_DB_TEST_SIZE} + self.assertEqual(ctxt, expected) + + @patch.object(ceph_hooks.ch_context, 'CephBlueStoreCompressionContext', + lambda: lambda: {}) + @patch.object(ceph_hooks.ch_ceph, 'get_osd_settings', lambda *args: {}) + @patch.object(ceph_hooks, 'get_fsid', lambda *args: '1234') + @patch.object(ceph_hooks, 'get_auth', lambda *args: False) + @patch.object(ceph_hooks, 'get_public_addr', lambda *args: "10.0.0.1") + @patch.object(ceph_hooks, 'get_cluster_addr', lambda *args: "10.1.0.1") + @patch.object(ceph_hooks, 'cmp_pkgrevno', + lambda pkg, ver: -1 if ver == '12.1.0' else 1) + @patch.object(ceph_hooks, 'get_mon_hosts', lambda *args: ['10.0.0.1', + '10.0.0.2']) + @patch.object(ceph_hooks, 'get_networks', lambda *args: "") + @patch.object(ceph, 'config') + @patch.object(ceph_hooks, 'config') + def test_get_ceph_context_bluestore_old(self, mock_config, mock_config2): + self.maxDiff = None + config = copy.deepcopy(CHARM_CONFIG) + config['bluestore-block-wal-size'] = BLUESTORE_WAL_TEST_SIZE + config['bluestore-block-db-size'] = BLUESTORE_DB_TEST_SIZE + mock_config.side_effect = lambda key: config[key] + mock_config2.side_effect = lambda key: config[key] + ctxt = ceph_hooks.get_ceph_context() + expected = {'auth_supported': False, + 'ceph_cluster_network': '', + 'ceph_public_network': '', + 'cluster_addr': '10.1.0.1', + 'dio': 'true', + 'fsid': '1234', + 'loglevel': 1, + 'mon_hosts': '10.0.0.1 10.0.0.2', + 'old_auth': False, + 'crush_initial_weight': '0', + 'osd_journal_size': 1024, + 'osd_max_backfills': 1, + 'osd_recovery_max_active': 2, + 'osd_from_client': OrderedDict(), + 'osd_from_client_conflict': OrderedDict(), + 'public_addr': '10.0.0.1', + 'short_object_len': True, + 'upgrade_in_progress': False, + 'use_syslog': 'true', + 'bdev_discard': True, + 'bluestore_experimental': True, + 'bluestore_block_wal_size': BLUESTORE_WAL_TEST_SIZE, + 'bluestore_block_db_size': BLUESTORE_DB_TEST_SIZE} + self.assertEqual(ctxt, expected) + + @patch.object(ceph_hooks.ch_context, 'CephBlueStoreCompressionContext', + lambda: lambda: {}) + @patch.object(ceph_hooks.ch_ceph, 'get_osd_settings', lambda *args: {}) + @patch.object(ceph_hooks, 'get_fsid', lambda *args: '1234') + @patch.object(ceph_hooks, 'get_auth', lambda *args: False) + @patch.object(ceph_hooks, 'get_public_addr', lambda *args: "10.0.0.1") + @patch.object(ceph_hooks, 'get_cluster_addr', lambda *args: "10.1.0.1") + @patch.object(ceph_hooks, 'cmp_pkgrevno', lambda *args: 1) + @patch.object(ceph_hooks, 'get_mon_hosts', lambda *args: ['10.0.0.1', + '10.0.0.2']) + @patch.object(ceph_hooks, 'get_networks', lambda *args: "") + @patch.object(ceph, 'config') + @patch.object(ceph_hooks, 'config') + def test_get_ceph_context_w_config_flags(self, mock_config, mock_config2): + config = copy.deepcopy(CHARM_CONFIG) + config['config-flags'] = '{"osd": {"osd max write size": 1024}}' + mock_config.side_effect = lambda key: config[key] + mock_config2.side_effect = lambda key: config[key] + ctxt = ceph_hooks.get_ceph_context() + expected = {'auth_supported': False, + 'ceph_cluster_network': '', + 'ceph_public_network': '', + 'cluster_addr': '10.1.0.1', + 'dio': 'true', + 'fsid': '1234', + 'loglevel': 1, + 'mon_hosts': '10.0.0.1 10.0.0.2', + 'old_auth': False, + 'osd': OrderedDict([('osd max write size', 1024)]), + 'crush_initial_weight': '0', + 'osd_journal_size': 1024, + 'osd_max_backfills': 1, + 'osd_recovery_max_active': 2, + 'osd_from_client': OrderedDict(), + 'osd_from_client_conflict': OrderedDict(), + 'public_addr': '10.0.0.1', + 'short_object_len': True, + 'upgrade_in_progress': False, + 'use_syslog': 'true', + 'bdev_discard': True, + 'bluestore_experimental': False, + 'bluestore_block_wal_size': 0, + 'bluestore_block_db_size': 0} + self.assertEqual(ctxt, expected) + + @patch.object(ceph_hooks.ch_context, 'CephBlueStoreCompressionContext', + lambda: lambda: {}) + @patch.object(ceph_hooks.ch_ceph, 'get_osd_settings', lambda *args: {}) + @patch.object(ceph_hooks, 'get_fsid', lambda *args: '1234') + @patch.object(ceph_hooks, 'get_auth', lambda *args: False) + @patch.object(ceph_hooks, 'get_public_addr', lambda *args: "10.0.0.1") + @patch.object(ceph_hooks, 'get_cluster_addr', lambda *args: "10.1.0.1") + @patch.object(ceph_hooks, 'cmp_pkgrevno', lambda *args: 1) + @patch.object(ceph_hooks, 'get_mon_hosts', lambda *args: ['10.0.0.1', + '10.0.0.2']) + @patch.object(ceph_hooks, 'get_networks', lambda *args: "") + @patch.object(ceph, 'config') + @patch.object(ceph_hooks, 'config') + def test_get_ceph_context_w_config_flags_invalid(self, mock_config, + mock_config2): + config = copy.deepcopy(CHARM_CONFIG) + config['config-flags'] = ('{"osd": {"osd max write size": 1024},' + '"foo": "bar"}') + mock_config.side_effect = lambda key: config[key] + mock_config2.side_effect = lambda key: config[key] + ctxt = ceph_hooks.get_ceph_context() + expected = {'auth_supported': False, + 'ceph_cluster_network': '', + 'ceph_public_network': '', + 'cluster_addr': '10.1.0.1', + 'dio': 'true', + 'fsid': '1234', + 'loglevel': 1, + 'mon_hosts': '10.0.0.1 10.0.0.2', + 'old_auth': False, + 'osd': OrderedDict([('osd max write size', 1024)]), + 'crush_initial_weight': '0', + 'osd_journal_size': 1024, + 'osd_max_backfills': 1, + 'osd_recovery_max_active': 2, + 'osd_from_client': OrderedDict(), + 'osd_from_client_conflict': OrderedDict(), + 'public_addr': '10.0.0.1', + 'short_object_len': True, + 'upgrade_in_progress': False, + 'use_syslog': 'true', + 'bdev_discard': True, + 'bluestore_experimental': False, + 'bluestore_block_wal_size': 0, + 'bluestore_block_db_size': 0} + self.assertEqual(ctxt, expected) + + @patch.object(ceph_utils, 'cmp_pkgrevno', lambda *args: 1) + @patch.object(ceph_hooks.ch_context, 'CephBlueStoreCompressionContext') + @patch.object(ceph_hooks.ch_ceph, 'get_osd_settings', lambda *args: {}) + @patch.object(ceph_hooks, 'get_fsid', lambda *args: '1234') + @patch.object(ceph_hooks, 'get_auth', lambda *args: False) + @patch.object(ceph_hooks, 'get_public_addr', lambda *args: "10.0.0.1") + @patch.object(ceph_hooks, 'get_cluster_addr', lambda *args: "10.1.0.1") + @patch.object(ceph_hooks, 'cmp_pkgrevno', lambda *args: 1) + @patch.object(ceph_hooks, 'get_mon_hosts', lambda *args: ['10.0.0.1', + '10.0.0.2']) + @patch.object(ceph_hooks, 'get_networks', lambda *args: "") + @patch.object(ceph_utils, 'config') + @patch.object(ceph, 'config') + @patch.object(ceph_hooks, 'config') + def test_get_ceph_context_bluestore_compression( + self, mock_config, mock_config2, mock_config3, + mock_bluestore_compression): + config = copy.deepcopy(CHARM_CONFIG) + mock_config.side_effect = lambda key: config[key] + mock_config2.side_effect = lambda key: config[key] + mock_config3.side_effect = lambda key: config[key] + mock_bluestore_compression().return_value = { + 'fake-bluestore-compression-key': 'fake-value'} + ctxt = ceph_hooks.get_ceph_context() + expected = {'auth_supported': False, + 'ceph_cluster_network': '', + 'ceph_public_network': '', + 'cluster_addr': '10.1.0.1', + 'dio': 'true', + 'fsid': '1234', + 'loglevel': 1, + 'mon_hosts': '10.0.0.1 10.0.0.2', + 'old_auth': False, + 'crush_initial_weight': '0', + 'osd_journal_size': 1024, + 'osd_max_backfills': 1, + 'osd_recovery_max_active': 2, + 'osd_from_client': OrderedDict(), + 'osd_from_client_conflict': OrderedDict(), + 'public_addr': '10.0.0.1', + 'short_object_len': True, + 'upgrade_in_progress': False, + 'use_syslog': 'true', + 'bdev_discard': True, + 'bluestore_experimental': False, + 'bluestore_block_wal_size': 0, + 'bluestore_block_db_size': 0, + 'fake-bluestore-compression-key': 'fake-value'} + self.assertEqual(ctxt, expected) + + @patch.object(ceph_hooks.ch_context, 'CephBlueStoreCompressionContext', + lambda: lambda: {}) + @patch.object(ceph_hooks, 'get_mon_hosts', + lambda *args: ['2a01:348:2f4:0:685e:5748:ae62:209f', + '2a01:348:2f4:0:685e:5748:ae62:20a0']) + @patch.object(ceph_hooks, 'get_ipv6_addr', + lambda *args: ['2a01:348:2f4:0:685e:5748:ae62:209f']) + @patch.object(ceph_hooks.ch_ceph, 'get_osd_settings', lambda *args: {}) + @patch.object(ceph_hooks, 'get_fsid', lambda *args: '1234') + @patch.object(ceph_hooks, 'get_auth', lambda *args: False) + @patch.object(ceph_hooks, 'cmp_pkgrevno', lambda *args: 1) + @patch.object(ceph_hooks, 'get_networks', lambda *args: "") + @patch.object(ceph, 'config') + @patch.object(ceph_hooks, 'config') + def test_ipv6_only_env_bindings(self, mock_config, mock_config2): + config = copy.deepcopy(CHARM_CONFIG) + config['prefer-ipv6'] = True + mock_config.side_effect = lambda key: config[key] + mock_config2.side_effect = lambda key: config[key] + ctxt = ceph_hooks.get_ceph_context() + expected = { + 'auth_supported': False, + 'ceph_cluster_network': '', + 'ceph_public_network': '', + 'dio': 'true', + 'fsid': '1234', + 'loglevel': 1, + 'old_auth': False, + 'crush_initial_weight': '0', + 'osd_journal_size': 1024, + 'osd_max_backfills': 1, + 'osd_recovery_max_active': 2, + 'osd_from_client': OrderedDict(), + 'osd_from_client_conflict': OrderedDict(), + 'short_object_len': True, + 'upgrade_in_progress': False, + 'use_syslog': 'true', + 'bdev_discard': True, + 'bluestore_experimental': False, + 'bluestore_block_wal_size': 0, + 'bluestore_block_db_size': 0, + 'cluster_addr': '2a01:348:2f4:0:685e:5748:ae62:209f', + 'public_addr': '2a01:348:2f4:0:685e:5748:ae62:209f', + 'mon_hosts': '2a01:348:2f4:0:685e:5748:ae62:209f ' + '2a01:348:2f4:0:685e:5748:ae62:20a0', + 'ms_bind_ipv4': False, + 'ms_bind_ipv6': True, + } + self.assertEqual(ctxt, expected) + + @patch.object(ceph_hooks, "kv") + @patch.object(ceph_hooks.glob, "glob") + @patch.object(ceph_hooks, "file_hash") + def test_check_aa_profile_needs_update_True( + self, mock_hash, mock_glob, mock_kv): + mock_glob.return_value = ['file1', 'file2', 'file3'] + mock_hash.side_effect = ['hash1', 'hash2'] + mock_kv.return_value = {'hash:file1': 'hash1', + 'hash:file2': 'hash2_old'} + result = ceph_hooks.check_aa_profile_needs_update() + self.assertTrue(result) + mock_hash.assert_has_calls([call('file1'), call('file2')]) + + @patch.object(ceph_hooks, "kv") + @patch.object(ceph_hooks.glob, "glob") + @patch.object(ceph_hooks, "file_hash") + def test_check_aa_profile_needs_update_False( + self, mock_hash, mock_glob, mock_kv): + mock_glob.return_value = ['file1', 'file2', 'file3'] + mock_hash.side_effect = ['hash1', 'hash2', 'hash3'] + mock_kv.return_value = {'hash:file1': 'hash1', + 'hash:file2': 'hash2', + 'hash:file3': 'hash3'} + result = ceph_hooks.check_aa_profile_needs_update() + self.assertFalse(result) + mock_hash.assert_has_calls( + [call('file1'), call('file2'), call('file3')]) + + @patch.object(ceph_hooks, "kv") + @patch.object(ceph_hooks.glob, "glob") + @patch.object(ceph_hooks, "file_hash") + def test_check_aa_profile_needs_update_never_installed( + self, mock_hash, mock_glob, mock_kv): + mock_glob.return_value = ['file1', 'file2', 'file3'] + mock_kv.return_value = {} + self.assertRaises(ceph_hooks.AppArmorProfileNeverInstalledException, + ceph_hooks.check_aa_profile_needs_update) + mock_hash.assert_not_called() + + @patch.object(ceph_hooks, 'check_aa_profile_needs_update') + @patch.object(ceph_hooks, 'update_apparmor') + @patch.object(ceph_hooks, '_set_pending_apparmor_update_status') + def test_install_apparmor_profile_no_change( + self, mock_set, mock_update, mock_check): + mock_check.return_value = False + ceph_hooks.install_apparmor_profile() + mock_set.assert_not_called() + mock_update.assert_not_called() + + @patch.object(ceph_hooks, 'check_aa_profile_needs_update') + @patch.object(ceph_hooks, 'update_apparmor') + @patch.object(ceph_hooks, '_set_pending_apparmor_update_status') + @patch.object(ceph_hooks, 'config') + def test_install_apparmor_profile_disable( + self, mock_config, mock_set, mock_update, mock_check): + mock_check.return_value = True + mock_config.return_value = 'disable' + ceph_hooks.install_apparmor_profile() + mock_set.assert_not_called() + mock_update.assert_called_once_with() + + @patch.object(ceph_hooks, 'check_aa_profile_needs_update') + @patch.object(ceph_hooks, 'update_apparmor') + @patch.object(ceph_hooks, '_set_pending_apparmor_update_status') + @patch.object(ceph_hooks, 'config') + def test_install_apparmor_profile_never_installed( + self, mock_config, mock_set, mock_update, mock_check): + mock_check.side_effect = ( + ceph_hooks.AppArmorProfileNeverInstalledException) + ceph_hooks.install_apparmor_profile() + mock_config.assert_not_called() + mock_set.assert_not_called() + mock_update.assert_called_once_with() + + @patch.object(ceph_hooks, 'check_aa_profile_needs_update') + @patch.object(ceph_hooks, 'update_apparmor') + @patch.object(ceph_hooks, '_set_pending_apparmor_update_status') + @patch.object(ceph_hooks, 'config') + def test_install_apparmor_profile_enforce( + self, mock_config, mock_set, mock_update, mock_check): + mock_check.return_value = True + mock_config.return_value = 'enforce' + ceph_hooks.install_apparmor_profile() + mock_set.assert_called_once_with() + mock_update.assert_not_called() + + @patch.object(ceph_hooks, 'assess_status') + @patch.object(ceph_hooks, 'ceph') + @patch.object(ceph_hooks, 'service_restart') + @patch.object(ceph_hooks, 'service_reload') + @patch.object(ceph_hooks, 'copy_profile_into_place') + @patch.object(ceph_hooks, 'CephOsdAppArmorContext') + @patch.object(ceph_hooks, 'config') + def test_update_apparmor_upstart_config_changed( + self, mock_config, mock_apparmor_context, + mock_copy_profile_into_place, mock_service_reload, + mock_service_restart, mock_ceph, + mock_assess_status): + m_config = MagicMock() + m_config.changed.return_value = True + mock_config.return_value = m_config + m_aa_context = MagicMock() + mock_apparmor_context.return_value = m_aa_context + mock_ceph.systemd.return_value = False + mock_copy_profile_into_place.return_value = False + + ceph_hooks.update_apparmor() + + m_aa_context.setup_aa_profile.assert_called() + mock_copy_profile_into_place.assert_called() + mock_service_restart.assert_called_with('ceph-osd-all') + m_config.changed.assert_called_with('aa-profile-mode') + mock_service_reload.assert_called_with('apparmor') + mock_assess_status.assert_called_once_with() + + @patch.object(ceph_hooks, 'assess_status') + @patch.object(ceph_hooks, 'ceph') + @patch.object(ceph_hooks, 'service_restart') + @patch.object(ceph_hooks, 'service_reload') + @patch.object(ceph_hooks, 'copy_profile_into_place') + @patch.object(ceph_hooks, 'CephOsdAppArmorContext') + @patch.object(ceph_hooks, 'config') + def test_update_apparmor_systemd_profile_changed( + self, mock_config, mock_apparmor_context, + mock_copy_profile_into_place, mock_service_reload, + mock_service_restart, mock_ceph, + mock_assess_status): + m_aa_context = MagicMock() + mock_apparmor_context.return_value = m_aa_context + mock_ceph.systemd.return_value = True + mock_copy_profile_into_place.return_value = True + + ceph_hooks.update_apparmor() + + m_aa_context.setup_aa_profile.assert_called() + mock_copy_profile_into_place.assert_called() + mock_config.changed.assert_not_called() + mock_service_reload.assert_called_with('apparmor') + mock_service_restart.assert_called_once_with('ceph-osd.target') + mock_assess_status.assert_called_once_with() + + @patch.object(ceph_hooks, 'assess_status') + @patch.object(ceph_hooks, 'ceph') + @patch.object(ceph_hooks, 'service_restart') + @patch.object(ceph_hooks, 'service_reload') + @patch.object(ceph_hooks, 'copy_profile_into_place') + @patch.object(ceph_hooks, 'CephOsdAppArmorContext') + @patch.object(ceph_hooks, 'config') + def test_update_apparmor_disable( + self, mock_config, mock_apparmor_context, + mock_copy_profile_into_place, + mock_service_reload, mock_service_restart, + mock_ceph, mock_assess_status): + mock_config.return_value = 'disable' + m_aa_context = MagicMock() + mock_apparmor_context.return_value = m_aa_context + mock_ceph.systemd.return_value = True + mock_copy_profile_into_place.return_value = True + + ceph_hooks.update_apparmor() + + m_aa_context.setup_aa_profile.assert_called() + mock_copy_profile_into_place.assert_called() + mock_config.changed.assert_not_called() + mock_service_reload.assert_called_with('apparmor') + mock_service_restart.assert_not_called() + mock_assess_status.assert_not_called() + + @patch.object(ceph_hooks, 'is_block_device') + @patch.object(ceph_hooks, 'storage_list') + @patch.object(ceph_hooks, 'config') + def test_get_devices(self, mock_config, mock_storage_list, + mock_is_block_device): + '''Devices returned as expected''' + config = {'osd-devices': '/dev/vda /dev/vdb'} + mock_config.side_effect = lambda key: config[key] + mock_storage_list.return_value = [] + mock_is_block_device.return_value = True + devices = ceph_hooks.get_devices() + self.assertEqual(devices, ['/dev/vda', '/dev/vdb']) + + @patch.object(ceph_hooks, 'is_block_device') + @patch.object(ceph_hooks, 'get_blacklist') + @patch.object(ceph_hooks, 'storage_list') + @patch.object(ceph_hooks, 'config') + def test_get_devices_blacklist(self, mock_config, mock_storage_list, + mock_get_blacklist, mock_is_block_device): + '''Devices returned as expected when blacklist in effect''' + config = {'osd-devices': '/dev/vda /dev/vdb'} + mock_config.side_effect = lambda key: config[key] + mock_storage_list.return_value = [] + mock_get_blacklist.return_value = ['/dev/vda'] + mock_is_block_device.return_value = True + devices = ceph_hooks.get_devices() + mock_storage_list.assert_called() + mock_get_blacklist.assert_called() + self.assertEqual(devices, ['/dev/vdb']) + + @patch.object(ceph_hooks, 'log') + @patch.object(ceph_hooks, 'config') + @patch('os.environ') + def test_az_info_unset(self, environ, config, log): + config.return_value = None + environ.get.return_value = None + + self.assertEqual(ceph_hooks.az_info(), None) + + config.assert_called_with('availability_zone') + environ.get.assert_called_with('JUJU_AVAILABILITY_ZONE') + + @patch.object(ceph_hooks, 'log') + @patch.object(ceph_hooks, 'config') + @patch('os.environ') + def test_az_info_config(self, environ, config, log): + config.return_value = 'dc-01' + environ.get.return_value = None + + self.assertEqual(ceph_hooks.az_info(), + ' row=dc-01') + + config.assert_called_with('availability_zone') + environ.get.assert_called_with('JUJU_AVAILABILITY_ZONE') + + @patch.object(ceph_hooks, 'log') + @patch.object(ceph_hooks, 'config') + @patch('os.environ') + def test_az_info_juju_az(self, environ, config, log): + config.return_value = 'dc-01' + environ.get.return_value = 'zone1' + + self.assertEqual(ceph_hooks.az_info(), + ' rack=zone1 row=dc-01') + + config.assert_called_with('availability_zone') + environ.get.assert_called_with('JUJU_AVAILABILITY_ZONE') + + @patch.object(ceph_hooks, 'log') + @patch.object(ceph_hooks, 'config') + @patch('os.environ') + def test_az_info_default_remap(self, environ, config, log): + config.return_value = 'default' + environ.get.return_value = 'default' + + self.assertEqual(ceph_hooks.az_info(), + ' rack=default-rack row=default-row') + + config.assert_called_with('availability_zone') + environ.get.assert_called_with('JUJU_AVAILABILITY_ZONE') + + @patch.object(ceph_hooks, 'is_container') + @patch.object(ceph_hooks, 'subprocess') + @patch.object(ceph_hooks, 'shutil') + def test_install_udev_rules(self, shutil, subprocess, is_container): + is_container.return_value = False + ceph_hooks.install_udev_rules() + shutil.copy.assert_called_once_with( + 'files/udev/95-charm-ceph-osd.rules', + '/lib/udev/rules.d' + ) + subprocess.check_call.assert_called_once_with( + ['udevadm', 'control', '--reload-rules'] + ) + + @patch.object(ceph_hooks, 'is_container') + @patch.object(ceph_hooks, 'subprocess') + @patch.object(ceph_hooks, 'shutil') + def test_install_udev_rules_container(self, shutil, subprocess, + is_container): + is_container.return_value = True + ceph_hooks.install_udev_rules() + shutil.copy.assert_not_called() + subprocess.check_call.assert_not_called() + + @patch.object(ceph_hooks, 'config') + @patch.object(ceph_hooks, 'cmp_pkgrevno') + def test_use_short_objects(self, mock_cmp_pkgrevno, mock_config): + + def fake_config(key): + return config.get(key, None) + + mock_config.side_effect = fake_config + mock_cmp_pkgrevno.return_value = True + + config = {'osd-devices': '/dev/sdb /dev/sdc', 'osd-format': 'ext4'} + self.assertTrue(ceph_hooks.use_short_objects()) + + config = {'osd-devices': '/dev/sdb /dev/sdc', 'osd-format': 'xfs'} + self.assertFalse(ceph_hooks.use_short_objects()) + + config = {'osd-devices': '/srv/osd', 'osd-format': 'xfs'} + self.assertTrue(ceph_hooks.use_short_objects()) + + config = {'osd-devices': '/srv/osd', 'osd-format': 'ext4'} + self.assertTrue(ceph_hooks.use_short_objects()) + + @patch.object(ceph_hooks, 'write_file') + @patch.object(ceph_hooks.ceph, 'ceph_user') + @patch.object(ceph_hooks, 'install_alternative') + @patch.object(ceph_hooks, 'render_template') + @patch.object(ceph_hooks, 'get_ceph_context') + @patch.object(ceph_hooks, 'service_name') + @patch.object(ceph_hooks, 'mkdir') + def test_emit_ceph_conf(self, mock_mkdir, mock_service_name, + mock_get_ceph_context, mock_render_template, + mock_install_alternative, mock_ceph_user, + mock_write_file): + mock_service_name.return_value = 'testsvc' + mock_ceph_user.return_value = 'ceph' + mock_get_ceph_context.return_value = {} + mock_render_template.return_value = "awesome ceph config" + + ceph_hooks.emit_cephconf() + + self.assertTrue(mock_write_file.called) + self.assertTrue(mock_install_alternative.called) + + @patch.object(ceph_hooks, 'should_enable_discard') + @patch.object(ceph_hooks, 'config') + def test_get_bdev_enable_discard(self, mock_config, + mock_should_enable_discard): + mock_should_enable_discard.return_value = True + config = {'bdev-enable-discard': 'xxx', + 'osd-devices': '/dev/vdb'} + mock_config.side_effect = lambda key: config[key] + self.assertRaises(ValueError, ceph_hooks.get_bdev_enable_discard) + + for value, expected in [('enable', True), + ('enabled', True), + ('disable', False), + ('disabled', False), + ('auto', True)]: + config['bdev-enable-discard'] = value + self.assertEqual(ceph_hooks.get_bdev_enable_discard(), expected) + + @patch.object(ceph_hooks, "get_total_ram") + @patch.object(ceph_hooks, "kv") + @patch.object(ceph_hooks, "log") + def test_warn_memory_bounds( + self, mock_log, mock_kv, mock_total_ram + ): + mock_total_ram.return_value = 16 * 1024 * 1024 * 1024 # 16GB + mock_kv.return_value = {"osd-devices": ["osd1", "osd2"]} + ceph_hooks.warn_if_memory_outside_bounds(5 * 1024 * 1024 * 1024) # 5GB + mock_log.assert_not_called() + + mock_kv.return_value = {"osd-devices": ["osd1", "osd2", "osd3"]} + ceph_hooks.warn_if_memory_outside_bounds(5 * 1024 * 1024 * 1024) # 5GB + mock_log.assert_called_with( + "tune-osd-memory-target results in value > 90% of system ram. " + "This is not recommended.", + level=ceph_hooks.WARNING + ) + + mock_kv.return_value = {"osd-devices": ["osd1", "osd2"]} + ceph_hooks.warn_if_memory_outside_bounds(2 * 1024 * 1024 * 1024) # 2GB + mock_log.assert_called_with( + "tune-osd-memory-target results in value < 4GB. " + "This is not recommended.", + level=ceph_hooks.WARNING + ) + + @patch.object(ceph_hooks, "config") + def test_is_tune_osd_memory_target_valid(self, mock_config): + def tune(value): + return lambda k: ( + value if k == "tune-osd-memory-target" else KeyError + ) + + # value, is_valid + scenarios = [ + ("", True), + ("5GB", True), + ("020GB", True), + ("34GB", True), + ("5%", True), + ("05%", True), + ("50%", True), + ("test", False), + (" ", False), + ("5", False), + ("GB", False), + ("%", False), + ("test5GB", False), + ("50%%", False), + ] + for value, expected_valid in scenarios: + mock_config.side_effect = tune(value) + print(f"testing tune-osd-memory-target set to {value}") + self.assertEqual( + ceph_hooks.is_tune_osd_memory_target_valid(), + expected_valid + ) + + @patch.object(ceph_hooks, "config") + @patch.object(ceph_hooks, "get_total_ram") + @patch.object(ceph_hooks, "kv") + @patch.object(ceph_hooks, "log") + def test_get_osd_memory_target_gb( + self, mock_log, mock_kv, mock_total_ram, + mock_config, + ): + mock_total_ram.return_value = 16 * 1024 * 1024 * 1024 # 16GB + mock_kv.return_value = {"osd-devices": ["osd1", "osd2"]} + + def config_func(k): + if k == "tune-osd-memory-target": + return "5GB" + raise ValueError + mock_config.side_effect = config_func + + target = ceph_hooks.get_osd_memory_target() + self.assertEqual(target, str(5 * 1024 * 1024 * 1024)) # 5GB + + @patch.object(ceph_hooks, "config") + @patch.object(ceph_hooks, "get_total_ram") + @patch.object(ceph_hooks, "kv") + @patch.object(ceph_hooks, "log") + def test_get_osd_memory_target_percentage( + self, mock_log, mock_kv, mock_total_ram, + mock_config, + ): + mock_total_ram.return_value = 16 * 1024 * 1024 * 1024 # 16GB + mock_kv.return_value = {"osd-devices": ["osd1", "osd2"]} + + def config_func(k): + if k == "tune-osd-memory-target": + return "50%" + raise ValueError + mock_config.side_effect = config_func + + target = ceph_hooks.get_osd_memory_target() + # should be 50% of 16GB / 2 osd devices = 4GB + self.assertEqual(target, str(4 * 1024 * 1024 * 1024)) # 4GB + + @patch.object(ceph_hooks, "config") + @patch.object(ceph_hooks, "get_total_ram") + @patch.object(ceph_hooks, "kv") + @patch.object(ceph_hooks, "log") + def test_get_osd_memory_target_empty( + self, mock_log, mock_kv, mock_total_ram, + mock_config, + ): + mock_total_ram.return_value = 16 * 1024 * 1024 * 1024 # 16GB + mock_kv.return_value = {"osd-devices": ["osd1", "osd2"]} + + mock_config.side_effect = lambda _: None + + target = ceph_hooks.get_osd_memory_target() + self.assertEqual(target, "") + + @patch.object(ceph_hooks, "config") + @patch.object(ceph_hooks, "get_total_ram") + @patch.object(ceph_hooks, "kv") + @patch.object(ceph_hooks, "log") + def test_get_osd_memory_target_invalid( + self, mock_log, mock_kv, mock_total_ram, + mock_config, + ): + mock_total_ram.return_value = 16 * 1024 * 1024 * 1024 # 16GB + mock_kv.return_value = {"osd-devices": ["osd1", "osd2"]} + + def config_func(k): + if k == "tune-osd-memory-target": + return "foo" + raise ValueError + mock_config.side_effect = config_func + + target = ceph_hooks.get_osd_memory_target() + self.assertEqual(target, "") + mock_log.assert_called_with( + "tune-osd-memory-target value invalid," + " leaving the OSD memory target unchanged", + level=ceph_hooks.ERROR, + ) + + @patch.object(ceph_hooks, 'service_restart') + @patch.object(ceph_hooks, 'import_pending_key') + @patch.object(ceph_hooks.os.path, 'exists') + def test_handle_pending_key(self, exists, import_pending_key, + service_restart): + exists.return_value = True + pending_key = '{"0":"some-key"}' + ceph_hooks.handle_pending_key(pending_key) + exists.assert_called_with('/var/lib/ceph/osd/ceph-0') + import_pending_key.assert_called_with('some-key', '0') + service_restart.assert_called_with('ceph-osd@0') + + +@patch.object(ceph_hooks, 'local_unit') +@patch.object(ceph_hooks, 'relation_get') +@patch.object(ceph_hooks, 'relation_set') +@patch.object(ceph_hooks, 'prepare_disks_and_activate') +@patch.object(ceph_hooks, 'get_relation_ip') +@patch.object(ceph_hooks, 'socket') +class SecretsStorageTestCase(unittest.TestCase): + + def test_secrets_storage_relation_joined(self, + _socket, + _get_relation_ip, + _prepare_disks_and_activate, + _relation_set, + _relation_get, + _local_unit): + _local_unit.return_value = 'ceph-osd/0' + _get_relation_ip.return_value = '10.23.1.2' + _socket.gethostname.return_value = 'testhost' + ceph_hooks.secrets_storage_joined() + _get_relation_ip.assert_called_with('secrets-storage') + _relation_set.assert_called_with( + relation_id=None, + secret_backend='charm-vaultlocker', + isolated=True, + access_address='10.23.1.2', + unit_name='ceph-osd/0', + hostname='testhost' + ) + _socket.gethostname.assert_called_once_with() + + def test_secrets_storage_relation_changed(self, + _socket, + _get_relation_ip, + _prepare_disks_and_activate, + _relation_set, + _relation_get, + _local_unit): + _local_unit.return_value = 'ceph-osd/0' + _relation_get.return_value = None + ceph_hooks.secrets_storage_changed() + _prepare_disks_and_activate.assert_called_once_with() + + +@patch.object(ceph_hooks, 'cmp_pkgrevno') +@patch.object(ceph_hooks, 'config') +class VaultLockerTestCase(unittest.TestCase): + + def test_use_vaultlocker(self, _config, _cmp_pkgrevno): + _test_data = { + 'osd-encrypt': True, + 'osd-encrypt-keymanager': 'vault', + } + _config.side_effect = lambda x: _test_data.get(x) + _cmp_pkgrevno.return_value = 1 + self.assertTrue(ceph_hooks.use_vaultlocker()) + + def test_use_vaultlocker_no_encryption(self, _config, _cmp_pkgrevno): + _test_data = { + 'osd-encrypt': False, + 'osd-encrypt-keymanager': 'vault', + } + _config.side_effect = lambda x: _test_data.get(x) + _cmp_pkgrevno.return_value = 1 + self.assertFalse(ceph_hooks.use_vaultlocker()) + + def test_use_vaultlocker_not_vault(self, _config, _cmp_pkgrevno): + _test_data = { + 'osd-encrypt': True, + 'osd-encrypt-keymanager': 'ceph', + } + _config.side_effect = lambda x: _test_data.get(x) + _cmp_pkgrevno.return_value = 1 + self.assertFalse(ceph_hooks.use_vaultlocker()) + + def test_use_vaultlocker_old_version(self, _config, _cmp_pkgrevno): + _test_data = { + 'osd-encrypt': True, + 'osd-encrypt-keymanager': 'vault', + } + _config.side_effect = lambda x: _test_data.get(x) + _cmp_pkgrevno.return_value = -1 + self.assertRaises(ValueError, + ceph_hooks.use_vaultlocker) diff --git a/ceph-osd/unit_tests/test_ceph_networking.py b/ceph-osd/unit_tests/test_ceph_networking.py new file mode 100644 index 00000000..168e82fc --- /dev/null +++ b/ceph-osd/unit_tests/test_ceph_networking.py @@ -0,0 +1,65 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import test_utils +import charmhelpers.core.hookenv as hookenv +import utils as ceph_utils + +TO_PATCH_SPACES = [ + 'network_get_primary_address', + 'log', + 'get_host_ip', + 'config', + 'get_network_addrs', + 'cached', +] + + +class CephNetworkSpaceTestCase(test_utils.CharmTestCase): + def setUp(self): + super(CephNetworkSpaceTestCase, self).setUp(ceph_utils, + TO_PATCH_SPACES) + self.config.side_effect = self.test_config.get + + def tearDown(self): + # Reset @cached cache + hookenv.cache = {} + + def test_no_network_space_support(self): + self.get_host_ip.return_value = '192.168.2.1' + self.network_get_primary_address.side_effect = NotImplementedError + self.assertEqual(ceph_utils.get_cluster_addr(), + '192.168.2.1') + self.assertEqual(ceph_utils.get_public_addr(), + '192.168.2.1') + + def test_public_network_space(self): + self.network_get_primary_address.return_value = '10.20.40.2' + self.assertEqual(ceph_utils.get_public_addr(), + '10.20.40.2') + self.network_get_primary_address.assert_called_with('public') + self.config.assert_called_with('ceph-public-network') + + def test_cluster_network_space(self): + self.network_get_primary_address.return_value = '10.20.50.2' + self.assertEqual(ceph_utils.get_cluster_addr(), + '10.20.50.2') + self.network_get_primary_address.assert_called_with('cluster') + self.config.assert_called_with('ceph-cluster-network') + + def test_config_options_in_use(self): + self.get_network_addrs.return_value = ['192.122.20.2'] + self.test_config.set('ceph-cluster-network', '192.122.20.0/24') + self.assertEqual(ceph_utils.get_cluster_addr(), + '192.122.20.2') diff --git a/ceph-osd/unit_tests/test_ceph_utils.py b/ceph-osd/unit_tests/test_ceph_utils.py new file mode 100644 index 00000000..2ae572aa --- /dev/null +++ b/ceph-osd/unit_tests/test_ceph_utils.py @@ -0,0 +1,366 @@ +# Copyright 2016 Canonical Ltd + +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from unittest.mock import patch + +with patch('charmhelpers.contrib.hardening.harden.harden') as mock_dec: + mock_dec.side_effect = (lambda *dargs, **dkwargs: lambda f: + lambda *args, **kwargs: f(*args, **kwargs)) + import utils + + +class CephUtilsTestCase(unittest.TestCase): + def setUp(self): + super(CephUtilsTestCase, self).setUp() + + @patch('os.path.exists') + @patch.object(utils, 'storage_list') + @patch.object(utils, 'config') + def test_get_journal_devices(self, mock_config, mock_storage_list, + mock_os_path_exists): + '''Devices returned as expected''' + config = {'osd-journal': '/dev/vda /dev/vdb'} + mock_config.side_effect = lambda key: config[key] + mock_storage_list.return_value = [] + mock_os_path_exists.return_value = True + devices = utils.get_journal_devices() + mock_storage_list.assert_called() + mock_os_path_exists.assert_called() + self.assertEqual(devices, set(['/dev/vda', '/dev/vdb'])) + + @patch('os.path.exists') + @patch.object(utils, 'get_blacklist') + @patch.object(utils, 'storage_list') + @patch.object(utils, 'config') + def test_get_journal_devices_blacklist(self, mock_config, + mock_storage_list, + mock_get_blacklist, + mock_os_path_exists): + '''Devices returned as expected when blacklist in effect''' + config = {'osd-journal': '/dev/vda /dev/vdb'} + mock_config.side_effect = lambda key: config[key] + mock_storage_list.return_value = [] + mock_get_blacklist.return_value = ['/dev/vda'] + mock_os_path_exists.return_value = True + devices = utils.get_journal_devices() + mock_storage_list.assert_called() + mock_os_path_exists.assert_called() + mock_get_blacklist.assert_called() + self.assertEqual(devices, set(['/dev/vdb'])) + + @patch('os.path.exists') + @patch.object(utils, 'is_sata30orless') + def test_should_enable_discard_yes(self, mock_is_sata30orless, + mock_os_path_exists): + devices = ['/dev/sda', '/dev/vda', '/dev/nvme0n1'] + mock_os_path_exists.return_value = True + mock_is_sata30orless.return_value = False + ret = utils.should_enable_discard(devices) + mock_os_path_exists.assert_called() + mock_is_sata30orless.assert_called() + self.assertEqual(ret, True) + + @patch('os.path.exists') + @patch.object(utils, 'is_sata30orless') + def test_should_enable_discard_no(self, mock_is_sata30orless, + mock_os_path_exists): + devices = ['/dev/sda', '/dev/vda', '/dev/nvme0n1'] + mock_os_path_exists.return_value = True + mock_is_sata30orless.return_value = True + ret = utils.should_enable_discard(devices) + mock_os_path_exists.assert_called() + mock_is_sata30orless.assert_called() + self.assertEqual(ret, False) + + @patch('subprocess.check_output') + def test_is_sata30orless_sata31(self, mock_subprocess_check_output): + extcmd_output = (b'supressed text\nSATA Version is: ' + b'SATA 3.1, 6.0 Gb/s (current: 6.0 Gb/s)\n' + b'supressed text\n\n') + mock_subprocess_check_output.return_value = extcmd_output + ret = utils.is_sata30orless('/dev/sda') + mock_subprocess_check_output.assert_called() + self.assertEqual(ret, False) + + @patch('subprocess.check_output') + def test_is_sata30orless_sata30(self, mock_subprocess_check_output): + extcmd_output = (b'supressed text\nSATA Version is: ' + b'SATA 3.0, 6.0 Gb/s (current: 6.0 Gb/s)\n' + b'supressed text\n\n') + mock_subprocess_check_output.return_value = extcmd_output + ret = utils.is_sata30orless('/dev/sda') + mock_subprocess_check_output.assert_called() + self.assertEqual(ret, True) + + @patch('subprocess.check_output') + def test_is_sata30orless_sata26(self, mock_subprocess_check_output): + extcmd_output = (b'supressed text\nSATA Version is: ' + b'SATA 2.6, 3.0 Gb/s (current: 3.0 Gb/s)\n' + b'supressed text\n\n') + mock_subprocess_check_output.return_value = extcmd_output + ret = utils.is_sata30orless('/dev/sda') + mock_subprocess_check_output.assert_called() + self.assertEqual(ret, True) + + @patch.object(utils, "function_get") + def test_raise_on_missing_arguments(self, mock_function_get): + mock_function_get.return_value = None + err_msg = "Action argument \"osds\" is missing" + with self.assertRaises(RuntimeError, msg=err_msg): + utils.parse_osds_arguments() + + @patch.object(utils, "function_get") + def test_parse_service_ids(self, mock_function_get): + mock_function_get.return_value = "1,2,3" + expected_ids = {"1", "2", "3"} + + parsed = utils.parse_osds_arguments() + self.assertEqual(parsed, expected_ids) + + @patch.object(utils, "function_get") + def test_parse_service_ids_with_all(self, mock_function_get): + mock_function_get.return_value = "1,2,all" + expected_id = {utils.ALL} + + parsed = utils.parse_osds_arguments() + self.assertEqual(parsed, expected_id) + + @patch('subprocess.check_call') + @patch('subprocess.check_output') + def test_setup_bcache(self, check_output, check_call): + check_output.return_value = b''' + { + "blockdevices": [ + {"name":"/dev/nvme0n1", + "children": [ + {"name":"/dev/bcache0"} + ] + } + ] + } + ''' + self.assertEqual(utils.setup_bcache('', ''), '/dev/bcache0') + + @patch('subprocess.check_output') + def test_get_partition_names(self, check_output): + check_output.return_value = b''' + { + "blockdevices": [ + {"name":"/dev/sdd", + "children": [ + {"name":"/dev/sdd1"} + ] + } + ] + } + ''' + partitions = utils.get_partition_names('') + self.assertEqual(partitions, set(['/dev/sdd1'])) + # Check for a raw device with no partitions. + check_output.return_value = b''' + {"blockdevices": [{"name":"/dev/sdd"}]} + ''' + self.assertEqual(set(), utils.get_partition_names('')) + + @patch.object(utils, 'get_partition_names') + @patch('subprocess.check_call') + def test_create_partition(self, check_call, get_partition_names): + first_call = True + + def gpn(dev): + nonlocal first_call + if first_call: + first_call = False + return set() + return set(['/dev/nvm0n1p1']) + get_partition_names.side_effect = gpn + partition_name = utils.create_partition('/dev/nvm0n1', 101, 0) + self.assertEqual(partition_name, '/dev/nvm0n1p1') + args = check_call.call_args[0][0] + self.assertIn('/dev/nvm0n1', args) + self.assertIn('101GB', args) + + @patch('subprocess.check_output') + def test_device_size(self, check_output): + check_output.return_value = b''' + { + "blockdevices": [{"size":800166076416}] + } + ''' + self.assertEqual(745, int(utils.device_size(''))) + + @patch('subprocess.check_output') + @patch.object(utils, 'remove_lvm') + @patch.object(utils, 'wipe_disk') + @patch('os.system') + def test_bcache_remove(self, system, wipe_disk, remove_lvm, check_output): + check_output.return_value = b''' + sb.magic ok + sb.first_sector 8 [match] + sb.csum 63F23B706BA0FE6A [match] + sb.version 3 [cache device] + dev.label (empty) + dev.uuid ca4ce5e1-4cf3-4330-b1c9-2c735b14cd0b + dev.sectors_per_block 1 + dev.sectors_per_bucket 1024 + dev.cache.first_sector 1024 + dev.cache.cache_sectors 1562822656 + dev.cache.total_sectors 1562823680 + dev.cache.ordered yes + dev.cache.discard no + dev.cache.pos 0 + dev.cache.replacement 0 [lru] + cset.uuid 424242 + ''' + utils.bcache_remove('/dev/bcache0', 'backing', 'caching') + system.assert_any_call( + 'echo 1 | sudo tee /sys/block/bcache0/bcache/detach') + system.assert_any_call( + 'echo 1 | sudo tee /sys/block/bcache0/bcache/stop') + system.assert_any_call( + 'echo 1 | sudo tee /sys/fs/bcache/424242/stop') + wipe_disk.assert_any_call('backing', 1) + wipe_disk.assert_any_call('caching', 1) + + @patch('os.listdir') + @patch('os.path.exists') + @patch('subprocess.check_output') + def test_get_bcache_names(self, check_output, exists, listdir): + exists.return_value = True + check_output.return_value = b''' +sb.magic ok +sb.first_sector 8 [match] +sb.csum A71D96D4364343BF [match] +sb.version 1 [backing device] + +dev.label (empty) +dev.uuid cca84a86-3f68-4ffb-8be1-4449c9fb29a8 +dev.sectors_per_block 1 +dev.sectors_per_bucket 1024 +dev.data.first_sector 16 +dev.data.cache_mode 1 [writeback] +dev.data.cache_state 1 [clean] + +cset.uuid 57add9da-e5de-47c6-8f39-3e16aafb8d31 + ''' + listdir.return_value = ['backing', 'caching'] + values = utils.get_bcache_names('/dev/bcache0') + self.assertEqual(2, len(values)) + self.assertEqual(values[0], '/dev/backing') + check_output.return_value = b''' +sb.magic ok +sb.first_sector 8 [match] +sb.csum 6802E76075FF7B77 [match] +sb.version 3 [cache device] + +dev.label (empty) +dev.uuid fb6e9d06-12e2-46ca-b8fd-797ecec1a126 +dev.sectors_per_block 1 +dev.sectors_per_bucket 1024 +dev.cache.first_sector 1024 +dev.cache.cache_sectors 10238976 +dev.cache.total_sectors 10240000 +dev.cache.ordered yes +dev.cache.discard no +dev.cache.pos 0 +dev.cache.replacement 0 [lru] + +cset.uuid 57add9da-e5de-47c6-8f39-3e16aafb8d31 + ''' + values = utils.get_bcache_names('/dev/bcache0') + self.assertEqual(values[0], '/dev/caching') + + @patch('subprocess.check_output') + @patch('subprocess.check_call') + def test_remove_lvm(self, check_call, check_output): + check_output.return_value = b''' +--- Physical volume --- + PV Name /dev/bcache0 + VG Name ceph-1 + VG Name ceph-2 + ''' + utils.remove_lvm('/dev/bcache0') + check_call.assert_any_call( + ['sudo', 'vgremove', '-y', 'ceph-1', 'ceph-2']) + check_call.assert_any_call(['sudo', 'pvremove', '-y', '/dev/bcache0']) + + check_call.reset_mock() + + def just_raise(*args): + raise utils.DeviceError() + + check_output.side_effect = just_raise + utils.remove_lvm('') + check_call.assert_not_called() + + @patch.object(utils, 'wipe_disk') + @patch.object(utils, 'bcache_remove') + @patch.object(utils, 'create_partition') + @patch.object(utils, 'setup_bcache') + def test_partition_iter(self, setup_bcache, create_partition, + bcache_remove, wipe_disk): + create_partition.side_effect = \ + lambda c, s, n: c + '|' + str(s) + '|' + str(n) + setup_bcache.side_effect = lambda *args: args + piter = utils.PartitionIter(['/dev/nvm0n1', '/dev/nvm0n2'], + 200, ['dev1', 'dev2', 'dev3']) + piter.create_bcache('dev1') + setup_bcache.assert_called_with('dev1', '/dev/nvm0n1|200|0') + piter.cleanup('dev1') + bcache_remove.assert_called() + setup_bcache.mock_reset() + piter.create_bcache('dev2') + setup_bcache.assert_called_with('dev2', '/dev/nvm0n2|200|0') + piter.create_bcache('dev3') + setup_bcache.assert_called_with('dev3', '/dev/nvm0n1|200|1') + + @patch.object(utils, 'device_size') + @patch.object(utils, 'create_partition') + @patch.object(utils, 'setup_bcache') + def test_partition_iter_no_size(self, setup_bcache, create_partition, + device_size): + device_size.return_value = 300 + piter = utils.PartitionIter(['/dev/nvm0n1'], 0, + ['dev1', 'dev2', 'dev3']) + create_partition.side_effect = lambda c, sz, g: sz + + # 300GB across 3 devices, i.e: 100 for each. + self.assertEqual(100, next(piter)) + self.assertEqual(100, next(piter)) + + @patch.object(utils.subprocess, 'check_output') + def test_parent_device(self, check_output): + check_output.return_value = b''' +{"blockdevices": [ + {"name": "loop1p1", + "children": [ + {"name": "loop1"}] + }] +}''' + self.assertEqual(utils.get_parent_device('/dev/loop1p1'), '/dev/loop1') + + @patch.object(utils.ceph, 'ceph_user') + @patch.object(utils.subprocess, 'check_call') + @patch.object(utils.os.path, 'exists') + def test_import_pending_key(self, exists, check_call, ceph_user): + ceph_user.return_value = 'ceph' + exists.return_value = True + utils.import_pending_key('some-key', '0') + exists.assert_called_with('/var/lib/ceph/osd/ceph-0/keyring') + check_call.assert_called_with(['sudo', '-u', 'ceph', 'ceph-authtool', + '/var/lib/ceph/osd/ceph-0/keyring', + '--name=osd.0', '--add-key=some-key']) diff --git a/ceph-osd/unit_tests/test_config.py b/ceph-osd/unit_tests/test_config.py new file mode 100644 index 00000000..574a3021 --- /dev/null +++ b/ceph-osd/unit_tests/test_config.py @@ -0,0 +1,113 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os.path +import shutil +import tempfile +import sys +import test_utils + +from unittest.mock import patch, MagicMock + +# python-apt is not installed as part of test-requirements but is imported by +# some charmhelpers modules so create a fake import. +mock_apt = MagicMock() +sys.modules['apt'] = mock_apt +mock_apt.apt_pkg = MagicMock() + + +with patch('charmhelpers.contrib.hardening.harden.harden') as mock_dec: + mock_dec.side_effect = (lambda *dargs, **dkwargs: lambda f: + lambda *args, **kwargs: f(*args, **kwargs)) + import ceph_hooks as hooks + +TO_PATCH = [ + 'config', + 'is_block_device', + 'get_blacklist', +] + + +class GetDevicesTestCase(test_utils.CharmTestCase): + + def setUp(self): + super(GetDevicesTestCase, self).setUp(hooks, TO_PATCH) + self.config.side_effect = self.test_config.get + self.tmp_dir = tempfile.mkdtemp() + self.bd = { + os.path.join(self.tmp_dir, "device1"): True, + os.path.join(self.tmp_dir, "device1"): True, + os.path.join(self.tmp_dir, "link"): True, + os.path.join(self.tmp_dir, "device"): True, + } + self.is_block_device.side_effect = lambda x: self.bd.get(x, False) + self.get_blacklist.return_value = [] + self.addCleanup(shutil.rmtree, self.tmp_dir) + + def test_get_devices_empty(self): + """ + If osd-devices is set to an empty string, get_devices() returns + an empty list. + """ + self.test_config.set("osd-devices", "") + self.assertEqual([], hooks.get_devices()) + + def test_get_devices_non_existing_files(self): + """ + If osd-devices points to a file that doesn't exist, it's still + returned by get_devices(). + """ + non_existing = os.path.join(self.tmp_dir, "no-such-file") + self.test_config.set("osd-devices", non_existing) + self.assertEqual([non_existing], hooks.get_devices()) + + def test_get_devices_multiple(self): + """ + Multiple devices can be specified in osd-devices by separating + them with spaces. + """ + device1 = os.path.join(self.tmp_dir, "device1") + device2 = os.path.join(self.tmp_dir, "device2") + self.test_config.set("osd-devices", "{} {}".format(device1, device2)) + self.assertEqual([device1, device2], hooks.get_devices()) + + def test_get_devices_extra_spaces(self): + """ + Multiple spaces do not result in additional devices. + """ + device1 = os.path.join(self.tmp_dir, "device1") + device2 = os.path.join(self.tmp_dir, "device2") + self.test_config.set("osd-devices", "{} {}".format(device1, device2)) + self.assertEqual([device1, device2], hooks.get_devices()) + + def test_get_devices_non_absolute_path(self): + """ + Charm does not allow relative paths as this may result in a path + on the root device/within the charm directory. + """ + device1 = os.path.join(self.tmp_dir, "device1") + device2 = "foo" + self.test_config.set("osd-devices", "{} {}".format(device1, device2)) + self.assertEqual([device1], hooks.get_devices()) + + def test_get_devices_symlink(self): + """ + If a symlink is specified in osd-devices, get_devices() does not + resolve it and returns the symlink provided. + """ + device = os.path.join(self.tmp_dir, "device") + link = os.path.join(self.tmp_dir, "link") + os.symlink(device, link) + self.test_config.set("osd-devices", link) + self.assertEqual([link], hooks.get_devices()) diff --git a/ceph-osd/unit_tests/test_status.py b/ceph-osd/unit_tests/test_status.py new file mode 100644 index 00000000..98a87a69 --- /dev/null +++ b/ceph-osd/unit_tests/test_status.py @@ -0,0 +1,135 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock +import test_utils + +from unittest.mock import MagicMock, patch + +with patch('charmhelpers.contrib.hardening.harden.harden') as mock_dec: + mock_dec.side_effect = (lambda *dargs, **dkwargs: lambda f: + lambda *args, **kwargs: f(*args, **kwargs)) + import ceph_hooks as hooks + +TO_PATCH = [ + 'status_set', + 'config', + 'ceph', + 'relation_ids', + 'relation_get', + 'related_units', + 'get_conf', + 'application_version_set', + 'get_upstream_version', + 'vaultlocker', + 'use_vaultlocker', +] + +CEPH_MONS = [ + 'ceph/0', + 'ceph/1', + 'ceph/2', +] + + +@patch.object(hooks, 'get_mon_hosts', new=MagicMock(return_value=['1.1.1.1'])) +class ServiceStatusTestCase(test_utils.CharmTestCase): + + def setUp(self): + super(ServiceStatusTestCase, self).setUp(hooks, TO_PATCH) + self.config.side_effect = self.test_config.get + self.get_upstream_version.return_value = '10.2.2' + self.use_vaultlocker.return_value = False + + def test_assess_status_no_monitor_relation(self): + self.relation_ids.return_value = [] + hooks.assess_status() + self.status_set.assert_called_with('blocked', mock.ANY) + self.application_version_set.assert_called_with('10.2.2') + + def test_assess_status_monitor_relation_incomplete(self): + self.relation_ids.return_value = ['mon:1'] + self.related_units.return_value = CEPH_MONS + self.get_conf.return_value = None + hooks.assess_status() + self.status_set.assert_called_with('waiting', mock.ANY) + self.application_version_set.assert_called_with('10.2.2') + + @patch.object(hooks.ch_context, 'CephBlueStoreCompressionContext', + lambda: MagicMock()) + def test_assess_status_monitor_complete_no_disks(self): + self.relation_ids.return_value = ['mon:1'] + self.related_units.return_value = CEPH_MONS + self.get_conf.return_value = 'monitor-bootstrap-key' + self.ceph.get_running_osds.return_value = [] + hooks.assess_status() + self.status_set.assert_called_with('blocked', mock.ANY) + self.application_version_set.assert_called_with('10.2.2') + + @patch.object(hooks.ch_context, 'CephBlueStoreCompressionContext', + lambda: MagicMock()) + def test_assess_status_monitor_complete_disks(self): + self.relation_ids.return_value = ['mon:1'] + self.related_units.return_value = CEPH_MONS + self.get_conf.return_value = 'monitor-bootstrap-key' + self.ceph.get_running_osds.return_value = ['12345', + '67890'] + self.get_upstream_version.return_value = '12.2.4' + hooks.assess_status() + self.status_set.assert_called_with('active', mock.ANY) + self.application_version_set.assert_called_with('12.2.4') + + def test_assess_status_monitor_vault_missing(self): + _test_relations = { + 'mon': ['mon:1'], + } + self.relation_ids.side_effect = lambda x: _test_relations.get(x, []) + self.related_units.return_value = CEPH_MONS + self.vaultlocker.vault_relation_complete.return_value = False + self.use_vaultlocker.return_value = True + self.get_conf.return_value = 'monitor-bootstrap-key' + self.ceph.get_running_osds.return_value = ['12345', + '67890'] + self.get_upstream_version.return_value = '12.2.4' + hooks.assess_status() + self.status_set.assert_called_with('blocked', mock.ANY) + self.application_version_set.assert_called_with('12.2.4') + + def test_assess_status_monitor_vault_incomplete(self): + _test_relations = { + 'mon': ['mon:1'], + 'secrets-storage': ['secrets-storage:6'] + } + self.relation_ids.side_effect = lambda x: _test_relations.get(x, []) + self.related_units.return_value = CEPH_MONS + self.vaultlocker.vault_relation_complete.return_value = False + self.use_vaultlocker.return_value = True + self.get_conf.return_value = 'monitor-bootstrap-key' + self.ceph.get_running_osds.return_value = ['12345', + '67890'] + self.get_upstream_version.return_value = '12.2.4' + hooks.assess_status() + self.status_set.assert_called_with('waiting', mock.ANY) + self.application_version_set.assert_called_with('12.2.4') + + @patch.object(hooks.ch_context, 'CephBlueStoreCompressionContext') + def test_assess_status_invalid_bluestore_compression_options( + self, _bluestore_compression): + self.relation_ids.return_value = ['mon:1'] + self.related_units.return_value = CEPH_MONS + _bluestore_compression().validate.side_effect = ValueError( + 'fake-config is invalid') + hooks.assess_status() + self.status_set.assert_called_with( + 'blocked', 'Invalid configuration: fake-config is invalid') diff --git a/ceph-osd/unit_tests/test_tuning.py b/ceph-osd/unit_tests/test_tuning.py new file mode 100644 index 00000000..92e61d52 --- /dev/null +++ b/ceph-osd/unit_tests/test_tuning.py @@ -0,0 +1,126 @@ +__author__ = 'Chris Holcombe ' +from unittest.mock import patch, call +import test_utils +import charms_ceph.utils as ceph + +TO_PATCH = [ + 'hookenv', + 'status_set', + 'log', +] + + +class PerformanceTestCase(test_utils.CharmTestCase): + def setUp(self): + super(PerformanceTestCase, self).setUp(ceph, TO_PATCH) + + @patch.object(ceph.subprocess, 'check_output') + @patch.object(ceph, 'get_link_speed') + @patch.object(ceph, 'save_sysctls') + def test_tune_nic(self, save_sysctls, get_link_speed, check_output): + get_link_speed.return_value = 10000 + ceph.tune_nic('eth0') + save_sysctls.assert_has_calls([ + call( + save_location='/etc/sysctl.d/51-ceph-osd-charm-eth0.conf', + sysctl_dict={ + 'net.core.rmem_max': 524287, + 'net.core.wmem_max': 524287, + 'net.core.rmem_default': 524287, + 'net.ipv4.tcp_wmem': '10000000 10000000 10000000', + 'net.core.netdev_max_backlog': 300000, + 'net.core.optmem_max': 524287, + 'net.ipv4.tcp_mem': '10000000 10000000 10000000', + 'net.ipv4.tcp_rmem': '10000000 10000000 10000000', + 'net.core.wmem_default': 524287 + }) + ]) + check_output.assert_called_with(['sysctl', '-p', + '/etc/sysctl.d/' + '51-ceph-osd-charm-eth0.conf']) + self.status_set.assert_has_calls([ + call('maintenance', 'Tuning device eth0'), + ]) + + @patch.object(ceph.subprocess, 'check_output') + def test_get_block_uuid(self, check_output): + check_output.return_value = \ + b'UUID=378f3c86-b21a-4172-832d-e2b3d4bc7511\nTYPE=ext2\n' + uuid = ceph.get_block_uuid('/dev/sda1') + self.assertEqual(uuid, '378f3c86-b21a-4172-832d-e2b3d4bc7511') + + @patch.object(ceph, 'persist_settings') + @patch.object(ceph, 'set_hdd_read_ahead') + @patch.object(ceph, 'get_max_sectors_kb') + @patch.object(ceph, 'get_max_hw_sectors_kb') + @patch.object(ceph, 'set_max_sectors_kb') + @patch.object(ceph, 'get_block_uuid') + def test_tune_dev(self, + block_uuid, + set_max_sectors_kb, + get_max_hw_sectors_kb, + get_max_sectors_kb, + set_hdd_read_ahead, + persist_settings): + self.hookenv.config.return_value = 712 + block_uuid.return_value = '378f3c86-b21a-4172-832d-e2b3d4bc7511' + set_hdd_read_ahead.return_value = None + get_max_sectors_kb.return_value = 512 + get_max_hw_sectors_kb.return_value = 1024 + ceph.tune_dev('/dev/sda') + # The config value was lower than the hardware value. + # We use the lower value. The user wants 712 but the hw supports + # 1K + set_max_sectors_kb.assert_called_with( + dev_name='sda', max_sectors_size=712 + ) + persist_settings.assert_called_with( + settings_dict={'drive_settings': { + '378f3c86-b21a-4172-832d-e2b3d4bc7511': { + 'read_ahead_sect': 712}}} + ) + self.status_set.assert_has_calls([ + call('maintenance', 'Tuning device /dev/sda'), + call('maintenance', 'Finished tuning device /dev/sda') + ]) + + @patch.object(ceph, 'persist_settings') + @patch.object(ceph, 'set_hdd_read_ahead') + @patch.object(ceph, 'get_max_sectors_kb') + @patch.object(ceph, 'get_max_hw_sectors_kb') + @patch.object(ceph, 'set_max_sectors_kb') + @patch.object(ceph, 'get_block_uuid') + def test_tune_dev_2(self, + block_uuid, + set_max_sectors_kb, + get_max_hw_sectors_kb, + get_max_sectors_kb, + set_hdd_read_ahead, + persist_settings): + self.hookenv.config.return_value = 2048 + block_uuid.return_value = '378f3c86-b21a-4172-832d-e2b3d4bc7511' + set_hdd_read_ahead.return_value = None + get_max_sectors_kb.return_value = 512 + get_max_hw_sectors_kb.return_value = 1024 + ceph.tune_dev('/dev/sda') + # The config value was higher than the hardware value. + # We use the lower value. The user wants 2K but the hw only support 1K + set_max_sectors_kb.assert_called_with( + dev_name='sda', max_sectors_size=1024 + ) + persist_settings.assert_called_with( + settings_dict={'drive_settings': { + '378f3c86-b21a-4172-832d-e2b3d4bc7511': { + 'read_ahead_sect': 1024}}} + ) + self.status_set.assert_has_calls([ + call('maintenance', 'Tuning device /dev/sda'), + call('maintenance', 'Finished tuning device /dev/sda') + ]) + + @patch.object(ceph.subprocess, 'check_output') + def test_set_hdd_read_ahead(self, check_output): + ceph.set_hdd_read_ahead(dev_name='/dev/sda') + check_output.assert_called_with( + ['hdparm', '-a256', '/dev/sda'] + ) diff --git a/ceph-osd/unit_tests/test_upgrade.py b/ceph-osd/unit_tests/test_upgrade.py new file mode 100644 index 00000000..1e7be80d --- /dev/null +++ b/ceph-osd/unit_tests/test_upgrade.py @@ -0,0 +1,190 @@ +from unittest.mock import call, patch +from test_utils import CharmTestCase +from ceph_hooks import check_for_upgrade, notify_mon_of_upgrade + + +__author__ = 'Chris Holcombe ' + + +class UpgradeRollingTestCase(CharmTestCase): + + @patch('ceph_hooks.notify_mon_of_upgrade') + @patch('ceph_hooks.ceph.dirs_need_ownership_update') + @patch('ceph_hooks.os.path.exists') + @patch('ceph_hooks.ceph.resolve_ceph_version') + @patch('ceph_hooks.emit_cephconf') + @patch('ceph_hooks.hookenv') + @patch('ceph_hooks.ceph.roll_osd_cluster') + @patch('utils.find_filestore_osds') + def test_check_for_upgrade(self, find_filestore_osds, + roll_osd_cluster, hookenv, + emit_cephconf, version, exists, + dirs_need_ownership_update, + notify_mon_of_upgrade): + dirs_need_ownership_update.return_value = False + exists.return_value = True + version_pre = 'firefly' + version_post = 'hammer' + version.side_effect = [version_pre, version_post] + + self.test_config.set_previous('source', "cloud:trusty-juno") + self.test_config.set('source', 'cloud:trusty-kilo') + self.test_config.set('key', 'key') + + hookenv.config.side_effect = self.test_config + check_for_upgrade() + + roll_osd_cluster.assert_called_with(new_version='hammer', + upgrade_key='osd-upgrade') + emit_cephconf.assert_has_calls([call(upgrading=True), + call(upgrading=False)]) + exists.assert_called_with( + "/var/lib/ceph/osd/ceph.client.osd-upgrade.keyring") + notify_mon_of_upgrade.assert_called_once_with(version_post) + + @patch('ceph_hooks.notify_mon_of_upgrade') + @patch('ceph_hooks.ceph.dirs_need_ownership_update') + @patch('ceph_hooks.os.path.exists') + @patch('ceph_hooks.ceph.resolve_ceph_version') + @patch('ceph_hooks.emit_cephconf') + @patch('ceph_hooks.hookenv') + @patch('ceph_hooks.ceph.roll_osd_cluster') + @patch('utils.find_filestore_osds') + def test_resume_failed_upgrade(self, find_filestore_osds, + roll_osd_cluster, + hookenv, emit_cephconf, version, + exists, + dirs_need_ownership_update, + notify_mon_of_upgrade): + dirs_need_ownership_update.return_value = True + exists.return_value = True + version_pre_and_post = 'jewel' + version.side_effect = [version_pre_and_post, version_pre_and_post] + + check_for_upgrade() + + roll_osd_cluster.assert_called_with(new_version='jewel', + upgrade_key='osd-upgrade') + emit_cephconf.assert_has_calls([call(upgrading=True), + call(upgrading=False)]) + exists.assert_called_with( + "/var/lib/ceph/osd/ceph.client.osd-upgrade.keyring") + notify_mon_of_upgrade.assert_called_once_with(version_pre_and_post) + + @patch('ceph_hooks.os.path.exists') + @patch('ceph_hooks.ceph.resolve_ceph_version') + @patch('ceph_hooks.hookenv') + @patch('ceph_hooks.ceph.roll_monitor_cluster') + def test_check_for_upgrade_not_bootstrapped(self, roll_monitor_cluster, + hookenv, + version, exists): + exists.return_value = False + version.side_effect = ['firefly', 'hammer'] + + self.test_config.set_previous('source', "cloud:trusty-juno") + self.test_config.set('source', 'cloud:trusty-kilo') + self.test_config.set('key', 'key') + + hookenv.config.side_effect = self.test_config + check_for_upgrade() + + roll_monitor_cluster.assert_not_called() + exists.assert_called_with( + "/var/lib/ceph/osd/ceph.client.osd-upgrade.keyring") + + @patch('ceph_hooks.os.path.exists') + @patch('ceph_hooks.ceph.dirs_need_ownership_update') + @patch('ceph_hooks.add_source') + @patch('ceph_hooks.ceph.is_bootstrapped') + @patch('ceph_hooks.hookenv') + @patch('ceph_hooks.ceph.roll_monitor_cluster') + @patch('utils.find_filestore_osds') + def test_check_for_upgrade_from_pike_to_queens(self, find_filestore_osds, + roll_monitor_cluster, + hookenv, is_bootstrapped, + add_source, + dirs_need_ownership_update, + exists): + exists.return_value = True + dirs_need_ownership_update.return_value = False + is_bootstrapped.return_value = True + hookenv.config.side_effect = self.test_config + self.test_config.set('key', 'some-key') + self.test_config.set_previous('source', 'cloud:xenial-pike') + self.test_config.set('source', 'cloud:xenial-queens') + check_for_upgrade() + roll_monitor_cluster.assert_not_called() + add_source.assert_called_with('cloud:xenial-queens', 'some-key') + + @patch('ceph_hooks.os.path.exists') + @patch('ceph_hooks.ceph.dirs_need_ownership_update') + @patch('ceph_hooks.add_source') + @patch('ceph_hooks.ceph.is_bootstrapped') + @patch('ceph_hooks.hookenv') + @patch('ceph_hooks.ceph.roll_monitor_cluster') + @patch('utils.find_filestore_osds') + def test_check_for_upgrade_from_rocky_to_stein(self, find_filestore_osds, + roll_monitor_cluster, + hookenv, is_bootstrapped, + add_source, + dirs_need_ownership_update, + exists): + exists.return_value = True + dirs_need_ownership_update.return_value = False + is_bootstrapped.return_value = True + hookenv.config.side_effect = self.test_config + self.test_config.set('key', 'some-key') + self.test_config.set_previous('source', 'cloud:bionic-rocky') + self.test_config.set('source', 'cloud:bionic-stein') + check_for_upgrade() + roll_monitor_cluster.assert_not_called() + add_source.assert_called_with('cloud:bionic-stein', 'some-key') + + @patch('ceph_hooks.os.path.exists') + @patch('ceph_hooks.ceph.dirs_need_ownership_update') + @patch('ceph_hooks.add_source') + @patch('ceph_hooks.ceph.is_bootstrapped') + @patch('ceph_hooks.hookenv') + @patch('ceph_hooks.ceph.roll_monitor_cluster') + @patch('utils.find_filestore_osds') + def test_check_for_upgrade_reef_filestore(self, find_filestore_osds, + roll_monitor_cluster, + hookenv, is_bootstrapped, + add_source, + dirs_need_ownership_update, + exists): + exists.return_value = True + is_bootstrapped.return_value = True + find_filestore_osds.return_value = ['ceph-0'] + hookenv.config.side_effect = self.test_config + self.test_config.set('key', 'some-key') + self.test_config.set_previous('source', 'cloud:jammy-antelope') + self.test_config.set('source', 'cloud:jammy-bobcat') + check_for_upgrade() + roll_monitor_cluster.assert_not_called() + dirs_need_ownership_update.assert_not_called() + + +class UpgradeUtilTestCase(CharmTestCase): + @patch('ceph_hooks.relation_ids') + @patch('ceph_hooks.log') + @patch('ceph_hooks.relation_set') + def test_notify_mon_of_upgrade(self, relation_set, log, relation_ids): + relation_ids_to_check = ['1', '2', '3'] + relation_ids.return_value = relation_ids_to_check + release = 'luminous' + + notify_mon_of_upgrade(release) + + self.assertEqual(log.call_count, len(relation_ids_to_check)) + relation_ids.assert_called_once_with('mon') + set_dict = dict(ceph_release=release) + relation_set_calls = [ + call(relation_id=relation_ids_to_check[0], + relation_settings=set_dict), + call(relation_id=relation_ids_to_check[1], + relation_settings=set_dict), + call(relation_id=relation_ids_to_check[2], + relation_settings=set_dict), + ] + relation_set.assert_has_calls(relation_set_calls) diff --git a/ceph-osd/unit_tests/test_utils.py b/ceph-osd/unit_tests/test_utils.py new file mode 100644 index 00000000..7d4531b2 --- /dev/null +++ b/ceph-osd/unit_tests/test_utils.py @@ -0,0 +1,166 @@ +# Copyright 2016-2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import unittest +import os +import sys +import yaml + +from contextlib import contextmanager +from unittest.mock import patch, MagicMock + +# python-apt is not installed as part of test-requirements but is imported by +# some charmhelpers modules so create a fake import. +mock_apt = MagicMock() +sys.modules['apt'] = mock_apt +mock_apt.apt_pkg = MagicMock() + + +def load_config(): + ''' + Walk backwards from __file__ looking for config.yaml, load and return the + 'options' section' + ''' + config = None + f = __file__ + while config is None: + d = os.path.dirname(f) + if os.path.isfile(os.path.join(d, 'config.yaml')): + config = os.path.join(d, 'config.yaml') + break + f = d + + if not config: + logging.error('Could not find config.yaml in any parent directory ' + 'of {}. '.format(f)) + raise Exception + + return yaml.safe_load(open(config).read())['options'] + + +def get_default_config(): + ''' + Load default charm config from config.yaml return as a dict. + If no default is set in config.yaml, its value is None. + ''' + default_config = {} + config = load_config() + for k, v in config.items(): + if 'default' in v: + default_config[k] = v['default'] + else: + default_config[k] = None + return default_config + + +class CharmTestCase(unittest.TestCase): + + def setUp(self, obj=None, patches=None): + super(CharmTestCase, self).setUp() + self.patches = patches or [] + self.obj = obj or [] + self.test_config = TestConfig() + self.test_relation = TestRelation() + self.patch_all() + + def patch(self, method): + _m = patch.object(self.obj, method) + mock = _m.start() + self.addCleanup(_m.stop) + return mock + + def patch_all(self): + for method in self.patches: + setattr(self, method, self.patch(method)) + + +class TestConfig(object): + + def __init__(self): + self.config = get_default_config() + self.config_changed = {} + self.config_changed.setdefault(False) + self._previous = get_default_config() + + def __call__(self, key=None): + if key: + return self[key] + else: + return self + + def __getitem__(self, item): + return self.config[item] + + def get(self, attr=None): + if not attr: + return self.get_all() + try: + return self.config[attr] + except KeyError: + return None + + def get_all(self): + return self.config + + def set(self, attr, value): + if attr not in self.config: + raise KeyError + self.config[attr] = value + + def changed(self, attr): + return self.config_changed[attr] + + def set_changed(self, attr, changed=True): + self.config_changed[attr] = changed + + def set_previous(self, key, value): + self._previous[key] = value + + def previous(self, key): + return self._previous[key] + + +class TestRelation(object): + + def __init__(self, relation_data={}): + self.relation_data = relation_data + + def set(self, relation_data): + self.relation_data = relation_data + + def get(self, attr=None, unit=None, rid=None): + if attr is None: + return self.relation_data + elif attr in self.relation_data: + return self.relation_data[attr] + return None + + +@contextmanager +def patch_open(): + '''Patch open() to allow mocking both open() itself and the file that is + yielded. + + Yields the mock for "open" and "file", respectively.''' + mock_open = MagicMock(spec=open) + mock_file = MagicMock(spec=file) # noqa - transitional py2 py3 + + @contextmanager + def stub_open(*args, **kwargs): + mock_open(*args, **kwargs) + yield mock_file + + with patch('builtins.open', stub_open): + yield mock_open, mock_file diff --git a/ceph-proxy/.coveragerc b/ceph-proxy/.coveragerc new file mode 100644 index 00000000..7f7b5be3 --- /dev/null +++ b/ceph-proxy/.coveragerc @@ -0,0 +1,7 @@ +[report] +# Regexes for lines to exclude from consideration +exclude_lines = + if __name__ == .__main__.: +include= + hooks/hooks.py + hooks/ceph*.py diff --git a/ceph-proxy/.gitignore b/ceph-proxy/.gitignore new file mode 100644 index 00000000..b81658ef --- /dev/null +++ b/ceph-proxy/.gitignore @@ -0,0 +1,10 @@ +bin +.coverage +.testrepository +.tox +*.sw[nop] +*.charm +*.pyc +.unit-state.db +.stestr +__pycache__ diff --git a/ceph-proxy/.gitreview b/ceph-proxy/.gitreview new file mode 100644 index 00000000..d1e8ee12 --- /dev/null +++ b/ceph-proxy/.gitreview @@ -0,0 +1,6 @@ +[gerrit] +host=review.opendev.org +port=29418 +project=openstack/charm-ceph-proxy.git + +defaultbranch=stable/squid-jammy diff --git a/ceph-proxy/.project b/ceph-proxy/.project new file mode 100644 index 00000000..17434fc2 --- /dev/null +++ b/ceph-proxy/.project @@ -0,0 +1,17 @@ + + + ceph-mon + + + + + + org.python.pydev.PyDevBuilder + + + + + + org.python.pydev.pythonNature + + diff --git a/ceph-proxy/.pydevproject b/ceph-proxy/.pydevproject new file mode 100644 index 00000000..683d89d8 --- /dev/null +++ b/ceph-proxy/.pydevproject @@ -0,0 +1,11 @@ + + +python 2.7 +Default + +/ceph-mon/hooks +/ceph-mon/unit_tests +/ceph-mon/tests +/ceph-mon/actions + + diff --git a/ceph-proxy/.stestr.conf b/ceph-proxy/.stestr.conf new file mode 100644 index 00000000..5fcccaca --- /dev/null +++ b/ceph-proxy/.stestr.conf @@ -0,0 +1,3 @@ +[DEFAULT] +test_path=./unit_tests +top_dir=./ diff --git a/ceph-proxy/.zuul.yaml b/ceph-proxy/.zuul.yaml new file mode 100644 index 00000000..d6c1104b --- /dev/null +++ b/ceph-proxy/.zuul.yaml @@ -0,0 +1,6 @@ +- project: + templates: + - openstack-python3-charm-zed-jobs + - openstack-python3-charm-yoga-jobs + - openstack-python3-charm-jobs + diff --git a/ceph-proxy/LICENSE b/ceph-proxy/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/ceph-proxy/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ceph-proxy/Makefile b/ceph-proxy/Makefile new file mode 100644 index 00000000..09b701f6 --- /dev/null +++ b/ceph-proxy/Makefile @@ -0,0 +1,27 @@ +#!/usr/bin/make +PYTHON := /usr/bin/env python3 + +lint: + @tox -e pep8 + +test: + @echo Starting unit tests... + @tox -e py27 + +functional_test: + @echo Starting Amulet tests... + @tox -e func27 + +bin/charm_helpers_sync.py: + @mkdir -p bin + @curl -o bin/charm_helpers_sync.py https://raw.githubusercontent.com/juju/charm-helpers/master/tools/charm_helpers_sync/charm_helpers_sync.py + +sync: bin/charm_helpers_sync.py + $(PYTHON) bin/charm_helpers_sync.py -c charm-helpers-hooks.yaml + +bin/git_sync.py: + @mkdir -p bin + @wget -O bin/git_sync.py https://raw.githubusercontent.com/CanonicalLtd/git-sync/master/git_sync.py + +ceph-sync: bin/git_sync.py + $(PYTHON) bin/git_sync.py -d lib -s https://github.com/openstack/charms.ceph.git diff --git a/ceph-proxy/README.md b/ceph-proxy/README.md new file mode 100644 index 00000000..b586e27a --- /dev/null +++ b/ceph-proxy/README.md @@ -0,0 +1,78 @@ +# Overview + +[Ceph][ceph-upstream] is a unified, distributed storage system designed for +excellent performance, reliability, and scalability. + +The ceph-proxy charm deploys a proxy that acts as a [ceph-mon][ceph-mon-charm] +application for an external Ceph cluster. It joins a non-charmed Ceph cluster +to a Juju model. + +The charm works with traditional Ceph charm clients (e.g. cinder, glance, +nova-compute) but it also supports the [ceph-radosgw][ceph-radosgw-charm] and +[ceph-fs][ceph-fs-charm] charms. + +# Usage + +## Configuration + +This section covers common and/or important configuration options. See file +`config.yaml` for the full list of options, along with their descriptions and +default values. See the [Juju documentation][juju-docs-config-apps] for details +on configuring applications. + +#### `fsid` + +The `fsid` option supplies the UUID of the external cluster. + +#### `admin-key` + +The `admin-key` option supplies the admin Cephx key of the external cluster. + +#### `monitor-hosts` + +The `monitor-hosts` option supplies the network addresses (and ports) of the +Monitors of the external cluster. + +## Deployment + +Let file ``ceph-proxy.yaml`` contain the deployment configuration: + +```yaml + ceph-proxy: + fsid: a4f1fb08-c83d-11ea-8f4a-635b3b062931 + admin-key: AQCJvBFfWX+GLhAAln5dFd1rZekcGLyMmy58bQ== + monitor-hosts: '10.246.114.21:6789 10.246.114.22:6789 10.246.114.7:6789' +``` + +To deploy: + + juju deploy --config ceph-proxy.yaml ceph-proxy + +Now add relations as you normally would between a ceph-mon application and +another application, except substitute ceph-proxy for ceph-mon. For instance, +to use the external Ceph cluster as the backend for an existing glance +application: + + juju add-relation ceph-proxy:client glance:ceph + +## Actions + +Many of the ceph-mon charm's actions are supported. See file `actions.yaml` for +the full list of actions, along with their descriptions. + +# Bugs + +Please report bugs on [Launchpad][lp-bugs-charm-ceph-proxy]. + +For general charm questions refer to the [OpenStack Charm Guide][cg]. + + + +[ceph-upstream]: https://ceph.io +[cg]: https://docs.openstack.org/charm-guide +[ceph-mon-charm]: https://charmhub.io/ceph-mon +[ceph-fs-charm]: https://charmbui.io/ceph-fs +[ceph-radosgw-charm]: https://charmbui.io/ceph-radosgw +[juju-docs-actions]: https://charmbui.io/docs/actions +[juju-docs-config-apps]: https://juju.is/docs/configuring-applications +[lp-bugs-charm-ceph-proxy]: https://bugs.launchpad.net/charm-ceph-proxy/+filebug diff --git a/ceph-proxy/TODO b/ceph-proxy/TODO new file mode 100644 index 00000000..22e0889d --- /dev/null +++ b/ceph-proxy/TODO @@ -0,0 +1,6 @@ +Ceph Charm +========== + + * fix tunables (http://tracker.newdream.net/issues/2210) + * more than 192 PGs + * fixup data placement in crush to be host not osd driven diff --git a/ceph-proxy/actions.yaml b/ceph-proxy/actions.yaml new file mode 100644 index 00000000..18e6a498 --- /dev/null +++ b/ceph-proxy/actions.yaml @@ -0,0 +1,216 @@ +pause-health: + description: Pause ceph health operations across the entire ceph cluster +resume-health: + description: Resume ceph health operations across the entire ceph cluster +create-cache-tier: + description: Create a new cache tier + params: + backer-pool: + type: string + description: | + The name of the pool that will back the cache tier. Also known as + the cold pool + cache-pool: + type: string + description: | + The name of the pool that will be the cache pool. Also known + as the hot pool + cache-mode: + type: string + default: writeback + enum: [writeback, readonly] + description: | + The mode of the caching tier. Please refer to the Ceph docs for more + information + required: [backer-pool, cache-pool] + additionalProperties: false +remove-cache-tier: + description: Remove an existing cache tier + params: + backer-pool: + type: string + description: | + The name of the pool that backs the cache tier. Also known as + the cold pool + cache-pool: + type: string + description: | + The name of the pool that is the cache pool. Also known + as the hot pool + required: [backer-pool, cache-pool] + additionalProperties: false + +create-pool: + description: Creates a pool + params: + name: + type: string + description: The name of the pool + profile-name: + type: string + description: The crush profile to use for this pool. The ruleset must exist first. + pool-type: + type: string + default: "replicated" + enum: [replicated, erasure] + description: | + The pool type which may either be replicated to recover from lost OSDs by keeping multiple copies of the + objects or erasure to get a kind of generalized RAID5 capability. + replicas: + type: integer + default: 3 + description: | + For the replicated pool this is the number of replicas to store of each object. + erasure-profile-name: + type: string + default: default + description: | + The name of the erasure coding profile to use for this pool. Note this profile must exist + before calling create-pool + required: [name] + additionalProperties: false +create-erasure-profile: + description: Create a new erasure code profile to use on a pool. + params: + name: + type: string + description: The name of the profile + failure-domain: + type: string + default: host + enum: [chassis, datacenter, host, osd, pdu, pod, rack, region, room, root, row] + description: | + The failure-domain=host will create a CRUSH ruleset that ensures no two chunks are stored in the same host. + plugin: + type: string + default: "jerasure" + enum: [jerasure, isa, lrc, shec] + description: | + The erasure plugin to use for this profile. + See http://docs.ceph.com/docs/master/rados/operations/erasure-code-profile/ for more details + data-chunks: + type: integer + default: 3 + description: | + The number of data chunks, i.e. the number of chunks in which the original object is divided. For instance + if K = 2 a 10KB object will be divided into K objects of 5KB each. + coding-chunks: + type: integer + default: 2 + description: | + The number of coding chunks, i.e. the number of additional chunks computed by the encoding functions. + If there are 2 coding chunks, it means 2 OSDs can be out without losing data. + locality-chunks: + type: integer + description: | + Group the coding and data chunks into sets of size locality. For instance, for k=4 and m=2, when locality=3 + two groups of three are created. Each set can be recovered without reading chunks from another set. + durability-estimator: + type: integer + description: | + The number of parity chunks each of which includes each data chunk in its calculation range. The number is used + as a durability estimator. For instance, if c=2, 2 OSDs can be down without losing data. + required: [name, data-chunks, coding-chunks] + additionalProperties: false +get-erasure-profile: + description: Display an erasure code profile. + params: + name: + type: string + description: The name of the profile + required: [name] + additionalProperties: false +delete-erasure-profile: + description: Deletes an erasure code profile. + params: + name: + type: string + description: The name of the profile + required: [name] + additionalProperties: false +list-erasure-profiles: + description: List the names of all erasure code profiles + additionalProperties: false +list-pools: + description: List your cluster's pools + additionalProperties: false +set-pool-max-bytes: + description: Set pool quotas for the maximum number of bytes. + params: + max: + type: integer + description: The name of the pool + pool-name: + type: string + description: The name of the pool + required: [pool-name, max] + additionalProperties: false +delete-pool: + description: Deletes the named pool + params: + pool-name: + type: string + description: The name of the pool + required: [pool-name] + additionalProperties: false +rename-pool: + description: Rename a pool + params: + pool-name: + type: string + description: The name of the pool + new-name: + type: string + description: The new name of the pool + required: [pool-name, new-name] + additionalProperties: false +pool-statistics: + description: Show a pool's utilization statistics + additionalProperties: false +snapshot-pool: + description: Snapshot a pool + params: + pool-name: + type: string + description: The name of the pool + snapshot-name: + type: string + description: The name of the snapshot + required: [snapshot-name, pool-name] + additionalProperties: false +remove-pool-snapshot: + description: Remove a pool snapshot + params: + pool-name: + type: string + description: The name of the pool + snapshot-name: + type: string + description: The name of the snapshot + required: [snapshot-name, pool-name] + additionalProperties: false +pool-set: + description: Set a value for the pool + params: + pool-name: + type: string + description: The pool to set this variable on. + key: + type: string + description: Any valid Ceph key from http://docs.ceph.com/docs/master/rados/operations/pools/#set-pool-values + value: + type: string + description: The value to set + required: [key, value, pool-name] + additionalProperties: false +pool-get: + description: Get a value for the pool + params: + pool-name: + type: string + description: The pool to get this variable from. + key: + type: string + description: Any valid Ceph key from http://docs.ceph.com/docs/master/rados/operations/pools/#get-pool-values + required: [key, pool-name] + additionalProperties: false diff --git a/ceph-proxy/actions/__init__.py b/ceph-proxy/actions/__init__.py new file mode 100644 index 00000000..ff2381cc --- /dev/null +++ b/ceph-proxy/actions/__init__.py @@ -0,0 +1,3 @@ +__author__ = 'chris' +import sys +sys.path.append('hooks') diff --git a/ceph-proxy/actions/ceph_ops.py b/ceph-proxy/actions/ceph_ops.py new file mode 100755 index 00000000..e70ebc7e --- /dev/null +++ b/ceph-proxy/actions/ceph_ops.py @@ -0,0 +1,103 @@ +__author__ = 'chris' +from subprocess import CalledProcessError, check_output +import sys + +sys.path.append('hooks') + +import rados +from charmhelpers.core.hookenv import log, action_get, action_fail +from charmhelpers.contrib.storage.linux.ceph import pool_set, \ + set_pool_quota, snapshot_pool, remove_pool_snapshot + + +# Connect to Ceph via Librados and return a connection +def connect(): + try: + cluster = rados.Rados(conffile='/etc/ceph/ceph.conf') + cluster.connect() + return cluster + except (rados.IOError, + rados.ObjectNotFound, + rados.NoData, + rados.NoSpace, + rados.PermissionError) as rados_error: + log("librados failed with error: {}".format(str(rados_error))) + + +def create_crush_rule(): + # Shell out + pass + + +def list_pools(): + try: + cluster = connect() + pool_list = cluster.list_pools() + cluster.shutdown() + return pool_list + except (rados.IOError, + rados.ObjectNotFound, + rados.NoData, + rados.NoSpace, + rados.PermissionError) as e: + action_fail(e.message) + + +def pool_get(): + key = action_get("key") + pool_name = action_get("pool_name") + try: + value = check_output(['ceph', 'osd', 'pool', 'get', pool_name, key]) + return value + except CalledProcessError as e: + action_fail(e.message) + + +def set_pool(): + key = action_get("key") + value = action_get("value") + pool_name = action_get("pool_name") + pool_set(service='ceph', pool_name=pool_name, key=key, value=value) + + +def pool_stats(): + try: + pool_name = action_get("pool-name") + cluster = connect() + ioctx = cluster.open_ioctx(pool_name) + stats = ioctx.get_stats() + ioctx.close() + cluster.shutdown() + return stats + except (rados.Error, + rados.IOError, + rados.ObjectNotFound, + rados.NoData, + rados.NoSpace, + rados.PermissionError) as e: + action_fail(e.message) + + +def delete_pool_snapshot(): + pool_name = action_get("pool-name") + snapshot_name = action_get("snapshot-name") + remove_pool_snapshot(service='ceph', + pool_name=pool_name, + snapshot_name=snapshot_name) + + +# Note only one or the other can be set +def set_pool_max_bytes(): + pool_name = action_get("pool-name") + max_bytes = action_get("max") + set_pool_quota(service='ceph', + pool_name=pool_name, + max_bytes=max_bytes) + + +def snapshot_ceph_pool(): + pool_name = action_get("pool-name") + snapshot_name = action_get("snapshot-name") + snapshot_pool(service='ceph', + pool_name=pool_name, + snapshot_name=snapshot_name) diff --git a/ceph-proxy/actions/create-cache-tier b/ceph-proxy/actions/create-cache-tier new file mode 100755 index 00000000..97a1d1ef --- /dev/null +++ b/ceph-proxy/actions/create-cache-tier @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +__author__ = 'chris' +import os +from subprocess import CalledProcessError +import sys + +_path = os.path.dirname(os.path.realpath(__file__)) +_hooks = os.path.abspath(os.path.join(_path, '../hooks')) +_root = os.path.abspath(os.path.join(_path, '..')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + +_add_path(_hooks) +_add_path(_root) + + +from charmhelpers.contrib.storage.linux.ceph import Pool, pool_exists +from charmhelpers.core.hookenv import action_get, config, log, action_fail + + +def make_cache_tier(): + backer_pool = action_get("backer-pool") + cache_pool = action_get("cache-pool") + cache_mode = action_get("cache-mode") + user = config('admin-user') + + # Pre flight checks + if not pool_exists(user, backer_pool): + log("Please create {} pool before calling create-cache-tier".format( + backer_pool)) + action_fail("create-cache-tier failed. Backer pool {} must exist " + "before calling this".format(backer_pool)) + + if not pool_exists(user, cache_pool): + log("Please create {} pool before calling create-cache-tier".format( + cache_pool)) + action_fail("create-cache-tier failed. Cache pool {} must exist " + "before calling this".format(cache_pool)) + + pool = Pool(service=user, name=backer_pool) + try: + pool.add_cache_tier(cache_pool=cache_pool, mode=cache_mode) + except CalledProcessError as err: + log("Add cache tier failed with message: {}".format( + err.message)) + action_fail("create-cache-tier failed. Add cache tier failed with " + "message: {}".format(err.message)) + + +if __name__ == '__main__': + make_cache_tier() diff --git a/ceph-proxy/actions/create-erasure-profile b/ceph-proxy/actions/create-erasure-profile new file mode 100755 index 00000000..016862c8 --- /dev/null +++ b/ceph-proxy/actions/create-erasure-profile @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +import os +from subprocess import CalledProcessError +import sys + +_path = os.path.dirname(os.path.realpath(__file__)) +_hooks = os.path.abspath(os.path.join(_path, '../hooks')) +_root = os.path.abspath(os.path.join(_path, '..')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + +_add_path(_hooks) +_add_path(_root) + + +from charmhelpers.contrib.storage.linux.ceph import create_erasure_profile +from charmhelpers.core.hookenv import action_get, config, log, action_fail + + +def make_erasure_profile(): + name = action_get("name") + plugin = action_get("plugin") + failure_domain = action_get("failure-domain") + user = config('admin-user') + # jerasure requires k+m + # isa requires k+m + # local requires k+m+l + # shec requires k+m+c + + if plugin == "jerasure": + k = action_get("data-chunks") + m = action_get("coding-chunks") + try: + create_erasure_profile(service=user, + erasure_plugin_name=plugin, + profile_name=name, + data_chunks=k, + coding_chunks=m, + failure_domain=failure_domain) + except CalledProcessError as e: + log(str(e)) + action_fail("Create erasure profile failed with " + "message: {}".format(e.message)) + elif plugin == "isa": + k = action_get("data-chunks") + m = action_get("coding-chunks") + try: + create_erasure_profile(service=user, + erasure_plugin_name=plugin, + profile_name=name, + data_chunks=k, + coding_chunks=m, + failure_domain=failure_domain) + except CalledProcessError as e: + log(str(e)) + action_fail("Create erasure profile failed with " + "message: {}".format(e.message)) + elif plugin == "local": + k = action_get("data-chunks") + m = action_get("coding-chunks") + l = action_get("locality-chunks") + try: + create_erasure_profile(service=user, + erasure_plugin_name=plugin, + profile_name=name, + data_chunks=k, + coding_chunks=m, + locality=l, + failure_domain=failure_domain) + except CalledProcessError as e: + log(str(e)) + action_fail("Create erasure profile failed with " + "message: {}".format(e.message)) + elif plugin == "shec": + k = action_get("data-chunks") + m = action_get("coding-chunks") + c = action_get("durability-estimator") + try: + create_erasure_profile(service=user, + erasure_plugin_name=plugin, + profile_name=name, + data_chunks=k, + coding_chunks=m, + durability_estimator=c, + failure_domain=failure_domain) + except CalledProcessError as e: + log(str(e)) + action_fail("Create erasure profile failed with " + "message: {}".format(e.message)) + else: + # Unknown erasure plugin + action_fail("Unknown erasure-plugin type of {}. " + "Only jerasure, isa, local or shec is " + "allowed".format(plugin)) + + +if __name__ == '__main__': + make_erasure_profile() diff --git a/ceph-proxy/actions/create-pool b/ceph-proxy/actions/create-pool new file mode 100755 index 00000000..ee6a7798 --- /dev/null +++ b/ceph-proxy/actions/create-pool @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +import os +import sys + +_path = os.path.dirname(os.path.realpath(__file__)) +_hooks = os.path.abspath(os.path.join(_path, '../hooks')) +_root = os.path.abspath(os.path.join(_path, '..')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + +_add_path(_hooks) +_add_path(_root) + +from subprocess import CalledProcessError +from charmhelpers.core.hookenv import action_get, config, log, action_fail +from charmhelpers.contrib.storage.linux.ceph import ErasurePool, ReplicatedPool + + +def create_pool(): + pool_name = action_get("name") + pool_type = action_get("pool-type") + user = config('admin-user') + try: + if pool_type == "replicated": + replicas = action_get("replicas") + replicated_pool = ReplicatedPool(name=pool_name, + service=user, + replicas=replicas) + replicated_pool.create() + + elif pool_type == "erasure": + crush_profile_name = action_get("erasure-profile-name") + erasure_pool = ErasurePool(name=pool_name, + erasure_code_profile=crush_profile_name, + service=user) + erasure_pool.create() + else: + log("Unknown pool type of {}. Only erasure or replicated is " + "allowed".format(pool_type)) + action_fail("Unknown pool type of {}. Only erasure or replicated " + "is allowed".format(pool_type)) + except CalledProcessError as e: + action_fail("Pool creation failed because of a failed process. " + "Ret Code: {} Message: {}".format(e.returncode, str(e))) + + +if __name__ == '__main__': + create_pool() diff --git a/ceph-proxy/actions/delete-erasure-profile b/ceph-proxy/actions/delete-erasure-profile new file mode 100755 index 00000000..7df8c445 --- /dev/null +++ b/ceph-proxy/actions/delete-erasure-profile @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 +from subprocess import CalledProcessError + +__author__ = 'chris' +import os +import sys + +_path = os.path.dirname(os.path.realpath(__file__)) +_hooks = os.path.abspath(os.path.join(_path, '../hooks')) +_root = os.path.abspath(os.path.join(_path, '..')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + +_add_path(_hooks) +_add_path(_root) + +from charmhelpers.contrib.storage.linux.ceph import remove_erasure_profile +from charmhelpers.core.hookenv import action_get, config, log, action_fail + + +def delete_erasure_profile(): + name = action_get("name") + + try: + remove_erasure_profile(service=config('admin-user'), profile_name=name) + except CalledProcessError as e: + action_fail("Remove erasure profile failed with error: {}".format( + e.message)) + + +if __name__ == '__main__': + delete_erasure_profile() diff --git a/ceph-proxy/actions/delete-pool b/ceph-proxy/actions/delete-pool new file mode 100755 index 00000000..68b89b23 --- /dev/null +++ b/ceph-proxy/actions/delete-pool @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +import os +import sys + +_path = os.path.dirname(os.path.realpath(__file__)) +_hooks = os.path.abspath(os.path.join(_path, '../hooks')) +_root = os.path.abspath(os.path.join(_path, '..')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + +_add_path(_hooks) +_add_path(_root) + +import rados +from ceph_ops import connect +from charmhelpers.core.hookenv import action_get, log, action_fail + + +def remove_pool(): + try: + pool_name = action_get("name") + cluster = connect() + log("Deleting pool: {}".format(pool_name)) + cluster.delete_pool(str(pool_name)) # Convert from unicode + cluster.shutdown() + except (rados.IOError, + rados.ObjectNotFound, + rados.NoData, + rados.NoSpace, + rados.PermissionError) as e: + log(str(e)) + action_fail(str(e)) + + +if __name__ == '__main__': + remove_pool() diff --git a/ceph-proxy/actions/get-erasure-profile b/ceph-proxy/actions/get-erasure-profile new file mode 100755 index 00000000..1f6b311d --- /dev/null +++ b/ceph-proxy/actions/get-erasure-profile @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +__author__ = 'chris' +import os +import sys + +_path = os.path.dirname(os.path.realpath(__file__)) +_hooks = os.path.abspath(os.path.join(_path, '../hooks')) +_root = os.path.abspath(os.path.join(_path, '..')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + +_add_path(_hooks) +_add_path(_root) + +from charmhelpers.contrib.storage.linux.ceph import get_erasure_profile +from charmhelpers.core.hookenv import action_get, action_set, config + + +def make_erasure_profile(): + name = action_get("name") + out = get_erasure_profile(service=config('admin-user'), name=name) + action_set({'message': out}) + + +if __name__ == '__main__': + make_erasure_profile() diff --git a/ceph-proxy/actions/list-erasure-profiles b/ceph-proxy/actions/list-erasure-profiles new file mode 100755 index 00000000..caaa68c4 --- /dev/null +++ b/ceph-proxy/actions/list-erasure-profiles @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +__author__ = 'chris' +import os +from subprocess import check_output, CalledProcessError +import sys + +_path = os.path.dirname(os.path.realpath(__file__)) +_hooks = os.path.abspath(os.path.join(_path, '../hooks')) +_root = os.path.abspath(os.path.join(_path, '..')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + +_add_path(_hooks) +_add_path(_root) + +from charmhelpers.core.hookenv import action_get, log, config, action_set, action_fail + +if __name__ == '__main__': + name = action_get("name") + try: + out = check_output(['ceph', + '--id', config('admin-user'), + 'osd', + 'erasure-code-profile', + 'ls']).decode('UTF-8') + action_set({'message': out}) + except CalledProcessError as e: + log(str(e)) + action_fail("Listing erasure profiles failed with error: {}".format( + str(e))) diff --git a/ceph-proxy/actions/list-pools b/ceph-proxy/actions/list-pools new file mode 100755 index 00000000..401619cd --- /dev/null +++ b/ceph-proxy/actions/list-pools @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +__author__ = 'chris' +import os +from subprocess import check_output, CalledProcessError +import sys + +_path = os.path.dirname(os.path.realpath(__file__)) +_hooks = os.path.abspath(os.path.join(_path, '../hooks')) +_root = os.path.abspath(os.path.join(_path, '..')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + +_add_path(_hooks) +_add_path(_root) + +from charmhelpers.core.hookenv import log, config, action_set, action_fail + +if __name__ == '__main__': + try: + out = check_output(['ceph', '--id', config('admin-user'), + 'osd', 'lspools']).decode('UTF-8') + action_set({'message': out}) + except CalledProcessError as e: + log(str(e)) + action_fail("List pools failed with error: {}".format(str(e))) diff --git a/ceph-proxy/actions/pause-health b/ceph-proxy/actions/pause-health new file mode 100755 index 00000000..207c4f65 --- /dev/null +++ b/ceph-proxy/actions/pause-health @@ -0,0 +1,6 @@ +#!/bin/bash + +set -eux + +ceph osd set nodown +ceph osd set noout \ No newline at end of file diff --git a/ceph-proxy/actions/pool-get b/ceph-proxy/actions/pool-get new file mode 100755 index 00000000..f1a5077d --- /dev/null +++ b/ceph-proxy/actions/pool-get @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +__author__ = 'chris' +import os +from subprocess import check_output, CalledProcessError +import sys + +_path = os.path.dirname(os.path.realpath(__file__)) +_hooks = os.path.abspath(os.path.join(_path, '../hooks')) +_root = os.path.abspath(os.path.join(_path, '..')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + +_add_path(_hooks) +_add_path(_root) + +from charmhelpers.core.hookenv import log, config, action_set, action_get, action_fail + +if __name__ == '__main__': + name = action_get('pool-name') + key = action_get('key') + try: + out = check_output(['ceph', '--id', config('admin-user'), + 'osd', 'pool', 'get', name, key]).decode('UTF-8') + action_set({'message': out}) + except CalledProcessError as e: + log(str(e)) + action_fail("Pool get failed with message: {}".format(str(e))) diff --git a/ceph-proxy/actions/pool-set b/ceph-proxy/actions/pool-set new file mode 100755 index 00000000..44874eb2 --- /dev/null +++ b/ceph-proxy/actions/pool-set @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +import os +from subprocess import CalledProcessError +import sys + +_path = os.path.dirname(os.path.realpath(__file__)) +_hooks = os.path.abspath(os.path.join(_path, '../hooks')) +_root = os.path.abspath(os.path.join(_path, '..')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + +_add_path(_hooks) +_add_path(_root) + +from charmhelpers.core.hookenv import action_get, config, log, action_fail +from ceph_broker import handle_set_pool_value + +if __name__ == '__main__': + name = action_get("pool-name") + key = action_get("key") + value = action_get("value") + request = {'name': name, + 'key': key, + 'value': value} + + try: + handle_set_pool_value(service=config('admin-user'), request=request) + except CalledProcessError as e: + log(str(e)) + action_fail("Setting pool key: {} and value: {} failed with " + "message: {}".format(key, value, str(e))) diff --git a/ceph-proxy/actions/pool-statistics b/ceph-proxy/actions/pool-statistics new file mode 100755 index 00000000..56e56a7a --- /dev/null +++ b/ceph-proxy/actions/pool-statistics @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +import os +import sys + +_path = os.path.dirname(os.path.realpath(__file__)) +_hooks = os.path.abspath(os.path.join(_path, '../hooks')) +_root = os.path.abspath(os.path.join(_path, '..')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + +_add_path(_hooks) +_add_path(_root) + +from subprocess import check_output, CalledProcessError +from charmhelpers.core.hookenv import log, config, action_set, action_fail + +if __name__ == '__main__': + try: + out = check_output(['ceph', '--id', config('admin-user'), + 'df']).decode('UTF-8') + action_set({'message': out}) + except CalledProcessError as e: + log(str(e)) + action_fail("ceph df failed with message: {}".format(str(e))) diff --git a/ceph-proxy/actions/remove-cache-tier b/ceph-proxy/actions/remove-cache-tier new file mode 100755 index 00000000..a6f8f2b6 --- /dev/null +++ b/ceph-proxy/actions/remove-cache-tier @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +import os +from subprocess import CalledProcessError +import sys + +_path = os.path.dirname(os.path.realpath(__file__)) +_hooks = os.path.abspath(os.path.join(_path, '../hooks')) +_root = os.path.abspath(os.path.join(_path, '..')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + +_add_path(_hooks) +_add_path(_root) + +from charmhelpers.contrib.storage.linux.ceph import Pool, pool_exists +from charmhelpers.core.hookenv import action_get, config, log, action_fail + +__author__ = 'chris' + + +def delete_cache_tier(): + backer_pool = action_get("backer-pool") + cache_pool = action_get("cache-pool") + user = config('admin-user') + # Pre flight checks + if not pool_exists(user, backer_pool): + log("Backer pool {} must exist before calling this".format( + backer_pool)) + action_fail("remove-cache-tier failed. Backer pool {} must exist " + "before calling this".format(backer_pool)) + + if not pool_exists(user, cache_pool): + log("Cache pool {} must exist before calling this".format( + cache_pool)) + action_fail("remove-cache-tier failed. Cache pool {} must exist " + "before calling this".format(cache_pool)) + + pool = Pool(service=user, name=backer_pool) + try: + pool.remove_cache_tier(cache_pool=cache_pool) + except CalledProcessError as err: + log("Removing the cache tier failed with message: {}".format( + str(err))) + action_fail("remove-cache-tier failed. Removing the cache tier failed " + "with message: {}".format(str(err))) + + +if __name__ == '__main__': + delete_cache_tier() diff --git a/ceph-proxy/actions/remove-pool-snapshot b/ceph-proxy/actions/remove-pool-snapshot new file mode 100755 index 00000000..7569db5c --- /dev/null +++ b/ceph-proxy/actions/remove-pool-snapshot @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +import os +import sys + +_path = os.path.dirname(os.path.realpath(__file__)) +_hooks = os.path.abspath(os.path.join(_path, '../hooks')) +_root = os.path.abspath(os.path.join(_path, '..')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + +_add_path(_hooks) +_add_path(_root) + +from subprocess import CalledProcessError +from charmhelpers.core.hookenv import action_get, config, log, action_fail +from charmhelpers.contrib.storage.linux.ceph import remove_pool_snapshot + +if __name__ == '__main__': + name = action_get("pool-name") + snapname = action_get("snapshot-name") + try: + remove_pool_snapshot(service=config('admin-user'), + pool_name=name, + snapshot_name=snapname) + except CalledProcessError as e: + log(str(e)) + action_fail("Remove pool snapshot failed with message: {}".format( + str(e))) diff --git a/ceph-proxy/actions/rename-pool b/ceph-proxy/actions/rename-pool new file mode 100755 index 00000000..c8508b78 --- /dev/null +++ b/ceph-proxy/actions/rename-pool @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +import os +import sys + +_path = os.path.dirname(os.path.realpath(__file__)) +_hooks = os.path.abspath(os.path.join(_path, '../hooks')) +_root = os.path.abspath(os.path.join(_path, '..')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + +_add_path(_hooks) +_add_path(_root) + +from subprocess import CalledProcessError +from charmhelpers.core.hookenv import action_get, config, log, action_fail +from charmhelpers.contrib.storage.linux.ceph import rename_pool + +if __name__ == '__main__': + name = action_get("pool-name") + new_name = action_get("new-name") + try: + rename_pool(service=config('admin-user'), old_name=name, new_name=new_name) + except CalledProcessError as e: + log(str(e)) + action_fail("Renaming pool failed with message: {}".format(str(e))) diff --git a/ceph-proxy/actions/resume-health b/ceph-proxy/actions/resume-health new file mode 100755 index 00000000..39d15a1f --- /dev/null +++ b/ceph-proxy/actions/resume-health @@ -0,0 +1,6 @@ +#!/bin/bash + +set -eux + +ceph osd unset nodown +ceph osd unset noout \ No newline at end of file diff --git a/ceph-proxy/actions/set-pool-max-bytes b/ceph-proxy/actions/set-pool-max-bytes new file mode 100755 index 00000000..91196b3e --- /dev/null +++ b/ceph-proxy/actions/set-pool-max-bytes @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +import os +import sys + +_path = os.path.dirname(os.path.realpath(__file__)) +_hooks = os.path.abspath(os.path.join(_path, '../hooks')) +_root = os.path.abspath(os.path.join(_path, '..')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + +_add_path(_hooks) +_add_path(_root) + +from subprocess import CalledProcessError +from charmhelpers.core.hookenv import action_get, config, log, action_fail +from charmhelpers.contrib.storage.linux.ceph import set_pool_quota + +if __name__ == '__main__': + max_bytes = action_get("max") + name = action_get("pool-name") + try: + set_pool_quota(service=config('admin-user'), pool_name=name, max_bytes=max_bytes) + except CalledProcessError as e: + log(str(e)) + action_fail("Set pool quota failed with message: {}".format(str(e))) diff --git a/ceph-proxy/actions/snapshot-pool b/ceph-proxy/actions/snapshot-pool new file mode 100755 index 00000000..3eb6926e --- /dev/null +++ b/ceph-proxy/actions/snapshot-pool @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +import os +import sys + +_path = os.path.dirname(os.path.realpath(__file__)) +_hooks = os.path.abspath(os.path.join(_path, '../hooks')) +_root = os.path.abspath(os.path.join(_path, '..')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + +_add_path(_hooks) +_add_path(_root) + +from subprocess import CalledProcessError +from charmhelpers.core.hookenv import action_get, config, log, action_fail +from charmhelpers.contrib.storage.linux.ceph import snapshot_pool + +if __name__ == '__main__': + name = action_get("pool-name") + snapname = action_get("snapshot-name") + try: + snapshot_pool(service=config('admin-user'), + pool_name=name, + snapshot_name=snapname) + except CalledProcessError as e: + log(str(e)) + action_fail("Snapshot pool failed with message: {}".format(str(e))) diff --git a/ceph-proxy/bindep.txt b/ceph-proxy/bindep.txt new file mode 100644 index 00000000..bdbe8d56 --- /dev/null +++ b/ceph-proxy/bindep.txt @@ -0,0 +1,3 @@ +libffi-dev [platform:dpkg] +libxml2-dev [platform:dpkg] +libxslt1-dev [platform:dpkg] diff --git a/ceph-proxy/build-requirements.txt b/ceph-proxy/build-requirements.txt new file mode 100644 index 00000000..b6d2452f --- /dev/null +++ b/ceph-proxy/build-requirements.txt @@ -0,0 +1,7 @@ +# NOTES(lourot): +# * We don't install charmcraft via pip anymore because it anyway spins up a +# container and scp the system's charmcraft snap inside it. So the charmcraft +# snap is necessary on the system anyway. +# * `tox -e build` successfully validated with charmcraft 1.2.1 + +cffi==1.14.6; python_version < '3.6' # cffi 1.15.0 drops support for py35. diff --git a/ceph-proxy/charm-helpers-hooks.yaml b/ceph-proxy/charm-helpers-hooks.yaml new file mode 100644 index 00000000..b7a1428b --- /dev/null +++ b/ceph-proxy/charm-helpers-hooks.yaml @@ -0,0 +1,28 @@ +repo: https://github.com/juju/charm-helpers +destination: charmhelpers +include: + - core + - cli + - fetch + - osplatform + - contrib.storage.linux: + - utils + - ceph + - loopback + - lvm + - payload.execd + - contrib.openstack: + - alternatives + - deferred_events + - exceptions + - files + - ha + - ip + - policy_rcd + - utils + - contrib.network.ip + - contrib.charmsupport + - contrib.hardening|inc=* + - contrib.python + - contrib.openstack.policyd + - contrib.hahelpers diff --git a/ceph-proxy/charmcraft.yaml b/ceph-proxy/charmcraft.yaml new file mode 100644 index 00000000..daa66338 --- /dev/null +++ b/ceph-proxy/charmcraft.yaml @@ -0,0 +1,37 @@ +type: charm + +parts: + charm: + plugin: dump + source: . + prime: + - actions/* + - charmhelpers/* + - files/* + - hooks/* + - lib/* + - templates/* + - actions.yaml + - config.yaml + - copyright + - hardening.yaml + - icon.svg + - LICENSE + - Makefile + - metadata.yaml + - README.md + +base: ubuntu@22.04 +platforms: + amd64: + build-on: amd64 + build-for: amd64 + arm64: + build-on: arm64 + build-for: arm64 + ppc64el: + build-on: ppc64el + build-for: ppc64el + s390x: + build-on: s390x + build-for: s390x diff --git a/ceph-proxy/charmhelpers/__init__.py b/ceph-proxy/charmhelpers/__init__.py new file mode 100644 index 00000000..ddf30450 --- /dev/null +++ b/ceph-proxy/charmhelpers/__init__.py @@ -0,0 +1,84 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Bootstrap charm-helpers, installing its dependencies if necessary using +# only standard libraries. +import functools +import inspect +import subprocess + + +try: + import yaml # NOQA:F401 +except ImportError: + subprocess.check_call(['apt-get', 'install', '-y', 'python3-yaml']) + import yaml # NOQA:F401 + + +# Holds a list of mapping of mangled function names that have been deprecated +# using the @deprecate decorator below. This is so that the warning is only +# printed once for each usage of the function. +__deprecated_functions = {} + + +def deprecate(warning, date=None, log=None): + """Add a deprecation warning the first time the function is used. + + The date which is a string in semi-ISO8660 format indicates the year-month + that the function is officially going to be removed. + + usage: + + @deprecate('use core/fetch/add_source() instead', '2017-04') + def contributed_add_source_thing(...): + ... + + And it then prints to the log ONCE that the function is deprecated. + The reason for passing the logging function (log) is so that hookenv.log + can be used for a charm if needed. + + :param warning: String to indicate what is to be used instead. + :param date: Optional string in YYYY-MM format to indicate when the + function will definitely (probably) be removed. + :param log: The log function to call in order to log. If None, logs to + stdout + """ + def wrap(f): + + @functools.wraps(f) + def wrapped_f(*args, **kwargs): + try: + module = inspect.getmodule(f) + file = inspect.getsourcefile(f) + lines = inspect.getsourcelines(f) + f_name = "{}-{}-{}..{}-{}".format( + module.__name__, file, lines[0], lines[-1], f.__name__) + except (IOError, TypeError): + # assume it was local, so just use the name of the function + f_name = f.__name__ + if f_name not in __deprecated_functions: + __deprecated_functions[f_name] = True + s = "DEPRECATION WARNING: Function {} is being removed".format( + f.__name__) + if date: + s = "{} on/around {}".format(s, date) + if warning: + s = "{} : {}".format(s, warning) + if log: + log(s) + else: + print(s) + return f(*args, **kwargs) + return wrapped_f + return wrap diff --git a/ceph-proxy/charmhelpers/cli/__init__.py b/ceph-proxy/charmhelpers/cli/__init__.py new file mode 100644 index 00000000..2b0c4b7a --- /dev/null +++ b/ceph-proxy/charmhelpers/cli/__init__.py @@ -0,0 +1,187 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import argparse +import sys + +import charmhelpers.core.unitdata + + +class OutputFormatter(object): + def __init__(self, outfile=sys.stdout): + self.formats = ( + "raw", + "json", + "py", + "yaml", + "csv", + "tab", + ) + self.outfile = outfile + + def add_arguments(self, argument_parser): + formatgroup = argument_parser.add_mutually_exclusive_group() + choices = self.supported_formats + formatgroup.add_argument("--format", metavar='FMT', + help="Select output format for returned data, " + "where FMT is one of: {}".format(choices), + choices=choices, default='raw') + for fmt in self.formats: + fmtfunc = getattr(self, fmt) + formatgroup.add_argument("-{}".format(fmt[0]), + "--{}".format(fmt), action='store_const', + const=fmt, dest='format', + help=fmtfunc.__doc__) + + @property + def supported_formats(self): + return self.formats + + def raw(self, output): + """Output data as raw string (default)""" + if isinstance(output, (list, tuple)): + output = '\n'.join(map(str, output)) + self.outfile.write(str(output)) + + def py(self, output): + """Output data as a nicely-formatted python data structure""" + import pprint + pprint.pprint(output, stream=self.outfile) + + def json(self, output): + """Output data in JSON format""" + import json + json.dump(output, self.outfile) + + def yaml(self, output): + """Output data in YAML format""" + import yaml + yaml.safe_dump(output, self.outfile) + + def csv(self, output): + """Output data as excel-compatible CSV""" + import csv + csvwriter = csv.writer(self.outfile) + csvwriter.writerows(output) + + def tab(self, output): + """Output data in excel-compatible tab-delimited format""" + import csv + csvwriter = csv.writer(self.outfile, dialect=csv.excel_tab) + csvwriter.writerows(output) + + def format_output(self, output, fmt='raw'): + fmtfunc = getattr(self, fmt) + fmtfunc(output) + + +class CommandLine(object): + argument_parser = None + subparsers = None + formatter = None + exit_code = 0 + + def __init__(self): + if not self.argument_parser: + self.argument_parser = argparse.ArgumentParser(description='Perform common charm tasks') + if not self.formatter: + self.formatter = OutputFormatter() + self.formatter.add_arguments(self.argument_parser) + if not self.subparsers: + self.subparsers = self.argument_parser.add_subparsers(help='Commands') + + def subcommand(self, command_name=None): + """ + Decorate a function as a subcommand. Use its arguments as the + command-line arguments""" + def wrapper(decorated): + cmd_name = command_name or decorated.__name__ + subparser = self.subparsers.add_parser(cmd_name, + description=decorated.__doc__) + for args, kwargs in describe_arguments(decorated): + subparser.add_argument(*args, **kwargs) + subparser.set_defaults(func=decorated) + return decorated + return wrapper + + def test_command(self, decorated): + """ + Subcommand is a boolean test function, so bool return values should be + converted to a 0/1 exit code. + """ + decorated._cli_test_command = True + return decorated + + def no_output(self, decorated): + """ + Subcommand is not expected to return a value, so don't print a spurious None. + """ + decorated._cli_no_output = True + return decorated + + def subcommand_builder(self, command_name, description=None): + """ + Decorate a function that builds a subcommand. Builders should accept a + single argument (the subparser instance) and return the function to be + run as the command.""" + def wrapper(decorated): + subparser = self.subparsers.add_parser(command_name) + func = decorated(subparser) + subparser.set_defaults(func=func) + subparser.description = description or func.__doc__ + return wrapper + + def run(self): + "Run cli, processing arguments and executing subcommands." + arguments = self.argument_parser.parse_args() + argspec = inspect.getfullargspec(arguments.func) + vargs = [] + for arg in argspec.args: + vargs.append(getattr(arguments, arg)) + if argspec.varargs: + vargs.extend(getattr(arguments, argspec.varargs)) + output = arguments.func(*vargs) + if getattr(arguments.func, '_cli_test_command', False): + self.exit_code = 0 if output else 1 + output = '' + if getattr(arguments.func, '_cli_no_output', False): + output = '' + self.formatter.format_output(output, arguments.format) + if charmhelpers.core.unitdata._KV: + charmhelpers.core.unitdata._KV.flush() + + +cmdline = CommandLine() + + +def describe_arguments(func): + """ + Analyze a function's signature and return a data structure suitable for + passing in as arguments to an argparse parser's add_argument() method.""" + + argspec = inspect.getfullargspec(func) + # we should probably raise an exception somewhere if func includes **kwargs + if argspec.defaults: + positional_args = argspec.args[:-len(argspec.defaults)] + keyword_names = argspec.args[-len(argspec.defaults):] + for arg, default in zip(keyword_names, argspec.defaults): + yield ('--{}'.format(arg),), {'default': default} + else: + positional_args = argspec.args + + for arg in positional_args: + yield (arg,), {} + if argspec.varargs: + yield (argspec.varargs,), {'nargs': '*'} diff --git a/ceph-proxy/charmhelpers/cli/benchmark.py b/ceph-proxy/charmhelpers/cli/benchmark.py new file mode 100644 index 00000000..303af14b --- /dev/null +++ b/ceph-proxy/charmhelpers/cli/benchmark.py @@ -0,0 +1,34 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import cmdline +from charmhelpers.contrib.benchmark import Benchmark + + +@cmdline.subcommand(command_name='benchmark-start') +def start(): + Benchmark.start() + + +@cmdline.subcommand(command_name='benchmark-finish') +def finish(): + Benchmark.finish() + + +@cmdline.subcommand_builder('benchmark-composite', description="Set the benchmark composite score") +def service(subparser): + subparser.add_argument("value", help="The composite score.") + subparser.add_argument("units", help="The units the composite score represents, i.e., 'reads/sec'.") + subparser.add_argument("direction", help="'asc' if a lower score is better, 'desc' if a higher score is better.") + return Benchmark.set_composite_score diff --git a/ceph-proxy/charmhelpers/cli/commands.py b/ceph-proxy/charmhelpers/cli/commands.py new file mode 100644 index 00000000..b9310565 --- /dev/null +++ b/ceph-proxy/charmhelpers/cli/commands.py @@ -0,0 +1,30 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This module loads sub-modules into the python runtime so they can be +discovered via the inspect module. In order to prevent flake8 from (rightfully) +telling us these are unused modules, throw a ' # noqa' at the end of each import +so that the warning is suppressed. +""" + +from . import CommandLine # noqa + +""" +Import the sub-modules which have decorated subcommands to register with chlp. +""" +from . import host # noqa +from . import benchmark # noqa +from . import unitdata # noqa +from . import hookenv # noqa diff --git a/ceph-proxy/charmhelpers/cli/hookenv.py b/ceph-proxy/charmhelpers/cli/hookenv.py new file mode 100644 index 00000000..bd72f448 --- /dev/null +++ b/ceph-proxy/charmhelpers/cli/hookenv.py @@ -0,0 +1,21 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import cmdline +from charmhelpers.core import hookenv + + +cmdline.subcommand('relation-id')(hookenv.relation_id._wrapped) +cmdline.subcommand('service-name')(hookenv.service_name) +cmdline.subcommand('remote-service-name')(hookenv.remote_service_name._wrapped) diff --git a/ceph-proxy/charmhelpers/cli/host.py b/ceph-proxy/charmhelpers/cli/host.py new file mode 100644 index 00000000..40396849 --- /dev/null +++ b/ceph-proxy/charmhelpers/cli/host.py @@ -0,0 +1,29 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import cmdline +from charmhelpers.core import host + + +@cmdline.subcommand() +def mounts(): + "List mounts" + return host.mounts() + + +@cmdline.subcommand_builder('service', description="Control system services") +def service(subparser): + subparser.add_argument("action", help="The action to perform (start, stop, etc...)") + subparser.add_argument("service_name", help="Name of the service to control") + return host.service diff --git a/ceph-proxy/charmhelpers/cli/unitdata.py b/ceph-proxy/charmhelpers/cli/unitdata.py new file mode 100644 index 00000000..acce846f --- /dev/null +++ b/ceph-proxy/charmhelpers/cli/unitdata.py @@ -0,0 +1,46 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import cmdline +from charmhelpers.core import unitdata + + +@cmdline.subcommand_builder('unitdata', description="Store and retrieve data") +def unitdata_cmd(subparser): + nested = subparser.add_subparsers() + + get_cmd = nested.add_parser('get', help='Retrieve data') + get_cmd.add_argument('key', help='Key to retrieve the value of') + get_cmd.set_defaults(action='get', value=None) + + getrange_cmd = nested.add_parser('getrange', help='Retrieve multiple data') + getrange_cmd.add_argument('key', metavar='prefix', + help='Prefix of the keys to retrieve') + getrange_cmd.set_defaults(action='getrange', value=None) + + set_cmd = nested.add_parser('set', help='Store data') + set_cmd.add_argument('key', help='Key to set') + set_cmd.add_argument('value', help='Value to store') + set_cmd.set_defaults(action='set') + + def _unitdata_cmd(action, key, value): + if action == 'get': + return unitdata.kv().get(key) + elif action == 'getrange': + return unitdata.kv().getrange(key) + elif action == 'set': + unitdata.kv().set(key, value) + unitdata.kv().flush() + return '' + return _unitdata_cmd diff --git a/ceph-proxy/charmhelpers/contrib/__init__.py b/ceph-proxy/charmhelpers/contrib/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-proxy/charmhelpers/contrib/charmsupport/__init__.py b/ceph-proxy/charmhelpers/contrib/charmsupport/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/charmsupport/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-proxy/charmhelpers/contrib/charmsupport/nrpe.py b/ceph-proxy/charmhelpers/contrib/charmsupport/nrpe.py new file mode 100644 index 00000000..ac002bc6 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/charmsupport/nrpe.py @@ -0,0 +1,576 @@ +# Copyright 2012-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Compatibility with the nrpe-external-master charm""" +# +# Authors: +# Matthew Wedgwood + +import glob +import grp +import json +import os +import pwd +import re +import shlex +import shutil +import subprocess +import yaml + +from charmhelpers.core.hookenv import ( + application_name, + config, + ERROR, + hook_name, + local_unit, + log, + relation_get, + relation_ids, + relation_set, + relations_of_type, +) + +from charmhelpers.core.host import service +from charmhelpers.core import host + +# This module adds compatibility with the nrpe-external-master and plain nrpe +# subordinate charms. To use it in your charm: +# +# 1. Update metadata.yaml +# +# provides: +# (...) +# nrpe-external-master: +# interface: nrpe-external-master +# scope: container +# +# and/or +# +# provides: +# (...) +# local-monitors: +# interface: local-monitors +# scope: container + +# +# 2. Add the following to config.yaml +# +# nagios_context: +# default: "juju" +# type: string +# description: | +# Used by the nrpe subordinate charms. +# A string that will be prepended to instance name to set the host name +# in nagios. So for instance the hostname would be something like: +# juju-myservice-0 +# If you're running multiple environments with the same services in them +# this allows you to differentiate between them. +# nagios_servicegroups: +# default: "" +# type: string +# description: | +# A comma-separated list of nagios servicegroups. +# If left empty, the nagios_context will be used as the servicegroup +# +# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master +# +# 4. Update your hooks.py with something like this: +# +# from charmsupport.nrpe import NRPE +# (...) +# def update_nrpe_config(): +# nrpe_compat = NRPE() +# nrpe_compat.add_check( +# shortname = "myservice", +# description = "Check MyService", +# check_cmd = "check_http -w 2 -c 10 http://localhost" +# ) +# nrpe_compat.add_check( +# "myservice_other", +# "Check for widget failures", +# check_cmd = "/srv/myapp/scripts/widget_check" +# ) +# nrpe_compat.write() +# +# def config_changed(): +# (...) +# update_nrpe_config() +# +# def nrpe_external_master_relation_changed(): +# update_nrpe_config() +# +# def local_monitors_relation_changed(): +# update_nrpe_config() +# +# 4.a If your charm is a subordinate charm set primary=False +# +# from charmsupport.nrpe import NRPE +# (...) +# def update_nrpe_config(): +# nrpe_compat = NRPE(primary=False) +# +# 5. ln -s hooks.py nrpe-external-master-relation-changed +# ln -s hooks.py local-monitors-relation-changed + + +class CheckException(Exception): + pass + + +class Check(object): + shortname_re = '[A-Za-z0-9-_.@]+$' + service_template = (""" +#--------------------------------------------------- +# This file is Juju managed +#--------------------------------------------------- +define service {{ + use active-service + host_name {nagios_hostname} + service_description {nagios_hostname}[{shortname}] """ + """{description} + check_command check_nrpe!{command} + servicegroups {nagios_servicegroup} +{service_config_overrides} +}} +""") + + def __init__(self, shortname, description, check_cmd, max_check_attempts=None): + super(Check, self).__init__() + # XXX: could be better to calculate this from the service name + if not re.match(self.shortname_re, shortname): + raise CheckException("shortname must match {}".format( + Check.shortname_re)) + self.shortname = shortname + self.command = "check_{}".format(shortname) + # Note: a set of invalid characters is defined by the + # Nagios server config + # The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()= + self.description = description + self.check_cmd = self._locate_cmd(check_cmd) + self.max_check_attempts = max_check_attempts + + def _get_check_filename(self): + return os.path.join(NRPE.nrpe_confdir, '{}.cfg'.format(self.command)) + + def _get_service_filename(self, hostname): + return os.path.join(NRPE.nagios_exportdir, + 'service__{}_{}.cfg'.format(hostname, self.command)) + + def _locate_cmd(self, check_cmd): + search_path = ( + '/usr/lib/nagios/plugins', + '/usr/local/lib/nagios/plugins', + ) + parts = shlex.split(check_cmd) + for path in search_path: + if os.path.exists(os.path.join(path, parts[0])): + command = os.path.join(path, parts[0]) + if len(parts) > 1: + safe_args = [shlex.quote(arg) for arg in parts[1:]] + command += " " + " ".join(safe_args) + return command + log('Check command not found: {}'.format(parts[0])) + return '' + + def _remove_service_files(self): + if not os.path.exists(NRPE.nagios_exportdir): + return + for f in os.listdir(NRPE.nagios_exportdir): + if f.endswith('_{}.cfg'.format(self.command)): + os.remove(os.path.join(NRPE.nagios_exportdir, f)) + + def remove(self, hostname): + nrpe_check_file = self._get_check_filename() + if os.path.exists(nrpe_check_file): + os.remove(nrpe_check_file) + self._remove_service_files() + + def write(self, nagios_context, hostname, nagios_servicegroups): + nrpe_check_file = self._get_check_filename() + with open(nrpe_check_file, 'w') as nrpe_check_config: + nrpe_check_config.write("# check {}\n".format(self.shortname)) + if nagios_servicegroups: + nrpe_check_config.write( + "# The following header was added automatically by juju\n") + nrpe_check_config.write( + "# Modifying it will affect nagios monitoring and alerting\n") + nrpe_check_config.write( + "# servicegroups: {}\n".format(nagios_servicegroups)) + nrpe_check_config.write("command[{}]={}\n".format( + self.command, self.check_cmd)) + + if not os.path.exists(NRPE.nagios_exportdir): + log('Not writing service config as {} is not accessible'.format( + NRPE.nagios_exportdir)) + else: + self.write_service_config(nagios_context, hostname, + nagios_servicegroups) + + def write_service_config(self, nagios_context, hostname, + nagios_servicegroups): + self._remove_service_files() + + if self.max_check_attempts: + service_config_overrides = ' max_check_attempts {}'.format( + self.max_check_attempts + ) # Note indentation is here rather than in the template to avoid trailing spaces + else: + service_config_overrides = '' # empty string to avoid printing 'None' + templ_vars = { + 'nagios_hostname': hostname, + 'nagios_servicegroup': nagios_servicegroups, + 'description': self.description, + 'shortname': self.shortname, + 'command': self.command, + 'service_config_overrides': service_config_overrides, + } + nrpe_service_text = Check.service_template.format(**templ_vars) + nrpe_service_file = self._get_service_filename(hostname) + with open(nrpe_service_file, 'w') as nrpe_service_config: + nrpe_service_config.write(str(nrpe_service_text)) + + def run(self): + subprocess.call(self.check_cmd) + + +class NRPE(object): + nagios_logdir = '/var/log/nagios' + nagios_exportdir = '/var/lib/nagios/export' + nrpe_confdir = '/etc/nagios/nrpe.d' + homedir = '/var/lib/nagios' # home dir provided by nagios-nrpe-server + + def __init__(self, hostname=None, primary=True): + super(NRPE, self).__init__() + self.config = config() + self.primary = primary + self.nagios_context = self.config['nagios_context'] + if 'nagios_servicegroups' in self.config and self.config['nagios_servicegroups']: + self.nagios_servicegroups = self.config['nagios_servicegroups'] + else: + self.nagios_servicegroups = self.nagios_context + self.unit_name = local_unit().replace('/', '-') + if hostname: + self.hostname = hostname + else: + nagios_hostname = get_nagios_hostname() + if nagios_hostname: + self.hostname = nagios_hostname + else: + self.hostname = "{}-{}".format(self.nagios_context, self.unit_name) + self.checks = [] + # Iff in an nrpe-external-master relation hook, set primary status + relation = relation_ids('nrpe-external-master') + if relation: + log("Setting charm primary status {}".format(primary)) + for rid in relation: + relation_set(relation_id=rid, relation_settings={'primary': self.primary}) + self.remove_check_queue = set() + + @classmethod + def does_nrpe_conf_dir_exist(cls): + """Return True if th nrpe_confdif directory exists.""" + return os.path.isdir(cls.nrpe_confdir) + + def add_check(self, *args, **kwargs): + shortname = None + if kwargs.get('shortname') is None: + if len(args) > 0: + shortname = args[0] + else: + shortname = kwargs['shortname'] + + self.checks.append(Check(*args, **kwargs)) + try: + self.remove_check_queue.remove(shortname) + except KeyError: + pass + + def remove_check(self, *args, **kwargs): + if kwargs.get('shortname') is None: + raise ValueError('shortname of check must be specified') + + # Use sensible defaults if they're not specified - these are not + # actually used during removal, but they're required for constructing + # the Check object; check_disk is chosen because it's part of the + # nagios-plugins-basic package. + if kwargs.get('check_cmd') is None: + kwargs['check_cmd'] = 'check_disk' + if kwargs.get('description') is None: + kwargs['description'] = '' + + check = Check(*args, **kwargs) + check.remove(self.hostname) + self.remove_check_queue.add(kwargs['shortname']) + + def write(self): + try: + nagios_uid = pwd.getpwnam('nagios').pw_uid + nagios_gid = grp.getgrnam('nagios').gr_gid + except Exception: + log("Nagios user not set up, nrpe checks not updated") + return + + if not os.path.exists(NRPE.nagios_logdir): + os.mkdir(NRPE.nagios_logdir) + os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid) + + nrpe_monitors = {} + monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}} + + # check that the charm can write to the conf dir. If not, then nagios + # probably isn't installed, and we can defer. + if not self.does_nrpe_conf_dir_exist(): + return + + for nrpecheck in self.checks: + nrpecheck.write(self.nagios_context, self.hostname, + self.nagios_servicegroups) + nrpe_monitors[nrpecheck.shortname] = { + "command": nrpecheck.command, + } + # If we were passed max_check_attempts, add that to the relation data + if nrpecheck.max_check_attempts is not None: + nrpe_monitors[nrpecheck.shortname]['max_check_attempts'] = nrpecheck.max_check_attempts + + # update-status hooks are configured to firing every 5 minutes by + # default. When nagios-nrpe-server is restarted, the nagios server + # reports checks failing causing unnecessary alerts. Let's not restart + # on update-status hooks. + if not hook_name() == 'update-status': + service('restart', 'nagios-nrpe-server') + + monitor_ids = relation_ids("local-monitors") + \ + relation_ids("nrpe-external-master") + for rid in monitor_ids: + reldata = relation_get(unit=local_unit(), rid=rid) + if 'monitors' in reldata: + # update the existing set of monitors with the new data + old_monitors = yaml.safe_load(reldata['monitors']) + old_nrpe_monitors = old_monitors['monitors']['remote']['nrpe'] + # remove keys that are in the remove_check_queue + old_nrpe_monitors = {k: v for k, v in old_nrpe_monitors.items() + if k not in self.remove_check_queue} + # update/add nrpe_monitors + old_nrpe_monitors.update(nrpe_monitors) + old_monitors['monitors']['remote']['nrpe'] = old_nrpe_monitors + # write back to the relation + relation_set(relation_id=rid, monitors=yaml.dump(old_monitors)) + else: + # write a brand new set of monitors, as no existing ones. + relation_set(relation_id=rid, monitors=yaml.dump(monitors)) + + self.remove_check_queue.clear() + + +def get_nagios_hostcontext(relation_name='nrpe-external-master'): + """ + Query relation with nrpe subordinate, return the nagios_host_context + + :param str relation_name: Name of relation nrpe sub joined to + """ + for rel in relations_of_type(relation_name): + if 'nagios_host_context' in rel: + return rel['nagios_host_context'] + + +def get_nagios_hostname(relation_name='nrpe-external-master'): + """ + Query relation with nrpe subordinate, return the nagios_hostname + + :param str relation_name: Name of relation nrpe sub joined to + """ + for rel in relations_of_type(relation_name): + if 'nagios_hostname' in rel: + return rel['nagios_hostname'] + + +def get_nagios_unit_name(relation_name='nrpe-external-master'): + """ + Return the nagios unit name prepended with host_context if needed + + :param str relation_name: Name of relation nrpe sub joined to + """ + host_context = get_nagios_hostcontext(relation_name) + if host_context: + unit = "%s:%s" % (host_context, local_unit()) + else: + unit = local_unit() + return unit + + +def add_init_service_checks(nrpe, services, unit_name, immediate_check=True): + """ + Add checks for each service in list + + :param NRPE nrpe: NRPE object to add check to + :param list services: List of services to check + :param str unit_name: Unit name to use in check description + :param bool immediate_check: For sysv init, run the service check immediately + """ + # check_haproxy is redundant in the presence of check_crm. See LP Bug#1880601 for details. + # just remove check_haproxy if haproxy is added as a lsb resource in hacluster. + for rid in relation_ids("ha"): + ha_resources = relation_get("json_resources", rid=rid, unit=local_unit()) + if ha_resources: + try: + ha_resources_parsed = json.loads(ha_resources) + except ValueError as e: + log('Could not parse JSON from ha resources. {}'.format(e), level=ERROR) + raise + if "lsb:haproxy" in ha_resources_parsed.values(): + if "haproxy" in services: + log("removed check_haproxy. This service will be monitored by check_crm") + services.remove("haproxy") + for svc in services: + # Don't add a check for these services from neutron-gateway + if svc in ['ext-port', 'os-charm-phy-nic-mtu']: + next + + upstart_init = '/etc/init/%s.conf' % svc + sysv_init = '/etc/init.d/%s' % svc + + if host.init_is_systemd(service_name=svc): + nrpe.add_check( + shortname=svc, + description='process check {%s}' % unit_name, + check_cmd='check_systemd.py %s' % svc + ) + elif os.path.exists(upstart_init): + nrpe.add_check( + shortname=svc, + description='process check {%s}' % unit_name, + check_cmd='check_upstart_job %s' % svc + ) + elif os.path.exists(sysv_init): + cronpath = '/etc/cron.d/nagios-service-check-%s' % svc + checkpath = '%s/service-check-%s.txt' % (nrpe.homedir, svc) + croncmd = ( + '/usr/local/lib/nagios/plugins/check_exit_status.pl ' + '-e -s /etc/init.d/%s status' % svc + ) + cron_file = '*/5 * * * * root %s > %s\n' % (croncmd, checkpath) + f = open(cronpath, 'w') + f.write(cron_file) + f.close() + nrpe.add_check( + shortname=svc, + description='service check {%s}' % unit_name, + check_cmd='check_status_file.py -f %s' % checkpath, + ) + # if /var/lib/nagios doesn't exist open(checkpath, 'w') will fail + # (LP: #1670223). + if immediate_check and os.path.isdir(nrpe.homedir): + f = open(checkpath, 'w') + subprocess.call( + croncmd.split(), + stdout=f, + stderr=subprocess.STDOUT + ) + f.close() + os.chmod(checkpath, 0o644) + + +def copy_nrpe_checks(nrpe_files_dir=None): + """ + Copy the nrpe checks into place + + """ + NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins' + if nrpe_files_dir is None: + # determine if "charmhelpers" is in CHARMDIR or CHARMDIR/hooks + for segment in ['.', 'hooks']: + nrpe_files_dir = os.path.abspath(os.path.join( + os.getenv('CHARM_DIR'), + segment, + 'charmhelpers', + 'contrib', + 'openstack', + 'files')) + if os.path.isdir(nrpe_files_dir): + break + else: + raise RuntimeError("Couldn't find charmhelpers directory") + if not os.path.exists(NAGIOS_PLUGINS): + os.makedirs(NAGIOS_PLUGINS) + for fname in glob.glob(os.path.join(nrpe_files_dir, "check_*")): + if os.path.isfile(fname): + shutil.copy2(fname, + os.path.join(NAGIOS_PLUGINS, os.path.basename(fname))) + + +def add_haproxy_checks(nrpe, unit_name): + """ + Add checks for each service in list + + :param NRPE nrpe: NRPE object to add check to + :param str unit_name: Unit name to use in check description + """ + nrpe.add_check( + shortname='haproxy_servers', + description='Check HAProxy {%s}' % unit_name, + check_cmd='check_haproxy.sh') + nrpe.add_check( + shortname='haproxy_queue', + description='Check HAProxy queue depth {%s}' % unit_name, + check_cmd='check_haproxy_queue_depth.sh') + + +def remove_deprecated_check(nrpe, deprecated_services): + """ + Remove checks for deprecated services in list + + :param nrpe: NRPE object to remove check from + :type nrpe: NRPE + :param deprecated_services: List of deprecated services that are removed + :type deprecated_services: list + """ + for dep_svc in deprecated_services: + log('Deprecated service: {}'.format(dep_svc)) + nrpe.remove_check(shortname=dep_svc) + + +def add_deferred_restarts_check(nrpe): + """ + Add NRPE check for services with deferred restarts. + + :param NRPE nrpe: NRPE object to add check to + """ + unit_name = local_unit().replace('/', '-') + shortname = unit_name + '_deferred_restarts' + check_cmd = 'check_deferred_restarts.py --application {}'.format( + application_name()) + + log('Adding deferred restarts nrpe check: {}'.format(shortname)) + nrpe.add_check( + shortname=shortname, + description='Check deferred service restarts {}'.format(unit_name), + check_cmd=check_cmd) + + +def remove_deferred_restarts_check(nrpe): + """ + Remove NRPE check for services with deferred service restarts. + + :param NRPE nrpe: NRPE object to remove check from + """ + unit_name = local_unit().replace('/', '-') + shortname = unit_name + '_deferred_restarts' + check_cmd = 'check_deferred_restarts.py --application {}'.format( + application_name()) + + log('Removing deferred restarts nrpe check: {}'.format(shortname)) + nrpe.remove_check( + shortname=shortname, + description='Check deferred service restarts {}'.format(unit_name), + check_cmd=check_cmd) diff --git a/ceph-proxy/charmhelpers/contrib/charmsupport/volumes.py b/ceph-proxy/charmhelpers/contrib/charmsupport/volumes.py new file mode 100644 index 00000000..f7c6fbdc --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/charmsupport/volumes.py @@ -0,0 +1,173 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +''' +Functions for managing volumes in juju units. One volume is supported per unit. +Subordinates may have their own storage, provided it is on its own partition. + +Configuration stanzas:: + + volume-ephemeral: + type: boolean + default: true + description: > + If false, a volume is mounted as specified in "volume-map" + If true, ephemeral storage will be used, meaning that log data + will only exist as long as the machine. YOU HAVE BEEN WARNED. + volume-map: + type: string + default: {} + description: > + YAML map of units to device names, e.g: + "{ rsyslog/0: /dev/vdb, rsyslog/1: /dev/vdb }" + Service units will raise a configure-error if volume-ephemeral + is 'true' and no volume-map value is set. Use 'juju set' to set a + value and 'juju resolved' to complete configuration. + +Usage:: + + from charmsupport.volumes import configure_volume, VolumeConfigurationError + from charmsupport.hookenv import log, ERROR + def post_mount_hook(): + stop_service('myservice') + def post_mount_hook(): + start_service('myservice') + + if __name__ == '__main__': + try: + configure_volume(before_change=pre_mount_hook, + after_change=post_mount_hook) + except VolumeConfigurationError: + log('Storage could not be configured', ERROR) + +''' + +# XXX: Known limitations +# - fstab is neither consulted nor updated + +import os +from charmhelpers.core import hookenv +from charmhelpers.core import host +import yaml + + +MOUNT_BASE = '/srv/juju/volumes' + + +class VolumeConfigurationError(Exception): + '''Volume configuration data is missing or invalid''' + pass + + +def get_config(): + '''Gather and sanity-check volume configuration data''' + volume_config = {} + config = hookenv.config() + + errors = False + + if config.get('volume-ephemeral') in (True, 'True', 'true', 'Yes', 'yes'): + volume_config['ephemeral'] = True + else: + volume_config['ephemeral'] = False + + try: + volume_map = yaml.safe_load(config.get('volume-map', '{}')) + except yaml.YAMLError as e: + hookenv.log("Error parsing YAML volume-map: {}".format(e), + hookenv.ERROR) + errors = True + if volume_map is None: + # probably an empty string + volume_map = {} + elif not isinstance(volume_map, dict): + hookenv.log("Volume-map should be a dictionary, not {}".format( + type(volume_map))) + errors = True + + volume_config['device'] = volume_map.get(os.environ['JUJU_UNIT_NAME']) + if volume_config['device'] and volume_config['ephemeral']: + # asked for ephemeral storage but also defined a volume ID + hookenv.log('A volume is defined for this unit, but ephemeral ' + 'storage was requested', hookenv.ERROR) + errors = True + elif not volume_config['device'] and not volume_config['ephemeral']: + # asked for permanent storage but did not define volume ID + hookenv.log('Ephemeral storage was requested, but there is no volume ' + 'defined for this unit.', hookenv.ERROR) + errors = True + + unit_mount_name = hookenv.local_unit().replace('/', '-') + volume_config['mountpoint'] = os.path.join(MOUNT_BASE, unit_mount_name) + + if errors: + return None + return volume_config + + +def mount_volume(config): + if os.path.exists(config['mountpoint']): + if not os.path.isdir(config['mountpoint']): + hookenv.log('Not a directory: {}'.format(config['mountpoint'])) + raise VolumeConfigurationError() + else: + host.mkdir(config['mountpoint']) + if os.path.ismount(config['mountpoint']): + unmount_volume(config) + if not host.mount(config['device'], config['mountpoint'], persist=True): + raise VolumeConfigurationError() + + +def unmount_volume(config): + if os.path.ismount(config['mountpoint']): + if not host.umount(config['mountpoint'], persist=True): + raise VolumeConfigurationError() + + +def managed_mounts(): + '''List of all mounted managed volumes''' + return filter(lambda mount: mount[0].startswith(MOUNT_BASE), host.mounts()) + + +def configure_volume(before_change=lambda: None, after_change=lambda: None): + '''Set up storage (or don't) according to the charm's volume configuration. + Returns the mount point or "ephemeral". before_change and after_change + are optional functions to be called if the volume configuration changes. + ''' + + config = get_config() + if not config: + hookenv.log('Failed to read volume configuration', hookenv.CRITICAL) + raise VolumeConfigurationError() + + if config['ephemeral']: + if os.path.ismount(config['mountpoint']): + before_change() + unmount_volume(config) + after_change() + return 'ephemeral' + else: + # persistent storage + if os.path.ismount(config['mountpoint']): + mounts = dict(managed_mounts()) + if mounts.get(config['mountpoint']) != config['device']: + before_change() + unmount_volume(config) + mount_volume(config) + after_change() + else: + before_change() + mount_volume(config) + after_change() + return config['mountpoint'] diff --git a/ceph-proxy/charmhelpers/contrib/hahelpers/__init__.py b/ceph-proxy/charmhelpers/contrib/hahelpers/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hahelpers/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-proxy/charmhelpers/contrib/hahelpers/apache.py b/ceph-proxy/charmhelpers/contrib/hahelpers/apache.py new file mode 100644 index 00000000..a54702bc --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hahelpers/apache.py @@ -0,0 +1,90 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Copyright 2012 Canonical Ltd. +# +# This file is sourced from lp:openstack-charm-helpers +# +# Authors: +# James Page +# Adam Gandelman +# + +import os + +from charmhelpers.core import host +from charmhelpers.core.hookenv import ( + config as config_get, + relation_get, + relation_ids, + related_units as relation_list, + log, + INFO, +) + +# This file contains the CA cert from the charms ssl_ca configuration +# option, in future the file name should be updated reflect that. +CONFIG_CA_CERT_FILE = 'keystone_juju_ca_cert' + + +def get_cert(cn=None): + # TODO: deal with multiple https endpoints via charm config + cert = config_get('ssl_cert') + key = config_get('ssl_key') + if not (cert and key): + log("Inspecting identity-service relations for SSL certificate.", + level=INFO) + cert = key = None + if cn: + ssl_cert_attr = 'ssl_cert_{}'.format(cn) + ssl_key_attr = 'ssl_key_{}'.format(cn) + else: + ssl_cert_attr = 'ssl_cert' + ssl_key_attr = 'ssl_key' + for r_id in relation_ids('identity-service'): + for unit in relation_list(r_id): + if not cert: + cert = relation_get(ssl_cert_attr, + rid=r_id, unit=unit) + if not key: + key = relation_get(ssl_key_attr, + rid=r_id, unit=unit) + return (cert, key) + + +def get_ca_cert(): + ca_cert = config_get('ssl_ca') + if ca_cert is None: + log("Inspecting identity-service relations for CA SSL certificate.", + level=INFO) + for r_id in (relation_ids('identity-service') + + relation_ids('identity-credentials')): + for unit in relation_list(r_id): + if ca_cert is None: + ca_cert = relation_get('ca_cert', + rid=r_id, unit=unit) + return ca_cert + + +def retrieve_ca_cert(cert_file): + cert = None + if os.path.isfile(cert_file): + with open(cert_file, 'rb') as crt: + cert = crt.read() + return cert + + +def install_ca_cert(ca_cert): + host.install_ca_cert(ca_cert, CONFIG_CA_CERT_FILE) diff --git a/ceph-proxy/charmhelpers/contrib/hahelpers/cluster.py b/ceph-proxy/charmhelpers/contrib/hahelpers/cluster.py new file mode 100644 index 00000000..7b309256 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hahelpers/cluster.py @@ -0,0 +1,455 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Copyright 2012 Canonical Ltd. +# +# Authors: +# James Page +# Adam Gandelman +# + +""" +Helpers for clustering and determining "cluster leadership" and other +clustering-related helpers. +""" + +import functools +import subprocess +import os +import time + +from socket import gethostname as get_unit_hostname + +from charmhelpers.core.hookenv import ( + log, + relation_ids, + related_units as relation_list, + relation_get, + config as config_get, + INFO, + DEBUG, + WARNING, + unit_get, + is_leader as juju_is_leader, + status_set, +) +from charmhelpers.core.host import ( + modulo_distribution, +) +from charmhelpers.core.decorators import ( + retry_on_exception, +) +from charmhelpers.core.strutils import ( + bool_from_string, +) + +DC_RESOURCE_NAME = 'DC' + + +class HAIncompleteConfig(Exception): + pass + + +class HAIncorrectConfig(Exception): + pass + + +class CRMResourceNotFound(Exception): + pass + + +class CRMDCNotFound(Exception): + pass + + +def is_elected_leader(resource): + """ + Returns True if the charm executing this is the elected cluster leader. + + It relies on two mechanisms to determine leadership: + 1. If juju is sufficiently new and leadership election is supported, + the is_leader command will be used. + 2. If the charm is part of a corosync cluster, call corosync to + determine leadership. + 3. If the charm is not part of a corosync cluster, the leader is + determined as being "the alive unit with the lowest unit number". In + other words, the oldest surviving unit. + """ + try: + return juju_is_leader() + except NotImplementedError: + log('Juju leadership election feature not enabled' + ', using fallback support', + level=WARNING) + + if is_clustered(): + if not is_crm_leader(resource): + log('Deferring action to CRM leader.', level=INFO) + return False + else: + peers = peer_units() + if peers and not oldest_peer(peers): + log('Deferring action to oldest service unit.', level=INFO) + return False + return True + + +def is_clustered(): + for r_id in (relation_ids('ha') or []): + for unit in (relation_list(r_id) or []): + clustered = relation_get('clustered', + rid=r_id, + unit=unit) + if clustered: + return True + return False + + +def is_crm_dc(): + """ + Determine leadership by querying the pacemaker Designated Controller + """ + cmd = ['crm', 'status'] + try: + status = subprocess.check_output( + cmd, stderr=subprocess.STDOUT).decode('utf-8') + except subprocess.CalledProcessError as ex: + raise CRMDCNotFound(str(ex)) + + current_dc = '' + for line in status.split('\n'): + if line.startswith('Current DC'): + # Current DC: juju-lytrusty-machine-2 (168108163) + # - partition with quorum + current_dc = line.split(':')[1].split()[0] + if current_dc == get_unit_hostname(): + return True + elif current_dc == 'NONE': + raise CRMDCNotFound('Current DC: NONE') + + return False + + +@retry_on_exception(5, base_delay=2, + exc_type=(CRMResourceNotFound, CRMDCNotFound)) +def is_crm_leader(resource, retry=False): + """ + Returns True if the charm calling this is the elected corosync leader, + as returned by calling the external "crm" command. + + We allow this operation to be retried to avoid the possibility of getting a + false negative. See LP #1396246 for more info. + """ + if resource == DC_RESOURCE_NAME: + return is_crm_dc() + cmd = ['crm', 'resource', 'show', resource] + try: + status = subprocess.check_output( + cmd, stderr=subprocess.STDOUT).decode('utf-8') + except subprocess.CalledProcessError: + status = None + + if status and get_unit_hostname() in status: + return True + + if status and "resource %s is NOT running" % (resource) in status: + raise CRMResourceNotFound("CRM resource %s not found" % (resource)) + + return False + + +def is_leader(resource): + log("is_leader is deprecated. Please consider using is_crm_leader " + "instead.", level=WARNING) + return is_crm_leader(resource) + + +def peer_units(peer_relation="cluster"): + peers = [] + for r_id in (relation_ids(peer_relation) or []): + for unit in (relation_list(r_id) or []): + peers.append(unit) + return peers + + +def peer_ips(peer_relation='cluster', addr_key='private-address'): + '''Return a dict of peers and their private-address''' + peers = {} + for r_id in relation_ids(peer_relation): + for unit in relation_list(r_id): + peers[unit] = relation_get(addr_key, rid=r_id, unit=unit) + return peers + + +def oldest_peer(peers): + """Determines who the oldest peer is by comparing unit numbers.""" + local_unit_no = int(os.getenv('JUJU_UNIT_NAME').split('/')[1]) + for peer in peers: + remote_unit_no = int(peer.split('/')[1]) + if remote_unit_no < local_unit_no: + return False + return True + + +def eligible_leader(resource): + log("eligible_leader is deprecated. Please consider using " + "is_elected_leader instead.", level=WARNING) + return is_elected_leader(resource) + + +def https(): + ''' + Determines whether enough data has been provided in configuration + or relation data to configure HTTPS + . + returns: boolean + ''' + use_https = config_get('use-https') + if use_https and bool_from_string(use_https): + return True + if config_get('ssl_cert') and config_get('ssl_key'): + return True + # Local import to avoid ciruclar dependency. + import charmhelpers.contrib.openstack.cert_utils as cert_utils + if ( + cert_utils.get_certificate_request() and not + cert_utils.get_requests_for_local_unit("certificates") + ): + return False + for r_id in relation_ids('certificates'): + for unit in relation_list(r_id): + ca = relation_get('ca', rid=r_id, unit=unit) + if ca: + return True + for r_id in relation_ids('identity-service'): + for unit in relation_list(r_id): + # TODO - needs fixing for new helper as ssl_cert/key suffixes with CN + rel_state = [ + relation_get('https_keystone', rid=r_id, unit=unit), + relation_get('ca_cert', rid=r_id, unit=unit), + ] + # NOTE: works around (LP: #1203241) + if (None not in rel_state) and ('' not in rel_state): + return True + return False + + +def determine_api_port(public_port, singlenode_mode=False): + ''' + Determine correct API server listening port based on + existence of HTTPS reverse proxy and/or haproxy. + + public_port: int: standard public port for given service + + singlenode_mode: boolean: Shuffle ports when only a single unit is present + + returns: int: the correct listening port for the API service + ''' + i = 0 + if singlenode_mode: + i += 1 + elif len(peer_units()) > 0 or is_clustered(): + i += 1 + if https(): + i += 1 + return public_port - (i * 10) + + +def determine_apache_port(public_port, singlenode_mode=False): + ''' + Description: Determine correct apache listening port based on public IP + + state of the cluster. + + public_port: int: standard public port for given service + + singlenode_mode: boolean: Shuffle ports when only a single unit is present + + returns: int: the correct listening port for the HAProxy service + ''' + i = 0 + if singlenode_mode: + i += 1 + elif len(peer_units()) > 0 or is_clustered(): + i += 1 + return public_port - (i * 10) + + +determine_apache_port_single = functools.partial( + determine_apache_port, singlenode_mode=True) + + +def get_hacluster_config(exclude_keys=None): + ''' + Obtains all relevant configuration from charm configuration required + for initiating a relation to hacluster: + + ha-bindiface, ha-mcastport, vip, os-internal-hostname, + os-admin-hostname, os-public-hostname, os-access-hostname + + param: exclude_keys: list of setting key(s) to be excluded. + returns: dict: A dict containing settings keyed by setting name. + raises: HAIncompleteConfig if settings are missing or incorrect. + ''' + settings = ['ha-bindiface', 'ha-mcastport', 'vip', 'os-internal-hostname', + 'os-admin-hostname', 'os-public-hostname', 'os-access-hostname'] + conf = {} + for setting in settings: + if exclude_keys and setting in exclude_keys: + continue + + conf[setting] = config_get(setting) + + if not valid_hacluster_config(): + raise HAIncorrectConfig('Insufficient or incorrect config data to ' + 'configure hacluster.') + return conf + + +def valid_hacluster_config(): + ''' + Check that either vip or dns-ha is set. If dns-ha then one of os-*-hostname + must be set. + + Note: ha-bindiface and ha-macastport both have defaults and will always + be set. We only care that either vip or dns-ha is set. + + :returns: boolean: valid config returns true. + raises: HAIncompatibileConfig if settings conflict. + raises: HAIncompleteConfig if settings are missing. + ''' + vip = config_get('vip') + dns = config_get('dns-ha') + if not (bool(vip) ^ bool(dns)): + msg = ('HA: Either vip or dns-ha must be set but not both in order to ' + 'use high availability') + status_set('blocked', msg) + raise HAIncorrectConfig(msg) + + # If dns-ha then one of os-*-hostname must be set + if dns: + dns_settings = ['os-internal-hostname', 'os-admin-hostname', + 'os-public-hostname', 'os-access-hostname'] + # At this point it is unknown if one or all of the possible + # network spaces are in HA. Validate at least one is set which is + # the minimum required. + for setting in dns_settings: + if config_get(setting): + log('DNS HA: At least one hostname is set {}: {}' + ''.format(setting, config_get(setting)), + level=DEBUG) + return True + + msg = ('DNS HA: At least one os-*-hostname(s) must be set to use ' + 'DNS HA') + status_set('blocked', msg) + raise HAIncompleteConfig(msg) + + log('VIP HA: VIP is set {}'.format(vip), level=DEBUG) + return True + + +def canonical_url(configs, vip_setting='vip'): + ''' + Returns the correct HTTP URL to this host given the state of HTTPS + configuration and hacluster. + + :configs : OSTemplateRenderer: A config tempating object to inspect for + a complete https context. + + :vip_setting: str: Setting in charm config that specifies + VIP address. + ''' + scheme = 'http' + if 'https' in configs.complete_contexts(): + scheme = 'https' + if is_clustered(): + addr = config_get(vip_setting) + else: + addr = unit_get('private-address') + return '%s://%s' % (scheme, addr) + + +def distributed_wait(modulo=None, wait=None, operation_name='operation'): + ''' Distribute operations by waiting based on modulo_distribution + + If modulo and or wait are not set, check config_get for those values. + If config values are not set, default to modulo=3 and wait=30. + + :param modulo: int The modulo number creates the group distribution + :param wait: int The constant time wait value + :param operation_name: string Operation name for status message + i.e. 'restart' + :side effect: Calls config_get() + :side effect: Calls log() + :side effect: Calls status_set() + :side effect: Calls time.sleep() + ''' + if modulo is None: + modulo = config_get('modulo-nodes') or 3 + if wait is None: + wait = config_get('known-wait') or 30 + if juju_is_leader(): + # The leader should never wait + calculated_wait = 0 + else: + # non_zero_wait=True guarantees the non-leader who gets modulo 0 + # will still wait + calculated_wait = modulo_distribution(modulo=modulo, wait=wait, + non_zero_wait=True) + msg = "Waiting {} seconds for {} ...".format(calculated_wait, + operation_name) + log(msg, DEBUG) + status_set('maintenance', msg) + time.sleep(calculated_wait) + + +def get_managed_services_and_ports(services, external_ports, + external_services=None, + port_conv_f=determine_apache_port_single): + """Get the services and ports managed by this charm. + + Return only the services and corresponding ports that are managed by this + charm. This excludes haproxy when there is a relation with hacluster. This + is because this charm passes responsibility for stopping and starting + haproxy to hacluster. + + Similarly, if a relation with hacluster exists then the ports returned by + this method correspond to those managed by the apache server rather than + haproxy. + + :param services: List of services. + :type services: List[str] + :param external_ports: List of ports managed by external services. + :type external_ports: List[int] + :param external_services: List of services to be removed if ha relation is + present. + :type external_services: List[str] + :param port_conv_f: Function to apply to ports to calculate the ports + managed by services controlled by this charm. + :type port_convert_func: f() + :returns: A tuple containing a list of services first followed by a list of + ports. + :rtype: Tuple[List[str], List[int]] + """ + if external_services is None: + external_services = ['haproxy'] + if relation_ids('ha'): + for svc in external_services: + try: + services.remove(svc) + except ValueError: + pass + external_ports = [port_conv_f(p) for p in external_ports] + return services, external_ports diff --git a/ceph-proxy/charmhelpers/contrib/hardening/README.hardening.md b/ceph-proxy/charmhelpers/contrib/hardening/README.hardening.md new file mode 100644 index 00000000..91280c03 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/README.hardening.md @@ -0,0 +1,38 @@ +# Juju charm-helpers hardening library + +## Description + +This library provides multiple implementations of system and application +hardening that conform to the standards of http://hardening.io/. + +Current implementations include: + + * OS + * SSH + * MySQL + * Apache + +## Requirements + +* Juju Charms + +## Usage + +1. Synchronise this library into your charm and add the harden() decorator + (from contrib.hardening.harden) to any functions or methods you want to use + to trigger hardening of your application/system. + +2. Add a config option called 'harden' to your charm config.yaml and set it to + a space-delimited list of hardening modules you want to run e.g. "os ssh" + +3. Override any config defaults (contrib.hardening.defaults) by adding a file + called hardening.yaml to your charm root containing the name(s) of the + modules whose settings you want override at root level and then any settings + with overrides e.g. + + os: + general: + desktop_enable: True + +4. Now just run your charm as usual and hardening will be applied each time the + hook runs. diff --git a/ceph-proxy/charmhelpers/contrib/hardening/__init__.py b/ceph-proxy/charmhelpers/contrib/hardening/__init__.py new file mode 100644 index 00000000..30a3e943 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-proxy/charmhelpers/contrib/hardening/apache/__init__.py b/ceph-proxy/charmhelpers/contrib/hardening/apache/__init__.py new file mode 100644 index 00000000..58bebd84 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/apache/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from os import path + +TEMPLATES_DIR = path.join(path.dirname(__file__), 'templates') diff --git a/ceph-proxy/charmhelpers/contrib/hardening/apache/checks/__init__.py b/ceph-proxy/charmhelpers/contrib/hardening/apache/checks/__init__.py new file mode 100644 index 00000000..3bc2ebd4 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/apache/checks/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import ( + log, + DEBUG, +) +from charmhelpers.contrib.hardening.apache.checks import config + + +def run_apache_checks(): + log("Starting Apache hardening checks.", level=DEBUG) + checks = config.get_audits() + for check in checks: + log("Running '%s' check" % (check.__class__.__name__), level=DEBUG) + check.ensure_compliance() + + log("Apache hardening checks complete.", level=DEBUG) diff --git a/ceph-proxy/charmhelpers/contrib/hardening/apache/checks/config.py b/ceph-proxy/charmhelpers/contrib/hardening/apache/checks/config.py new file mode 100644 index 00000000..e81a5f0b --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/apache/checks/config.py @@ -0,0 +1,101 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import subprocess + + +from charmhelpers.core.hookenv import ( + log, + INFO, +) +from charmhelpers.contrib.hardening.audits.file import ( + FilePermissionAudit, + DirectoryPermissionAudit, + NoReadWriteForOther, + TemplatedFile, + DeletedFile +) +from charmhelpers.contrib.hardening.audits.apache import DisabledModuleAudit +from charmhelpers.contrib.hardening.apache import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get Apache hardening config audits. + + :returns: dictionary of audits + """ + if subprocess.call(['which', 'apache2'], stdout=subprocess.PIPE) != 0: + log("Apache server does not appear to be installed on this node - " + "skipping apache hardening", level=INFO) + return [] + + context = ApacheConfContext() + settings = utils.get_settings('apache') + audits = [ + FilePermissionAudit(paths=os.path.join( + settings['common']['apache_dir'], 'apache2.conf'), + user='root', group='root', mode=0o0640), + + TemplatedFile(os.path.join(settings['common']['apache_dir'], + 'mods-available/alias.conf'), + context, + TEMPLATES_DIR, + mode=0o0640, + user='root', + service_actions=[{'service': 'apache2', + 'actions': ['restart']}]), + + TemplatedFile(os.path.join(settings['common']['apache_dir'], + 'conf-enabled/99-hardening.conf'), + context, + TEMPLATES_DIR, + mode=0o0640, + user='root', + service_actions=[{'service': 'apache2', + 'actions': ['restart']}]), + + DirectoryPermissionAudit(settings['common']['apache_dir'], + user='root', + group='root', + mode=0o0750), + + DisabledModuleAudit(settings['hardening']['modules_to_disable']), + + NoReadWriteForOther(settings['common']['apache_dir']), + + DeletedFile(['/var/www/html/index.html']) + ] + + return audits + + +class ApacheConfContext(object): + """Defines the set of key/value pairs to set in a apache config file. + + This context, when called, will return a dictionary containing the + key/value pairs of setting to specify in the + /etc/apache/conf-enabled/hardening.conf file. + """ + def __call__(self): + settings = utils.get_settings('apache') + ctxt = settings['hardening'] + + out = subprocess.check_output(['apache2', '-v']).decode('utf-8') + ctxt['apache_version'] = re.search(r'.+version: Apache/(.+?)\s.+', + out).group(1) + ctxt['apache_icondir'] = '/usr/share/apache2/icons/' + return ctxt diff --git a/ceph-proxy/charmhelpers/contrib/hardening/apache/templates/99-hardening.conf b/ceph-proxy/charmhelpers/contrib/hardening/apache/templates/99-hardening.conf new file mode 100644 index 00000000..22b68041 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/apache/templates/99-hardening.conf @@ -0,0 +1,32 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### + + + + # http://httpd.apache.org/docs/2.4/upgrading.html + {% if apache_version > '2.2' -%} + Require all granted + {% else -%} + Order Allow,Deny + Deny from all + {% endif %} + + + + + Options -Indexes -FollowSymLinks + AllowOverride None + + + + Options -Indexes -FollowSymLinks + AllowOverride None + + +TraceEnable {{ traceenable }} +ServerTokens {{ servertokens }} + +SSLHonorCipherOrder {{ honor_cipher_order }} +SSLCipherSuite {{ cipher_suite }} diff --git a/ceph-proxy/charmhelpers/contrib/hardening/apache/templates/__init__.py b/ceph-proxy/charmhelpers/contrib/hardening/apache/templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-proxy/charmhelpers/contrib/hardening/apache/templates/alias.conf b/ceph-proxy/charmhelpers/contrib/hardening/apache/templates/alias.conf new file mode 100644 index 00000000..e46a58a3 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/apache/templates/alias.conf @@ -0,0 +1,31 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### + + # + # Aliases: Add here as many aliases as you need (with no limit). The format is + # Alias fakename realname + # + # Note that if you include a trailing / on fakename then the server will + # require it to be present in the URL. So "/icons" isn't aliased in this + # example, only "/icons/". If the fakename is slash-terminated, then the + # realname must also be slash terminated, and if the fakename omits the + # trailing slash, the realname must also omit it. + # + # We include the /icons/ alias for FancyIndexed directory listings. If + # you do not use FancyIndexing, you may comment this out. + # + Alias /icons/ "{{ apache_icondir }}/" + + + Options -Indexes -MultiViews -FollowSymLinks + AllowOverride None +{% if apache_version == '2.4' -%} + Require all granted +{% else -%} + Order allow,deny + Allow from all +{% endif %} + + diff --git a/ceph-proxy/charmhelpers/contrib/hardening/audits/__init__.py b/ceph-proxy/charmhelpers/contrib/hardening/audits/__init__.py new file mode 100644 index 00000000..6dd5b05f --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/audits/__init__.py @@ -0,0 +1,54 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class BaseAudit(object): # NO-QA + """Base class for hardening checks. + + The lifecycle of a hardening check is to first check to see if the system + is in compliance for the specified check. If it is not in compliance, the + check method will return a value which will be supplied to the. + """ + def __init__(self, *args, **kwargs): + self.unless = kwargs.get('unless', None) + super(BaseAudit, self).__init__() + + def ensure_compliance(self): + """Checks to see if the current hardening check is in compliance or + not. + + If the check that is performed is not in compliance, then an exception + should be raised. + """ + pass + + def _take_action(self): + """Determines whether to perform the action or not. + + Checks whether or not an action should be taken. This is determined by + the truthy value for the unless parameter. If unless is a callback + method, it will be invoked with no parameters in order to determine + whether or not the action should be taken. Otherwise, the truthy value + of the unless attribute will determine if the action should be + performed. + """ + # Do the action if there isn't an unless override. + if self.unless is None: + return True + + # Invoke the callback if there is one. + if hasattr(self.unless, '__call__'): + return not self.unless() + + return not self.unless diff --git a/ceph-proxy/charmhelpers/contrib/hardening/audits/apache.py b/ceph-proxy/charmhelpers/contrib/hardening/audits/apache.py new file mode 100644 index 00000000..31db8f62 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/audits/apache.py @@ -0,0 +1,101 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import subprocess + +from charmhelpers.core.hookenv import ( + log, + INFO, + ERROR, +) + +from charmhelpers.contrib.hardening.audits import BaseAudit + + +class DisabledModuleAudit(BaseAudit): + """Audits Apache2 modules. + + Determines if the apache2 modules are enabled. If the modules are enabled + then they are removed in the ensure_compliance. + """ + def __init__(self, modules): + if modules is None: + self.modules = [] + elif isinstance(modules, str): + self.modules = [modules] + else: + self.modules = modules + + def ensure_compliance(self): + """Ensures that the modules are not loaded.""" + if not self.modules: + return + + try: + loaded_modules = self._get_loaded_modules() + non_compliant_modules = [] + for module in self.modules: + if module in loaded_modules: + log("Module '%s' is enabled but should not be." % + (module), level=INFO) + non_compliant_modules.append(module) + + if len(non_compliant_modules) == 0: + return + + for module in non_compliant_modules: + self._disable_module(module) + self._restart_apache() + except subprocess.CalledProcessError as e: + log('Error occurred auditing apache module compliance. ' + 'This may have been already reported. ' + 'Output is: %s' % e.output, level=ERROR) + + @staticmethod + def _get_loaded_modules(): + """Returns the modules which are enabled in Apache.""" + output = subprocess.check_output(['apache2ctl', '-M']).decode('utf-8') + modules = [] + for line in output.splitlines(): + # Each line of the enabled module output looks like: + # module_name (static|shared) + # Plus a header line at the top of the output which is stripped + # out by the regex. + matcher = re.search(r'^ (\S*)_module (\S*)', line) + if matcher: + modules.append(matcher.group(1)) + return modules + + @staticmethod + def _disable_module(module): + """Disables the specified module in Apache.""" + try: + subprocess.check_call(['a2dismod', module]) + except subprocess.CalledProcessError as e: + # Note: catch error here to allow the attempt of disabling + # multiple modules in one go rather than failing after the + # first module fails. + log('Error occurred disabling module %s. ' + 'Output is: %s' % (module, e.output), level=ERROR) + + @staticmethod + def _restart_apache(): + """Restarts the apache process""" + subprocess.check_output(['service', 'apache2', 'restart']) + + @staticmethod + def is_ssl_enabled(): + """Check if SSL module is enabled or not""" + return 'ssl' in DisabledModuleAudit._get_loaded_modules() diff --git a/ceph-proxy/charmhelpers/contrib/hardening/audits/apt.py b/ceph-proxy/charmhelpers/contrib/hardening/audits/apt.py new file mode 100644 index 00000000..1b22925b --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/audits/apt.py @@ -0,0 +1,101 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.fetch import ( + apt_cache, + apt_purge +) +from charmhelpers.core.hookenv import ( + log, + DEBUG, + WARNING, +) +from charmhelpers.contrib.hardening.audits import BaseAudit +from charmhelpers.fetch import ubuntu_apt_pkg as apt_pkg + + +class AptConfig(BaseAudit): + + def __init__(self, config, **kwargs): + self.config = config + + def verify_config(self): + apt_pkg.init() + for cfg in self.config: + value = apt_pkg.config.get(cfg['key'], cfg.get('default', '')) + if value and value != cfg['expected']: + log("APT config '%s' has unexpected value '%s' " + "(expected='%s')" % + (cfg['key'], value, cfg['expected']), level=WARNING) + + def ensure_compliance(self): + self.verify_config() + + +class RestrictedPackages(BaseAudit): + """Class used to audit restricted packages on the system.""" + + def __init__(self, pkgs, **kwargs): + super(RestrictedPackages, self).__init__(**kwargs) + if isinstance(pkgs, str) or not hasattr(pkgs, '__iter__'): + self.pkgs = pkgs.split() + else: + self.pkgs = pkgs + + def ensure_compliance(self): + cache = apt_cache() + + for p in self.pkgs: + if p not in cache: + continue + + pkg = cache[p] + if not self.is_virtual_package(pkg): + if not pkg.current_ver: + log("Package '%s' is not installed." % pkg.name, + level=DEBUG) + continue + else: + log("Restricted package '%s' is installed" % pkg.name, + level=WARNING) + self.delete_package(cache, pkg) + else: + log("Checking restricted virtual package '%s' provides" % + pkg.name, level=DEBUG) + self.delete_package(cache, pkg) + + def delete_package(self, cache, pkg): + """Deletes the package from the system. + + Deletes the package form the system, properly handling virtual + packages. + + :param cache: the apt cache + :param pkg: the package to remove + """ + if self.is_virtual_package(pkg): + log("Package '%s' appears to be virtual - purging provides" % + pkg.name, level=DEBUG) + for _p in pkg.provides_list: + self.delete_package(cache, _p[2].parent_pkg) + elif not pkg.current_ver: + log("Package '%s' not installed" % pkg.name, level=DEBUG) + return + else: + log("Purging package '%s'" % pkg.name, level=DEBUG) + apt_purge(pkg.name) + + def is_virtual_package(self, pkg): + return (pkg.get('has_provides', False) and + not pkg.get('has_versions', False)) diff --git a/ceph-proxy/charmhelpers/contrib/hardening/audits/file.py b/ceph-proxy/charmhelpers/contrib/hardening/audits/file.py new file mode 100644 index 00000000..84cc2494 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/audits/file.py @@ -0,0 +1,549 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import grp +import os +import pwd +import re + +from subprocess import ( + CalledProcessError, + check_output, + check_call, +) +from traceback import format_exc +from stat import ( + S_ISGID, + S_ISUID +) + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + INFO, + WARNING, + ERROR, +) +from charmhelpers.core import unitdata +from charmhelpers.core.host import file_hash +from charmhelpers.contrib.hardening.audits import BaseAudit +from charmhelpers.contrib.hardening.templating import ( + get_template_path, + render_and_write, +) +from charmhelpers.contrib.hardening import utils + + +class BaseFileAudit(BaseAudit): + """Base class for file audits. + + Provides api stubs for compliance check flow that must be used by any class + that implemented this one. + """ + + def __init__(self, paths, always_comply=False, *args, **kwargs): + """ + :param paths: string path of list of paths of files we want to apply + compliance checks are criteria to. + :param always_comply: if true compliance criteria is always applied + else compliance is skipped for non-existent + paths. + """ + super(BaseFileAudit, self).__init__(*args, **kwargs) + self.always_comply = always_comply + if isinstance(paths, str) or not hasattr(paths, '__iter__'): + self.paths = [paths] + else: + self.paths = paths + + def ensure_compliance(self): + """Ensure that the all registered files comply to registered criteria. + """ + for p in self.paths: + if os.path.exists(p): + if self.is_compliant(p): + continue + + log('File %s is not in compliance.' % p, level=INFO) + else: + if not self.always_comply: + log("Non-existent path '%s' - skipping compliance check" + % (p), level=INFO) + continue + + if self._take_action(): + log("Applying compliance criteria to '%s'" % (p), level=INFO) + self.comply(p) + + def is_compliant(self, path): + """Audits the path to see if it is compliance. + + :param path: the path to the file that should be checked. + """ + raise NotImplementedError + + def comply(self, path): + """Enforces the compliance of a path. + + :param path: the path to the file that should be enforced. + """ + raise NotImplementedError + + @classmethod + def _get_stat(cls, path): + """Returns the Posix st_stat information for the specified file path. + + :param path: the path to get the st_stat information for. + :returns: an st_stat object for the path or None if the path doesn't + exist. + """ + return os.stat(path) + + +class FilePermissionAudit(BaseFileAudit): + """Implements an audit for file permissions and ownership for a user. + + This class implements functionality that ensures that a specific user/group + will own the file(s) specified and that the permissions specified are + applied properly to the file. + """ + def __init__(self, paths, user, group=None, mode=0o600, **kwargs): + self.user = user + self.group = group + self.mode = mode + super(FilePermissionAudit, self).__init__(paths, user, group, mode, + **kwargs) + + @property + def user(self): + return self._user + + @user.setter + def user(self, name): + try: + user = pwd.getpwnam(name) + except KeyError: + log('Unknown user %s' % name, level=ERROR) + user = None + self._user = user + + @property + def group(self): + return self._group + + @group.setter + def group(self, name): + try: + group = None + if name: + group = grp.getgrnam(name) + else: + group = grp.getgrgid(self.user.pw_gid) + except KeyError: + log('Unknown group %s' % name, level=ERROR) + self._group = group + + def is_compliant(self, path): + """Checks if the path is in compliance. + + Used to determine if the path specified meets the necessary + requirements to be in compliance with the check itself. + + :param path: the file path to check + :returns: True if the path is compliant, False otherwise. + """ + stat = self._get_stat(path) + user = self.user + group = self.group + + compliant = True + if stat.st_uid != user.pw_uid or stat.st_gid != group.gr_gid: + log('File %s is not owned by %s:%s.' % (path, user.pw_name, + group.gr_name), + level=INFO) + compliant = False + + # POSIX refers to the st_mode bits as corresponding to both the + # file type and file permission bits, where the least significant 12 + # bits (o7777) are the suid (11), sgid (10), sticky bits (9), and the + # file permission bits (8-0) + perms = stat.st_mode & 0o7777 + if perms != self.mode: + log('File %s has incorrect permissions, currently set to %s' % + (path, oct(stat.st_mode & 0o7777)), level=INFO) + compliant = False + + return compliant + + def comply(self, path): + """Issues a chown and chmod to the file paths specified.""" + utils.ensure_permissions(path, self.user.pw_name, self.group.gr_name, + self.mode) + + +class DirectoryPermissionAudit(FilePermissionAudit): + """Performs a permission check for the specified directory path.""" + + def __init__(self, paths, user, group=None, mode=0o600, + recursive=True, **kwargs): + super(DirectoryPermissionAudit, self).__init__(paths, user, group, + mode, **kwargs) + self.recursive = recursive + + def is_compliant(self, path): + """Checks if the directory is compliant. + + Used to determine if the path specified and all of its children + directories are in compliance with the check itself. + + :param path: the directory path to check + :returns: True if the directory tree is compliant, otherwise False. + """ + if not os.path.isdir(path): + log('Path specified %s is not a directory.' % path, level=ERROR) + raise ValueError("%s is not a directory." % path) + + if not self.recursive: + return super(DirectoryPermissionAudit, self).is_compliant(path) + + compliant = True + for root, dirs, _ in os.walk(path): + if len(dirs) > 0: + continue + + if not super(DirectoryPermissionAudit, self).is_compliant(root): + compliant = False + continue + + return compliant + + def comply(self, path): + for root, dirs, _ in os.walk(path): + if len(dirs) > 0: + super(DirectoryPermissionAudit, self).comply(root) + + +class ReadOnly(BaseFileAudit): + """Audits that files and folders are read only.""" + def __init__(self, paths, *args, **kwargs): + super(ReadOnly, self).__init__(paths=paths, *args, **kwargs) + + def is_compliant(self, path): + try: + output = check_output(['find', path, '-perm', '-go+w', + '-type', 'f']).strip() + + # The find above will find any files which have permission sets + # which allow too broad of write access. As such, the path is + # compliant if there is no output. + if output: + return False + + return True + except CalledProcessError as e: + log('Error occurred checking finding writable files for %s. ' + 'Error information is: command %s failed with returncode ' + '%d and output %s.\n%s' % (path, e.cmd, e.returncode, e.output, + format_exc(e)), level=ERROR) + return False + + def comply(self, path): + try: + check_output(['chmod', 'go-w', '-R', path]) + except CalledProcessError as e: + log('Error occurred removing writeable permissions for %s. ' + 'Error information is: command %s failed with returncode ' + '%d and output %s.\n%s' % (path, e.cmd, e.returncode, e.output, + format_exc(e)), level=ERROR) + + +class NoReadWriteForOther(BaseFileAudit): + """Ensures that the files found under the base path are readable or + writable by anyone other than the owner or the group. + """ + def __init__(self, paths): + super(NoReadWriteForOther, self).__init__(paths) + + def is_compliant(self, path): + try: + cmd = ['find', path, '-perm', '-o+r', '-type', 'f', '-o', + '-perm', '-o+w', '-type', 'f'] + output = check_output(cmd).strip() + + # The find above here will find any files which have read or + # write permissions for other, meaning there is too broad of access + # to read/write the file. As such, the path is compliant if there's + # no output. + if output: + return False + + return True + except CalledProcessError as e: + log('Error occurred while finding files which are readable or ' + 'writable to the world in %s. ' + 'Command output is: %s.' % (path, e.output), level=ERROR) + + def comply(self, path): + try: + check_output(['chmod', '-R', 'o-rw', path]) + except CalledProcessError as e: + log('Error occurred attempting to change modes of files under ' + 'path %s. Output of command is: %s' % (path, e.output)) + + +class NoSUIDSGIDAudit(BaseFileAudit): + """Audits that specified files do not have SUID/SGID bits set.""" + def __init__(self, paths, *args, **kwargs): + super(NoSUIDSGIDAudit, self).__init__(paths=paths, *args, **kwargs) + + def is_compliant(self, path): + stat = self._get_stat(path) + if (stat.st_mode & (S_ISGID | S_ISUID)) != 0: + return False + + return True + + def comply(self, path): + try: + log('Removing suid/sgid from %s.' % path, level=DEBUG) + check_output(['chmod', '-s', path]) + except CalledProcessError as e: + log('Error occurred removing suid/sgid from %s.' + 'Error information is: command %s failed with returncode ' + '%d and output %s.\n%s' % (path, e.cmd, e.returncode, e.output, + format_exc(e)), level=ERROR) + + +class TemplatedFile(BaseFileAudit): + """The TemplatedFileAudit audits the contents of a templated file. + + This audit renders a file from a template, sets the appropriate file + permissions, then generates a hashsum with which to check the content + changed. + """ + def __init__(self, path, context, template_dir, mode, user='root', + group='root', service_actions=None, **kwargs): + self.context = context + self.user = user + self.group = group + self.mode = mode + self.template_dir = template_dir + self.service_actions = service_actions + super(TemplatedFile, self).__init__(paths=path, always_comply=True, + **kwargs) + + def is_compliant(self, path): + """Determines if the templated file is compliant. + + A templated file is only compliant if it has not changed (as + determined by its sha256 hashsum) AND its file permissions are set + appropriately. + + :param path: the path to check compliance. + """ + same_templates = self.templates_match(path) + same_content = self.contents_match(path) + same_permissions = self.permissions_match(path) + + if same_content and same_permissions and same_templates: + return True + + return False + + def run_service_actions(self): + """Run any actions on services requested.""" + if not self.service_actions: + return + + for svc_action in self.service_actions: + name = svc_action['service'] + actions = svc_action['actions'] + log("Running service '%s' actions '%s'" % (name, actions), + level=DEBUG) + for action in actions: + cmd = ['service', name, action] + try: + check_call(cmd) + except CalledProcessError as exc: + log("Service name='%s' action='%s' failed - %s" % + (name, action, exc), level=WARNING) + + def comply(self, path): + """Ensures the contents and the permissions of the file. + + :param path: the path to correct + """ + dirname = os.path.dirname(path) + if not os.path.exists(dirname): + os.makedirs(dirname) + + self.pre_write() + render_and_write(self.template_dir, path, self.context()) + utils.ensure_permissions(path, self.user, self.group, self.mode) + self.run_service_actions() + self.save_checksum(path) + self.post_write() + + def pre_write(self): + """Invoked prior to writing the template.""" + pass + + def post_write(self): + """Invoked after writing the template.""" + pass + + def templates_match(self, path): + """Determines if the template files are the same. + + The template file equality is determined by the hashsum of the + template files themselves. If there is no hashsum, then the content + cannot be sure to be the same so treat it as if they changed. + Otherwise, return whether or not the hashsums are the same. + + :param path: the path to check + :returns: boolean + """ + template_path = get_template_path(self.template_dir, path) + key = 'hardening:template:%s' % template_path + template_checksum = file_hash(template_path) + kv = unitdata.kv() + stored_tmplt_checksum = kv.get(key) + if not stored_tmplt_checksum: + kv.set(key, template_checksum) + kv.flush() + log('Saved template checksum for %s.' % template_path, + level=DEBUG) + # Since we don't have a template checksum, then assume it doesn't + # match and return that the template is different. + return False + elif stored_tmplt_checksum != template_checksum: + kv.set(key, template_checksum) + kv.flush() + log('Updated template checksum for %s.' % template_path, + level=DEBUG) + return False + + # Here the template hasn't changed based upon the calculated + # checksum of the template and what was previously stored. + return True + + def contents_match(self, path): + """Determines if the file content is the same. + + This is determined by comparing hashsum of the file contents and + the saved hashsum. If there is no hashsum, then the content cannot + be sure to be the same so treat them as if they are not the same. + Otherwise, return True if the hashsums are the same, False if they + are not the same. + + :param path: the file to check. + """ + checksum = file_hash(path) + + kv = unitdata.kv() + stored_checksum = kv.get('hardening:%s' % path) + if not stored_checksum: + # If the checksum hasn't been generated, return False to ensure + # the file is written and the checksum stored. + log('Checksum for %s has not been calculated.' % path, level=DEBUG) + return False + elif stored_checksum != checksum: + log('Checksum mismatch for %s.' % path, level=DEBUG) + return False + + return True + + def permissions_match(self, path): + """Determines if the file owner and permissions match. + + :param path: the path to check. + """ + audit = FilePermissionAudit(path, self.user, self.group, self.mode) + return audit.is_compliant(path) + + def save_checksum(self, path): + """Calculates and saves the checksum for the path specified. + + :param path: the path of the file to save the checksum. + """ + checksum = file_hash(path) + kv = unitdata.kv() + kv.set('hardening:%s' % path, checksum) + kv.flush() + + +class DeletedFile(BaseFileAudit): + """Audit to ensure that a file is deleted.""" + def __init__(self, paths): + super(DeletedFile, self).__init__(paths) + + def is_compliant(self, path): + return not os.path.exists(path) + + def comply(self, path): + os.remove(path) + + +class FileContentAudit(BaseFileAudit): + """Audit the contents of a file.""" + def __init__(self, paths, cases, **kwargs): + # Cases we expect to pass + self.pass_cases = cases.get('pass', []) + # Cases we expect to fail + self.fail_cases = cases.get('fail', []) + super(FileContentAudit, self).__init__(paths, **kwargs) + + def is_compliant(self, path): + """ + Given a set of content matching cases i.e. tuple(regex, bool) where + bool value denotes whether or not regex is expected to match, check that + all cases match as expected with the contents of the file. Cases can be + expected to pass of fail. + + :param path: Path of file to check. + :returns: Boolean value representing whether or not all cases are + found to be compliant. + """ + log("Auditing contents of file '%s'" % (path), level=DEBUG) + with open(path, 'r') as fd: + contents = fd.read() + + matches = 0 + for pattern in self.pass_cases: + key = re.compile(pattern, flags=re.MULTILINE) + results = re.search(key, contents) + if results: + matches += 1 + else: + log("Pattern '%s' was expected to pass but instead it failed" + % (pattern), level=WARNING) + + for pattern in self.fail_cases: + key = re.compile(pattern, flags=re.MULTILINE) + results = re.search(key, contents) + if not results: + matches += 1 + else: + log("Pattern '%s' was expected to fail but instead it passed" + % (pattern), level=WARNING) + + total = len(self.pass_cases) + len(self.fail_cases) + log("Checked %s cases and %s passed" % (total, matches), level=DEBUG) + return matches == total + + def comply(self, *args, **kwargs): + """NOOP since we just issue warnings. This is to avoid the + NotImplememtedError. + """ + log("Not applying any compliance criteria, only checks.", level=INFO) diff --git a/ceph-proxy/charmhelpers/contrib/hardening/defaults/__init__.py b/ceph-proxy/charmhelpers/contrib/hardening/defaults/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-proxy/charmhelpers/contrib/hardening/defaults/apache.yaml b/ceph-proxy/charmhelpers/contrib/hardening/defaults/apache.yaml new file mode 100644 index 00000000..0f940d4c --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/defaults/apache.yaml @@ -0,0 +1,16 @@ +# NOTE: this file contains the default configuration for the 'apache' hardening +# code. If you want to override any settings you must add them to a file +# called hardening.yaml in the root directory of your charm using the +# name 'apache' as the root key followed by any of the following with new +# values. + +common: + apache_dir: '/etc/apache2' + +hardening: + traceenable: 'off' + allowed_http_methods: "GET POST" + modules_to_disable: [ cgi, cgid ] + servertokens: 'Prod' + honor_cipher_order: 'on' + cipher_suite: 'ALL:+MEDIUM:+HIGH:!LOW:!MD5:!RC4:!eNULL:!aNULL:!3DES' diff --git a/ceph-proxy/charmhelpers/contrib/hardening/defaults/apache.yaml.schema b/ceph-proxy/charmhelpers/contrib/hardening/defaults/apache.yaml.schema new file mode 100644 index 00000000..c112137c --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/defaults/apache.yaml.schema @@ -0,0 +1,12 @@ +# NOTE: this schema must contain all valid keys from it's associated defaults +# file. It is used to validate user-provided overrides. +common: + apache_dir: + traceenable: + +hardening: + allowed_http_methods: + modules_to_disable: + servertokens: + honor_cipher_order: + cipher_suite: diff --git a/ceph-proxy/charmhelpers/contrib/hardening/defaults/mysql.yaml b/ceph-proxy/charmhelpers/contrib/hardening/defaults/mysql.yaml new file mode 100644 index 00000000..682d22bf --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/defaults/mysql.yaml @@ -0,0 +1,38 @@ +# NOTE: this file contains the default configuration for the 'mysql' hardening +# code. If you want to override any settings you must add them to a file +# called hardening.yaml in the root directory of your charm using the +# name 'mysql' as the root key followed by any of the following with new +# values. + +hardening: + mysql-conf: /etc/mysql/my.cnf + hardening-conf: /etc/mysql/conf.d/hardening.cnf + +security: + # @see http://www.symantec.com/connect/articles/securing-mysql-step-step + # @see http://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_chroot + chroot: None + + # @see http://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_safe-user-create + safe-user-create: 1 + + # @see http://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_secure-auth + secure-auth: 1 + + # @see http://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_symbolic-links + skip-symbolic-links: 1 + + # @see http://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_skip-show-database + skip-show-database: True + + # @see http://dev.mysql.com/doc/refman/5.7/en/server-system-variables.html#sysvar_local_infile + local-infile: 0 + + # @see https://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_allow-suspicious-udfs + allow-suspicious-udfs: 0 + + # @see https://dev.mysql.com/doc/refman/5.7/en/server-system-variables.html#sysvar_automatic_sp_privileges + automatic-sp-privileges: 0 + + # @see https://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_secure-file-priv + secure-file-priv: /tmp diff --git a/ceph-proxy/charmhelpers/contrib/hardening/defaults/mysql.yaml.schema b/ceph-proxy/charmhelpers/contrib/hardening/defaults/mysql.yaml.schema new file mode 100644 index 00000000..2edf325c --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/defaults/mysql.yaml.schema @@ -0,0 +1,15 @@ +# NOTE: this schema must contain all valid keys from it's associated defaults +# file. It is used to validate user-provided overrides. +hardening: + mysql-conf: + hardening-conf: +security: + chroot: + safe-user-create: + secure-auth: + skip-symbolic-links: + skip-show-database: + local-infile: + allow-suspicious-udfs: + automatic-sp-privileges: + secure-file-priv: diff --git a/ceph-proxy/charmhelpers/contrib/hardening/defaults/os.yaml b/ceph-proxy/charmhelpers/contrib/hardening/defaults/os.yaml new file mode 100644 index 00000000..9a8627b5 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/defaults/os.yaml @@ -0,0 +1,68 @@ +# NOTE: this file contains the default configuration for the 'os' hardening +# code. If you want to override any settings you must add them to a file +# called hardening.yaml in the root directory of your charm using the +# name 'os' as the root key followed by any of the following with new +# values. + +general: + desktop_enable: False # (type:boolean) + +environment: + extra_user_paths: [] + umask: 027 + root_path: / + +auth: + pw_max_age: 60 + # discourage password cycling + pw_min_age: 7 + retries: 5 + lockout_time: 600 + timeout: 60 + allow_homeless: False # (type:boolean) + pam_passwdqc_enable: True # (type:boolean) + pam_passwdqc_options: 'min=disabled,disabled,16,12,8' + root_ttys: + console + tty1 + tty2 + tty3 + tty4 + tty5 + tty6 + uid_min: 1000 + gid_min: 1000 + sys_uid_min: 100 + sys_uid_max: 999 + sys_gid_min: 100 + sys_gid_max: 999 + chfn_restrict: + +security: + users_allow: [] + suid_sgid_enforce: True # (type:boolean) + # user-defined blacklist and whitelist + suid_sgid_blacklist: [] + suid_sgid_whitelist: [] + # if this is True, remove any suid/sgid bits from files that were not in the whitelist + suid_sgid_dry_run_on_unknown: False # (type:boolean) + suid_sgid_remove_from_unknown: False # (type:boolean) + # remove packages with known issues + packages_clean: True # (type:boolean) + packages_list: + xinetd + inetd + ypserv + telnet-server + rsh-server + rsync + kernel_enable_module_loading: True # (type:boolean) + kernel_enable_core_dump: False # (type:boolean) + ssh_tmout: 300 + +sysctl: + kernel_secure_sysrq: 244 # 4 + 16 + 32 + 64 + 128 + kernel_enable_sysrq: False # (type:boolean) + forwarding: False # (type:boolean) + ipv6_enable: False # (type:boolean) + arp_restricted: True # (type:boolean) diff --git a/ceph-proxy/charmhelpers/contrib/hardening/defaults/os.yaml.schema b/ceph-proxy/charmhelpers/contrib/hardening/defaults/os.yaml.schema new file mode 100644 index 00000000..cc3b9c20 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/defaults/os.yaml.schema @@ -0,0 +1,43 @@ +# NOTE: this schema must contain all valid keys from it's associated defaults +# file. It is used to validate user-provided overrides. +general: + desktop_enable: +environment: + extra_user_paths: + umask: + root_path: +auth: + pw_max_age: + pw_min_age: + retries: + lockout_time: + timeout: + allow_homeless: + pam_passwdqc_enable: + pam_passwdqc_options: + root_ttys: + uid_min: + gid_min: + sys_uid_min: + sys_uid_max: + sys_gid_min: + sys_gid_max: + chfn_restrict: +security: + users_allow: + suid_sgid_enforce: + suid_sgid_blacklist: + suid_sgid_whitelist: + suid_sgid_dry_run_on_unknown: + suid_sgid_remove_from_unknown: + packages_clean: + packages_list: + kernel_enable_module_loading: + kernel_enable_core_dump: + ssh_tmout: +sysctl: + kernel_secure_sysrq: + kernel_enable_sysrq: + forwarding: + ipv6_enable: + arp_restricted: diff --git a/ceph-proxy/charmhelpers/contrib/hardening/defaults/ssh.yaml b/ceph-proxy/charmhelpers/contrib/hardening/defaults/ssh.yaml new file mode 100644 index 00000000..cd529bca --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/defaults/ssh.yaml @@ -0,0 +1,49 @@ +# NOTE: this file contains the default configuration for the 'ssh' hardening +# code. If you want to override any settings you must add them to a file +# called hardening.yaml in the root directory of your charm using the +# name 'ssh' as the root key followed by any of the following with new +# values. + +common: + service_name: 'ssh' + network_ipv6_enable: False # (type:boolean) + ports: [22] + remote_hosts: [] + +client: + package: 'openssh-client' + cbc_required: False # (type:boolean) + weak_hmac: False # (type:boolean) + weak_kex: False # (type:boolean) + roaming: False + password_authentication: 'no' + +server: + host_key_files: ['/etc/ssh/ssh_host_rsa_key', '/etc/ssh/ssh_host_dsa_key', + '/etc/ssh/ssh_host_ecdsa_key'] + cbc_required: False # (type:boolean) + weak_hmac: False # (type:boolean) + weak_kex: False # (type:boolean) + allow_root_with_key: False # (type:boolean) + allow_tcp_forwarding: 'no' + allow_agent_forwarding: 'no' + allow_x11_forwarding: 'no' + use_privilege_separation: 'sandbox' + listen_to: ['0.0.0.0'] + use_pam: 'no' + package: 'openssh-server' + password_authentication: 'no' + alive_interval: '600' + alive_count: '3' + sftp_enable: False # (type:boolean) + sftp_group: 'sftponly' + sftp_chroot: '/home/%u' + deny_users: [] + allow_users: [] + deny_groups: [] + allow_groups: [] + print_motd: 'no' + print_last_log: 'no' + use_dns: 'no' + max_auth_tries: 2 + max_sessions: 10 diff --git a/ceph-proxy/charmhelpers/contrib/hardening/defaults/ssh.yaml.schema b/ceph-proxy/charmhelpers/contrib/hardening/defaults/ssh.yaml.schema new file mode 100644 index 00000000..d05e054b --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/defaults/ssh.yaml.schema @@ -0,0 +1,42 @@ +# NOTE: this schema must contain all valid keys from it's associated defaults +# file. It is used to validate user-provided overrides. +common: + service_name: + network_ipv6_enable: + ports: + remote_hosts: +client: + package: + cbc_required: + weak_hmac: + weak_kex: + roaming: + password_authentication: +server: + host_key_files: + cbc_required: + weak_hmac: + weak_kex: + allow_root_with_key: + allow_tcp_forwarding: + allow_agent_forwarding: + allow_x11_forwarding: + use_privilege_separation: + listen_to: + use_pam: + package: + password_authentication: + alive_interval: + alive_count: + sftp_enable: + sftp_group: + sftp_chroot: + deny_users: + allow_users: + deny_groups: + allow_groups: + print_motd: + print_last_log: + use_dns: + max_auth_tries: + max_sessions: diff --git a/ceph-proxy/charmhelpers/contrib/hardening/harden.py b/ceph-proxy/charmhelpers/contrib/hardening/harden.py new file mode 100644 index 00000000..45ad076d --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/harden.py @@ -0,0 +1,93 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict + +from charmhelpers.core.hookenv import ( + config, + log, + DEBUG, + WARNING, +) +from charmhelpers.contrib.hardening.host.checks import run_os_checks +from charmhelpers.contrib.hardening.ssh.checks import run_ssh_checks +from charmhelpers.contrib.hardening.mysql.checks import run_mysql_checks +from charmhelpers.contrib.hardening.apache.checks import run_apache_checks + +_DISABLE_HARDENING_FOR_UNIT_TEST = False + + +def harden(overrides=None): + """Hardening decorator. + + This is the main entry point for running the hardening stack. In order to + run modules of the stack you must add this decorator to charm hook(s) and + ensure that your charm config.yaml contains the 'harden' option set to + one or more of the supported modules. Setting these will cause the + corresponding hardening code to be run when the hook fires. + + This decorator can and should be applied to more than one hook or function + such that hardening modules are called multiple times. This is because + subsequent calls will perform auditing checks that will report any changes + to resources hardened by the first run (and possibly perform compliance + actions as a result of any detected infractions). + + :param overrides: Optional list of stack modules used to override those + provided with 'harden' config. + :returns: Returns value returned by decorated function once executed. + """ + if overrides is None: + overrides = [] + + def _harden_inner1(f): + _logged = False + + def _harden_inner2(*args, **kwargs): + # knock out hardening via a config var; normally it won't get + # disabled. + nonlocal _logged + if _DISABLE_HARDENING_FOR_UNIT_TEST: + return f(*args, **kwargs) + if not _logged: + log("Hardening function '%s'" % (f.__name__), level=DEBUG) + _logged = True + RUN_CATALOG = OrderedDict([('os', run_os_checks), + ('ssh', run_ssh_checks), + ('mysql', run_mysql_checks), + ('apache', run_apache_checks)]) + + enabled = overrides[:] or (config("harden") or "").split() + if enabled: + modules_to_run = [] + # modules will always be performed in the following order + for module, func in RUN_CATALOG.items(): + if module in enabled: + enabled.remove(module) + modules_to_run.append(func) + + if enabled: + log("Unknown hardening modules '%s' - ignoring" % + (', '.join(enabled)), level=WARNING) + + for hardener in modules_to_run: + log("Executing hardening module '%s'" % + (hardener.__name__), level=DEBUG) + hardener() + else: + log("No hardening applied to '%s'" % (f.__name__), level=DEBUG) + + return f(*args, **kwargs) + return _harden_inner2 + + return _harden_inner1 diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/__init__.py b/ceph-proxy/charmhelpers/contrib/hardening/host/__init__.py new file mode 100644 index 00000000..58bebd84 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from os import path + +TEMPLATES_DIR = path.join(path.dirname(__file__), 'templates') diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/checks/__init__.py b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/__init__.py new file mode 100644 index 00000000..0e7e409f --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/__init__.py @@ -0,0 +1,48 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import ( + log, + DEBUG, +) +from charmhelpers.contrib.hardening.host.checks import ( + apt, + limits, + login, + minimize_access, + pam, + profile, + securetty, + suid_sgid, + sysctl +) + + +def run_os_checks(): + log("Starting OS hardening checks.", level=DEBUG) + checks = apt.get_audits() + checks.extend(limits.get_audits()) + checks.extend(login.get_audits()) + checks.extend(minimize_access.get_audits()) + checks.extend(pam.get_audits()) + checks.extend(profile.get_audits()) + checks.extend(securetty.get_audits()) + checks.extend(suid_sgid.get_audits()) + checks.extend(sysctl.get_audits()) + + for check in checks: + log("Running '%s' check" % (check.__class__.__name__), level=DEBUG) + check.ensure_compliance() + + log("OS hardening checks complete.", level=DEBUG) diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/checks/apt.py b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/apt.py new file mode 100644 index 00000000..7ce41b00 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/apt.py @@ -0,0 +1,37 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.utils import get_settings +from charmhelpers.contrib.hardening.audits.apt import ( + AptConfig, + RestrictedPackages, +) + + +def get_audits(): + """Get OS hardening apt audits. + + :returns: dictionary of audits + """ + audits = [AptConfig([{'key': 'APT::Get::AllowUnauthenticated', + 'expected': 'false'}])] + + settings = get_settings('os') + clean_packages = settings['security']['packages_clean'] + if clean_packages: + security_packages = settings['security']['packages_list'] + if security_packages: + audits.append(RestrictedPackages(security_packages)) + + return audits diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/checks/limits.py b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/limits.py new file mode 100644 index 00000000..e94f5ebe --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/limits.py @@ -0,0 +1,53 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.audits.file import ( + DirectoryPermissionAudit, + TemplatedFile, +) +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get OS hardening security limits audits. + + :returns: dictionary of audits + """ + audits = [] + settings = utils.get_settings('os') + + # Ensure that the /etc/security/limits.d directory is only writable + # by the root user, but others can execute and read. + audits.append(DirectoryPermissionAudit('/etc/security/limits.d', + user='root', group='root', + mode=0o755)) + + # If core dumps are not enabled, then don't allow core dumps to be + # created as they may contain sensitive information. + if not settings['security']['kernel_enable_core_dump']: + audits.append(TemplatedFile('/etc/security/limits.d/10.hardcore.conf', + SecurityLimitsContext(), + template_dir=TEMPLATES_DIR, + user='root', group='root', mode=0o0440)) + return audits + + +class SecurityLimitsContext(object): + + def __call__(self): + settings = utils.get_settings('os') + ctxt = {'disable_core_dump': + not settings['security']['kernel_enable_core_dump']} + return ctxt diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/checks/login.py b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/login.py new file mode 100644 index 00000000..fd500c8b --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/login.py @@ -0,0 +1,63 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.audits.file import TemplatedFile +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get OS hardening login.defs audits. + + :returns: dictionary of audits + """ + audits = [TemplatedFile('/etc/login.defs', LoginContext(), + template_dir=TEMPLATES_DIR, + user='root', group='root', mode=0o0444)] + return audits + + +class LoginContext(object): + + def __call__(self): + settings = utils.get_settings('os') + + # Octal numbers in yaml end up being turned into decimal, + # so check if the umask is entered as a string (e.g. '027') + # or as an octal umask as we know it (e.g. 002). If its not + # a string assume it to be octal and turn it into an octal + # string. + umask = settings['environment']['umask'] + if not isinstance(umask, str): + umask = '%s' % oct(umask) + + ctxt = { + 'additional_user_paths': + settings['environment']['extra_user_paths'], + 'umask': umask, + 'pwd_max_age': settings['auth']['pw_max_age'], + 'pwd_min_age': settings['auth']['pw_min_age'], + 'uid_min': settings['auth']['uid_min'], + 'sys_uid_min': settings['auth']['sys_uid_min'], + 'sys_uid_max': settings['auth']['sys_uid_max'], + 'gid_min': settings['auth']['gid_min'], + 'sys_gid_min': settings['auth']['sys_gid_min'], + 'sys_gid_max': settings['auth']['sys_gid_max'], + 'login_retries': settings['auth']['retries'], + 'login_timeout': settings['auth']['timeout'], + 'chfn_restrict': settings['auth']['chfn_restrict'], + 'allow_login_without_home': settings['auth']['allow_homeless'] + } + + return ctxt diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/checks/minimize_access.py b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/minimize_access.py new file mode 100644 index 00000000..6e64be00 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/minimize_access.py @@ -0,0 +1,50 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.audits.file import ( + FilePermissionAudit, + ReadOnly, +) +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get OS hardening access audits. + + :returns: dictionary of audits + """ + audits = [] + settings = utils.get_settings('os') + + # Remove write permissions from $PATH folders for all regular users. + # This prevents changing system-wide commands from normal users. + path_folders = {'/usr/local/sbin', + '/usr/local/bin', + '/usr/sbin', + '/usr/bin', + '/bin'} + extra_user_paths = settings['environment']['extra_user_paths'] + path_folders.update(extra_user_paths) + audits.append(ReadOnly(path_folders)) + + # Only allow the root user to have access to the shadow file. + audits.append(FilePermissionAudit('/etc/shadow', 'root', 'root', 0o0600)) + + if 'change_user' not in settings['security']['users_allow']: + # su should only be accessible to user and group root, unless it is + # expressly defined to allow users to change to root via the + # security_users_allow config option. + audits.append(FilePermissionAudit('/bin/su', 'root', 'root', 0o750)) + + return audits diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/checks/pam.py b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/pam.py new file mode 100644 index 00000000..9b38d5f0 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/pam.py @@ -0,0 +1,132 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import ( + check_output, + CalledProcessError, +) + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + ERROR, +) +from charmhelpers.fetch import ( + apt_install, + apt_purge, + apt_update, +) +from charmhelpers.contrib.hardening.audits.file import ( + TemplatedFile, + DeletedFile, +) +from charmhelpers.contrib.hardening import utils +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR + + +def get_audits(): + """Get OS hardening PAM authentication audits. + + :returns: dictionary of audits + """ + audits = [] + + settings = utils.get_settings('os') + + if settings['auth']['pam_passwdqc_enable']: + audits.append(PasswdqcPAM('/etc/passwdqc.conf')) + + if settings['auth']['retries']: + audits.append(Tally2PAM('/usr/share/pam-configs/tally2')) + else: + audits.append(DeletedFile('/usr/share/pam-configs/tally2')) + + return audits + + +class PasswdqcPAMContext(object): + + def __call__(self): + ctxt = {} + settings = utils.get_settings('os') + + ctxt['auth_pam_passwdqc_options'] = \ + settings['auth']['pam_passwdqc_options'] + + return ctxt + + +class PasswdqcPAM(TemplatedFile): + """The PAM Audit verifies the linux PAM settings.""" + def __init__(self, path): + super(PasswdqcPAM, self).__init__(path=path, + template_dir=TEMPLATES_DIR, + context=PasswdqcPAMContext(), + user='root', + group='root', + mode=0o0640) + + def pre_write(self): + # Always remove? + for pkg in ['libpam-ccreds', 'libpam-cracklib']: + log("Purging package '%s'" % pkg, level=DEBUG), + apt_purge(pkg) + + apt_update(fatal=True) + for pkg in ['libpam-passwdqc']: + log("Installing package '%s'" % pkg, level=DEBUG), + apt_install(pkg) + + def post_write(self): + """Updates the PAM configuration after the file has been written""" + try: + check_output(['pam-auth-update', '--package']) + except CalledProcessError as e: + log('Error calling pam-auth-update: %s' % e, level=ERROR) + + +class Tally2PAMContext(object): + + def __call__(self): + ctxt = {} + settings = utils.get_settings('os') + + ctxt['auth_lockout_time'] = settings['auth']['lockout_time'] + ctxt['auth_retries'] = settings['auth']['retries'] + + return ctxt + + +class Tally2PAM(TemplatedFile): + """The PAM Audit verifies the linux PAM settings.""" + def __init__(self, path): + super(Tally2PAM, self).__init__(path=path, + template_dir=TEMPLATES_DIR, + context=Tally2PAMContext(), + user='root', + group='root', + mode=0o0640) + + def pre_write(self): + # Always remove? + apt_purge('libpam-ccreds') + apt_update(fatal=True) + apt_install('libpam-modules') + + def post_write(self): + """Updates the PAM configuration after the file has been written""" + try: + check_output(['pam-auth-update', '--package']) + except CalledProcessError as e: + log('Error calling pam-auth-update: %s' % e, level=ERROR) diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/checks/profile.py b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/profile.py new file mode 100644 index 00000000..2727428d --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/profile.py @@ -0,0 +1,49 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.audits.file import TemplatedFile +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get OS hardening profile audits. + + :returns: dictionary of audits + """ + audits = [] + + settings = utils.get_settings('os') + # If core dumps are not enabled, then don't allow core dumps to be + # created as they may contain sensitive information. + if not settings['security']['kernel_enable_core_dump']: + audits.append(TemplatedFile('/etc/profile.d/pinerolo_profile.sh', + ProfileContext(), + template_dir=TEMPLATES_DIR, + mode=0o0755, user='root', group='root')) + if settings['security']['ssh_tmout']: + audits.append(TemplatedFile('/etc/profile.d/99-hardening.sh', + ProfileContext(), + template_dir=TEMPLATES_DIR, + mode=0o0644, user='root', group='root')) + return audits + + +class ProfileContext(object): + + def __call__(self): + settings = utils.get_settings('os') + ctxt = {'ssh_tmout': + settings['security']['ssh_tmout']} + return ctxt diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/checks/securetty.py b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/securetty.py new file mode 100644 index 00000000..34cd0217 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/securetty.py @@ -0,0 +1,37 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.audits.file import TemplatedFile +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get OS hardening Secure TTY audits. + + :returns: dictionary of audits + """ + audits = [] + audits.append(TemplatedFile('/etc/securetty', SecureTTYContext(), + template_dir=TEMPLATES_DIR, + mode=0o0400, user='root', group='root')) + return audits + + +class SecureTTYContext(object): + + def __call__(self): + settings = utils.get_settings('os') + ctxt = {'ttys': settings['auth']['root_ttys']} + return ctxt diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/checks/suid_sgid.py b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/suid_sgid.py new file mode 100644 index 00000000..bcbe3fde --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/suid_sgid.py @@ -0,0 +1,129 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import subprocess + +from charmhelpers.core.hookenv import ( + log, + INFO, +) +from charmhelpers.contrib.hardening.audits.file import NoSUIDSGIDAudit +from charmhelpers.contrib.hardening import utils + + +BLACKLIST = ['/usr/bin/rcp', '/usr/bin/rlogin', '/usr/bin/rsh', + '/usr/libexec/openssh/ssh-keysign', + '/usr/lib/openssh/ssh-keysign', + '/sbin/netreport', + '/usr/sbin/usernetctl', + '/usr/sbin/userisdnctl', + '/usr/sbin/pppd', + '/usr/bin/lockfile', + '/usr/bin/mail-lock', + '/usr/bin/mail-unlock', + '/usr/bin/mail-touchlock', + '/usr/bin/dotlockfile', + '/usr/bin/arping', + '/usr/sbin/uuidd', + '/usr/bin/mtr', + '/usr/lib/evolution/camel-lock-helper-1.2', + '/usr/lib/pt_chown', + '/usr/lib/eject/dmcrypt-get-device', + '/usr/lib/mc/cons.saver'] + +WHITELIST = ['/bin/mount', '/bin/ping', '/bin/su', '/bin/umount', + '/sbin/pam_timestamp_check', '/sbin/unix_chkpwd', '/usr/bin/at', + '/usr/bin/gpasswd', '/usr/bin/locate', '/usr/bin/newgrp', + '/usr/bin/passwd', '/usr/bin/ssh-agent', + '/usr/libexec/utempter/utempter', '/usr/sbin/lockdev', + '/usr/sbin/sendmail.sendmail', '/usr/bin/expiry', + '/bin/ping6', '/usr/bin/traceroute6.iputils', + '/sbin/mount.nfs', '/sbin/umount.nfs', + '/sbin/mount.nfs4', '/sbin/umount.nfs4', + '/usr/bin/crontab', + '/usr/bin/wall', '/usr/bin/write', + '/usr/bin/screen', + '/usr/bin/mlocate', + '/usr/bin/chage', '/usr/bin/chfn', '/usr/bin/chsh', + '/bin/fusermount', + '/usr/bin/pkexec', + '/usr/bin/sudo', '/usr/bin/sudoedit', + '/usr/sbin/postdrop', '/usr/sbin/postqueue', + '/usr/sbin/suexec', + '/usr/lib/squid/ncsa_auth', '/usr/lib/squid/pam_auth', + '/usr/kerberos/bin/ksu', + '/usr/sbin/ccreds_validate', + '/usr/bin/Xorg', + '/usr/bin/X', + '/usr/lib/dbus-1.0/dbus-daemon-launch-helper', + '/usr/lib/vte/gnome-pty-helper', + '/usr/lib/libvte9/gnome-pty-helper', + '/usr/lib/libvte-2.90-9/gnome-pty-helper'] + + +def get_audits(): + """Get OS hardening suid/sgid audits. + + :returns: dictionary of audits + """ + checks = [] + settings = utils.get_settings('os') + if not settings['security']['suid_sgid_enforce']: + log("Skipping suid/sgid hardening", level=INFO) + return checks + + # Build the blacklist and whitelist of files for suid/sgid checks. + # There are a total of 4 lists: + # 1. the system blacklist + # 2. the system whitelist + # 3. the user blacklist + # 4. the user whitelist + # + # The blacklist is the set of paths which should NOT have the suid/sgid bit + # set and the whitelist is the set of paths which MAY have the suid/sgid + # bit setl. The user whitelist/blacklist effectively override the system + # whitelist/blacklist. + u_b = settings['security']['suid_sgid_blacklist'] + u_w = settings['security']['suid_sgid_whitelist'] + + blacklist = set(BLACKLIST) - set(u_w + u_b) + whitelist = set(WHITELIST) - set(u_b + u_w) + + checks.append(NoSUIDSGIDAudit(blacklist)) + + dry_run = settings['security']['suid_sgid_dry_run_on_unknown'] + + if settings['security']['suid_sgid_remove_from_unknown'] or dry_run: + # If the policy is a dry_run (e.g. complain only) or remove unknown + # suid/sgid bits then find all of the paths which have the suid/sgid + # bit set and then remove the whitelisted paths. + root_path = settings['environment']['root_path'] + unknown_paths = find_paths_with_suid_sgid(root_path) - set(whitelist) + checks.append(NoSUIDSGIDAudit(unknown_paths, unless=dry_run)) + + return checks + + +def find_paths_with_suid_sgid(root_path): + """Finds all paths/files which have an suid/sgid bit enabled. + + Starting with the root_path, this will recursively find all paths which + have an suid or sgid bit set. + """ + cmd = ['find', root_path, '-perm', '-4000', '-o', '-perm', '-2000', + '-type', 'f', '!', '-path', '/proc/*', '-print'] + + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, _ = p.communicate() + return set(out.split('\n')) diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/checks/sysctl.py b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/sysctl.py new file mode 100644 index 00000000..8a57d83d --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/checks/sysctl.py @@ -0,0 +1,208 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import platform +import re +import subprocess + +from charmhelpers.core.hookenv import ( + log, + INFO, + WARNING, +) +from charmhelpers.contrib.hardening import utils +from charmhelpers.contrib.hardening.audits.file import ( + FilePermissionAudit, + TemplatedFile, +) +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR + + +SYSCTL_DEFAULTS = """net.ipv4.ip_forward=%(net_ipv4_ip_forward)s +net.ipv6.conf.all.forwarding=%(net_ipv6_conf_all_forwarding)s +net.ipv4.conf.all.rp_filter=1 +net.ipv4.conf.default.rp_filter=1 +net.ipv4.icmp_echo_ignore_broadcasts=1 +net.ipv4.icmp_ignore_bogus_error_responses=1 +net.ipv4.icmp_ratelimit=100 +net.ipv4.icmp_ratemask=88089 +net.ipv6.conf.all.disable_ipv6=%(net_ipv6_conf_all_disable_ipv6)s +net.ipv4.tcp_timestamps=%(net_ipv4_tcp_timestamps)s +net.ipv4.conf.all.arp_ignore=%(net_ipv4_conf_all_arp_ignore)s +net.ipv4.conf.all.arp_announce=%(net_ipv4_conf_all_arp_announce)s +net.ipv4.tcp_rfc1337=1 +net.ipv4.tcp_syncookies=1 +net.ipv4.conf.all.shared_media=1 +net.ipv4.conf.default.shared_media=1 +net.ipv4.conf.all.accept_source_route=0 +net.ipv4.conf.default.accept_source_route=0 +net.ipv4.conf.all.accept_redirects=0 +net.ipv4.conf.default.accept_redirects=0 +net.ipv6.conf.all.accept_redirects=0 +net.ipv6.conf.default.accept_redirects=0 +net.ipv4.conf.all.secure_redirects=0 +net.ipv4.conf.default.secure_redirects=0 +net.ipv4.conf.all.send_redirects=0 +net.ipv4.conf.default.send_redirects=0 +net.ipv4.conf.all.log_martians=0 +net.ipv6.conf.default.router_solicitations=0 +net.ipv6.conf.default.accept_ra_rtr_pref=0 +net.ipv6.conf.default.accept_ra_pinfo=0 +net.ipv6.conf.default.accept_ra_defrtr=0 +net.ipv6.conf.default.autoconf=0 +net.ipv6.conf.default.dad_transmits=0 +net.ipv6.conf.default.max_addresses=1 +net.ipv6.conf.all.accept_ra=0 +net.ipv6.conf.default.accept_ra=0 +kernel.modules_disabled=%(kernel_modules_disabled)s +kernel.sysrq=%(kernel_sysrq)s +fs.suid_dumpable=%(fs_suid_dumpable)s +kernel.randomize_va_space=2 +""" + + +def get_audits(): + """Get OS hardening sysctl audits. + + :returns: dictionary of audits + """ + audits = [] + settings = utils.get_settings('os') + + # Apply the sysctl settings which are configured to be applied. + audits.append(SysctlConf()) + # Make sure that only root has access to the sysctl.conf file, and + # that it is read-only. + audits.append(FilePermissionAudit('/etc/sysctl.conf', + user='root', + group='root', mode=0o0440)) + # If module loading is not enabled, then ensure that the modules + # file has the appropriate permissions and rebuild the initramfs + if not settings['security']['kernel_enable_module_loading']: + audits.append(ModulesTemplate()) + + return audits + + +class ModulesContext(object): + + def __call__(self): + settings = utils.get_settings('os') + with open('/proc/cpuinfo', 'r') as fd: + cpuinfo = fd.readlines() + + for line in cpuinfo: + match = re.search(r"^vendor_id\s+:\s+(.+)", line) + if match: + vendor = match.group(1) + + if vendor == "GenuineIntel": + vendor = "intel" + elif vendor == "AuthenticAMD": + vendor = "amd" + + ctxt = {'arch': platform.processor(), + 'cpuVendor': vendor, + 'desktop_enable': settings['general']['desktop_enable']} + + return ctxt + + +class ModulesTemplate(object): + + def __init__(self): + super(ModulesTemplate, self).__init__('/etc/initramfs-tools/modules', + ModulesContext(), + templates_dir=TEMPLATES_DIR, + user='root', group='root', + mode=0o0440) + + def post_write(self): + subprocess.check_call(['update-initramfs', '-u']) + + +class SysCtlHardeningContext(object): + def __call__(self): + settings = utils.get_settings('os') + ctxt = {'sysctl': {}} + + log("Applying sysctl settings", level=INFO) + extras = {'net_ipv4_ip_forward': 0, + 'net_ipv6_conf_all_forwarding': 0, + 'net_ipv6_conf_all_disable_ipv6': 1, + 'net_ipv4_tcp_timestamps': 0, + 'net_ipv4_conf_all_arp_ignore': 0, + 'net_ipv4_conf_all_arp_announce': 0, + 'kernel_sysrq': 0, + 'fs_suid_dumpable': 0, + 'kernel_modules_disabled': 1} + + if settings['sysctl']['ipv6_enable']: + extras['net_ipv6_conf_all_disable_ipv6'] = 0 + + if settings['sysctl']['forwarding']: + extras['net_ipv4_ip_forward'] = 1 + extras['net_ipv6_conf_all_forwarding'] = 1 + + if settings['sysctl']['arp_restricted']: + extras['net_ipv4_conf_all_arp_ignore'] = 1 + extras['net_ipv4_conf_all_arp_announce'] = 2 + + if settings['security']['kernel_enable_module_loading']: + extras['kernel_modules_disabled'] = 0 + + if settings['sysctl']['kernel_enable_sysrq']: + sysrq_val = settings['sysctl']['kernel_secure_sysrq'] + extras['kernel_sysrq'] = sysrq_val + + if settings['security']['kernel_enable_core_dump']: + extras['fs_suid_dumpable'] = 1 + + settings.update(extras) + for d in (SYSCTL_DEFAULTS % settings).split(): + d = d.strip().partition('=') + key = d[0].strip() + path = os.path.join('/proc/sys', key.replace('.', '/')) + if not os.path.exists(path): + log("Skipping '%s' since '%s' does not exist" % (key, path), + level=WARNING) + continue + + ctxt['sysctl'][key] = d[2] or None + + return { + 'sysctl_settings': [(k, v) for k, v in ctxt['sysctl'].items()] + } + + +class SysctlConf(TemplatedFile): + """An audit check for sysctl settings.""" + def __init__(self): + self.conffile = '/etc/sysctl.d/99-juju-hardening.conf' + super(SysctlConf, self).__init__(self.conffile, + SysCtlHardeningContext(), + template_dir=TEMPLATES_DIR, + user='root', group='root', + mode=0o0440) + + def post_write(self): + try: + subprocess.check_call(['sysctl', '-p', self.conffile]) + except subprocess.CalledProcessError as e: + # NOTE: on some systems if sysctl cannot apply all settings it + # will return non-zero as well. + log("sysctl command returned an error (maybe some " + "keys could not be set) - %s" % (e), + level=WARNING) diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/templates/10.hardcore.conf b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/10.hardcore.conf new file mode 100644 index 00000000..0014191f --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/10.hardcore.conf @@ -0,0 +1,8 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +{% if disable_core_dump -%} +# Prevent core dumps for all users. These are usually only needed by developers and may contain sensitive information. +* hard core 0 +{% endif %} \ No newline at end of file diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/templates/99-hardening.sh b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/99-hardening.sh new file mode 100644 index 00000000..616cef46 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/99-hardening.sh @@ -0,0 +1,5 @@ +TMOUT={{ tmout }} +readonly TMOUT +export TMOUT + +readonly HISTFILE diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/templates/99-juju-hardening.conf b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/99-juju-hardening.conf new file mode 100644 index 00000000..101f1e1d --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/99-juju-hardening.conf @@ -0,0 +1,7 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +{% for key, value in sysctl_settings -%} +{{ key }}={{ value }} +{% endfor -%} diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/templates/__init__.py b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/templates/login.defs b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/login.defs new file mode 100644 index 00000000..7d107637 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/login.defs @@ -0,0 +1,349 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# +# /etc/login.defs - Configuration control definitions for the login package. +# +# Three items must be defined: MAIL_DIR, ENV_SUPATH, and ENV_PATH. +# If unspecified, some arbitrary (and possibly incorrect) value will +# be assumed. All other items are optional - if not specified then +# the described action or option will be inhibited. +# +# Comment lines (lines beginning with "#") and blank lines are ignored. +# +# Modified for Linux. --marekm + +# REQUIRED for useradd/userdel/usermod +# Directory where mailboxes reside, _or_ name of file, relative to the +# home directory. If you _do_ define MAIL_DIR and MAIL_FILE, +# MAIL_DIR takes precedence. +# +# Essentially: +# - MAIL_DIR defines the location of users mail spool files +# (for mbox use) by appending the username to MAIL_DIR as defined +# below. +# - MAIL_FILE defines the location of the users mail spool files as the +# fully-qualified filename obtained by prepending the user home +# directory before $MAIL_FILE +# +# NOTE: This is no more used for setting up users MAIL environment variable +# which is, starting from shadow 4.0.12-1 in Debian, entirely the +# job of the pam_mail PAM modules +# See default PAM configuration files provided for +# login, su, etc. +# +# This is a temporary situation: setting these variables will soon +# move to /etc/default/useradd and the variables will then be +# no more supported +MAIL_DIR /var/mail +#MAIL_FILE .mail + +# +# Enable logging and display of /var/log/faillog login failure info. +# This option conflicts with the pam_tally PAM module. +# +FAILLOG_ENAB yes + +# +# Enable display of unknown usernames when login failures are recorded. +# +# WARNING: Unknown usernames may become world readable. +# See #290803 and #298773 for details about how this could become a security +# concern +LOG_UNKFAIL_ENAB no + +# +# Enable logging of successful logins +# +LOG_OK_LOGINS yes + +# +# Enable "syslog" logging of su activity - in addition to sulog file logging. +# SYSLOG_SG_ENAB does the same for newgrp and sg. +# +SYSLOG_SU_ENAB yes +SYSLOG_SG_ENAB yes + +# +# If defined, all su activity is logged to this file. +# +#SULOG_FILE /var/log/sulog + +# +# If defined, file which maps tty line to TERM environment parameter. +# Each line of the file is in a format something like "vt100 tty01". +# +#TTYTYPE_FILE /etc/ttytype + +# +# If defined, login failures will be logged here in a utmp format +# last, when invoked as lastb, will read /var/log/btmp, so... +# +FTMP_FILE /var/log/btmp + +# +# If defined, the command name to display when running "su -". For +# example, if this is defined as "su" then a "ps" will display the +# command is "-su". If not defined, then "ps" would display the +# name of the shell actually being run, e.g. something like "-sh". +# +SU_NAME su + +# +# If defined, file which inhibits all the usual chatter during the login +# sequence. If a full pathname, then hushed mode will be enabled if the +# user's name or shell are found in the file. If not a full pathname, then +# hushed mode will be enabled if the file exists in the user's home directory. +# +HUSHLOGIN_FILE .hushlogin +#HUSHLOGIN_FILE /etc/hushlogins + +# +# *REQUIRED* The default PATH settings, for superuser and normal users. +# +# (they are minimal, add the rest in the shell startup files) +ENV_SUPATH PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +ENV_PATH PATH=/usr/local/bin:/usr/bin:/bin{% if additional_user_paths %}{{ additional_user_paths }}{% endif %} + +# +# Terminal permissions +# +# TTYGROUP Login tty will be assigned this group ownership. +# TTYPERM Login tty will be set to this permission. +# +# If you have a "write" program which is "setgid" to a special group +# which owns the terminals, define TTYGROUP to the group number and +# TTYPERM to 0620. Otherwise leave TTYGROUP commented out and assign +# TTYPERM to either 622 or 600. +# +# In Debian /usr/bin/bsd-write or similar programs are setgid tty +# However, the default and recommended value for TTYPERM is still 0600 +# to not allow anyone to write to anyone else console or terminal + +# Users can still allow other people to write them by issuing +# the "mesg y" command. + +TTYGROUP tty +TTYPERM 0600 + +# +# Login configuration initializations: +# +# ERASECHAR Terminal ERASE character ('\010' = backspace). +# KILLCHAR Terminal KILL character ('\025' = CTRL/U). +# UMASK Default "umask" value. +# +# The ERASECHAR and KILLCHAR are used only on System V machines. +# +# UMASK is the default umask value for pam_umask and is used by +# useradd and newusers to set the mode of the new home directories. +# 022 is the "historical" value in Debian for UMASK +# 027, or even 077, could be considered better for privacy +# There is no One True Answer here : each sysadmin must make up his/her +# mind. +# +# If USERGROUPS_ENAB is set to "yes", that will modify this UMASK default value +# for private user groups, i. e. the uid is the same as gid, and username is +# the same as the primary group name: for these, the user permissions will be +# used as group permissions, e. g. 022 will become 002. +# +# Prefix these values with "0" to get octal, "0x" to get hexadecimal. +# +ERASECHAR 0177 +KILLCHAR 025 +UMASK {{ umask }} + +# Enable setting of the umask group bits to be the same as owner bits (examples: `022` -> `002`, `077` -> `007`) for non-root users, if the uid is the same as gid, and username is the same as the primary group name. +# If set to yes, userdel will remove the user´s group if it contains no more members, and useradd will create by default a group with the name of the user. +USERGROUPS_ENAB yes + +# +# Password aging controls: +# +# PASS_MAX_DAYS Maximum number of days a password may be used. +# PASS_MIN_DAYS Minimum number of days allowed between password changes. +# PASS_WARN_AGE Number of days warning given before a password expires. +# +PASS_MAX_DAYS {{ pwd_max_age }} +PASS_MIN_DAYS {{ pwd_min_age }} +PASS_WARN_AGE 7 + +# +# Min/max values for automatic uid selection in useradd +# +UID_MIN {{ uid_min }} +UID_MAX 60000 +# System accounts +SYS_UID_MIN {{ sys_uid_min }} +SYS_UID_MAX {{ sys_uid_max }} + +# Min/max values for automatic gid selection in groupadd +GID_MIN {{ gid_min }} +GID_MAX 60000 +# System accounts +SYS_GID_MIN {{ sys_gid_min }} +SYS_GID_MAX {{ sys_gid_max }} + +# +# Max number of login retries if password is bad. This will most likely be +# overridden by PAM, since the default pam_unix module has it's own built +# in of 3 retries. However, this is a safe fallback in case you are using +# an authentication module that does not enforce PAM_MAXTRIES. +# +LOGIN_RETRIES {{ login_retries }} + +# +# Max time in seconds for login +# +LOGIN_TIMEOUT {{ login_timeout }} + +# +# Which fields may be changed by regular users using chfn - use +# any combination of letters "frwh" (full name, room number, work +# phone, home phone). If not defined, no changes are allowed. +# For backward compatibility, "yes" = "rwh" and "no" = "frwh". +# +{% if chfn_restrict %} +CHFN_RESTRICT {{ chfn_restrict }} +{% endif %} + +# +# Should login be allowed if we can't cd to the home directory? +# Default in no. +# +DEFAULT_HOME {% if allow_login_without_home %} yes {% else %} no {% endif %} + +# +# If defined, this command is run when removing a user. +# It should remove any at/cron/print jobs etc. owned by +# the user to be removed (passed as the first argument). +# +#USERDEL_CMD /usr/sbin/userdel_local + +# +# Enable setting of the umask group bits to be the same as owner bits +# (examples: 022 -> 002, 077 -> 007) for non-root users, if the uid is +# the same as gid, and username is the same as the primary group name. +# +# If set to yes, userdel will remove the user´s group if it contains no +# more members, and useradd will create by default a group with the name +# of the user. +# +USERGROUPS_ENAB yes + +# +# Instead of the real user shell, the program specified by this parameter +# will be launched, although its visible name (argv[0]) will be the shell's. +# The program may do whatever it wants (logging, additional authentication, +# banner, ...) before running the actual shell. +# +# FAKE_SHELL /bin/fakeshell + +# +# If defined, either full pathname of a file containing device names or +# a ":" delimited list of device names. Root logins will be allowed only +# upon these devices. +# +# This variable is used by login and su. +# +#CONSOLE /etc/consoles +#CONSOLE console:tty01:tty02:tty03:tty04 + +# +# List of groups to add to the user's supplementary group set +# when logging in on the console (as determined by the CONSOLE +# setting). Default is none. +# +# Use with caution - it is possible for users to gain permanent +# access to these groups, even when not logged in on the console. +# How to do it is left as an exercise for the reader... +# +# This variable is used by login and su. +# +#CONSOLE_GROUPS floppy:audio:cdrom + +# +# If set to "yes", new passwords will be encrypted using the MD5-based +# algorithm compatible with the one used by recent releases of FreeBSD. +# It supports passwords of unlimited length and longer salt strings. +# Set to "no" if you need to copy encrypted passwords to other systems +# which don't understand the new algorithm. Default is "no". +# +# This variable is deprecated. You should use ENCRYPT_METHOD. +# +MD5_CRYPT_ENAB no + +# +# If set to MD5 , MD5-based algorithm will be used for encrypting password +# If set to SHA256, SHA256-based algorithm will be used for encrypting password +# If set to SHA512, SHA512-based algorithm will be used for encrypting password +# If set to DES, DES-based algorithm will be used for encrypting password (default) +# Overrides the MD5_CRYPT_ENAB option +# +# Note: It is recommended to use a value consistent with +# the PAM modules configuration. +# +ENCRYPT_METHOD SHA512 + +# +# Only used if ENCRYPT_METHOD is set to SHA256 or SHA512. +# +# Define the number of SHA rounds. +# With a lot of rounds, it is more difficult to brute forcing the password. +# But note also that it more CPU resources will be needed to authenticate +# users. +# +# If not specified, the libc will choose the default number of rounds (5000). +# The values must be inside the 1000-999999999 range. +# If only one of the MIN or MAX values is set, then this value will be used. +# If MIN > MAX, the highest value will be used. +# +# SHA_CRYPT_MIN_ROUNDS 5000 +# SHA_CRYPT_MAX_ROUNDS 5000 + +################# OBSOLETED BY PAM ############## +# # +# These options are now handled by PAM. Please # +# edit the appropriate file in /etc/pam.d/ to # +# enable the equivelants of them. +# +############### + +#MOTD_FILE +#DIALUPS_CHECK_ENAB +#LASTLOG_ENAB +#MAIL_CHECK_ENAB +#OBSCURE_CHECKS_ENAB +#PORTTIME_CHECKS_ENAB +#SU_WHEEL_ONLY +#CRACKLIB_DICTPATH +#PASS_CHANGE_TRIES +#PASS_ALWAYS_WARN +#ENVIRON_FILE +#NOLOGINS_FILE +#ISSUE_FILE +#PASS_MIN_LEN +#PASS_MAX_LEN +#ULIMIT +#ENV_HZ +#CHFN_AUTH +#CHSH_AUTH +#FAIL_DELAY + +################# OBSOLETED ####################### +# # +# These options are no more handled by shadow. # +# # +# Shadow utilities will display a warning if they # +# still appear. # +# # +################################################### + +# CLOSE_SESSIONS +# LOGIN_STRING +# NO_PASSWORD_CONSOLE +# QMAIL_DIR + + + diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/templates/modules b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/modules new file mode 100644 index 00000000..ef0354ee --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/modules @@ -0,0 +1,117 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# /etc/modules: kernel modules to load at boot time. +# +# This file contains the names of kernel modules that should be loaded +# at boot time, one per line. Lines beginning with "#" are ignored. +# Parameters can be specified after the module name. + +# Arch +# ---- +# +# Modules for certains builds, contains support modules and some CPU-specific optimizations. + +{% if arch == "x86_64" -%} +# Optimize for x86_64 cryptographic features +twofish-x86_64-3way +twofish-x86_64 +aes-x86_64 +salsa20-x86_64 +blowfish-x86_64 +{% endif -%} + +{% if cpuVendor == "intel" -%} +# Intel-specific optimizations +ghash-clmulni-intel +aesni-intel +kvm-intel +{% endif -%} + +{% if cpuVendor == "amd" -%} +# AMD-specific optimizations +kvm-amd +{% endif -%} + +kvm + + +# Crypto +# ------ + +# Some core modules which comprise strong cryptography. +blowfish_common +blowfish_generic +ctr +cts +lrw +lzo +rmd160 +rmd256 +rmd320 +serpent +sha512_generic +twofish_common +twofish_generic +xts +zlib + + +# Drivers +# ------- + +# Basics +lp +rtc +loop + +# Filesystems +ext2 +btrfs + +{% if desktop_enable -%} +# Desktop +psmouse +snd +snd_ac97_codec +snd_intel8x0 +snd_page_alloc +snd_pcm +snd_timer +soundcore +usbhid +{% endif -%} + +# Lib +# --- +xz + + +# Net +# --- + +# All packets needed for netfilter rules (ie iptables, ebtables). +ip_tables +x_tables +iptable_filter +iptable_nat + +# Targets +ipt_LOG +ipt_REJECT + +# Modules +xt_connlimit +xt_tcpudp +xt_recent +xt_limit +xt_conntrack +nf_conntrack +nf_conntrack_ipv4 +nf_defrag_ipv4 +xt_state +nf_nat + +# Addons +xt_pknock \ No newline at end of file diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/templates/passwdqc.conf b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/passwdqc.conf new file mode 100644 index 00000000..f98d14e5 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/passwdqc.conf @@ -0,0 +1,11 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +Name: passwdqc password strength enforcement +Default: yes +Priority: 1024 +Conflicts: cracklib +Password-Type: Primary +Password: + requisite pam_passwdqc.so {{ auth_pam_passwdqc_options }} diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/templates/pinerolo_profile.sh b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/pinerolo_profile.sh new file mode 100644 index 00000000..fd2de791 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/pinerolo_profile.sh @@ -0,0 +1,8 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# Disable core dumps via soft limits for all users. Compliance to this setting +# is voluntary and can be modified by users up to a hard limit. This setting is +# a sane default. +ulimit -S -c 0 > /dev/null 2>&1 diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/templates/securetty b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/securetty new file mode 100644 index 00000000..15b18d4e --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/securetty @@ -0,0 +1,11 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# A list of TTYs, from which root can log in +# see `man securetty` for reference +{% if ttys -%} +{% for tty in ttys -%} +{{ tty }} +{% endfor -%} +{% endif -%} diff --git a/ceph-proxy/charmhelpers/contrib/hardening/host/templates/tally2 b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/tally2 new file mode 100644 index 00000000..d9620299 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/host/templates/tally2 @@ -0,0 +1,14 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +Name: tally2 lockout after failed attempts enforcement +Default: yes +Priority: 1024 +Conflicts: cracklib +Auth-Type: Primary +Auth-Initial: + required pam_tally2.so deny={{ auth_retries }} onerr=fail unlock_time={{ auth_lockout_time }} +Account-Type: Primary +Account-Initial: + required pam_tally2.so diff --git a/ceph-proxy/charmhelpers/contrib/hardening/mysql/__init__.py b/ceph-proxy/charmhelpers/contrib/hardening/mysql/__init__.py new file mode 100644 index 00000000..58bebd84 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/mysql/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from os import path + +TEMPLATES_DIR = path.join(path.dirname(__file__), 'templates') diff --git a/ceph-proxy/charmhelpers/contrib/hardening/mysql/checks/__init__.py b/ceph-proxy/charmhelpers/contrib/hardening/mysql/checks/__init__.py new file mode 100644 index 00000000..1990d851 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/mysql/checks/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import ( + log, + DEBUG, +) +from charmhelpers.contrib.hardening.mysql.checks import config + + +def run_mysql_checks(): + log("Starting MySQL hardening checks.", level=DEBUG) + checks = config.get_audits() + for check in checks: + log("Running '%s' check" % (check.__class__.__name__), level=DEBUG) + check.ensure_compliance() + + log("MySQL hardening checks complete.", level=DEBUG) diff --git a/ceph-proxy/charmhelpers/contrib/hardening/mysql/checks/config.py b/ceph-proxy/charmhelpers/contrib/hardening/mysql/checks/config.py new file mode 100644 index 00000000..8bf9f36c --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/mysql/checks/config.py @@ -0,0 +1,86 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import subprocess + +from charmhelpers.core.hookenv import ( + log, + WARNING, +) +from charmhelpers.contrib.hardening.audits.file import ( + FilePermissionAudit, + DirectoryPermissionAudit, + TemplatedFile, +) +from charmhelpers.contrib.hardening.mysql import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get MySQL hardening config audits. + + :returns: dictionary of audits + """ + if subprocess.call(['which', 'mysql'], stdout=subprocess.PIPE) != 0: + log("MySQL does not appear to be installed on this node - " + "skipping mysql hardening", level=WARNING) + return [] + + settings = utils.get_settings('mysql') + hardening_settings = settings['hardening'] + my_cnf = hardening_settings['mysql-conf'] + + audits = [ + FilePermissionAudit(paths=[my_cnf], user='root', + group='root', mode=0o0600), + + TemplatedFile(hardening_settings['hardening-conf'], + MySQLConfContext(), + TEMPLATES_DIR, + mode=0o0750, + user='mysql', + group='root', + service_actions=[{'service': 'mysql', + 'actions': ['restart']}]), + + # MySQL and Percona charms do not allow configuration of the + # data directory, so use the default. + DirectoryPermissionAudit('/var/lib/mysql', + user='mysql', + group='mysql', + recursive=False, + mode=0o755), + + DirectoryPermissionAudit('/etc/mysql', + user='root', + group='root', + recursive=False, + mode=0o700), + ] + + return audits + + +class MySQLConfContext(object): + """Defines the set of key/value pairs to set in a mysql config file. + + This context, when called, will return a dictionary containing the + key/value pairs of setting to specify in the + /etc/mysql/conf.d/hardening.cnf file. + """ + def __call__(self): + settings = utils.get_settings('mysql') + return { + 'mysql_settings': [(k, v) for k, v in settings['security'].items()] + } diff --git a/ceph-proxy/charmhelpers/contrib/hardening/mysql/templates/__init__.py b/ceph-proxy/charmhelpers/contrib/hardening/mysql/templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-proxy/charmhelpers/contrib/hardening/mysql/templates/hardening.cnf b/ceph-proxy/charmhelpers/contrib/hardening/mysql/templates/hardening.cnf new file mode 100644 index 00000000..8242586c --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/mysql/templates/hardening.cnf @@ -0,0 +1,12 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +[mysqld] +{% for setting, value in mysql_settings -%} +{% if value == 'True' -%} +{{ setting }} +{% elif value != 'None' and value != None -%} +{{ setting }} = {{ value }} +{% endif -%} +{% endfor -%} diff --git a/ceph-proxy/charmhelpers/contrib/hardening/ssh/__init__.py b/ceph-proxy/charmhelpers/contrib/hardening/ssh/__init__.py new file mode 100644 index 00000000..58bebd84 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/ssh/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from os import path + +TEMPLATES_DIR = path.join(path.dirname(__file__), 'templates') diff --git a/ceph-proxy/charmhelpers/contrib/hardening/ssh/checks/__init__.py b/ceph-proxy/charmhelpers/contrib/hardening/ssh/checks/__init__.py new file mode 100644 index 00000000..edaf484b --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/ssh/checks/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import ( + log, + DEBUG, +) +from charmhelpers.contrib.hardening.ssh.checks import config + + +def run_ssh_checks(): + log("Starting SSH hardening checks.", level=DEBUG) + checks = config.get_audits() + for check in checks: + log("Running '%s' check" % (check.__class__.__name__), level=DEBUG) + check.ensure_compliance() + + log("SSH hardening checks complete.", level=DEBUG) diff --git a/ceph-proxy/charmhelpers/contrib/hardening/ssh/checks/config.py b/ceph-proxy/charmhelpers/contrib/hardening/ssh/checks/config.py new file mode 100644 index 00000000..41bed2d1 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/ssh/checks/config.py @@ -0,0 +1,435 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from charmhelpers.contrib.network.ip import ( + get_address_in_network, + get_iface_addr, + is_ip, +) +from charmhelpers.core.hookenv import ( + log, + DEBUG, +) +from charmhelpers.fetch import ( + apt_install, + apt_update, +) +from charmhelpers.core.host import ( + lsb_release, + CompareHostReleases, +) +from charmhelpers.contrib.hardening.audits.file import ( + TemplatedFile, + FileContentAudit, +) +from charmhelpers.contrib.hardening.ssh import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get SSH hardening config audits. + + :returns: dictionary of audits + """ + audits = [SSHConfig(), SSHDConfig(), SSHConfigFileContentAudit(), + SSHDConfigFileContentAudit()] + return audits + + +class SSHConfigContext(object): + + type = 'client' + + def get_macs(self, allow_weak_mac): + if allow_weak_mac: + weak_macs = 'weak' + else: + weak_macs = 'default' + + default = 'hmac-sha2-512,hmac-sha2-256,hmac-ripemd160' + macs = {'default': default, + 'weak': default + ',hmac-sha1'} + + default = ('hmac-sha2-512-etm@openssh.com,' + 'hmac-sha2-256-etm@openssh.com,' + 'hmac-ripemd160-etm@openssh.com,umac-128-etm@openssh.com,' + 'hmac-sha2-512,hmac-sha2-256,hmac-ripemd160') + macs_66 = {'default': default, + 'weak': default + ',hmac-sha1'} + + # Use newer ciphers on Ubuntu Trusty and above + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) >= 'trusty': + log("Detected Ubuntu 14.04 or newer, using new macs", level=DEBUG) + macs = macs_66 + + return macs[weak_macs] + + def get_kexs(self, allow_weak_kex): + if allow_weak_kex: + weak_kex = 'weak' + else: + weak_kex = 'default' + + default = 'diffie-hellman-group-exchange-sha256' + weak = (default + ',diffie-hellman-group14-sha1,' + 'diffie-hellman-group-exchange-sha1,' + 'diffie-hellman-group1-sha1') + kex = {'default': default, + 'weak': weak} + + default = ('curve25519-sha256@libssh.org,' + 'diffie-hellman-group-exchange-sha256') + weak = (default + ',diffie-hellman-group14-sha1,' + 'diffie-hellman-group-exchange-sha1,' + 'diffie-hellman-group1-sha1') + kex_66 = {'default': default, + 'weak': weak} + + # Use newer kex on Ubuntu Trusty and above + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) >= 'trusty': + log('Detected Ubuntu 14.04 or newer, using new key exchange ' + 'algorithms', level=DEBUG) + kex = kex_66 + + return kex[weak_kex] + + def get_ciphers(self, cbc_required): + if cbc_required: + weak_ciphers = 'weak' + else: + weak_ciphers = 'default' + + default = 'aes256-ctr,aes192-ctr,aes128-ctr' + cipher = {'default': default, + 'weak': default + 'aes256-cbc,aes192-cbc,aes128-cbc'} + + default = ('chacha20-poly1305@openssh.com,aes256-gcm@openssh.com,' + 'aes128-gcm@openssh.com,aes256-ctr,aes192-ctr,aes128-ctr') + ciphers_66 = {'default': default, + 'weak': default + ',aes256-cbc,aes192-cbc,aes128-cbc'} + + # Use newer ciphers on ubuntu Trusty and above + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) >= 'trusty': + log('Detected Ubuntu 14.04 or newer, using new ciphers', + level=DEBUG) + cipher = ciphers_66 + + return cipher[weak_ciphers] + + def get_listening(self, listen=['0.0.0.0']): + """Returns a list of addresses SSH can list on + + Turns input into a sensible list of IPs SSH can listen on. Input + must be a python list of interface names, IPs and/or CIDRs. + + :param listen: list of IPs, CIDRs, interface names + + :returns: list of IPs available on the host + """ + if listen == ['0.0.0.0']: + return listen + + value = [] + for network in listen: + try: + ip = get_address_in_network(network=network, fatal=True) + except ValueError: + if is_ip(network): + ip = network + else: + try: + ip = get_iface_addr(iface=network, fatal=False)[0] + except IndexError: + continue + value.append(ip) + if value == []: + return ['0.0.0.0'] + return value + + def __call__(self): + settings = utils.get_settings('ssh') + if settings['common']['network_ipv6_enable']: + addr_family = 'any' + else: + addr_family = 'inet' + + ctxt = { + 'addr_family': addr_family, + 'remote_hosts': settings['common']['remote_hosts'], + 'password_auth_allowed': + settings['client']['password_authentication'], + 'ports': settings['common']['ports'], + 'ciphers': self.get_ciphers(settings['client']['cbc_required']), + 'macs': self.get_macs(settings['client']['weak_hmac']), + 'kexs': self.get_kexs(settings['client']['weak_kex']), + 'roaming': settings['client']['roaming'], + } + return ctxt + + +class SSHConfig(TemplatedFile): + def __init__(self): + path = '/etc/ssh/ssh_config' + super(SSHConfig, self).__init__(path=path, + template_dir=TEMPLATES_DIR, + context=SSHConfigContext(), + user='root', + group='root', + mode=0o0644) + + def pre_write(self): + settings = utils.get_settings('ssh') + apt_update(fatal=True) + apt_install(settings['client']['package']) + if not os.path.exists('/etc/ssh'): + os.makedir('/etc/ssh') + # NOTE: don't recurse + utils.ensure_permissions('/etc/ssh', 'root', 'root', 0o0755, + maxdepth=0) + + def post_write(self): + # NOTE: don't recurse + utils.ensure_permissions('/etc/ssh', 'root', 'root', 0o0755, + maxdepth=0) + + +class SSHDConfigContext(SSHConfigContext): + + type = 'server' + + def __call__(self): + settings = utils.get_settings('ssh') + if settings['common']['network_ipv6_enable']: + addr_family = 'any' + else: + addr_family = 'inet' + + ctxt = { + 'ssh_ip': self.get_listening(settings['server']['listen_to']), + 'password_auth_allowed': + settings['server']['password_authentication'], + 'ports': settings['common']['ports'], + 'addr_family': addr_family, + 'ciphers': self.get_ciphers(settings['server']['cbc_required']), + 'macs': self.get_macs(settings['server']['weak_hmac']), + 'kexs': self.get_kexs(settings['server']['weak_kex']), + 'host_key_files': settings['server']['host_key_files'], + 'allow_root_with_key': settings['server']['allow_root_with_key'], + 'password_authentication': + settings['server']['password_authentication'], + 'use_priv_sep': settings['server']['use_privilege_separation'], + 'use_pam': settings['server']['use_pam'], + 'allow_x11_forwarding': settings['server']['allow_x11_forwarding'], + 'print_motd': settings['server']['print_motd'], + 'print_last_log': settings['server']['print_last_log'], + 'client_alive_interval': + settings['server']['alive_interval'], + 'client_alive_count': settings['server']['alive_count'], + 'allow_tcp_forwarding': settings['server']['allow_tcp_forwarding'], + 'allow_agent_forwarding': + settings['server']['allow_agent_forwarding'], + 'deny_users': settings['server']['deny_users'], + 'allow_users': settings['server']['allow_users'], + 'deny_groups': settings['server']['deny_groups'], + 'allow_groups': settings['server']['allow_groups'], + 'use_dns': settings['server']['use_dns'], + 'sftp_enable': settings['server']['sftp_enable'], + 'sftp_group': settings['server']['sftp_group'], + 'sftp_chroot': settings['server']['sftp_chroot'], + 'max_auth_tries': settings['server']['max_auth_tries'], + 'max_sessions': settings['server']['max_sessions'], + } + return ctxt + + +class SSHDConfig(TemplatedFile): + def __init__(self): + path = '/etc/ssh/sshd_config' + super(SSHDConfig, self).__init__(path=path, + template_dir=TEMPLATES_DIR, + context=SSHDConfigContext(), + user='root', + group='root', + mode=0o0600, + service_actions=[{'service': 'ssh', + 'actions': + ['restart']}]) + + def pre_write(self): + settings = utils.get_settings('ssh') + apt_update(fatal=True) + apt_install(settings['server']['package']) + if not os.path.exists('/etc/ssh'): + os.makedir('/etc/ssh') + # NOTE: don't recurse + utils.ensure_permissions('/etc/ssh', 'root', 'root', 0o0755, + maxdepth=0) + + def post_write(self): + # NOTE: don't recurse + utils.ensure_permissions('/etc/ssh', 'root', 'root', 0o0755, + maxdepth=0) + + +class SSHConfigFileContentAudit(FileContentAudit): + def __init__(self): + self.path = '/etc/ssh/ssh_config' + super(SSHConfigFileContentAudit, self).__init__(self.path, {}) + + def is_compliant(self, *args, **kwargs): + self.pass_cases = [] + self.fail_cases = [] + settings = utils.get_settings('ssh') + + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) >= 'trusty': + if not settings['server']['weak_hmac']: + self.pass_cases.append(r'^MACs.+,hmac-ripemd160$') + else: + self.pass_cases.append(r'^MACs.+,hmac-sha1$') + + if settings['server']['weak_kex']: + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + else: + self.pass_cases.append(r'^KexAlgorithms.+,diffie-hellman-group-exchange-sha256$') # noqa + self.fail_cases.append(r'^KexAlgorithms.*diffie-hellman-group14-sha1[,\s]?') # noqa + + if settings['server']['cbc_required']: + self.pass_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + self.fail_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.pass_cases.append(r'^Ciphers\schacha20-poly1305@openssh.com,.+') # noqa + self.pass_cases.append(r'^Ciphers\s.*aes128-ctr$') + self.pass_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + if not settings['client']['weak_hmac']: + self.fail_cases.append(r'^MACs.+,hmac-sha1$') + else: + self.pass_cases.append(r'^MACs.+,hmac-sha1$') + + if settings['client']['weak_kex']: + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + else: + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256$') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + + if settings['client']['cbc_required']: + self.pass_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + self.fail_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + + if settings['client']['roaming']: + self.pass_cases.append(r'^UseRoaming yes$') + else: + self.fail_cases.append(r'^UseRoaming yes$') + + return super(SSHConfigFileContentAudit, self).is_compliant(*args, + **kwargs) + + +class SSHDConfigFileContentAudit(FileContentAudit): + def __init__(self): + self.path = '/etc/ssh/sshd_config' + super(SSHDConfigFileContentAudit, self).__init__(self.path, {}) + + def is_compliant(self, *args, **kwargs): + self.pass_cases = [] + self.fail_cases = [] + settings = utils.get_settings('ssh') + + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) >= 'trusty': + if not settings['server']['weak_hmac']: + self.pass_cases.append(r'^MACs.+,hmac-ripemd160$') + else: + self.pass_cases.append(r'^MACs.+,hmac-sha1$') + + if settings['server']['weak_kex']: + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + else: + self.pass_cases.append(r'^KexAlgorithms.+,diffie-hellman-group-exchange-sha256$') # noqa + self.fail_cases.append(r'^KexAlgorithms.*diffie-hellman-group14-sha1[,\s]?') # noqa + + if settings['server']['cbc_required']: + self.pass_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + self.fail_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.pass_cases.append(r'^Ciphers\schacha20-poly1305@openssh.com,.+') # noqa + self.pass_cases.append(r'^Ciphers\s.*aes128-ctr$') + self.pass_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + if not settings['server']['weak_hmac']: + self.pass_cases.append(r'^MACs.+,hmac-ripemd160$') + else: + self.pass_cases.append(r'^MACs.+,hmac-sha1$') + + if settings['server']['weak_kex']: + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + else: + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256$') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + + if settings['server']['cbc_required']: + self.pass_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + self.fail_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + + if settings['server']['sftp_enable']: + self.pass_cases.append(r'^Subsystem\ssftp') + else: + self.fail_cases.append(r'^Subsystem\ssftp') + + return super(SSHDConfigFileContentAudit, self).is_compliant(*args, + **kwargs) diff --git a/ceph-proxy/charmhelpers/contrib/hardening/ssh/templates/__init__.py b/ceph-proxy/charmhelpers/contrib/hardening/ssh/templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-proxy/charmhelpers/contrib/hardening/ssh/templates/ssh_config b/ceph-proxy/charmhelpers/contrib/hardening/ssh/templates/ssh_config new file mode 100644 index 00000000..9742d8e2 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/ssh/templates/ssh_config @@ -0,0 +1,70 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# This is the ssh client system-wide configuration file. See +# ssh_config(5) for more information. This file provides defaults for +# users, and the values can be changed in per-user configuration files +# or on the command line. + +# Configuration data is parsed as follows: +# 1. command line options +# 2. user-specific file +# 3. system-wide file +# Any configuration value is only changed the first time it is set. +# Thus, host-specific definitions should be at the beginning of the +# configuration file, and defaults at the end. + +# Site-wide defaults for some commonly used options. For a comprehensive +# list of available options, their meanings and defaults, please see the +# ssh_config(5) man page. + +# Restrict the following configuration to be limited to this Host. +{% if remote_hosts -%} +Host {{ ' '.join(remote_hosts) }} +{% endif %} +ForwardAgent no +ForwardX11 no +ForwardX11Trusted yes +RhostsRSAAuthentication no +RSAAuthentication yes +PasswordAuthentication {{ password_auth_allowed }} +HostbasedAuthentication no +GSSAPIAuthentication no +GSSAPIDelegateCredentials no +GSSAPIKeyExchange no +GSSAPITrustDNS no +BatchMode no +CheckHostIP yes +AddressFamily {{ addr_family }} +ConnectTimeout 0 +StrictHostKeyChecking ask +IdentityFile ~/.ssh/identity +IdentityFile ~/.ssh/id_rsa +IdentityFile ~/.ssh/id_dsa +# The port at the destination should be defined +{% for port in ports -%} +Port {{ port }} +{% endfor %} +Protocol 2 +Cipher 3des +{% if ciphers -%} +Ciphers {{ ciphers }} +{%- endif %} +{% if macs -%} +MACs {{ macs }} +{%- endif %} +{% if kexs -%} +KexAlgorithms {{ kexs }} +{%- endif %} +EscapeChar ~ +Tunnel no +TunnelDevice any:any +PermitLocalCommand no +VisualHostKey no +RekeyLimit 1G 1h +SendEnv LANG LC_* +HashKnownHosts yes +{% if roaming -%} +UseRoaming {{ roaming }} +{% endif %} diff --git a/ceph-proxy/charmhelpers/contrib/hardening/ssh/templates/sshd_config b/ceph-proxy/charmhelpers/contrib/hardening/ssh/templates/sshd_config new file mode 100644 index 00000000..5f87298a --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/ssh/templates/sshd_config @@ -0,0 +1,159 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# Package generated configuration file +# See the sshd_config(5) manpage for details + +# What ports, IPs and protocols we listen for +{% for port in ports -%} +Port {{ port }} +{% endfor -%} +AddressFamily {{ addr_family }} +# Use these options to restrict which interfaces/protocols sshd will bind to +{% if ssh_ip -%} +{% for ip in ssh_ip -%} +ListenAddress {{ ip }} +{% endfor %} +{%- else -%} +ListenAddress :: +ListenAddress 0.0.0.0 +{% endif -%} +Protocol 2 +{% if ciphers -%} +Ciphers {{ ciphers }} +{% endif -%} +{% if macs -%} +MACs {{ macs }} +{% endif -%} +{% if kexs -%} +KexAlgorithms {{ kexs }} +{% endif -%} +# HostKeys for protocol version 2 +{% for keyfile in host_key_files -%} +HostKey {{ keyfile }} +{% endfor -%} + +# Privilege Separation is turned on for security +{% if use_priv_sep -%} +UsePrivilegeSeparation {{ use_priv_sep }} +{% endif -%} + +# Lifetime and size of ephemeral version 1 server key +KeyRegenerationInterval 3600 +ServerKeyBits 1024 + +# Logging +SyslogFacility AUTH +LogLevel VERBOSE + +# Authentication: +LoginGraceTime 30s +{% if allow_root_with_key -%} +PermitRootLogin without-password +{% else -%} +PermitRootLogin no +{% endif %} +PermitTunnel no +PermitUserEnvironment no +StrictModes yes + +RSAAuthentication yes +PubkeyAuthentication yes +AuthorizedKeysFile %h/.ssh/authorized_keys + +# Don't read the user's ~/.rhosts and ~/.shosts files +IgnoreRhosts yes +# For this to work you will also need host keys in /etc/ssh_known_hosts +RhostsRSAAuthentication no +# similar for protocol version 2 +HostbasedAuthentication no +# Uncomment if you don't trust ~/.ssh/known_hosts for RhostsRSAAuthentication +IgnoreUserKnownHosts yes + +# To enable empty passwords, change to yes (NOT RECOMMENDED) +PermitEmptyPasswords no + +# Change to yes to enable challenge-response passwords (beware issues with +# some PAM modules and threads) +ChallengeResponseAuthentication no + +# Change to no to disable tunnelled clear text passwords +PasswordAuthentication {{ password_authentication }} + +# Kerberos options +KerberosAuthentication no +KerberosGetAFSToken no +KerberosOrLocalPasswd no +KerberosTicketCleanup yes + +# GSSAPI options +GSSAPIAuthentication no +GSSAPICleanupCredentials yes + +X11Forwarding {{ allow_x11_forwarding }} +X11DisplayOffset 10 +X11UseLocalhost yes +GatewayPorts no +PrintMotd {{ print_motd }} +PrintLastLog {{ print_last_log }} +TCPKeepAlive no +UseLogin no + +ClientAliveInterval {{ client_alive_interval }} +ClientAliveCountMax {{ client_alive_count }} +AllowTcpForwarding {{ allow_tcp_forwarding }} +AllowAgentForwarding {{ allow_agent_forwarding }} + +MaxStartups 10:30:100 +#Banner /etc/issue.net + +# Allow client to pass locale environment variables +AcceptEnv LANG LC_* + +# Set this to 'yes' to enable PAM authentication, account processing, +# and session processing. If this is enabled, PAM authentication will +# be allowed through the ChallengeResponseAuthentication and +# PasswordAuthentication. Depending on your PAM configuration, +# PAM authentication via ChallengeResponseAuthentication may bypass +# the setting of "PermitRootLogin without-password". +# If you just want the PAM account and session checks to run without +# PAM authentication, then enable this but set PasswordAuthentication +# and ChallengeResponseAuthentication to 'no'. +UsePAM {{ use_pam }} + +{% if deny_users -%} +DenyUsers {{ deny_users }} +{% endif -%} +{% if allow_users -%} +AllowUsers {{ allow_users }} +{% endif -%} +{% if deny_groups -%} +DenyGroups {{ deny_groups }} +{% endif -%} +{% if allow_groups -%} +AllowGroups allow_groups +{% endif -%} +UseDNS {{ use_dns }} +MaxAuthTries {{ max_auth_tries }} +MaxSessions {{ max_sessions }} + +{% if sftp_enable -%} +# Configuration, in case SFTP is used +## override default of no subsystems +## Subsystem sftp /opt/app/openssh5/libexec/sftp-server +Subsystem sftp internal-sftp -l VERBOSE + +## These lines must appear at the *end* of sshd_config +Match Group {{ sftp_group }} +ForceCommand internal-sftp -l VERBOSE +ChrootDirectory {{ sftp_chroot }} +{% else -%} +# Configuration, in case SFTP is used +## override default of no subsystems +## Subsystem sftp /opt/app/openssh5/libexec/sftp-server +## These lines must appear at the *end* of sshd_config +Match Group sftponly +ForceCommand internal-sftp -l VERBOSE +ChrootDirectory /sftpchroot/home/%u +{% endif %} diff --git a/ceph-proxy/charmhelpers/contrib/hardening/templating.py b/ceph-proxy/charmhelpers/contrib/hardening/templating.py new file mode 100644 index 00000000..4dee5465 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/templating.py @@ -0,0 +1,69 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + WARNING, +) + +try: + from jinja2 import FileSystemLoader, Environment +except ImportError: + from charmhelpers.fetch import apt_install + from charmhelpers.fetch import apt_update + apt_update(fatal=True) + apt_install('python3-jinja2', fatal=True) + from jinja2 import FileSystemLoader, Environment + + +# NOTE: function separated from main rendering code to facilitate easier +# mocking in unit tests. +def write(path, data): + with open(path, 'wb') as out: + out.write(data) + + +def get_template_path(template_dir, path): + """Returns the template file which would be used to render the path. + + The path to the template file is returned. + :param template_dir: the directory the templates are located in + :param path: the file path to be written to. + :returns: path to the template file + """ + return os.path.join(template_dir, os.path.basename(path)) + + +def render_and_write(template_dir, path, context): + """Renders the specified template into the file. + + :param template_dir: the directory to load the template from + :param path: the path to write the templated contents to + :param context: the parameters to pass to the rendering engine + """ + env = Environment(loader=FileSystemLoader(template_dir)) + template_file = os.path.basename(path) + template = env.get_template(template_file) + log('Rendering from template: %s' % template.name, level=DEBUG) + rendered_content = template.render(context) + if not rendered_content: + log("Render returned None - skipping '%s'" % path, + level=WARNING) + return + + write(path, rendered_content.encode('utf-8').strip()) + log('Wrote template %s' % path, level=DEBUG) diff --git a/ceph-proxy/charmhelpers/contrib/hardening/utils.py b/ceph-proxy/charmhelpers/contrib/hardening/utils.py new file mode 100644 index 00000000..f93851a9 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/hardening/utils.py @@ -0,0 +1,154 @@ +# Copyright 2016-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import grp +import os +import pwd +import yaml + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + INFO, + WARNING, + ERROR, +) + + +# Global settings cache. Since each hook fire entails a fresh module import it +# is safe to hold this in memory and not risk missing config changes (since +# they will result in a new hook fire and thus re-import). +__SETTINGS__ = {} + + +def _get_defaults(modules): + """Load the default config for the provided modules. + + :param modules: stack modules config defaults to lookup. + :returns: modules default config dictionary. + """ + default = os.path.join(os.path.dirname(__file__), + 'defaults/%s.yaml' % (modules)) + return yaml.safe_load(open(default)) + + +def _get_schema(modules): + """Load the config schema for the provided modules. + + NOTE: this schema is intended to have 1-1 relationship with they keys in + the default config and is used a means to verify valid overrides provided + by the user. + + :param modules: stack modules config schema to lookup. + :returns: modules default schema dictionary. + """ + schema = os.path.join(os.path.dirname(__file__), + 'defaults/%s.yaml.schema' % (modules)) + return yaml.safe_load(open(schema)) + + +def _get_user_provided_overrides(modules): + """Load user-provided config overrides. + + :param modules: stack modules to lookup in user overrides yaml file. + :returns: overrides dictionary. + """ + overrides = os.path.join(os.environ['JUJU_CHARM_DIR'], + 'hardening.yaml') + if os.path.exists(overrides): + log("Found user-provided config overrides file '%s'" % + (overrides), level=DEBUG) + settings = yaml.safe_load(open(overrides)) + if settings and settings.get(modules): + log("Applying '%s' overrides" % (modules), level=DEBUG) + return settings.get(modules) + + log("No overrides found for '%s'" % (modules), level=DEBUG) + else: + log("No hardening config overrides file '%s' found in charm " + "root dir" % (overrides), level=DEBUG) + + return {} + + +def _apply_overrides(settings, overrides, schema): + """Get overrides config overlaid onto modules defaults. + + :param modules: require stack modules config. + :returns: dictionary of modules config with user overrides applied. + """ + if overrides: + for k, v in overrides.items(): + if k in schema: + if schema[k] is None: + settings[k] = v + elif type(schema[k]) is dict: + settings[k] = _apply_overrides(settings[k], overrides[k], + schema[k]) + else: + raise Exception("Unexpected type found in schema '%s'" % + type(schema[k]), level=ERROR) + else: + log("Unknown override key '%s' - ignoring" % (k), level=INFO) + + return settings + + +def get_settings(modules): + global __SETTINGS__ + if modules in __SETTINGS__: + return __SETTINGS__[modules] + + schema = _get_schema(modules) + settings = _get_defaults(modules) + overrides = _get_user_provided_overrides(modules) + __SETTINGS__[modules] = _apply_overrides(settings, overrides, schema) + return __SETTINGS__[modules] + + +def ensure_permissions(path, user, group, permissions, maxdepth=-1): + """Ensure permissions for path. + + If path is a file, apply to file and return. If path is a directory, + apply recursively (if required) to directory contents and return. + + :param user: user name + :param group: group name + :param permissions: octal permissions + :param maxdepth: maximum recursion depth. A negative maxdepth allows + infinite recursion and maxdepth=0 means no recursion. + :returns: None + """ + if not os.path.exists(path): + log("File '%s' does not exist - cannot set permissions" % (path), + level=WARNING) + return + + _user = pwd.getpwnam(user) + os.chown(path, _user.pw_uid, grp.getgrnam(group).gr_gid) + os.chmod(path, permissions) + + if maxdepth == 0: + log("Max recursion depth reached - skipping further recursion", + level=DEBUG) + return + elif maxdepth > 0: + maxdepth -= 1 + + if os.path.isdir(path): + contents = glob.glob("%s/*" % (path)) + for c in contents: + ensure_permissions(c, user=user, group=group, + permissions=permissions, maxdepth=maxdepth) diff --git a/ceph-proxy/charmhelpers/contrib/network/__init__.py b/ceph-proxy/charmhelpers/contrib/network/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/network/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-proxy/charmhelpers/contrib/network/ip.py b/ceph-proxy/charmhelpers/contrib/network/ip.py new file mode 100644 index 00000000..f3b4864f --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/network/ip.py @@ -0,0 +1,628 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import re +import subprocess +import socket +import ssl + +from functools import partial + +from charmhelpers.fetch import apt_install, apt_update +from charmhelpers.core.hookenv import ( + config, + log, + network_get_primary_address, + unit_get, + WARNING, + NoNetworkBinding, +) + +from charmhelpers.core.host import ( + lsb_release, + CompareHostReleases, +) + +try: + import netifaces +except ImportError: + apt_update(fatal=True) + apt_install('python3-netifaces', fatal=True) + import netifaces + +try: + import netaddr +except ImportError: + apt_update(fatal=True) + apt_install('python3-netaddr', fatal=True) + import netaddr + + +def _validate_cidr(network): + try: + netaddr.IPNetwork(network) + except (netaddr.core.AddrFormatError, ValueError): + raise ValueError("Network (%s) is not in CIDR presentation format" % + network) + + +def no_ip_found_error_out(network): + errmsg = ("No IP address found in network(s): %s" % network) + raise ValueError(errmsg) + + +def _get_ipv6_network_from_address(address): + """Get an netaddr.IPNetwork for the given IPv6 address + :param address: a dict as returned by netifaces.ifaddresses + :returns netaddr.IPNetwork: None if the address is a link local or loopback + address + """ + if address['addr'].startswith('fe80') or address['addr'] == "::1": + return None + + prefix = address['netmask'].split("/") + if len(prefix) > 1: + netmask = prefix[1] + else: + netmask = address['netmask'] + return netaddr.IPNetwork("%s/%s" % (address['addr'], + netmask)) + + +def get_address_in_network(network, fallback=None, fatal=False): + """Get an IPv4 or IPv6 address within the network from the host. + + :param network (str): CIDR presentation format. For example, + '192.168.1.0/24'. Supports multiple networks as a space-delimited list. + :param fallback (str): If no address is found, return fallback. + :param fatal (boolean): If no address is found, fallback is not + set and fatal is True then exit(1). + """ + if network is None: + if fallback is not None: + return fallback + + if fatal: + no_ip_found_error_out(network) + else: + return None + + networks = network.split() or [network] + for network in networks: + _validate_cidr(network) + network = netaddr.IPNetwork(network) + for iface in netifaces.interfaces(): + try: + addresses = netifaces.ifaddresses(iface) + except ValueError: + # If an instance was deleted between + # netifaces.interfaces() run and now, its interfaces are gone + continue + if network.version == 4 and netifaces.AF_INET in addresses: + for addr in addresses[netifaces.AF_INET]: + cidr = netaddr.IPNetwork("%s/%s" % (addr['addr'], + addr['netmask'])) + if cidr in network: + return str(cidr.ip) + + if network.version == 6 and netifaces.AF_INET6 in addresses: + for addr in addresses[netifaces.AF_INET6]: + cidr = _get_ipv6_network_from_address(addr) + if cidr and cidr in network: + return str(cidr.ip) + + if fallback is not None: + return fallback + + if fatal: + no_ip_found_error_out(network) + + return None + + +def is_ipv6(address): + """Determine whether provided address is IPv6 or not.""" + try: + address = netaddr.IPAddress(address) + except netaddr.AddrFormatError: + # probably a hostname - so not an address at all! + return False + + return address.version == 6 + + +def is_address_in_network(network, address): + """ + Determine whether the provided address is within a network range. + + :param network (str): CIDR presentation format. For example, + '192.168.1.0/24'. + :param address: An individual IPv4 or IPv6 address without a net + mask or subnet prefix. For example, '192.168.1.1'. + :returns boolean: Flag indicating whether address is in network. + """ + try: + network = netaddr.IPNetwork(network) + except (netaddr.core.AddrFormatError, ValueError): + raise ValueError("Network (%s) is not in CIDR presentation format" % + network) + + try: + address = netaddr.IPAddress(address) + except (netaddr.core.AddrFormatError, ValueError): + raise ValueError("Address (%s) is not in correct presentation format" % + address) + + if address in network: + return True + else: + return False + + +def _get_for_address(address, key): + """Retrieve an attribute of or the physical interface that + the IP address provided could be bound to. + + :param address (str): An individual IPv4 or IPv6 address without a net + mask or subnet prefix. For example, '192.168.1.1'. + :param key: 'iface' for the physical interface name or an attribute + of the configured interface, for example 'netmask'. + :returns str: Requested attribute or None if address is not bindable. + """ + address = netaddr.IPAddress(address) + for iface in netifaces.interfaces(): + addresses = netifaces.ifaddresses(iface) + if address.version == 4 and netifaces.AF_INET in addresses: + addr = addresses[netifaces.AF_INET][0]['addr'] + netmask = addresses[netifaces.AF_INET][0]['netmask'] + network = netaddr.IPNetwork("%s/%s" % (addr, netmask)) + cidr = network.cidr + if address in cidr: + if key == 'iface': + return iface + else: + return addresses[netifaces.AF_INET][0][key] + + if address.version == 6 and netifaces.AF_INET6 in addresses: + for addr in addresses[netifaces.AF_INET6]: + network = _get_ipv6_network_from_address(addr) + if not network: + continue + + cidr = network.cidr + if address in cidr: + if key == 'iface': + return iface + elif key == 'netmask' and cidr: + return str(cidr).split('/')[1] + else: + return addr[key] + return None + + +get_iface_for_address = partial(_get_for_address, key='iface') + + +get_netmask_for_address = partial(_get_for_address, key='netmask') + + +def resolve_network_cidr(ip_address): + ''' + Resolves the full address cidr of an ip_address based on + configured network interfaces + ''' + netmask = get_netmask_for_address(ip_address) + return str(netaddr.IPNetwork("%s/%s" % (ip_address, netmask)).cidr) + + +def format_ipv6_addr(address): + """If address is IPv6, wrap it in '[]' otherwise return None. + + This is required by most configuration files when specifying IPv6 + addresses. + """ + if is_ipv6(address): + return "[%s]" % address + + return None + + +def is_ipv6_disabled(): + try: + result = subprocess.check_output( + ['sysctl', 'net.ipv6.conf.all.disable_ipv6'], + stderr=subprocess.STDOUT, + universal_newlines=True) + except subprocess.CalledProcessError: + return True + + return "net.ipv6.conf.all.disable_ipv6 = 1" in result + + +def get_iface_addr(iface='eth0', inet_type='AF_INET', inc_aliases=False, + fatal=True, exc_list=None): + """Return the assigned IP address for a given interface, if any. + + :param iface: network interface on which address(es) are expected to + be found. + :param inet_type: inet address family + :param inc_aliases: include alias interfaces in search + :param fatal: if True, raise exception if address not found + :param exc_list: list of addresses to ignore + :return: list of ip addresses + """ + # Extract nic if passed /dev/ethX + if '/' in iface: + iface = iface.split('/')[-1] + + if not exc_list: + exc_list = [] + + try: + inet_num = getattr(netifaces, inet_type) + except AttributeError: + raise Exception("Unknown inet type '%s'" % str(inet_type)) + + interfaces = netifaces.interfaces() + if inc_aliases: + ifaces = [] + for _iface in interfaces: + if iface == _iface or _iface.split(':')[0] == iface: + ifaces.append(_iface) + + if fatal and not ifaces: + raise Exception("Invalid interface '%s'" % iface) + + ifaces.sort() + else: + if iface not in interfaces: + if fatal: + raise Exception("Interface '%s' not found " % (iface)) + else: + return [] + + else: + ifaces = [iface] + + addresses = [] + for netiface in ifaces: + net_info = netifaces.ifaddresses(netiface) + if inet_num in net_info: + for entry in net_info[inet_num]: + if 'addr' in entry and entry['addr'] not in exc_list: + addresses.append(entry['addr']) + + if fatal and not addresses: + raise Exception("Interface '%s' doesn't have any %s addresses." % + (iface, inet_type)) + + return sorted(addresses) + + +get_ipv4_addr = partial(get_iface_addr, inet_type='AF_INET') + + +def get_iface_from_addr(addr): + """Work out on which interface the provided address is configured.""" + for iface in netifaces.interfaces(): + addresses = netifaces.ifaddresses(iface) + for inet_type in addresses: + for _addr in addresses[inet_type]: + _addr = _addr['addr'] + # link local + ll_key = re.compile("(.+)%.*") + raw = re.match(ll_key, _addr) + if raw: + _addr = raw.group(1) + + if _addr == addr: + log("Address '%s' is configured on iface '%s'" % + (addr, iface)) + return iface + + msg = "Unable to infer net iface on which '%s' is configured" % (addr) + raise Exception(msg) + + +def sniff_iface(f): + """Ensure decorated function is called with a value for iface. + + If no iface provided, inject net iface inferred from unit private address. + """ + def iface_sniffer(*args, **kwargs): + if not kwargs.get('iface', None): + kwargs['iface'] = get_iface_from_addr(unit_get('private-address')) + + return f(*args, **kwargs) + + return iface_sniffer + + +@sniff_iface +def get_ipv6_addr(iface=None, inc_aliases=False, fatal=True, exc_list=None, + dynamic_only=True): + """Get assigned IPv6 address for a given interface. + + Returns list of addresses found. If no address found, returns empty list. + + If iface is None, we infer the current primary interface by doing a reverse + lookup on the unit private-address. + + We currently only support scope global IPv6 addresses i.e. non-temporary + addresses. If no global IPv6 address is found, return the first one found + in the ipv6 address list. + + :param iface: network interface on which ipv6 address(es) are expected to + be found. + :param inc_aliases: include alias interfaces in search + :param fatal: if True, raise exception if address not found + :param exc_list: list of addresses to ignore + :param dynamic_only: only recognise dynamic addresses + :return: list of ipv6 addresses + """ + addresses = get_iface_addr(iface=iface, inet_type='AF_INET6', + inc_aliases=inc_aliases, fatal=fatal, + exc_list=exc_list) + + if addresses: + global_addrs = [] + for addr in addresses: + key_scope_link_local = re.compile("^fe80::..(.+)%(.+)") + m = re.match(key_scope_link_local, addr) + if m: + eui_64_mac = m.group(1) + iface = m.group(2) + else: + global_addrs.append(addr) + + if global_addrs: + # Make sure any found global addresses are not temporary + cmd = ['ip', 'addr', 'show', iface] + out = subprocess.check_output( + cmd).decode('UTF-8', errors='replace') + if dynamic_only: + key = re.compile("inet6 (.+)/[0-9]+ scope global.* dynamic.*") + else: + key = re.compile("inet6 (.+)/[0-9]+ scope global.*") + + addrs = [] + for line in out.split('\n'): + line = line.strip() + m = re.match(key, line) + if m and 'temporary' not in line: + # Return the first valid address we find + for addr in global_addrs: + if m.group(1) == addr: + if not dynamic_only or \ + m.group(1).endswith(eui_64_mac): + addrs.append(addr) + + if addrs: + return addrs + + if fatal: + raise Exception("Interface '%s' does not have a scope global " + "non-temporary ipv6 address." % iface) + + return [] + + +def get_bridges(vnic_dir='/sys/devices/virtual/net'): + """Return a list of bridges on the system.""" + b_regex = "%s/*/bridge" % vnic_dir + return [x.replace(vnic_dir, '').split('/')[1] for x in glob.glob(b_regex)] + + +def get_bridge_nics(bridge, vnic_dir='/sys/devices/virtual/net'): + """Return a list of nics comprising a given bridge on the system.""" + brif_regex = "%s/%s/brif/*" % (vnic_dir, bridge) + return [x.split('/')[-1] for x in glob.glob(brif_regex)] + + +def is_bridge_member(nic): + """Check if a given nic is a member of a bridge.""" + for bridge in get_bridges(): + if nic in get_bridge_nics(bridge): + return True + + return False + + +def is_ip(address): + """ + Returns True if address is a valid IP address. + """ + try: + # Test to see if already an IPv4/IPv6 address + address = netaddr.IPAddress(address) + return True + except (netaddr.AddrFormatError, ValueError): + return False + + +def ns_query(address): + try: + import dns.resolver + except ImportError: + apt_install('python3-dnspython', fatal=True) + import dns.resolver + + if isinstance(address, dns.name.Name): + rtype = 'PTR' + elif isinstance(address, str): + rtype = 'A' + else: + return None + + try: + answers = dns.resolver.query(address, rtype) + except (dns.resolver.NXDOMAIN, dns.resolver.NoNameservers): + return None + + if answers: + return str(answers[0]) + return None + + +def get_host_ip(hostname, fallback=None): + """ + Resolves the IP for a given hostname, or returns + the input if it is already an IP. + """ + if is_ip(hostname): + return hostname + + ip_addr = ns_query(hostname) + if not ip_addr: + try: + ip_addr = socket.gethostbyname(hostname) + except Exception: + log("Failed to resolve hostname '%s'" % (hostname), + level=WARNING) + return fallback + return ip_addr + + +def get_hostname(address, fqdn=True): + """ + Resolves hostname for given IP, or returns the input + if it is already a hostname. + """ + if is_ip(address): + try: + import dns.reversename + except ImportError: + apt_install("python3-dnspython", fatal=True) + import dns.reversename + + rev = dns.reversename.from_address(address) + result = ns_query(rev) + + if not result: + try: + result = socket.gethostbyaddr(address)[0] + except Exception: + return None + else: + result = address + + if fqdn: + # strip trailing . + if result.endswith('.'): + return result[:-1] + else: + return result + else: + return result.split('.')[0] + + +class SSLPortCheckInfo(object): + + def __init__(self, key, cert, ca_cert, check_hostname=False): + self.key = key + self.cert = cert + self.ca_cert = ca_cert + # NOTE: by default we do not check hostname since the port check is + # typically performed using 0.0.0.0 which will not match the + # certificate. Hence the default for this is False. + self.check_hostname = check_hostname + + @property + def ssl_context(self): + context = ssl.create_default_context() + context.check_hostname = self.check_hostname + context.load_cert_chain(self.cert, self.key) + context.load_verify_locations(self.ca_cert) + return context + + +def port_has_listener(address, port, sslinfo=None): + """ + Returns True if the address:port is open and being listened to, + else False. By default uses netcat to check ports but if sslinfo is + provided will use an SSL connection instead. + + @param address: an IP address or hostname + @param port: integer port + @param sslinfo: optional SSLPortCheckInfo object. + If provided, the check is performed using an ssl + connection. + + Note calls 'zc' via a subprocess shell + """ + if not sslinfo: + cmd = ['nc', '-z', address, str(port)] + result = subprocess.call(cmd) + return not (bool(result)) + + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) as sock: + ssock = sslinfo.ssl_context.wrap_socket(sock, + server_hostname=address) + ssock.connect((address, port)) + # this bit is crucial to ensure tls close_notify is sent + ssock.unwrap() + + return True + except ConnectionRefusedError: + return False + + +def assert_charm_supports_ipv6(): + """Check whether we are able to support charms ipv6.""" + release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(release) < "trusty": + raise Exception("IPv6 is not supported in the charms for Ubuntu " + "versions less than Trusty 14.04") + + +def get_relation_ip(interface, cidr_network=None): + """Return this unit's IP for the given interface. + + Allow for an arbitrary interface to use with network-get to select an IP. + Handle all address selection options including passed cidr network and + IPv6. + + Usage: get_relation_ip('amqp', cidr_network='10.0.0.0/8') + + @param interface: string name of the relation. + @param cidr_network: string CIDR Network to select an address from. + @raises Exception if prefer-ipv6 is configured but IPv6 unsupported. + @returns IPv6 or IPv4 address + """ + # Select the interface address first + # For possible use as a fallback below with get_address_in_network + try: + # Get the interface specific IP + address = network_get_primary_address(interface) + except NotImplementedError: + # If network-get is not available + address = get_host_ip(unit_get('private-address')) + except NoNetworkBinding: + log("No network binding for {}".format(interface), WARNING) + address = get_host_ip(unit_get('private-address')) + + if config('prefer-ipv6'): + # Currently IPv6 has priority, eventually we want IPv6 to just be + # another network space. + assert_charm_supports_ipv6() + return get_ipv6_addr()[0] + elif cidr_network: + # If a specific CIDR network is passed get the address from that + # network. + return get_address_in_network(cidr_network, address) + + # Return the interface address + return address diff --git a/ceph-proxy/charmhelpers/contrib/openstack/__init__.py b/ceph-proxy/charmhelpers/contrib/openstack/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/openstack/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-proxy/charmhelpers/contrib/openstack/alternatives.py b/ceph-proxy/charmhelpers/contrib/openstack/alternatives.py new file mode 100644 index 00000000..547de09c --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/openstack/alternatives.py @@ -0,0 +1,44 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +''' Helper for managing alternatives for file conflict resolution ''' + +import subprocess +import shutil +import os + + +def install_alternative(name, target, source, priority=50): + ''' Install alternative configuration ''' + if (os.path.exists(target) and not os.path.islink(target)): + # Move existing file/directory away before installing + shutil.move(target, '{}.bak'.format(target)) + cmd = [ + 'update-alternatives', '--force', '--install', + target, name, source, str(priority) + ] + subprocess.check_call(cmd) + + +def remove_alternative(name, source): + """Remove an installed alternative configuration file + + :param name: string name of the alternative to remove + :param source: string full path to alternative to remove + """ + cmd = [ + 'update-alternatives', '--remove', + name, source + ] + subprocess.check_call(cmd) diff --git a/ceph-proxy/charmhelpers/contrib/openstack/deferred_events.py b/ceph-proxy/charmhelpers/contrib/openstack/deferred_events.py new file mode 100644 index 00000000..4c46e41a --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/openstack/deferred_events.py @@ -0,0 +1,418 @@ +# Copyright 2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module for managing deferred service events. + +This module is used to manage deferred service events from both charm actions +and package actions. +""" + +import datetime +import glob +import yaml +import os +import time +import uuid + +import charmhelpers.contrib.openstack.policy_rcd as policy_rcd +import charmhelpers.core.hookenv as hookenv +import charmhelpers.core.host as host +import charmhelpers.core.unitdata as unitdata + +import subprocess + + +# Deferred events generated from the charm are stored along side those +# generated from packaging. +DEFERRED_EVENTS_DIR = policy_rcd.POLICY_DEFERRED_EVENTS_DIR + + +class ServiceEvent(): + + def __init__(self, timestamp, service, reason, action, + policy_requestor_name=None, policy_requestor_type=None): + self.timestamp = timestamp + self.service = service + self.reason = reason + self.action = action + if policy_requestor_name: + self.policy_requestor_name = policy_requestor_name + else: + self.policy_requestor_name = hookenv.service_name() + if policy_requestor_type: + self.policy_requestor_type = policy_requestor_type + else: + self.policy_requestor_type = 'charm' + + def __eq__(self, other): + for attr in vars(self): + if getattr(self, attr) != getattr(other, attr): + return False + return True + + def matching_request(self, other): + for attr in ['service', 'action', 'reason']: + if getattr(self, attr) != getattr(other, attr): + return False + return True + + @classmethod + def from_dict(cls, data): + return cls( + data['timestamp'], + data['service'], + data['reason'], + data['action'], + data.get('policy_requestor_name'), + data.get('policy_requestor_type')) + + +def deferred_events_files(): + """Deferred event files + + Deferred event files that were generated by service_name() policy. + + :returns: Deferred event files + :rtype: List[str] + """ + return glob.glob('{}/*.deferred'.format(DEFERRED_EVENTS_DIR)) + + +def read_event_file(file_name): + """Read a file and return the corresponding objects. + + :param file_name: Name of file to read. + :type file_name: str + :returns: ServiceEvent from file. + :rtype: ServiceEvent + """ + with open(file_name, 'r') as f: + contents = yaml.safe_load(f) + event = ServiceEvent( + contents['timestamp'], + contents['service'], + contents['reason'], + contents['action'], + policy_requestor_name=contents.get('policy_requestor_name'), + policy_requestor_type=contents.get('policy_requestor_type')) + return event + + +def deferred_events(): + """Get list of deferred events. + + List of deferred events. Events are represented by dicts of the form: + + { + action: restart, + policy_requestor_name: neutron-openvswitch, + policy_requestor_type: charm, + reason: 'Pkg update', + service: openvswitch-switch, + time: 1614328743} + + :returns: List of deferred events. + :rtype: List[ServiceEvent] + """ + events = [] + for defer_file in deferred_events_files(): + event = read_event_file(defer_file) + if event.policy_requestor_name == hookenv.service_name(): + events.append((defer_file, event)) + return events + + +def duplicate_event_files(event): + """Get list of event files that have equivalent deferred events. + + :param event: Event to compare + :type event: ServiceEvent + :returns: List of event files + :rtype: List[str] + """ + duplicates = [] + for event_file, existing_event in deferred_events(): + if event.matching_request(existing_event): + duplicates.append(event_file) + return duplicates + + +def get_event_record_file(policy_requestor_type, policy_requestor_name): + """Generate filename for storing a new event. + + :param policy_requestor_type: System that blocked event + :type policy_requestor_type: str + :param policy_requestor_name: Name of application that blocked event + :type policy_requestor_name: str + :returns: File name + :rtype: str + """ + file_name = '{}/{}-{}-{}.deferred'.format( + DEFERRED_EVENTS_DIR, + policy_requestor_type, + policy_requestor_name, + uuid.uuid1()) + return file_name + + +def save_event(event): + """Write deferred events to backend. + + :param event: Event to save + :type event: ServiceEvent + """ + requestor_name = hookenv.service_name() + requestor_type = 'charm' + init_policy_log_dir() + if duplicate_event_files(event): + hookenv.log( + "Not writing new event, existing event found. {} {} {}".format( + event.service, + event.action, + event.reason), + level="DEBUG") + else: + record_file = get_event_record_file( + policy_requestor_type=requestor_type, + policy_requestor_name=requestor_name) + + with open(record_file, 'w') as f: + data = { + 'timestamp': event.timestamp, + 'service': event.service, + 'action': event.action, + 'reason': event.reason, + 'policy_requestor_type': requestor_type, + 'policy_requestor_name': requestor_name} + yaml.dump(data, f) + + +def clear_deferred_events(svcs, action): + """Remove any outstanding deferred events. + + Remove a deferred event if its service is in the services list and its + action matches. + + :param svcs: List of services to remove. + :type svcs: List[str] + :param action: Action to remove + :type action: str + """ + # XXX This function is not currently processing the action. It needs to + # match the action and also take account of try-restart and the + # equivalnce of stop-start and restart. + for defer_file in deferred_events_files(): + deferred_event = read_event_file(defer_file) + if deferred_event.service in svcs: + os.remove(defer_file) + + +def init_policy_log_dir(): + """Ensure directory to store events exists.""" + if not os.path.exists(DEFERRED_EVENTS_DIR): + os.mkdir(DEFERRED_EVENTS_DIR) + + +def get_deferred_events(): + """Return a list of deferred events requested by the charm and packages. + + :returns: List of deferred events + :rtype: List[ServiceEvent] + """ + events = [] + for _, event in deferred_events(): + events.append(event) + return events + + +def get_deferred_restarts(): + """List of deferred restart events requested by the charm and packages. + + :returns: List of deferred restarts + :rtype: List[ServiceEvent] + """ + return [e for e in get_deferred_events() if e.action == 'restart'] + + +def clear_deferred_restarts(services): + """Clear deferred restart events targeted at `services`. + + :param services: Services with deferred actions to clear. + :type services: List[str] + """ + clear_deferred_events(services, 'restart') + + +def process_svc_restart(service): + """Respond to a service restart having occurred. + + :param service: Services that the action was performed against. + :type service: str + """ + clear_deferred_restarts([service]) + + +def is_restart_permitted(): + """Check whether restarts are permitted. + + :returns: Whether restarts are permitted + :rtype: bool + """ + if hookenv.config('enable-auto-restarts') is None: + return True + return hookenv.config('enable-auto-restarts') + + +def check_and_record_restart_request(service, changed_files): + """Check if restarts are permitted, if they are not log the request. + + :param service: Service to be restarted + :type service: str + :param changed_files: Files that have changed to trigger restarts. + :type changed_files: List[str] + :returns: Whether restarts are permitted + :rtype: bool + """ + changed_files = sorted(list(set(changed_files))) + permitted = is_restart_permitted() + if not permitted: + save_event(ServiceEvent( + timestamp=round(time.time()), + service=service, + reason='File(s) changed: {}'.format( + ', '.join(changed_files)), + action='restart')) + return permitted + + +def deferrable_svc_restart(service, reason=None): + """Restarts service if permitted, if not defer it. + + :param service: Service to be restarted + :type service: str + :param reason: Reason for restart + :type reason: Union[str, None] + """ + if is_restart_permitted(): + host.service_restart(service) + else: + save_event(ServiceEvent( + timestamp=round(time.time()), + service=service, + reason=reason, + action='restart')) + + +def configure_deferred_restarts(services): + """Setup deferred restarts. + + :param services: Services to block restarts of. + :type services: List[str] + """ + policy_rcd.install_policy_rcd() + if is_restart_permitted(): + policy_rcd.remove_policy_file() + else: + blocked_actions = ['stop', 'restart', 'try-restart'] + for svc in services: + policy_rcd.add_policy_block(svc, blocked_actions) + + +def get_service_start_time(service): + """Find point in time when the systemd unit transitioned to active state. + + :param service: Services to check timetsamp of. + :type service: str + """ + start_time = None + out = subprocess.check_output( + [ + 'systemctl', + 'show', + service, + '--property=ActiveEnterTimestamp']) + str_time = out.decode().rstrip().replace('ActiveEnterTimestamp=', '') + if str_time: + start_time = datetime.datetime.strptime( + str_time, + '%a %Y-%m-%d %H:%M:%S %Z') + return start_time + + +def check_restart_timestamps(): + """Check deferred restarts against systemd units start time. + + Check if a service has a deferred event and clear it if it has been + subsequently restarted. + """ + for event in get_deferred_restarts(): + start_time = get_service_start_time(event.service) + deferred_restart_time = datetime.datetime.fromtimestamp( + event.timestamp) + if start_time and start_time < deferred_restart_time: + hookenv.log( + ("Restart still required, {} was started at {}, restart was " + "requested after that at {}").format( + event.service, + start_time, + deferred_restart_time), + level='DEBUG') + else: + clear_deferred_restarts([event.service]) + + +def set_deferred_hook(hookname): + """Record that a hook has been deferred. + + :param hookname: Name of hook that was deferred. + :type hookname: str + """ + with unitdata.HookData()() as t: + kv = t[0] + deferred_hooks = kv.get('deferred-hooks', []) + if hookname not in deferred_hooks: + deferred_hooks.append(hookname) + kv.set('deferred-hooks', sorted(list(set(deferred_hooks)))) + + +def get_deferred_hooks(): + """Get a list of deferred hooks. + + :returns: List of hook names. + :rtype: List[str] + """ + with unitdata.HookData()() as t: + kv = t[0] + return kv.get('deferred-hooks', []) + + +def clear_deferred_hooks(): + """Clear any deferred hooks.""" + with unitdata.HookData()() as t: + kv = t[0] + kv.set('deferred-hooks', []) + + +def clear_deferred_hook(hookname): + """Clear a specific deferred hooks. + + :param hookname: Name of hook to remove. + :type hookname: str + """ + with unitdata.HookData()() as t: + kv = t[0] + deferred_hooks = kv.get('deferred-hooks', []) + if hookname in deferred_hooks: + deferred_hooks.remove(hookname) + kv.set('deferred-hooks', deferred_hooks) diff --git a/ceph-proxy/charmhelpers/contrib/openstack/exceptions.py b/ceph-proxy/charmhelpers/contrib/openstack/exceptions.py new file mode 100644 index 00000000..b2330637 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/openstack/exceptions.py @@ -0,0 +1,26 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class OSContextError(Exception): + """Raised when an error occurs during context generation. + + This exception is principally used in contrib.openstack.context + """ + pass + + +class ServiceActionError(Exception): + """Raised when a service action (stop/start/ etc) failed.""" + pass diff --git a/ceph-proxy/charmhelpers/contrib/openstack/files/__init__.py b/ceph-proxy/charmhelpers/contrib/openstack/files/__init__.py new file mode 100644 index 00000000..9df5f746 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/openstack/files/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# dummy __init__.py to fool syncer into thinking this is a syncable python +# module diff --git a/ceph-proxy/charmhelpers/contrib/openstack/files/check_deferred_restarts.py b/ceph-proxy/charmhelpers/contrib/openstack/files/check_deferred_restarts.py new file mode 100755 index 00000000..5f392b3c --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/openstack/files/check_deferred_restarts.py @@ -0,0 +1,128 @@ +#!/usr/bin/python3 + +# Copyright 2014-2022 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Checks for services with deferred restarts. + +This Nagios check will parse /var/lib/policy-rd.d/ +to find any restarts that are currently deferred. +""" + +import argparse +import glob +import sys +import yaml + + +DEFERRED_EVENTS_DIR = '/var/lib/policy-rc.d' + + +def get_deferred_events(): + """Return a list of deferred events dicts from policy-rc.d files. + + Events are read from DEFERRED_EVENTS_DIR and are of the form: + { + action: restart, + policy_requestor_name: rabbitmq-server, + policy_requestor_type: charm, + reason: 'Pkg update', + service: rabbitmq-server, + time: 1614328743 + } + + :raises OSError: Raised in case of a system error while reading a policy file + :raises yaml.YAMLError: Raised if parsing a policy file fails + + :returns: List of deferred event dictionaries + :rtype: list + """ + deferred_events_files = glob.glob( + '{}/*.deferred'.format(DEFERRED_EVENTS_DIR)) + + deferred_events = [] + for event_file in deferred_events_files: + with open(event_file, 'r') as f: + event = yaml.safe_load(f) + deferred_events.append(event) + + return deferred_events + + +def get_deferred_restart_services(application=None): + """Returns a list of services with deferred restarts. + + :param str application: Name of the application that blocked the service restart. + If application is None, all services with deferred restarts + are returned. Services which are blocked by a non-charm + requestor are always returned. + + :raises OSError: Raised in case of a system error while reading a policy file + :raises yaml.YAMLError: Raised if parsing a policy file fails + + :returns: List of services with deferred restarts belonging to application. + :rtype: list + """ + + deferred_restart_events = filter( + lambda e: e['action'] == 'restart', get_deferred_events()) + + deferred_restart_services = set() + for restart_event in deferred_restart_events: + if application: + if ( + restart_event['policy_requestor_type'] != 'charm' or + restart_event['policy_requestor_type'] == 'charm' and + restart_event['policy_requestor_name'] == application + ): + deferred_restart_services.add(restart_event['service']) + else: + deferred_restart_services.add(restart_event['service']) + + return list(deferred_restart_services) + + +def main(): + """Check for services with deferred restarts.""" + parser = argparse.ArgumentParser( + description='Check for services with deferred restarts') + parser.add_argument( + '--application', help='Check services belonging to this application only') + + args = parser.parse_args() + + services = set(get_deferred_restart_services(args.application)) + + if len(services) == 0: + print('OK: No deferred service restarts.') + sys.exit(0) + else: + print( + 'CRITICAL: Restarts are deferred for services: {}.'.format(', '.join(services))) + sys.exit(1) + + +if __name__ == '__main__': + try: + main() + except OSError as e: + print('CRITICAL: A system error occurred: {} ({})'.format(e.errno, e.strerror)) + sys.exit(1) + except yaml.YAMLError as e: + print('CRITICAL: Failed to parse a policy file: {}'.format(str(e))) + sys.exit(1) + except Exception as e: + print('CRITICAL: An unknown error occurred: {}'.format(str(e))) + sys.exit(1) diff --git a/ceph-proxy/charmhelpers/contrib/openstack/files/policy_rc_d_script.py b/ceph-proxy/charmhelpers/contrib/openstack/files/policy_rc_d_script.py new file mode 100755 index 00000000..431e972b --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/openstack/files/policy_rc_d_script.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python3 + +"""This script is an implementation of policy-rc.d + +For further information on policy-rc.d see *1 + +*1 https://people.debian.org/~hmh/invokerc.d-policyrc.d-specification.txt +""" +import collections +import glob +import os +import logging +import sys +import time +import uuid +import yaml + + +SystemPolicy = collections.namedtuple( + 'SystemPolicy', + [ + 'policy_requestor_name', + 'policy_requestor_type', + 'service', + 'blocked_actions']) + +DEFAULT_POLICY_CONFIG_DIR = '/etc/policy-rc.d' +DEFAULT_POLICY_LOG_DIR = '/var/lib/policy-rc.d' + + +def read_policy_file(policy_file): + """Return system policies from given file. + + :param file_name: Name of file to read. + :type file_name: str + :returns: Policy + :rtype: List[SystemPolicy] + """ + policies = [] + if os.path.exists(policy_file): + with open(policy_file, 'r') as f: + policy = yaml.safe_load(f) + for service, actions in policy['blocked_actions'].items(): + service = service.replace('.service', '') + policies.append(SystemPolicy( + policy_requestor_name=policy['policy_requestor_name'], + policy_requestor_type=policy['policy_requestor_type'], + service=service, + blocked_actions=actions)) + return policies + + +def get_policies(policy_config_dir): + """Return all system policies in policy_config_dir. + + :param policy_config_dir: Name of file to read. + :type policy_config_dir: str + :returns: Policy + :rtype: List[SystemPolicy] + """ + _policy = [] + for f in glob.glob('{}/*.policy'.format(policy_config_dir)): + _policy.extend(read_policy_file(f)) + return _policy + + +def record_blocked_action(service, action, blocking_policies, policy_log_dir): + """Record that an action was requested but deniedl + + :param service: Service that was blocked + :type service: str + :param action: Action that was blocked. + :type action: str + :param blocking_policies: Policies that blocked the action on the service. + :type blocking_policies: List[SystemPolicy] + :param policy_log_dir: Directory to place the blocking action record. + :type policy_log_dir: str + """ + if not os.path.exists(policy_log_dir): + os.mkdir(policy_log_dir) + seconds = round(time.time()) + for policy in blocking_policies: + if not os.path.exists(policy_log_dir): + os.mkdir(policy_log_dir) + file_name = '{}/{}-{}-{}.deferred'.format( + policy_log_dir, + policy.policy_requestor_type, + policy.policy_requestor_name, + uuid.uuid1()) + with open(file_name, 'w') as f: + data = { + 'timestamp': seconds, + 'service': service, + 'action': action, + 'reason': 'Package update', + 'policy_requestor_type': policy.policy_requestor_type, + 'policy_requestor_name': policy.policy_requestor_name} + yaml.dump(data, f) + + +def get_blocking_policies(service, action, policy_config_dir): + """Record that an action was requested but deniedl + + :param service: Service that action is requested against. + :type service: str + :param action: Action that is requested. + :type action: str + :param policy_config_dir: Directory that stores policy files. + :type policy_config_dir: str + :returns: Policies + :rtype: List[SystemPolicy] + """ + service = service.replace('.service', '') + blocking_policies = [ + policy + for policy in get_policies(policy_config_dir) + if policy.service == service and action in policy.blocked_actions] + return blocking_policies + + +def process_action_request(service, action, policy_config_dir, policy_log_dir): + """Take the requested action against service and check if it is permitted. + + :param service: Service that action is requested against. + :type service: str + :param action: Action that is requested. + :type action: str + :param policy_config_dir: Directory that stores policy files. + :type policy_config_dir: str + :param policy_log_dir: Directory that stores policy files. + :type policy_log_dir: str + :returns: Tuple of whether the action is permitted and explanation. + :rtype: (boolean, str) + """ + blocking_policies = get_blocking_policies( + service, + action, + policy_config_dir) + if blocking_policies: + policy_msg = [ + '{} {}'.format(p.policy_requestor_type, p.policy_requestor_name) + for p in sorted(blocking_policies)] + message = '{} of {} blocked by {}'.format( + action, + service, + ', '.join(policy_msg)) + record_blocked_action( + service, + action, + blocking_policies, + policy_log_dir) + action_permitted = False + else: + message = "Permitting {} {}".format(service, action) + action_permitted = True + return action_permitted, message + + +def main(): + logging.basicConfig( + filename='/var/log/policy-rc.d.log', + level=logging.DEBUG, + format='%(asctime)s %(message)s') + + service = sys.argv[1] + action = sys.argv[2] + + permitted, message = process_action_request( + service, + action, + DEFAULT_POLICY_CONFIG_DIR, + DEFAULT_POLICY_LOG_DIR) + logging.info(message) + + # https://people.debian.org/~hmh/invokerc.d-policyrc.d-specification.txt + # Exit status codes: + # 0 - action allowed + # 1 - unknown action (therefore, undefined policy) + # 100 - unknown initscript id + # 101 - action forbidden by policy + # 102 - subsystem error + # 103 - syntax error + # 104 - [reserved] + # 105 - behaviour uncertain, policy undefined. + # 106 - action not allowed. Use the returned fallback actions + # (which are implied to be "allowed") instead. + + if permitted: + return 0 + else: + return 101 + + +if __name__ == "__main__": + rc = main() + sys.exit(rc) diff --git a/ceph-proxy/charmhelpers/contrib/openstack/ha/__init__.py b/ceph-proxy/charmhelpers/contrib/openstack/ha/__init__.py new file mode 100644 index 00000000..9b088de8 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/openstack/ha/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-proxy/charmhelpers/contrib/openstack/ha/utils.py b/ceph-proxy/charmhelpers/contrib/openstack/ha/utils.py new file mode 100644 index 00000000..b4912c42 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/openstack/ha/utils.py @@ -0,0 +1,377 @@ +# Copyright 2014-2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Copyright 2016 Canonical Ltd. +# +# Authors: +# Openstack Charmers < +# + +""" +Helpers for high availability. +""" + +import hashlib +import json +import os + +import re + +from charmhelpers.core.hookenv import ( + expected_related_units, + log, + relation_set, + charm_name, + config, + status_set, + DEBUG, + application_name, +) + +from charmhelpers.core.host import ( + lsb_release +) + +from charmhelpers.contrib.openstack.ip import ( + resolve_address, + is_ipv6, +) + +from charmhelpers.contrib.network.ip import ( + get_iface_for_address, + get_netmask_for_address, +) + +from charmhelpers.contrib.hahelpers.cluster import ( + get_hacluster_config +) + +JSON_ENCODE_OPTIONS = dict( + sort_keys=True, + allow_nan=False, + indent=None, + separators=(',', ':'), +) + +VIP_GROUP_NAME = 'grp_{service}_vips' +DNSHA_GROUP_NAME = 'grp_{service}_hostnames' +HAPROXY_DASHBOARD_RESOURCE = "haproxy-dashboard" + + +class DNSHAException(Exception): + """Raised when an error occurs setting up DNS HA + """ + + pass + + +def update_dns_ha_resource_params(resources, resource_params, + relation_id=None, + crm_ocf='ocf:maas:dns'): + """ Configure DNS-HA resources based on provided configuration and + update resource dictionaries for the HA relation. + + @param resources: Pointer to dictionary of resources. + Usually instantiated in ha_joined(). + @param resource_params: Pointer to dictionary of resource parameters. + Usually instantiated in ha_joined() + @param relation_id: Relation ID of the ha relation + @param crm_ocf: Corosync Open Cluster Framework resource agent to use for + DNS HA + """ + _relation_data = {'resources': {}, 'resource_params': {}} + update_hacluster_dns_ha(charm_name(), + _relation_data, + crm_ocf) + resources.update(_relation_data['resources']) + resource_params.update(_relation_data['resource_params']) + relation_set(relation_id=relation_id, groups=_relation_data['groups']) + + +def assert_charm_supports_dns_ha(): + """Validate prerequisites for DNS HA + The MAAS client is only available on Xenial or greater + + :raises DNSHAException: if release is < 16.04 + """ + if lsb_release().get('DISTRIB_RELEASE') < '16.04': + msg = ('DNS HA is only supported on 16.04 and greater ' + 'versions of Ubuntu.') + status_set('blocked', msg) + raise DNSHAException(msg) + return True + + +def expect_ha(): + """ Determine if the unit expects to be in HA + + Check juju goal-state if ha relation is expected, check for VIP or dns-ha + settings which indicate the unit should expect to be related to hacluster. + + @returns boolean + """ + ha_related_units = [] + try: + ha_related_units = list(expected_related_units(reltype='ha')) + except (NotImplementedError, KeyError): + pass + return len(ha_related_units) > 0 or config('vip') or config('dns-ha') + + +def generate_ha_relation_data(service, + extra_settings=None, + haproxy_enabled=True): + """ Generate relation data for ha relation + + Based on configuration options and unit interfaces, generate a json + encoded dict of relation data items for the hacluster relation, + providing configuration for DNS HA or VIP's + haproxy clone sets. + + Example of supplying additional settings:: + + COLO_CONSOLEAUTH = 'inf: res_nova_consoleauth grp_nova_vips' + AGENT_CONSOLEAUTH = 'ocf:openstack:nova-consoleauth' + AGENT_CA_PARAMS = 'op monitor interval="5s"' + + ha_console_settings = { + 'colocations': {'vip_consoleauth': COLO_CONSOLEAUTH}, + 'init_services': {'res_nova_consoleauth': 'nova-consoleauth'}, + 'resources': {'res_nova_consoleauth': AGENT_CONSOLEAUTH}, + 'resource_params': {'res_nova_consoleauth': AGENT_CA_PARAMS}) + generate_ha_relation_data('nova', extra_settings=ha_console_settings) + + + @param service: Name of the service being configured + @param extra_settings: Dict of additional resource data + @returns dict: json encoded data for use with relation_set + """ + _relation_data = {'resources': {}, 'resource_params': {}} + + if haproxy_enabled: + _meta = 'meta migration-threshold="INFINITY" failure-timeout="5s"' + _haproxy_res = 'res_{}_haproxy'.format(service) + _relation_data['resources'] = {_haproxy_res: 'lsb:haproxy'} + _relation_data['resource_params'] = { + _haproxy_res: '{} op monitor interval="5s"'.format(_meta) + } + _relation_data['init_services'] = {_haproxy_res: 'haproxy'} + _relation_data['clones'] = { + 'cl_{}_haproxy'.format(service): _haproxy_res + } + + if extra_settings: + for k, v in extra_settings.items(): + if _relation_data.get(k): + _relation_data[k].update(v) + else: + _relation_data[k] = v + + if config('dns-ha'): + update_hacluster_dns_ha(service, _relation_data) + else: + update_hacluster_vip(service, _relation_data) + + return { + 'json_{}'.format(k): json.dumps(v, **JSON_ENCODE_OPTIONS) + for k, v in _relation_data.items() if v + } + + +def update_hacluster_dns_ha(service, relation_data, + crm_ocf='ocf:maas:dns'): + """ Configure DNS-HA resources based on provided configuration + + @param service: Name of the service being configured + @param relation_data: Pointer to dictionary of relation data. + @param crm_ocf: Corosync Open Cluster Framework resource agent to use for + DNS HA + """ + # Validate the charm environment for DNS HA + assert_charm_supports_dns_ha() + + settings = ['os-admin-hostname', 'os-internal-hostname', + 'os-public-hostname', 'os-access-hostname'] + + # Check which DNS settings are set and update dictionaries + hostname_group = [] + for setting in settings: + hostname = config(setting) + if hostname is None: + log('DNS HA: Hostname setting {} is None. Ignoring.' + ''.format(setting), + DEBUG) + continue + m = re.search('os-(.+?)-hostname', setting) + if m: + endpoint_type = m.group(1) + # resolve_address's ADDRESS_MAP uses 'int' not 'internal' + if endpoint_type == 'internal': + endpoint_type = 'int' + else: + msg = ('Unexpected DNS hostname setting: {}. ' + 'Cannot determine endpoint_type name' + ''.format(setting)) + status_set('blocked', msg) + raise DNSHAException(msg) + + hostname_key = 'res_{}_{}_hostname'.format(service, endpoint_type) + if hostname_key in hostname_group: + log('DNS HA: Resource {}: {} already exists in ' + 'hostname group - skipping'.format(hostname_key, hostname), + DEBUG) + continue + + hostname_group.append(hostname_key) + relation_data['resources'][hostname_key] = crm_ocf + relation_data['resource_params'][hostname_key] = ( + 'params fqdn="{}" ip_address="{}"' + .format(hostname, resolve_address(endpoint_type=endpoint_type, + override=False))) + + if len(hostname_group) >= 1: + log('DNS HA: Hostname group is set with {} as members. ' + 'Informing the ha relation'.format(' '.join(hostname_group)), + DEBUG) + relation_data['groups'] = { + DNSHA_GROUP_NAME.format(service=service): ' '.join(hostname_group) + } + else: + msg = 'DNS HA: Hostname group has no members.' + status_set('blocked', msg) + raise DNSHAException(msg) + + +def get_vip_settings(vip): + """Calculate which nic is on the correct network for the given vip. + + If nic or netmask discovery fail then fallback to using charm supplied + config. If fallback is used this is indicated via the fallback variable. + + @param vip: VIP to lookup nic and cidr for. + @returns (str, str, bool): eg (iface, netmask, fallback) + """ + iface = get_iface_for_address(vip) + netmask = get_netmask_for_address(vip) + fallback = False + if iface is None: + iface = config('vip_iface') + fallback = True + if netmask is None: + netmask = config('vip_cidr') + fallback = True + return iface, netmask, fallback + + +def update_hacluster_vip(service, relation_data): + """ Configure VIP resources based on provided configuration + + @param service: Name of the service being configured + @param relation_data: Pointer to dictionary of relation data. + """ + cluster_config = get_hacluster_config() + vip_group = [] + vips_to_delete = [] + for vip in cluster_config['vip'].split(): + if is_ipv6(vip): + res_vip = 'ocf:heartbeat:IPv6addr' + vip_params = 'ipv6addr' + else: + res_vip = 'ocf:heartbeat:IPaddr2' + vip_params = 'ip' + + iface, netmask, fallback = get_vip_settings(vip) + + vip_monitoring = 'op monitor timeout="20s" interval="10s" depth="0"' + if iface is not None: + # NOTE(jamespage): Delete old VIP resources + # Old style naming encoding iface in name + # does not work well in environments where + # interface/subnet wiring is not consistent + vip_key = 'res_{}_{}_vip'.format(service, iface) + if vip_key in vips_to_delete: + vip_key = '{}_{}'.format(vip_key, vip_params) + vips_to_delete.append(vip_key) + + vip_key = 'res_{}_{}_vip'.format( + service, + hashlib.sha1(vip.encode('UTF-8')).hexdigest()[:7]) + + relation_data['resources'][vip_key] = res_vip + # NOTE(jamespage): + # Use option provided vip params if these where used + # instead of auto-detected values + if fallback: + relation_data['resource_params'][vip_key] = ( + 'params {ip}="{vip}" cidr_netmask="{netmask}" ' + 'nic="{iface}" {vip_monitoring}'.format( + ip=vip_params, + vip=vip, + iface=iface, + netmask=netmask, + vip_monitoring=vip_monitoring)) + else: + # NOTE(jamespage): + # let heartbeat figure out which interface and + # netmask to configure, which works nicely + # when network interface naming is not + # consistent across units. + relation_data['resource_params'][vip_key] = ( + 'params {ip}="{vip}" {vip_monitoring}'.format( + ip=vip_params, + vip=vip, + vip_monitoring=vip_monitoring)) + + vip_group.append(vip_key) + + if vips_to_delete: + try: + relation_data['delete_resources'].extend(vips_to_delete) + except KeyError: + relation_data['delete_resources'] = vips_to_delete + + if len(vip_group) >= 1: + key = VIP_GROUP_NAME.format(service=service) + try: + relation_data['groups'][key] = ' '.join(vip_group) + except KeyError: + relation_data['groups'] = { + key: ' '.join(vip_group) + } + + +def render_grafana_dashboard(prometheus_app_name, haproxy_dashboard): + """Load grafana dashboard json model and insert prometheus datasource. + + :param prometheus_app_name: name of the 'prometheus' application that will + be used as datasource in grafana dashboard + :type prometheus_app_name: str + :param haproxy_dashboard: path to haproxy dashboard + :type haproxy_dashboard: str + :return: Grafana dashboard json model as a str. + :rtype: str + """ + from charmhelpers.contrib.templating import jinja + + dashboard_template = os.path.basename(haproxy_dashboard) + dashboard_template_dir = os.path.dirname(haproxy_dashboard) + app_name = application_name() + datasource = "{} - Juju generated source".format(prometheus_app_name) + return jinja.render(dashboard_template, + {"datasource": datasource, + "app_name": app_name, + "prometheus_app_name": prometheus_app_name}, + template_dir=dashboard_template_dir, + jinja_env_args={"variable_start_string": "<< ", + "variable_end_string": " >>"}) diff --git a/ceph-proxy/charmhelpers/contrib/openstack/ip.py b/ceph-proxy/charmhelpers/contrib/openstack/ip.py new file mode 100644 index 00000000..2afad369 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/openstack/ip.py @@ -0,0 +1,260 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import ( + NoNetworkBinding, + config, + unit_get, + service_name, + network_get_primary_address, +) +from charmhelpers.contrib.network.ip import ( + get_address_in_network, + is_address_in_network, + is_ipv6, + get_ipv6_addr, + resolve_network_cidr, + get_iface_for_address +) +from charmhelpers.contrib.hahelpers.cluster import is_clustered + +PUBLIC = 'public' +INTERNAL = 'int' +ADMIN = 'admin' +ACCESS = 'access' + +# TODO: reconcile 'int' vs 'internal' binding names +ADDRESS_MAP = { + PUBLIC: { + 'binding': 'public', + 'config': 'os-public-network', + 'fallback': 'public-address', + 'override': 'os-public-hostname', + }, + INTERNAL: { + 'binding': 'internal', + 'config': 'os-internal-network', + 'fallback': 'private-address', + 'override': 'os-internal-hostname', + }, + ADMIN: { + 'binding': 'admin', + 'config': 'os-admin-network', + 'fallback': 'private-address', + 'override': 'os-admin-hostname', + }, + ACCESS: { + 'binding': 'access', + 'config': 'access-network', + 'fallback': 'private-address', + 'override': 'os-access-hostname', + }, + # Note (thedac) bridge to begin the reconciliation between 'int' vs + # 'internal' binding names + 'internal': { + 'binding': 'internal', + 'config': 'os-internal-network', + 'fallback': 'private-address', + 'override': 'os-internal-hostname', + }, +} + + +def canonical_url(configs, endpoint_type=PUBLIC): + """Returns the correct HTTP URL to this host given the state of HTTPS + configuration, hacluster and charm configuration. + + :param configs: OSTemplateRenderer config templating object to inspect + for a complete https context. + :param endpoint_type: str endpoint type to resolve. + :param returns: str base URL for services on the current service unit. + """ + scheme = _get_scheme(configs) + + address = resolve_address(endpoint_type) + if is_ipv6(address): + address = "[{}]".format(address) + + return '%s://%s' % (scheme, address) + + +def _get_scheme(configs): + """Returns the scheme to use for the url (either http or https) + depending upon whether https is in the configs value. + + :param configs: OSTemplateRenderer config templating object to inspect + for a complete https context. + :returns: either 'http' or 'https' depending on whether https is + configured within the configs context. + """ + scheme = 'http' + if configs and 'https' in configs.complete_contexts(): + scheme = 'https' + return scheme + + +def _get_address_override(endpoint_type=PUBLIC): + """Returns any address overrides that the user has defined based on the + endpoint type. + + Note: this function allows for the service name to be inserted into the + address if the user specifies {service_name}.somehost.org. + + :param endpoint_type: the type of endpoint to retrieve the override + value for. + :returns: any endpoint address or hostname that the user has overridden + or None if an override is not present. + """ + override_key = ADDRESS_MAP[endpoint_type]['override'] + addr_override = config(override_key) + if not addr_override: + return None + else: + return addr_override.format(service_name=service_name()) + + +def local_address(unit_get_fallback='public-address'): + """Return a network address for this unit. + + Attempt to retrieve a 'default' IP address for this unit + from network-get. If this is running with an old version of Juju then + fallback to unit_get. + + Note on juju < 2.9 the binding to juju-info may not exist, so fall back to + the unit-get. + + :param unit_get_fallback: Either 'public-address' or 'private-address'. + Only used with old versions of Juju. + :type unit_get_fallback: str + :returns: IP Address + :rtype: str + """ + try: + return network_get_primary_address('juju-info') + except (NotImplementedError, NoNetworkBinding): + return unit_get(unit_get_fallback) + + +def get_invalid_vips(): + """Check if any of the provided vips are invalid. + A vip is invalid if it doesn't belong to the subnet in any interface. + If all vips are valid, this returns an empty list. + + :returns: A list of strings, where each string is an invalid vip address. + :rtype: list + """ + + clustered = is_clustered() + vips = config('vip') + if vips: + vips = vips.split() + invalid_vips = [] + + if clustered and vips: + for vip in vips: + iface_for_vip = get_iface_for_address(vip) + if iface_for_vip is None: + invalid_vips.append(vip) + + return invalid_vips + + +def resolve_address(endpoint_type=PUBLIC, override=True): + """Return unit address depending on net config. + + If unit is clustered with vip(s) and has net splits defined, return vip on + correct network. If clustered with no nets defined, return primary vip. + + If not clustered, return unit address ensuring address is on configured net + split if one is configured, or a Juju 2.0 extra-binding has been used. + + :param endpoint_type: Network endpoing type + :param override: Accept hostname overrides or not + """ + resolved_address = None + if override: + resolved_address = _get_address_override(endpoint_type) + if resolved_address: + return resolved_address + + vips = config('vip') + if vips: + vips = vips.split() + + net_type = ADDRESS_MAP[endpoint_type]['config'] + net_addr = config(net_type) + net_fallback = ADDRESS_MAP[endpoint_type]['fallback'] + binding = ADDRESS_MAP[endpoint_type]['binding'] + clustered = is_clustered() + + if clustered and vips: + if net_addr: + for vip in vips: + if is_address_in_network(net_addr, vip): + resolved_address = vip + break + else: + # NOTE: endeavour to check vips against network space + # bindings + try: + bound_cidr = resolve_network_cidr( + network_get_primary_address(binding) + ) + for vip in vips: + if is_address_in_network(bound_cidr, vip): + resolved_address = vip + break + except (NotImplementedError, NoNetworkBinding): + # If no net-splits configured and no support for extra + # bindings/network spaces so we expect a single vip + resolved_address = vips[0] + else: + if config('prefer-ipv6'): + fallback_addr = get_ipv6_addr(exc_list=vips)[0] + else: + fallback_addr = local_address(unit_get_fallback=net_fallback) + + if net_addr: + resolved_address = get_address_in_network(net_addr, fallback_addr) + else: + # NOTE: only try to use extra bindings if legacy network + # configuration is not in use + try: + resolved_address = network_get_primary_address(binding) + except (NotImplementedError, NoNetworkBinding): + resolved_address = fallback_addr + + if resolved_address is None: + raise ValueError("Unable to resolve a suitable IP address based on " + "charm state and configuration. (net_type=%s, " + "clustered=%s)" % (net_type, clustered)) + + return resolved_address + + +def get_vip_in_network(network): + matching_vip = None + vips = config('vip') + if vips: + for vip in vips.split(): + if is_address_in_network(network, vip): + matching_vip = vip + return matching_vip + + +def get_default_api_bindings(): + _default_bindings = [] + for binding in [INTERNAL, ADMIN, PUBLIC]: + _default_bindings.append(ADDRESS_MAP[binding]['binding']) + return _default_bindings diff --git a/ceph-proxy/charmhelpers/contrib/openstack/policy_rcd.py b/ceph-proxy/charmhelpers/contrib/openstack/policy_rcd.py new file mode 100644 index 00000000..ecffbc68 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/openstack/policy_rcd.py @@ -0,0 +1,173 @@ +# Copyright 2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module for managing policy-rc.d script and associated files. + +This module manages the installation of /usr/sbin/policy-rc.d, the +policy files and the event files. When a package update occurs the +packaging system calls: + +policy-rc.d [options] + +The return code of the script determines if the packaging system +will perform that action on the given service. The policy-rc.d +implementation installed by this module checks if an action is +permitted by checking policy files placed in /etc/policy-rc.d. +If a policy file exists which denies the requested action then +this is recorded in an event file which is placed in +/var/lib/policy-rc.d. +""" + +import os +import shutil +import tempfile +import yaml + +import charmhelpers.contrib.openstack.files as os_files +import charmhelpers.contrib.openstack.alternatives as alternatives +import charmhelpers.core.hookenv as hookenv +import charmhelpers.core.host as host + +POLICY_HEADER = """# Managed by juju\n""" +POLICY_DEFERRED_EVENTS_DIR = '/var/lib/policy-rc.d' +POLICY_CONFIG_DIR = '/etc/policy-rc.d' + + +def get_policy_file_name(): + """Get the name of the policy file for this application. + + :returns: Policy file name + :rtype: str + """ + application_name = hookenv.service_name() + return '{}/charm-{}.policy'.format(POLICY_CONFIG_DIR, application_name) + + +def read_default_policy_file(): + """Return the policy file. + + A policy is in the form: + blocked_actions: + neutron-dhcp-agent: [restart, stop, try-restart] + neutron-l3-agent: [restart, stop, try-restart] + neutron-metadata-agent: [restart, stop, try-restart] + neutron-openvswitch-agent: [restart, stop, try-restart] + openvswitch-switch: [restart, stop, try-restart] + ovs-vswitchd: [restart, stop, try-restart] + ovs-vswitchd-dpdk: [restart, stop, try-restart] + ovsdb-server: [restart, stop, try-restart] + policy_requestor_name: neutron-openvswitch + policy_requestor_type: charm + + :returns: Policy + :rtype: Dict[str, Union[str, Dict[str, List[str]]] + """ + policy = {} + policy_file = get_policy_file_name() + if os.path.exists(policy_file): + with open(policy_file, 'r') as f: + policy = yaml.safe_load(f) + return policy + + +def write_policy_file(policy_file, policy): + """Write policy to disk. + + :param policy_file: Name of policy file + :type policy_file: str + :param policy: Policy + :type policy: Dict[str, Union[str, Dict[str, List[str]]]] + """ + with tempfile.NamedTemporaryFile('w', delete=False) as f: + f.write(POLICY_HEADER) + yaml.dump(policy, f) + tmp_file_name = f.name + shutil.move(tmp_file_name, policy_file) + + +def remove_policy_file(): + """Remove policy file.""" + try: + os.remove(get_policy_file_name()) + except FileNotFoundError: + pass + + +def install_policy_rcd(): + """Install policy-rc.d components.""" + source_file_dir = os.path.dirname(os.path.abspath(os_files.__file__)) + policy_rcd_exec = "/var/lib/charm/{}/policy-rc.d".format( + hookenv.service_name()) + host.mkdir(os.path.dirname(policy_rcd_exec)) + shutil.copy2( + '{}/policy_rc_d_script.py'.format(source_file_dir), + policy_rcd_exec) + # policy-rc.d must be installed via the alternatives system: + # https://people.debian.org/~hmh/invokerc.d-policyrc.d-specification.txt + if not os.path.exists('/usr/sbin/policy-rc.d'): + alternatives.install_alternative( + 'policy-rc.d', + '/usr/sbin/policy-rc.d', + policy_rcd_exec) + host.mkdir(POLICY_CONFIG_DIR) + + +def get_default_policy(): + """Return the default policy structure. + + :returns: Policy + :rtype: Dict[str, Union[str, Dict[str, List[str]]] + """ + policy = { + 'policy_requestor_name': hookenv.service_name(), + 'policy_requestor_type': 'charm', + 'blocked_actions': {}} + return policy + + +def add_policy_block(service, blocked_actions): + """Update a policy file with new list of actions. + + :param service: Service name + :type service: str + :param blocked_actions: Action to block + :type blocked_actions: List[str] + """ + policy = read_default_policy_file() or get_default_policy() + policy_file = get_policy_file_name() + if policy['blocked_actions'].get(service): + policy['blocked_actions'][service].extend(blocked_actions) + else: + policy['blocked_actions'][service] = blocked_actions + policy['blocked_actions'][service] = sorted( + list(set(policy['blocked_actions'][service]))) + write_policy_file(policy_file, policy) + + +def remove_policy_block(service, unblocked_actions): + """Remove list of actions from policy file. + + :param service: Service name + :type service: str + :param unblocked_actions: Action to unblock + :type unblocked_actions: List[str] + """ + policy_file = get_policy_file_name() + policy = read_default_policy_file() + for action in unblocked_actions: + try: + policy['blocked_actions'][service].remove(action) + except (KeyError, ValueError): + continue + write_policy_file(policy_file, policy) diff --git a/ceph-proxy/charmhelpers/contrib/openstack/policyd.py b/ceph-proxy/charmhelpers/contrib/openstack/policyd.py new file mode 100644 index 00000000..767943c2 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/openstack/policyd.py @@ -0,0 +1,763 @@ +# Copyright 2019-2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import contextlib +import os +import shutil +import yaml +import zipfile + +import charmhelpers +import charmhelpers.core.hookenv as hookenv +import charmhelpers.core.host as ch_host + +# Features provided by this module: + +""" +Policy.d helper functions +========================= + +The functions in this module are designed, as a set, to provide an easy-to-use +set of hooks for classic charms to add in /etc//policy.d/ +directory override YAML files. + +(For charms.openstack charms, a mixin class is provided for this +functionality). + +In order to "hook" this functionality into a (classic) charm, two functions are +provided: + + maybe_do_policyd_overrides(openstack_release, + service, + blacklist_paths=none, + blacklist_keys=none, + template_function=none, + restart_handler=none) + + maybe_do_policyd_overrides_on_config_changed(openstack_release, + service, + blacklist_paths=None, + blacklist_keys=None, + template_function=None, + restart_handler=None + +(See the docstrings for details on the parameters) + +The functions should be called from the install and upgrade hooks in the charm. +The `maybe_do_policyd_overrides_on_config_changed` function is designed to be +called on the config-changed hook, in that it does an additional check to +ensure that an already overridden policy.d in an upgrade or install hooks isn't +repeated. + +In order the *enable* this functionality, the charm's install, config_changed, +and upgrade_charm hooks need to be modified, and a new config option (see +below) needs to be added. The README for the charm should also be updated. + +Examples from the keystone charm are: + +@hooks.hook('install.real') +@harden() +def install(): + ... + # call the policy overrides handler which will install any policy overrides + maybe_do_policyd_overrides(os_release('keystone'), 'keystone') + + +@hooks.hook('config-changed') +@restart_on_change(restart_map(), restart_functions=restart_function_map()) +@harden() +def config_changed(): + ... + # call the policy overrides handler which will install any policy overrides + maybe_do_policyd_overrides_on_config_changed(os_release('keystone'), + 'keystone') + +@hooks.hook('upgrade-charm') +@restart_on_change(restart_map(), stopstart=True) +@harden() +def upgrade_charm(): + ... + # call the policy overrides handler which will install any policy overrides + maybe_do_policyd_overrides(os_release('keystone'), 'keystone') + +Status Line +=========== + +The workload status code in charm-helpers has been modified to detect if +policy.d override code has been incorporated into the charm by checking for the +new config variable (in the config.yaml). If it has been, then the workload +status line will automatically show "PO:" at the beginning of the workload +status for that unit/service if the config option is set. If the policy +override is broken, the "PO (broken):" will be shown. No changes to the charm +(apart from those already mentioned) are needed to enable this functionality. +(charms.openstack charms also get this functionality, but please see that +library for further details). +""" + +# The config.yaml for the charm should contain the following for the config +# option: + +""" + use-policyd-override: + type: boolean + default: False + description: | + If True then use the resource file named 'policyd-override' to install + override YAML files in the service's policy.d directory. The resource + file should be a ZIP file containing at least one yaml file with a .yaml + or .yml extension. If False then remove the overrides. +""" + +# The metadata.yaml for the charm should contain the following: +""" +resources: + policyd-override: + type: file + filename: policyd-override.zip + description: The policy.d overrides file +""" + +# The README for the charm should contain the following: +""" +Policy Overrides +---------------- + +This feature allows for policy overrides using the `policy.d` directory. This +is an **advanced** feature and the policies that the OpenStack service supports +should be clearly and unambiguously understood before trying to override, or +add to, the default policies that the service uses. The charm also has some +policy defaults. They should also be understood before being overridden. + +> **Caution**: It is possible to break the system (for tenants and other + services) if policies are incorrectly applied to the service. + +Policy overrides are YAML files that contain rules that will add to, or +override, existing policy rules in the service. The `policy.d` directory is +a place to put the YAML override files. This charm owns the +`/etc/keystone/policy.d` directory, and as such, any manual changes to it will +be overwritten on charm upgrades. + +Overrides are provided to the charm using a Juju resource called +`policyd-override`. The resource is a ZIP file. This file, say +`overrides.zip`, is attached to the charm by: + + + juju attach-resource policyd-override=overrides.zip + +The policy override is enabled in the charm using: + + juju config use-policyd-override=true + +When `use-policyd-override` is `True` the status line of the charm will be +prefixed with `PO:` indicating that policies have been overridden. If the +installation of the policy override YAML files failed for any reason then the +status line will be prefixed with `PO (broken):`. The log file for the charm +will indicate the reason. No policy override files are installed if the `PO +(broken):` is shown. The status line indicates that the overrides are broken, +not that the policy for the service has failed. The policy will be the defaults +for the charm and service. + +Policy overrides on one service may affect the functionality of another +service. Therefore, it may be necessary to provide policy overrides for +multiple service charms to achieve a consistent set of policies across the +OpenStack system. The charms for the other services that may need overrides +should be checked to ensure that they support overrides before proceeding. +""" + +POLICYD_VALID_EXTS = ['.yaml', '.yml', '.j2', '.tmpl', '.tpl'] +POLICYD_TEMPLATE_EXTS = ['.j2', '.tmpl', '.tpl'] +POLICYD_RESOURCE_NAME = "policyd-override" +POLICYD_CONFIG_NAME = "use-policyd-override" +POLICYD_SUCCESS_FILENAME = "policyd-override-success" +POLICYD_LOG_LEVEL_DEFAULT = hookenv.INFO +POLICYD_ALWAYS_BLACKLISTED_KEYS = ("admin_required", "cloud_admin") + + +class BadPolicyZipFile(Exception): + + def __init__(self, log_message): + self.log_message = log_message + + def __str__(self): + return self.log_message + + +class BadPolicyYamlFile(Exception): + + def __init__(self, log_message): + self.log_message = log_message + + def __str__(self): + return self.log_message + + +def is_policyd_override_valid_on_this_release(openstack_release): + """Check that the charm is running on at least Ubuntu Xenial, and at + least the queens release. + + :param openstack_release: the release codename that is installed. + :type openstack_release: str + :returns: True if okay + :rtype: bool + """ + # NOTE(ajkavanagh) circular import! This is because the status message + # generation code in utils has to call into this module, but this function + # needs the CompareOpenStackReleases() function. The only way to solve + # this is either to put ALL of this module into utils, or refactor one or + # other of the CompareOpenStackReleases or status message generation code + # into a 3rd module. + import charmhelpers.contrib.openstack.utils as ch_utils + return ch_utils.CompareOpenStackReleases(openstack_release) >= 'queens' + + +def maybe_do_policyd_overrides(openstack_release, + service, + blacklist_paths=None, + blacklist_keys=None, + template_function=None, + restart_handler=None, + user=None, + group=None, + config_changed=False): + """If the config option is set, get the resource file and process it to + enable the policy.d overrides for the service passed. + + The param `openstack_release` is required as the policyd overrides feature + is only supported on openstack_release "queens" or later, and on ubuntu + "xenial" or later. Prior to these versions, this feature is a NOP. + + The optional template_function is a function that accepts a string and has + an opportunity to modify the loaded file prior to it being read by + yaml.safe_load(). This allows the charm to perform "templating" using + charm derived data. + + The param blacklist_paths are paths (that are in the service's policy.d + directory that should not be touched). + + The param blacklist_keys are keys that must not appear in the yaml file. + If they do, then the whole policy.d file fails. + + The yaml file extracted from the resource_file (which is a zipped file) has + its file path reconstructed. This, also, must not match any path in the + black list. + + The param restart_handler is an optional Callable that is called to perform + the service restart if the policy.d file is changed. This should normally + be None as oslo.policy automatically picks up changes in the policy.d + directory. However, for any services where this is buggy then a + restart_handler can be used to force the policy.d files to be read. + + If the config_changed param is True, then the handling is slightly + different: It will only perform the policyd overrides if the config is True + and the success file doesn't exist. Otherwise, it does nothing as the + resource file has already been processed. + + :param openstack_release: The openstack release that is installed. + :type openstack_release: str + :param service: the service name to construct the policy.d directory for. + :type service: str + :param blacklist_paths: optional list of paths to leave alone + :type blacklist_paths: Union[None, List[str]] + :param blacklist_keys: optional list of keys that mustn't appear in the + yaml file's + :type blacklist_keys: Union[None, List[str]] + :param template_function: Optional function that can modify the string + prior to being processed as a Yaml document. + :type template_function: Union[None, Callable[[str], str]] + :param restart_handler: The function to call if the service should be + restarted. + :type restart_handler: Union[None, Callable[]] + :param user: The user to create/write files/directories as + :type user: Union[None, str] + :param group: the group to create/write files/directories as + :type group: Union[None, str] + :param config_changed: Set to True for config_changed hook. + :type config_changed: bool + """ + _user = service if user is None else user + _group = service if group is None else group + if not is_policyd_override_valid_on_this_release(openstack_release): + return + hookenv.log("Running maybe_do_policyd_overrides", + level=POLICYD_LOG_LEVEL_DEFAULT) + config = hookenv.config() + try: + if not config.get(POLICYD_CONFIG_NAME, False): + clean_policyd_dir_for(service, + blacklist_paths, + user=_user, + group=_group) + if (os.path.isfile(_policy_success_file()) and + restart_handler is not None and + callable(restart_handler)): + restart_handler() + remove_policy_success_file() + return + except Exception as e: + hookenv.log("... ERROR: Exception is: {}".format(str(e)), + level=POLICYD_CONFIG_NAME) + import traceback + hookenv.log(traceback.format_exc(), level=POLICYD_LOG_LEVEL_DEFAULT) + return + # if the policyd overrides have been performed when doing config_changed + # just return + if config_changed and is_policy_success_file_set(): + hookenv.log("... already setup, so skipping.", + level=POLICYD_LOG_LEVEL_DEFAULT) + return + # from now on it should succeed; if it doesn't then status line will show + # broken. + resource_filename = get_policy_resource_filename() + restart = process_policy_resource_file( + resource_filename, service, blacklist_paths, blacklist_keys, + template_function) + if restart and restart_handler is not None and callable(restart_handler): + restart_handler() + + +@charmhelpers.deprecate("Use maybe_do_policyd_overrides instead") +def maybe_do_policyd_overrides_on_config_changed(*args, **kwargs): + """This function is designed to be called from the config changed hook. + + DEPRECATED: please use maybe_do_policyd_overrides() with the param + `config_changed` as `True`. + + See maybe_do_policyd_overrides() for more details on the params. + """ + if 'config_changed' not in kwargs.keys(): + kwargs['config_changed'] = True + return maybe_do_policyd_overrides(*args, **kwargs) + + +def get_policy_resource_filename(): + """Function to extract the policy resource filename + + :returns: The filename of the resource, if set, otherwise, if an error + occurs, then None is returned. + :rtype: Union[str, None] + """ + try: + return hookenv.resource_get(POLICYD_RESOURCE_NAME) + except Exception: + return None + + +@contextlib.contextmanager +def open_and_filter_yaml_files(filepath, has_subdirs=False): + """Validate that the filepath provided is a zip file and contains at least + one (.yaml|.yml) file, and that the files are not duplicated when the zip + file is flattened. Note that the yaml files are not checked. This is the + first stage in validating the policy zipfile; individual yaml files are not + checked for validity or black listed keys. + + If the has_subdirs param is True, then the files are flattened to the first + directory, and the files in the root are ignored. + + An example of use is: + + with open_and_filter_yaml_files(some_path) as zfp, g: + for zipinfo in g: + # do something with zipinfo ... + + :param filepath: a filepath object that can be opened by zipfile + :type filepath: Union[AnyStr, os.PathLike[AntStr]] + :param has_subdirs: Keep first level of subdirectories in yaml file. + :type has_subdirs: bool + :returns: (zfp handle, + a generator of the (name, filename, ZipInfo object) tuples) as a + tuple. + :rtype: ContextManager[(zipfile.ZipFile, + Generator[(name, str, str, zipfile.ZipInfo)])] + :raises: zipfile.BadZipFile + :raises: BadPolicyZipFile if duplicated yaml or missing + :raises: IOError if the filepath is not found + """ + with zipfile.ZipFile(filepath, 'r') as zfp: + # first pass through; check for duplicates and at least one yaml file. + names = collections.defaultdict(int) + yamlfiles = _yamlfiles(zfp, has_subdirs) + for name, _, _, _ in yamlfiles: + names[name] += 1 + # There must be at least 1 yaml file. + if len(names.keys()) == 0: + raise BadPolicyZipFile("contains no yaml files with {} extensions." + .format(", ".join(POLICYD_VALID_EXTS))) + # There must be no duplicates + duplicates = [n for n, c in names.items() if c > 1] + if duplicates: + raise BadPolicyZipFile("{} have duplicates in the zip file." + .format(", ".join(duplicates))) + # Finally, let's yield the generator + yield (zfp, yamlfiles) + + +def _yamlfiles(zipfile, has_subdirs=False): + """Helper to get a yaml file (according to POLICYD_VALID_EXTS extensions) + and the infolist item from a zipfile. + + If the `has_subdirs` param is True, the the only yaml files that have a + directory component are read, and then first part of the directory + component is kept, along with the filename in the name. e.g. an entry with + a filename of: + + compute/someotherdir/override.yaml + + is returned as: + + compute/override, yaml, override.yaml, + + This is to help with the special, additional, processing that the dashboard + charm requires. + + :param zipfile: the zipfile to read zipinfo items from + :type zipfile: zipfile.ZipFile + :param has_subdirs: Keep first level of subdirectories in yaml file. + :type has_subdirs: bool + :returns: generator of (name, ext, filename, info item) for each + self-identified yaml file. + :rtype: List[(str, str, str, zipfile.ZipInfo)] + """ + files = [] + for infolist_item in zipfile.infolist(): + try: + if infolist_item.is_dir(): + continue + except AttributeError: + # fallback to "old" way to determine dir entry for pre-py36 + if infolist_item.filename.endswith('/'): + continue + _dir, name_ext = os.path.split(infolist_item.filename) + name, ext = os.path.splitext(name_ext) + if has_subdirs and _dir != "": + name = os.path.join(_dir.split(os.path.sep)[0], name) + ext = ext.lower() + if ext and ext in POLICYD_VALID_EXTS: + files.append((name, ext, name_ext, infolist_item)) + return files + + +def read_and_validate_yaml(stream_or_doc, blacklist_keys=None): + """Read, validate and return the (first) yaml document from the stream. + + The doc is read, and checked for a yaml file. The the top-level keys are + checked against the blacklist_keys provided. If there are problems then an + Exception is raised. Otherwise the yaml document is returned as a Python + object that can be dumped back as a yaml file on the system. + + The yaml file must only consist of a str:str mapping, and if not then the + yaml file is rejected. + + :param stream_or_doc: the file object to read the yaml from + :type stream_or_doc: Union[AnyStr, IO[AnyStr]] + :param blacklist_keys: Any keys, which if in the yaml file, should cause + and error. + :type blacklisted_keys: Union[None, List[str]] + :returns: the yaml file as a python document + :rtype: Dict[str, str] + :raises: yaml.YAMLError if there is a problem with the document + :raises: BadPolicyYamlFile if file doesn't look right or there are + blacklisted keys in the file. + """ + blacklist_keys = blacklist_keys or [] + blacklist_keys.append(POLICYD_ALWAYS_BLACKLISTED_KEYS) + doc = yaml.safe_load(stream_or_doc) + if not isinstance(doc, dict): + raise BadPolicyYamlFile("doesn't look like a policy file?") + keys = set(doc.keys()) + blacklisted_keys_present = keys.intersection(blacklist_keys) + if blacklisted_keys_present: + raise BadPolicyYamlFile("blacklisted keys {} present." + .format(", ".join(blacklisted_keys_present))) + if not all(isinstance(k, str) for k in keys): + raise BadPolicyYamlFile("keys in yaml aren't all strings?") + # check that the dictionary looks like a mapping of str to str + if not all(isinstance(v, str) for v in doc.values()): + raise BadPolicyYamlFile("values in yaml aren't all strings?") + return doc + + +def policyd_dir_for(service): + """Return the policy directory for the named service. + + :param service: str + :returns: the policy.d override directory. + :rtype: os.PathLike[str] + """ + return os.path.join("/", "etc", service, "policy.d") + + +def clean_policyd_dir_for(service, keep_paths=None, user=None, group=None): + """Clean out the policyd directory except for items that should be kept. + + The keep_paths, if used, should be set to the full path of the files that + should be kept in the policyd directory for the service. Note that the + service name is passed in, and then the policyd_dir_for() function is used. + This is so that a coding error doesn't result in a sudden deletion of the + charm (say). + + :param service: the service name to use to construct the policy.d dir. + :type service: str + :param keep_paths: optional list of paths to not delete. + :type keep_paths: Union[None, List[str]] + :param user: The user to create/write files/directories as + :type user: Union[None, str] + :param group: the group to create/write files/directories as + :type group: Union[None, str] + """ + _user = service if user is None else user + _group = service if group is None else group + keep_paths = keep_paths or [] + path = policyd_dir_for(service) + hookenv.log("Cleaning path: {}".format(path), level=hookenv.DEBUG) + if not os.path.exists(path): + ch_host.mkdir(path, owner=_user, group=_group, perms=0o775) + for direntry in os.scandir(path): + # see if the path should be kept. + if direntry.path in keep_paths: + continue + # we remove any directories; it's ours and there shouldn't be any + if direntry.is_dir(): + shutil.rmtree(direntry.path) + else: + os.remove(direntry.path) + + +def maybe_create_directory_for(path, user, group): + """For the filename 'path', ensure that the directory for that path exists. + + Note that if the directory already exists then the permissions are NOT + changed. + + :param path: the filename including the path to it. + :type path: str + :param user: the user to create the directory as + :param group: the group to create the directory as + """ + _dir, _ = os.path.split(path) + if not os.path.exists(_dir): + ch_host.mkdir(_dir, owner=user, group=group, perms=0o775) + + +def path_for_policy_file(service, name): + """Return the full path for a policy.d file that will be written to the + service's policy.d directory. + + It is constructed using policyd_dir_for(), the name and the ".yaml" + extension. + + For horizon, for example, it's a bit more complicated. The name param is + actually "override_service_dir/a_name", where target_service needs to be + one the allowed horizon override services. This translation and check is + done in the _yamlfiles() function. + + :param service: the service name + :type service: str + :param name: the name for the policy override + :type name: str + :returns: the full path name for the file + :rtype: os.PathLike[str] + """ + return os.path.join(policyd_dir_for(service), name + ".yaml") + + +def _policy_success_file(): + """Return the file name for a successful drop of policy.d overrides + + :returns: the path name for the file. + :rtype: str + """ + return os.path.join(hookenv.charm_dir(), POLICYD_SUCCESS_FILENAME) + + +def remove_policy_success_file(): + """Remove the file that indicates successful policyd override.""" + try: + os.remove(_policy_success_file()) + except Exception: + pass + + +def set_policy_success_file(): + """Set the file that indicates successful policyd override.""" + open(_policy_success_file(), "w").close() + + +def is_policy_success_file_set(): + """Returns True if the policy success file has been set. + + This indicates that policies are overridden and working properly. + + :returns: True if the policy file is set + :rtype: bool + """ + return os.path.isfile(_policy_success_file()) + + +def policyd_status_message_prefix(): + """Return the prefix str for the status line. + + "PO:" indicating that the policy overrides are in place, or "PO (broken):" + if the policy is supposed to be working but there is no success file. + + :returns: the prefix + :rtype: str + """ + if is_policy_success_file_set(): + return "PO:" + return "PO (broken):" + + +def process_policy_resource_file(resource_file, + service, + blacklist_paths=None, + blacklist_keys=None, + template_function=None, + preserve_topdir=False, + preprocess_filename=None, + user=None, + group=None): + """Process the resource file (which should contain at least one yaml file) + and write those files to the service's policy.d directory. + + The optional template_function is a function that accepts a python + string and has an opportunity to modify the document + prior to it being read by the yaml.safe_load() function and written to + disk. Note that this function does *not* say how the templating is done - + this is up to the charm to implement its chosen method. + + The param blacklist_paths are paths (that are in the service's policy.d + directory that should not be touched). + + The param blacklist_keys are keys that must not appear in the yaml file. + If they do, then the whole policy.d file fails. + + The yaml file extracted from the resource_file (which is a zipped file) has + its file path reconstructed. This, also, must not match any path in the + black list. + + The yaml filename can be modified in two ways. If the `preserve_topdir` + param is True, then files will be flattened to the top dir. This allows + for creating sets of files that can be grouped into a single level tree + structure. + + Secondly, if the `preprocess_filename` param is not None and callable() + then the name is passed to that function for preprocessing before being + converted to the end location. This is to allow munging of the filename + prior to being tested for a blacklist path. + + If any error occurs, then the policy.d directory is cleared, the error is + written to the log, and the status line will eventually show as failed. + + :param resource_file: The zipped file to open and extract yaml files form. + :type resource_file: Union[AnyStr, os.PathLike[AnyStr]] + :param service: the service name to construct the policy.d directory for. + :type service: str + :param blacklist_paths: optional list of paths to leave alone + :type blacklist_paths: Union[None, List[str]] + :param blacklist_keys: optional list of keys that mustn't appear in the + yaml file's + :type blacklist_keys: Union[None, List[str]] + :param template_function: Optional function that can modify the yaml + document. + :type template_function: Union[None, Callable[[AnyStr], AnyStr]] + :param preserve_topdir: Keep the toplevel subdir + :type preserve_topdir: bool + :param preprocess_filename: Optional function to use to process filenames + extracted from the resource file. + :type preprocess_filename: Union[None, Callable[[AnyStr]. AnyStr]] + :param user: The user to create/write files/directories as + :type user: Union[None, str] + :param group: the group to create/write files/directories as + :type group: Union[None, str] + :returns: True if the processing was successful, False if not. + :rtype: boolean + """ + hookenv.log("Running process_policy_resource_file", level=hookenv.DEBUG) + blacklist_paths = blacklist_paths or [] + completed = False + _preprocess = None + if preprocess_filename is not None and callable(preprocess_filename): + _preprocess = preprocess_filename + _user = service if user is None else user + _group = service if group is None else group + try: + with open_and_filter_yaml_files( + resource_file, preserve_topdir) as (zfp, gen): + # first clear out the policy.d directory and clear success + remove_policy_success_file() + clean_policyd_dir_for(service, + blacklist_paths, + user=_user, + group=_group) + for name, ext, filename, zipinfo in gen: + # See if the name should be preprocessed. + if _preprocess is not None: + name = _preprocess(name) + # construct a name for the output file. + yaml_filename = path_for_policy_file(service, name) + if yaml_filename in blacklist_paths: + raise BadPolicyZipFile("policy.d name {} is blacklisted" + .format(yaml_filename)) + with zfp.open(zipinfo) as fp: + doc = fp.read() + # if template_function is not None, then offer the document + # to the template function + if ext in POLICYD_TEMPLATE_EXTS: + if (template_function is None or not + callable(template_function)): + raise BadPolicyZipFile( + "Template {} but no template_function is " + "available".format(filename)) + doc = template_function(doc) + yaml_doc = read_and_validate_yaml(doc, blacklist_keys) + # we may have to create the directory + maybe_create_directory_for(yaml_filename, _user, _group) + ch_host.write_file(yaml_filename, + yaml.dump(yaml_doc).encode('utf-8'), + _user, + _group) + # Every thing worked, so we mark up a success. + completed = True + except (zipfile.BadZipFile, BadPolicyZipFile, BadPolicyYamlFile) as e: + hookenv.log("Processing {} failed: {}".format(resource_file, str(e)), + level=POLICYD_LOG_LEVEL_DEFAULT) + except IOError as e: + # technically this shouldn't happen; it would be a programming error as + # the filename comes from Juju and thus, should exist. + hookenv.log( + "File {} failed with IOError. This really shouldn't happen" + " -- error: {}".format(resource_file, str(e)), + level=POLICYD_LOG_LEVEL_DEFAULT) + except Exception as e: + import traceback + hookenv.log("General Exception({}) during policyd processing" + .format(str(e)), + level=POLICYD_LOG_LEVEL_DEFAULT) + hookenv.log(traceback.format_exc()) + finally: + if not completed: + hookenv.log("Processing {} failed: cleaning policy.d directory" + .format(resource_file), + level=POLICYD_LOG_LEVEL_DEFAULT) + clean_policyd_dir_for(service, + blacklist_paths, + user=_user, + group=_group) + else: + # touch the success filename + hookenv.log("policy.d overrides installed.", + level=POLICYD_LOG_LEVEL_DEFAULT) + set_policy_success_file() + return completed diff --git a/ceph-proxy/charmhelpers/contrib/openstack/utils.py b/ceph-proxy/charmhelpers/contrib/openstack/utils.py new file mode 100644 index 00000000..82c28d8e --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/openstack/utils.py @@ -0,0 +1,2695 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Common python helper functions used for OpenStack charms. +from collections import OrderedDict, namedtuple +from functools import partial, wraps + +import subprocess +import json +import operator +import os +import sys +import re +import itertools +import functools + +import traceback +import uuid +import yaml + +from charmhelpers import deprecate + +from charmhelpers.contrib.network import ip + +from charmhelpers.core import decorators, unitdata + +import charmhelpers.contrib.openstack.deferred_events as deferred_events + +from charmhelpers.core.hookenv import ( + WORKLOAD_STATES, + action_fail, + action_get, + action_set, + config, + expected_peer_units, + expected_related_units, + log as juju_log, + charm_dir, + INFO, + ERROR, + metadata, + related_units, + relation_get, + relation_id, + relation_ids, + relation_set, + service_name as ch_service_name, + status_set, + hook_name, + application_version_set, + cached, + leader_set, + leader_get, + local_unit, +) + +from charmhelpers.core.strutils import ( + BasicStringComparator, + bool_from_string, +) + +from charmhelpers.contrib.storage.linux.lvm import ( + deactivate_lvm_volume_group, + is_lvm_physical_volume, + remove_lvm_physical_volume, +) + +from charmhelpers.contrib.network.ip import ( + get_ipv6_addr, + is_ipv6, + port_has_listener, +) + +from charmhelpers.core.host import ( + lsb_release, + mounts, + umount, + service_running, + service_pause, + service_resume, + service_stop, + service_start, + restart_on_change_helper, +) + +from charmhelpers.fetch import ( + apt_cache, + apt_install, + import_key as fetch_import_key, + add_source as fetch_add_source, + SourceConfigError, + GPGKeyError, + get_upstream_version, + filter_installed_packages, + filter_missing_packages, + ubuntu_apt_pkg as apt, + OPENSTACK_RELEASES, + UBUNTU_OPENSTACK_RELEASE, +) + +from charmhelpers.fetch.snap import ( + snap_install, + snap_refresh, + valid_snap_channel, +) + +from charmhelpers.contrib.storage.linux.utils import is_block_device, zap_disk +from charmhelpers.contrib.storage.linux.loopback import ensure_loopback_device +from charmhelpers.contrib.openstack.exceptions import OSContextError, ServiceActionError +from charmhelpers.contrib.openstack.policyd import ( + policyd_status_message_prefix, + POLICYD_CONFIG_NAME, +) + +from charmhelpers.contrib.openstack.ha.utils import ( + expect_ha, +) + +CLOUD_ARCHIVE_URL = "http://ubuntu-cloud.archive.canonical.com/ubuntu" +CLOUD_ARCHIVE_KEY_ID = '5EDB1B62EC4926EA' + +DISTRO_PROPOSED = ('deb http://archive.ubuntu.com/ubuntu/ %s-proposed ' + 'restricted main multiverse universe') + +OPENSTACK_CODENAMES = OrderedDict([ + # NOTE(lourot): 'yyyy.i' isn't actually mapping with any real version + # number. This just means the i-th version of the year yyyy. + ('2011.2', 'diablo'), + ('2012.1', 'essex'), + ('2012.2', 'folsom'), + ('2013.1', 'grizzly'), + ('2013.2', 'havana'), + ('2014.1', 'icehouse'), + ('2014.2', 'juno'), + ('2015.1', 'kilo'), + ('2015.2', 'liberty'), + ('2016.1', 'mitaka'), + ('2016.2', 'newton'), + ('2017.1', 'ocata'), + ('2017.2', 'pike'), + ('2018.1', 'queens'), + ('2018.2', 'rocky'), + ('2019.1', 'stein'), + ('2019.2', 'train'), + ('2020.1', 'ussuri'), + ('2020.2', 'victoria'), + ('2021.1', 'wallaby'), + ('2021.2', 'xena'), + ('2022.1', 'yoga'), + ('2022.2', 'zed'), + ('2023.1', 'antelope'), + ('2023.2', 'bobcat'), + ('2024.1', 'caracal'), +]) + +# The ugly duckling - must list releases oldest to newest +SWIFT_CODENAMES = OrderedDict([ + ('diablo', + ['1.4.3']), + ('essex', + ['1.4.8']), + ('folsom', + ['1.7.4']), + ('grizzly', + ['1.7.6', '1.7.7', '1.8.0']), + ('havana', + ['1.9.0', '1.9.1', '1.10.0']), + ('icehouse', + ['1.11.0', '1.12.0', '1.13.0', '1.13.1']), + ('juno', + ['2.0.0', '2.1.0', '2.2.0']), + ('kilo', + ['2.2.1', '2.2.2']), + ('liberty', + ['2.3.0', '2.4.0', '2.5.0']), + ('mitaka', + ['2.5.0', '2.6.0', '2.7.0']), + ('newton', + ['2.8.0', '2.9.0', '2.10.0']), + ('ocata', + ['2.11.0', '2.12.0', '2.13.0']), + ('pike', + ['2.13.0', '2.15.0']), + ('queens', + ['2.16.0', '2.17.0']), + ('rocky', + ['2.18.0', '2.19.0']), + ('stein', + ['2.20.0', '2.21.0']), + ('train', + ['2.22.0', '2.23.0']), + ('ussuri', + ['2.24.0', '2.25.0']), + ('victoria', + ['2.25.0', '2.26.0']), +]) + +# >= Liberty version->codename mapping +PACKAGE_CODENAMES = { + 'nova-common': OrderedDict([ + ('12', 'liberty'), + ('13', 'mitaka'), + ('14', 'newton'), + ('15', 'ocata'), + ('16', 'pike'), + ('17', 'queens'), + ('18', 'rocky'), + ('19', 'stein'), + ('20', 'train'), + ('21', 'ussuri'), + ('22', 'victoria'), + ]), + 'neutron-common': OrderedDict([ + ('7', 'liberty'), + ('8', 'mitaka'), + ('9', 'newton'), + ('10', 'ocata'), + ('11', 'pike'), + ('12', 'queens'), + ('13', 'rocky'), + ('14', 'stein'), + ('15', 'train'), + ('16', 'ussuri'), + ('17', 'victoria'), + ]), + 'cinder-common': OrderedDict([ + ('7', 'liberty'), + ('8', 'mitaka'), + ('9', 'newton'), + ('10', 'ocata'), + ('11', 'pike'), + ('12', 'queens'), + ('13', 'rocky'), + ('14', 'stein'), + ('15', 'train'), + ('16', 'ussuri'), + ('17', 'victoria'), + ]), + 'keystone': OrderedDict([ + ('8', 'liberty'), + ('9', 'mitaka'), + ('10', 'newton'), + ('11', 'ocata'), + ('12', 'pike'), + ('13', 'queens'), + ('14', 'rocky'), + ('15', 'stein'), + ('16', 'train'), + ('17', 'ussuri'), + ('18', 'victoria'), + ]), + 'horizon-common': OrderedDict([ + ('8', 'liberty'), + ('9', 'mitaka'), + ('10', 'newton'), + ('11', 'ocata'), + ('12', 'pike'), + ('13', 'queens'), + ('14', 'rocky'), + ('15', 'stein'), + ('16', 'train'), + ('18', 'ussuri'), # Note this was actually 17.0 - 18.3 + ('19', 'victoria'), # Note this is really 18.6 + ]), + 'ceilometer-common': OrderedDict([ + ('5', 'liberty'), + ('6', 'mitaka'), + ('7', 'newton'), + ('8', 'ocata'), + ('9', 'pike'), + ('10', 'queens'), + ('11', 'rocky'), + ('12', 'stein'), + ('13', 'train'), + ('14', 'ussuri'), + ('15', 'victoria'), + ]), + 'heat-common': OrderedDict([ + ('5', 'liberty'), + ('6', 'mitaka'), + ('7', 'newton'), + ('8', 'ocata'), + ('9', 'pike'), + ('10', 'queens'), + ('11', 'rocky'), + ('12', 'stein'), + ('13', 'train'), + ('14', 'ussuri'), + ('15', 'victoria'), + ]), + 'glance-common': OrderedDict([ + ('11', 'liberty'), + ('12', 'mitaka'), + ('13', 'newton'), + ('14', 'ocata'), + ('15', 'pike'), + ('16', 'queens'), + ('17', 'rocky'), + ('18', 'stein'), + ('19', 'train'), + ('20', 'ussuri'), + ('21', 'victoria'), + ]), + 'openstack-dashboard': OrderedDict([ + ('8', 'liberty'), + ('9', 'mitaka'), + ('10', 'newton'), + ('11', 'ocata'), + ('12', 'pike'), + ('13', 'queens'), + ('14', 'rocky'), + ('15', 'stein'), + ('16', 'train'), + ('18', 'ussuri'), + ('19', 'victoria'), + ]), +} + +DEFAULT_LOOPBACK_SIZE = '5G' + +DB_SERIES_UPGRADING_KEY = 'cluster-series-upgrading' + +DB_MAINTENANCE_KEYS = [DB_SERIES_UPGRADING_KEY] + + +class CompareOpenStackReleases(BasicStringComparator): + """Provide comparisons of OpenStack releases. + + Use in the form of + + if CompareOpenStackReleases(release) > 'mitaka': + # do something with mitaka + """ + _list = OPENSTACK_RELEASES + + +def error_out(msg): + juju_log("FATAL ERROR: %s" % msg, level='ERROR') + sys.exit(1) + + +def get_installed_semantic_versioned_packages(): + '''Get a list of installed packages which have OpenStack semantic versioning + + :returns List of installed packages + :rtype: [pkg1, pkg2, ...] + ''' + return filter_missing_packages(PACKAGE_CODENAMES.keys()) + + +def get_os_codename_install_source(src): + '''Derive OpenStack release codename from a given installation source.''' + ubuntu_rel = lsb_release()['DISTRIB_CODENAME'] + rel = '' + if src is None: + return rel + if src in OPENSTACK_RELEASES: + return src + if src in ['distro', 'distro-proposed', 'proposed']: + try: + rel = UBUNTU_OPENSTACK_RELEASE[ubuntu_rel] + except KeyError: + e = 'Could not derive openstack release for '\ + 'this Ubuntu release: %s' % ubuntu_rel + error_out(e) + return rel + + if src.startswith('cloud:'): + ca_rel = src.split(':')[1] + ca_rel = ca_rel.split('-')[1].split('/')[0] + return ca_rel + + # Best guess match based on deb string provided + if (src.startswith('deb') or + src.startswith('ppa') or + src.startswith('snap')): + for v in OPENSTACK_CODENAMES.values(): + if v in src: + return v + + +def get_os_version_install_source(src): + codename = get_os_codename_install_source(src) + return get_os_version_codename(codename) + + +def get_os_codename_version(vers): + '''Determine OpenStack codename from version number.''' + try: + return OPENSTACK_CODENAMES[vers] + except KeyError: + e = 'Could not determine OpenStack codename for version %s' % vers + error_out(e) + + +def get_os_version_codename(codename, version_map=OPENSTACK_CODENAMES, + raise_exception=False): + '''Determine OpenStack version number from codename.''' + for k, v in version_map.items(): + if v == codename: + return k + e = 'Could not derive OpenStack version for '\ + 'codename: %s' % codename + if raise_exception: + raise ValueError(str(e)) + error_out(e) + + +def get_swift_codename(version): + '''Determine OpenStack codename that corresponds to swift version.''' + codenames = [k for k, v in SWIFT_CODENAMES.items() if version in v] + + if len(codenames) > 1: + # If more than one release codename contains this version we determine + # the actual codename based on the highest available install source. + for codename in reversed(codenames): + releases = UBUNTU_OPENSTACK_RELEASE + release = [k for k, v in releases.items() if codename in v] + ret = (subprocess + .check_output(['apt-cache', 'policy', 'swift']) + .decode('UTF-8')) + if codename in ret or release[0] in ret: + return codename + elif len(codenames) == 1: + return codenames[0] + + # NOTE: fallback - attempt to match with just major.minor version + match = re.match(r'^(\d+)\.(\d+)', version) + if match: + major_minor_version = match.group(0) + for codename, versions in SWIFT_CODENAMES.items(): + for release_version in versions: + if release_version.startswith(major_minor_version): + return codename + + return None + + +def get_os_codename_package(package, fatal=True): + """Derive OpenStack release codename from an installed package. + + Initially, see if the openstack-release pkg is available (by trying to + install it) and use it instead. + + If it isn't then it falls back to the existing method of checking the + version of the package passed and then resolving the version from that + using lookup tables. + + Note: if possible, charms should use get_installed_os_version() to + determine the version of the "openstack-release" pkg. + + :param package: the package to test for version information. + :type package: str + :param fatal: If True (default), then die via error_out() + :type fatal: bool + :returns: the OpenStack release codename (e.g. ussuri) + :rtype: str + """ + + codename = get_installed_os_version() + if codename: + return codename + + if snap_install_requested(): + cmd = ['snap', 'list', package] + try: + out = subprocess.check_output(cmd).decode('UTF-8') + except subprocess.CalledProcessError: + return None + lines = out.split('\n') + for line in lines: + if package in line: + # Second item in list is Version + return line.split()[1] + + cache = apt_cache() + + try: + pkg = cache[package] + except Exception: + if not fatal: + return None + # the package is unknown to the current apt cache. + e = 'Could not determine version of package with no installation '\ + 'candidate: %s' % package + error_out(e) + + if not pkg.current_ver: + if not fatal: + return None + # package is known, but no version is currently installed. + e = 'Could not determine version of uninstalled package: %s' % package + error_out(e) + + vers = apt.upstream_version(pkg.current_ver.ver_str) + if 'swift' in pkg.name: + # Fully x.y.z match for swift versions + match = re.match(r'^(\d+)\.(\d+)\.(\d+)', vers) + else: + # x.y match only for 20XX.X + # and ignore patch level for other packages + match = re.match(r'^(\d+)\.(\d+)', vers) + + if match: + vers = match.group(0) + + # Generate a major version number for newer semantic + # versions of openstack projects + major_vers = vers.split('.')[0] + # >= Liberty independent project versions + if (package in PACKAGE_CODENAMES and + major_vers in PACKAGE_CODENAMES[package]): + return PACKAGE_CODENAMES[package][major_vers] + else: + # < Liberty co-ordinated project versions + try: + if 'swift' in pkg.name: + return get_swift_codename(vers) + else: + return OPENSTACK_CODENAMES[vers] + except KeyError: + if not fatal: + return None + e = 'Could not determine OpenStack codename for version %s' % vers + error_out(e) + + +def get_os_version_package(pkg, fatal=True): + '''Derive OpenStack version number from an installed package.''' + codename = get_os_codename_package(pkg, fatal=fatal) + + if not codename: + return None + + if 'swift' in pkg: + vers_map = SWIFT_CODENAMES + for cname, version in vers_map.items(): + if cname == codename: + return version[-1] + else: + vers_map = OPENSTACK_CODENAMES + for version, cname in vers_map.items(): + if cname == codename: + return version + + +def get_installed_os_version(): + """Determine the OpenStack release code name from openstack-release pkg. + + This uses the "openstack-release" pkg (if it exists) to return the + OpenStack release codename (e.g. usurri, mitaka, ocata, etc.) + + Note, it caches the result so that it is only done once per hook. + + :returns: the OpenStack release codename, if available + :rtype: Optional[str] + """ + @cached + def _do_install(): + apt_install(filter_installed_packages(['openstack-release']), + fatal=False, quiet=True) + + _do_install() + return openstack_release().get('OPENSTACK_CODENAME') + + +def openstack_release(): + """Return /etc/os-release in a dict.""" + d = {} + try: + with open('/etc/openstack-release', 'r') as lsb: + for l in lsb: + s = l.split('=') + if len(s) != 2: + continue + d[s[0].strip()] = s[1].strip() + except FileNotFoundError: + pass + return d + + +# Module local cache variable for the os_release. +_os_rel = None + + +def reset_os_release(): + '''Unset the cached os_release version''' + global _os_rel + _os_rel = None + + +def os_release(package, base=None, reset_cache=False, source_key=None): + """Returns OpenStack release codename from a cached global. + + If reset_cache then unset the cached os_release version and return the + freshly determined version. + + If the codename can not be determined from either an installed package or + the installation source, the earliest release supported by the charm should + be returned. + + :param package: Name of package to determine release from + :type package: str + :param base: Fallback codename if endavours to determine from package fail + :type base: Optional[str] + :param reset_cache: Reset any cached codename value + :type reset_cache: bool + :param source_key: Name of source configuration option + (default: 'openstack-origin') + :type source_key: Optional[str] + :returns: OpenStack release codename + :rtype: str + """ + source_key = source_key or 'openstack-origin' + if not base: + base = UBUNTU_OPENSTACK_RELEASE[lsb_release()['DISTRIB_CODENAME']] + global _os_rel + if reset_cache: + reset_os_release() + if _os_rel: + return _os_rel + _os_rel = ( + get_os_codename_package(package, fatal=False) or + get_os_codename_install_source(config(source_key)) or + base) + return _os_rel + + +@deprecate("moved to charmhelpers.fetch.import_key()", "2017-07", log=juju_log) +def import_key(keyid): + """Import a key, either ASCII armored, or a GPG key id. + + @param keyid: the key in ASCII armor format, or a GPG key id. + @raises SystemExit() via sys.exit() on failure. + """ + try: + return fetch_import_key(keyid) + except GPGKeyError as e: + error_out("Could not import key: {}".format(str(e))) + + +def get_source_and_pgp_key(source_and_key): + """Look for a pgp key ID or ascii-armor key in the given input. + + :param source_and_key: String, "source_spec|keyid" where '|keyid' is + optional. + :returns (source_spec, key_id OR None) as a tuple. Returns None for key_id + if there was no '|' in the source_and_key string. + """ + try: + source, key = source_and_key.split('|', 2) + return source, key or None + except ValueError: + return source_and_key, None + + +@deprecate("use charmhelpers.fetch.add_source() instead.", + "2017-07", log=juju_log) +def configure_installation_source(source_plus_key): + """Configure an installation source. + + The functionality is provided by charmhelpers.fetch.add_source() + The difference between the two functions is that add_source() signature + requires the key to be passed directly, whereas this function passes an + optional key by appending '|' to the end of the source specification + 'source'. + + Another difference from add_source() is that the function calls sys.exit(1) + if the configuration fails, whereas add_source() raises + SourceConfigurationError(). Another difference, is that add_source() + silently fails (with a juju_log command) if there is no matching source to + configure, whereas this function fails with a sys.exit(1) + + :param source: String_plus_key -- see above for details. + + Note that the behaviour on error is to log the error to the juju log and + then call sys.exit(1). + """ + if source_plus_key.startswith('snap'): + # Do nothing for snap installs + return + # extract the key if there is one, denoted by a '|' in the rel + source, key = get_source_and_pgp_key(source_plus_key) + + # handle the ordinary sources via add_source + try: + fetch_add_source(source, key, fail_invalid=True) + except SourceConfigError as se: + error_out(str(se)) + + +def config_value_changed(option): + """ + Determine if config value changed since last call to this function. + """ + hook_data = unitdata.HookData() + with hook_data(): + db = unitdata.kv() + current = config(option) + saved = db.get(option) + db.set(option, current) + if saved is None: + return False + return current != saved + + +def get_endpoint_key(service_name, relation_id, unit_name): + """Return the key used to refer to an ep changed notification from a unit. + + :param service_name: Service name eg nova, neutron, placement etc + :type service_name: str + :param relation_id: The id of the relation the unit is on. + :type relation_id: str + :param unit_name: The name of the unit publishing the notification. + :type unit_name: str + :returns: The key used to refer to an ep changed notification from a unit + :rtype: str + """ + return '{}-{}-{}'.format( + service_name, + relation_id.replace(':', '_'), + unit_name.replace('/', '_')) + + +def get_endpoint_notifications(service_names, rel_name='identity-service'): + """Return all notifications for the given services. + + :param service_names: List of service name. + :type service_name: List + :param rel_name: Name of the relation to query + :type rel_name: str + :returns: A dict containing the source of the notification and its nonce. + :rtype: Dict[str, str] + """ + notifications = {} + for rid in relation_ids(rel_name): + for unit in related_units(relid=rid): + ep_changed_json = relation_get( + rid=rid, + unit=unit, + attribute='ep_changed') + if ep_changed_json: + ep_changed = json.loads(ep_changed_json) + for service in service_names: + if ep_changed.get(service): + key = get_endpoint_key(service, rid, unit) + notifications[key] = ep_changed[service] + return notifications + + +def endpoint_changed(service_name, rel_name='identity-service'): + """Whether a new notification has been received for an endpoint. + + :param service_name: Service name eg nova, neutron, placement etc + :type service_name: str + :param rel_name: Name of the relation to query + :type rel_name: str + :returns: Whether endpoint has changed + :rtype: bool + """ + changed = False + with unitdata.HookData()() as t: + db = t[0] + notifications = get_endpoint_notifications( + [service_name], + rel_name=rel_name) + for key, nonce in notifications.items(): + if db.get(key) != nonce: + juju_log(('New endpoint change notification found: ' + '{}={}').format(key, nonce), + 'INFO') + changed = True + break + return changed + + +def save_endpoint_changed_triggers(service_names, rel_name='identity-service'): + """Save the endpoint triggers in db so it can be tracked if they changed. + + :param service_names: List of service name. + :type service_name: List + :param rel_name: Name of the relation to query + :type rel_name: str + """ + with unitdata.HookData()() as t: + db = t[0] + notifications = get_endpoint_notifications( + service_names, + rel_name=rel_name) + for key, nonce in notifications.items(): + db.set(key, nonce) + + +def save_script_rc(script_path="scripts/scriptrc", **env_vars): + """ + Write an rc file in the charm-delivered directory containing + exported environment variables provided by env_vars. Any charm scripts run + outside the juju hook environment can source this scriptrc to obtain + updated config information necessary to perform health checks or + service changes. + """ + juju_rc_path = "%s/%s" % (charm_dir(), script_path) + if not os.path.exists(os.path.dirname(juju_rc_path)): + os.mkdir(os.path.dirname(juju_rc_path)) + with open(juju_rc_path, 'wt') as rc_script: + rc_script.write("#!/bin/bash\n") + for u, p in env_vars.items(): + if u != "script_path": + rc_script.write('export %s=%s\n' % (u, p)) + + +def openstack_upgrade_available(package): + """ + Determines if an OpenStack upgrade is available from installation + source, based on version of installed package. + + :param package: str: Name of installed package. + + :returns: bool: : Returns True if configured installation source offers + a newer version of package. + """ + + src = config('openstack-origin') + cur_vers = get_os_version_package(package) + if not cur_vers: + # The package has not been installed yet do not attempt upgrade + return False + try: + avail_vers = get_os_version_install_source(src) + except Exception: + avail_vers = cur_vers + apt.init() + return apt.version_compare(avail_vers, cur_vers) >= 1 + + +def ensure_block_device(block_device): + ''' + Confirm block_device, create as loopback if necessary. + + :param block_device: str: Full path of block device to ensure. + + :returns: str: Full path of ensured block device. + ''' + _none = ['None', 'none', None] + if (block_device in _none): + error_out('prepare_storage(): Missing required input: block_device=%s.' + % block_device) + + if block_device.startswith('/dev/'): + bdev = block_device + elif block_device.startswith('/'): + _bd = block_device.split('|') + if len(_bd) == 2: + bdev, size = _bd + else: + bdev = block_device + size = DEFAULT_LOOPBACK_SIZE + bdev = ensure_loopback_device(bdev, size) + else: + bdev = '/dev/%s' % block_device + + if not is_block_device(bdev): + error_out('Failed to locate valid block device at %s' % bdev) + + return bdev + + +def clean_storage(block_device): + ''' + Ensures a block device is clean. That is: + - unmounted + - any lvm volume groups are deactivated + - any lvm physical device signatures removed + - partition table wiped + + :param block_device: str: Full path to block device to clean. + ''' + for mp, d in mounts(): + if d == block_device: + juju_log('clean_storage(): %s is mounted @ %s, unmounting.' % + (d, mp), level=INFO) + umount(mp, persist=True) + + if is_lvm_physical_volume(block_device): + deactivate_lvm_volume_group(block_device) + remove_lvm_physical_volume(block_device) + else: + zap_disk(block_device) + + +is_ip = ip.is_ip +ns_query = ip.ns_query +get_host_ip = ip.get_host_ip +get_hostname = ip.get_hostname + + +def get_matchmaker_map(mm_file='/etc/oslo/matchmaker_ring.json'): + mm_map = {} + if os.path.isfile(mm_file): + with open(mm_file, 'r') as f: + mm_map = json.load(f) + return mm_map + + +def sync_db_with_multi_ipv6_addresses(database, database_user, + relation_prefix=None): + hosts = get_ipv6_addr(dynamic_only=False) + + if config('vip'): + vips = config('vip').split() + for vip in vips: + if vip and is_ipv6(vip): + hosts.append(vip) + + kwargs = {'database': database, + 'username': database_user, + 'hostname': json.dumps(hosts)} + + if relation_prefix: + for key in list(kwargs.keys()): + kwargs["%s_%s" % (relation_prefix, key)] = kwargs[key] + del kwargs[key] + + for rid in relation_ids('shared-db'): + relation_set(relation_id=rid, **kwargs) + + +def os_requires_version(ostack_release, pkg): + """ + Decorator for hook to specify minimum supported release + """ + def wrap(f): + @wraps(f) + def wrapped_f(*args): + if CompareOpenStackReleases(os_release(pkg)) < ostack_release: + raise Exception("This hook is not supported on releases" + " before %s" % ostack_release) + f(*args) + return wrapped_f + return wrap + + +def os_workload_status(configs, required_interfaces, charm_func=None): + """ + Decorator to set workload status based on complete contexts + """ + def wrap(f): + @wraps(f) + def wrapped_f(*args, **kwargs): + # Run the original function first + f(*args, **kwargs) + # Set workload status now that contexts have been + # acted on + set_os_workload_status(configs, required_interfaces, charm_func) + return wrapped_f + return wrap + + +def set_os_workload_status(configs, required_interfaces, charm_func=None, + services=None, ports=None): + """Set the state of the workload status for the charm. + + This calls _determine_os_workload_status() to get the new state, message + and sets the status using status_set() + + @param configs: a templating.OSConfigRenderer() object + @param required_interfaces: {generic: [specific, specific2, ...]} + @param charm_func: a callable function that returns state, message. The + signature is charm_func(configs) -> (state, message) + @param services: list of strings OR dictionary specifying services/ports + @param ports: OPTIONAL list of port numbers. + @returns state, message: the new workload status, user message + """ + state, message = _determine_os_workload_status( + configs, required_interfaces, charm_func, services, ports) + status_set(state, message) + + +def _determine_os_workload_status( + configs, required_interfaces, charm_func=None, + services=None, ports=None): + """Determine the state of the workload status for the charm. + + This function returns the new workload status for the charm based + on the state of the interfaces, the paused state and whether the + services are actually running and any specified ports are open. + + This checks: + + 1. if the unit should be paused, that it is actually paused. If so the + state is 'maintenance' + message, else 'broken'. + 2. that the interfaces/relations are complete. If they are not then + it sets the state to either 'broken' or 'waiting' and an appropriate + message. + 3. If all the relation data is set, then it checks that the actual + services really are running. If not it sets the state to 'broken'. + + If everything is okay then the state returns 'active'. + + @param configs: a templating.OSConfigRenderer() object + @param required_interfaces: {generic: [specific, specific2, ...]} + @param charm_func: a callable function that returns state, message. The + signature is charm_func(configs) -> (state, message) + @param services: list of strings OR dictionary specifying services/ports + @param ports: OPTIONAL list of port numbers. + @returns state, message: the new workload status, user message + """ + state, message = _ows_check_if_paused(services, ports) + + if state is None: + state, message = _ows_check_generic_interfaces( + configs, required_interfaces) + + if state != 'maintenance' and charm_func: + # _ows_check_charm_func() may modify the state, message + state, message = _ows_check_charm_func( + state, message, lambda: charm_func(configs)) + + if state is None: + state, message = ows_check_services_running(services, ports) + + if state is None: + state = 'active' + message = "Unit is ready" + juju_log(message, 'INFO') + + try: + if config(POLICYD_CONFIG_NAME): + message = "{} {}".format(policyd_status_message_prefix(), message) + # Get deferred restarts events that have been triggered by a policy + # written by this charm. + deferred_restarts = list(set( + [e.service + for e in deferred_events.get_deferred_restarts() + if e.policy_requestor_name == ch_service_name()])) + if deferred_restarts: + svc_msg = "Services queued for restart: {}".format( + ', '.join(sorted(deferred_restarts))) + message = "{}. {}".format(message, svc_msg) + deferred_hooks = deferred_events.get_deferred_hooks() + if deferred_hooks: + svc_msg = "Hooks skipped due to disabled auto restarts: {}".format( + ', '.join(sorted(deferred_hooks))) + message = "{}. {}".format(message, svc_msg) + + except Exception: + pass + + return state, message + + +def _ows_check_if_paused(services=None, ports=None): + """Check if the unit is supposed to be paused, and if so check that the + services/ports (if passed) are actually stopped/not being listened to. + + If the unit isn't supposed to be paused, just return None, None + + If the unit is performing a series upgrade, return a message indicating + this. + + @param services: OPTIONAL services spec or list of service names. + @param ports: OPTIONAL list of port numbers. + @returns state, message or None, None + """ + if is_unit_upgrading_set(): + state, message = check_actually_paused(services=services, + ports=ports) + if state is None: + # we're paused okay, so set maintenance and return + state = "blocked" + message = ("Ready for do-release-upgrade and reboot. " + "Set complete when finished.") + return state, message + + if is_unit_paused_set(): + state, message = check_actually_paused(services=services, + ports=ports) + if state is None: + # we're paused okay, so set maintenance and return + state = "maintenance" + message = "Paused. Use 'resume' action to resume normal service." + return state, message + return None, None + + +def _ows_check_generic_interfaces(configs, required_interfaces): + """Check the complete contexts to determine the workload status. + + - Checks for missing or incomplete contexts + - juju log details of missing required data. + - determines the correct workload status + - creates an appropriate message for status_set(...) + + if there are no problems then the function returns None, None + + @param configs: a templating.OSConfigRenderer() object + @params required_interfaces: {generic_interface: [specific_interface], } + @returns state, message or None, None + """ + incomplete_rel_data = incomplete_relation_data(configs, + required_interfaces) + state = None + message = None + missing_relations = set() + incomplete_relations = set() + + for generic_interface, relations_states in incomplete_rel_data.items(): + related_interface = None + missing_data = {} + # Related or not? + for interface, relation_state in relations_states.items(): + if relation_state.get('related'): + related_interface = interface + missing_data = relation_state.get('missing_data') + break + # No relation ID for the generic_interface? + if not related_interface: + juju_log("{} relation is missing and must be related for " + "functionality. ".format(generic_interface), 'WARN') + state = 'blocked' + missing_relations.add(generic_interface) + else: + # Relation ID eists but no related unit + if not missing_data: + # Edge case - relation ID exists but departings + _hook_name = hook_name() + if (('departed' in _hook_name or 'broken' in _hook_name) and + related_interface in _hook_name): + state = 'blocked' + missing_relations.add(generic_interface) + juju_log("{} relation's interface, {}, " + "relationship is departed or broken " + "and is required for functionality." + "".format(generic_interface, related_interface), + "WARN") + # Normal case relation ID exists but no related unit + # (joining) + else: + juju_log("{} relations's interface, {}, is related but has" + " no units in the relation." + "".format(generic_interface, related_interface), + "INFO") + # Related unit exists and data missing on the relation + else: + juju_log("{} relation's interface, {}, is related awaiting " + "the following data from the relationship: {}. " + "".format(generic_interface, related_interface, + ", ".join(missing_data)), "INFO") + if state != 'blocked': + state = 'waiting' + if generic_interface not in missing_relations: + incomplete_relations.add(generic_interface) + + if missing_relations: + message = "Missing relations: {}".format(", ".join(missing_relations)) + if incomplete_relations: + message += "; incomplete relations: {}" \ + "".format(", ".join(incomplete_relations)) + state = 'blocked' + elif incomplete_relations: + message = "Incomplete relations: {}" \ + "".format(", ".join(incomplete_relations)) + state = 'waiting' + + return state, message + + +def _ows_check_charm_func(state, message, charm_func_with_configs): + """Run a custom check function for the charm to see if it wants to + change the state. This is only run if not in 'maintenance' and + tests to see if the new state is more important that the previous + one determined by the interfaces/relations check. + + @param state: the previously determined state so far. + @param message: the user orientated message so far. + @param charm_func: a callable function that returns state, message + @returns state, message strings. + """ + if charm_func_with_configs: + charm_state, charm_message = charm_func_with_configs() + if (charm_state != 'active' and + charm_state != 'unknown' and + charm_state is not None): + state = workload_state_compare(state, charm_state) + if message: + charm_message = charm_message.replace("Incomplete relations: ", + "") + message = "{}, {}".format(message, charm_message) + else: + message = charm_message + return state, message + + +@deprecate("use ows_check_services_running() instead", "2022-05", log=juju_log) +def _ows_check_services_running(services, ports): + return ows_check_services_running(services, ports) + + +def ows_check_services_running(services, ports, ssl_check_info=None): + """Check that the services that should be running are actually running + and that any ports specified are being listened to. + + @param services: list of strings OR dictionary specifying services/ports + @param ports: list of ports + @param ssl_check_info: SSLPortCheckInfo object. If provided, port checks + will be done using an SSL connection. + @returns state, message: strings or None, None + """ + messages = [] + state = None + if services is not None: + services = _extract_services_list_helper(services) + services_running, running = _check_running_services(services) + if not all(running): + messages.append( + "Services not running that should be: {}" + .format(", ".join(_filter_tuples(services_running, False)))) + state = 'blocked' + # also verify that the ports that should be open are open + # NB, that ServiceManager objects only OPTIONALLY have ports + map_not_open, ports_open = ( + _check_listening_on_services_ports(services, ssl_check_info)) + if not all(ports_open): + # find which service has missing ports. They are in service + # order which makes it a bit easier. + message_parts = {service: ", ".join([str(v) for v in open_ports]) + for service, open_ports in map_not_open.items()} + message = ", ".join( + ["{}: [{}]".format(s, sp) for s, sp in message_parts.items()]) + messages.append( + "Services with ports not open that should be: {}" + .format(message)) + state = 'blocked' + + if ports is not None: + # and we can also check ports which we don't know the service for + ports_open, ports_open_bools = \ + _check_listening_on_ports_list(ports, ssl_check_info) + if not all(ports_open_bools): + messages.append( + "Ports which should be open, but are not: {}" + .format(", ".join([str(p) for p, v in ports_open + if not v]))) + state = 'blocked' + + if state is not None: + message = "; ".join(messages) + return state, message + + return None, None + + +def _extract_services_list_helper(services): + """Extract a OrderedDict of {service: [ports]} of the supplied services + for use by the other functions. + + The services object can either be: + - None : no services were passed (an empty dict is returned) + - a list of strings + - A dictionary (optionally OrderedDict) {service_name: {'service': ..}} + - An array of [{'service': service_name, ...}, ...] + + @param services: see above + @returns OrderedDict(service: [ports], ...) + """ + if services is None: + return {} + if isinstance(services, dict): + services = services.values() + # either extract the list of services from the dictionary, or if + # it is a simple string, use that. i.e. works with mixed lists. + _s = OrderedDict() + for s in services: + if isinstance(s, dict) and 'service' in s: + _s[s['service']] = s.get('ports', []) + if isinstance(s, str): + _s[s] = [] + return _s + + +def _check_running_services(services): + """Check that the services dict provided is actually running and provide + a list of (service, boolean) tuples for each service. + + Returns both a zipped list of (service, boolean) and a list of booleans + in the same order as the services. + + @param services: OrderedDict of strings: [ports], one for each service to + check. + @returns [(service, boolean), ...], : results for checks + [boolean] : just the result of the service checks + """ + services_running = [service_running(s) for s in services] + return list(zip(services, services_running)), services_running + + +def _check_listening_on_services_ports(services, test=False, + ssl_check_info=None): + """Check that the unit is actually listening (has the port open) on the + ports that the service specifies are open. If test is True then the + function returns the services with ports that are open rather than + closed. + + Returns an OrderedDict of service: ports and a list of booleans + + @param services: OrderedDict(service: [port, ...], ...) + @param test: default=False, if False, test for closed, otherwise open. + @param ssl_check_info: SSLPortCheckInfo object. If provided, port checks + will be done using an SSL connection. + @returns OrderedDict(service: [port-not-open, ...]...), [boolean] + """ + test = not (not (test)) # ensure test is True or False + all_ports = list(itertools.chain(*services.values())) + ports_states = [port_has_listener('0.0.0.0', p, ssl_check_info) + for p in all_ports] + map_ports = OrderedDict() + matched_ports = [p for p, opened in zip(all_ports, ports_states) + if opened == test] # essentially opened xor test + for service, ports in services.items(): + set_ports = set(ports).intersection(matched_ports) + if set_ports: + map_ports[service] = set_ports + return map_ports, ports_states + + +def _check_listening_on_ports_list(ports, ssl_check_info=None): + """Check that the ports list given are being listened to + + Returns a list of ports being listened to and a list of the + booleans. + + @param ssl_check_info: SSLPortCheckInfo object. If provided, port checks + will be done using an SSL connection. + @param ports: LIST of port numbers. + @returns [(port_num, boolean), ...], [boolean] + """ + ports_open = [port_has_listener('0.0.0.0', p, ssl_check_info) + for p in ports] + return zip(ports, ports_open), ports_open + + +def _filter_tuples(services_states, state): + """Return a simple list from a list of tuples according to the condition + + @param services_states: LIST of (string, boolean): service and running + state. + @param state: Boolean to match the tuple against. + @returns [LIST of strings] that matched the tuple RHS. + """ + return [s for s, b in services_states if b == state] + + +def workload_state_compare(current_workload_state, workload_state): + """ Return highest priority of two states""" + hierarchy = {'unknown': -1, + 'active': 0, + 'maintenance': 1, + 'waiting': 2, + 'blocked': 3, + } + + if hierarchy.get(workload_state) is None: + workload_state = 'unknown' + if hierarchy.get(current_workload_state) is None: + current_workload_state = 'unknown' + + # Set workload_state based on hierarchy of statuses + if hierarchy.get(current_workload_state) > hierarchy.get(workload_state): + return current_workload_state + else: + return workload_state + + +def incomplete_relation_data(configs, required_interfaces): + """Check complete contexts against required_interfaces + Return dictionary of incomplete relation data. + + configs is an OSConfigRenderer object with configs registered + + required_interfaces is a dictionary of required general interfaces + with dictionary values of possible specific interfaces. + Example: + required_interfaces = {'database': ['shared-db', 'pgsql-db']} + + The interface is said to be satisfied if anyone of the interfaces in the + list has a complete context. + + Return dictionary of incomplete or missing required contexts with relation + status of interfaces and any missing data points. Example: + {'message': + {'amqp': {'missing_data': ['rabbitmq_password'], 'related': True}, + 'zeromq-configuration': {'related': False}}, + 'identity': + {'identity-service': {'related': False}}, + 'database': + {'pgsql-db': {'related': False}, + 'shared-db': {'related': True}}} + """ + complete_ctxts = configs.complete_contexts() + incomplete_relations = [ + svc_type + for svc_type, interfaces in required_interfaces.items() + if not set(interfaces).intersection(complete_ctxts)] + return { + i: configs.get_incomplete_context_data(required_interfaces[i]) + for i in incomplete_relations} + + +def do_action_openstack_upgrade(package, upgrade_callback, configs): + """Perform action-managed OpenStack upgrade. + + Upgrades packages to the configured openstack-origin version and sets + the corresponding action status as a result. + + For backwards compatibility a config flag (action-managed-upgrade) must + be set for this code to run, otherwise a full service level upgrade will + fire on config-changed. + + @param package: package name for determining if openstack upgrade available + @param upgrade_callback: function callback to charm's upgrade function + @param configs: templating object derived from OSConfigRenderer class + + @return: True if upgrade successful; False if upgrade failed or skipped + """ + ret = False + + if openstack_upgrade_available(package): + if config('action-managed-upgrade'): + juju_log('Upgrading OpenStack release') + + try: + upgrade_callback(configs=configs) + action_set({'outcome': 'success, upgrade completed'}) + ret = True + except Exception: + action_set({'outcome': 'upgrade failed, see traceback'}) + action_set({'traceback': traceback.format_exc()}) + action_fail('upgrade callback resulted in an ' + 'unexpected error') + else: + action_set({'outcome': 'action-managed-upgrade config is ' + 'False, skipped upgrade'}) + else: + action_set({'outcome': 'no upgrade available'}) + + return ret + + +def do_action_package_upgrade(package, upgrade_callback, configs): + """Perform package upgrade within the current OpenStack release. + + Upgrades packages only if there is not an openstack upgrade available, + and sets the corresponding action status as a result. + + @param package: package name for determining if openstack upgrade available + @param upgrade_callback: function callback to charm's upgrade function + @param configs: templating object derived from OSConfigRenderer class + + @return: True if upgrade successful; False if upgrade failed or skipped + """ + ret = False + + if not openstack_upgrade_available(package): + juju_log('Upgrading packages') + + try: + upgrade_callback(configs=configs) + action_set({'outcome': 'success, upgrade completed'}) + ret = True + except Exception: + action_set({'outcome': 'upgrade failed, see traceback'}) + action_set({'traceback': traceback.format_exc()}) + action_fail('upgrade callback resulted in an ' + 'unexpected error') + else: + action_set({'outcome': 'upgrade skipped because an openstack upgrade ' + 'is available'}) + + return ret + + +def remote_restart(rel_name, remote_service=None): + trigger = { + 'restart-trigger': str(uuid.uuid4()), + } + if remote_service: + trigger['remote-service'] = remote_service + for rid in relation_ids(rel_name): + # This subordinate can be related to two separate services using + # different subordinate relations so only issue the restart if + # the principle is connected down the relation we think it is + if related_units(relid=rid): + relation_set(relation_id=rid, + relation_settings=trigger, + ) + + +def check_actually_paused(services=None, ports=None): + """Check that services listed in the services object and ports + are actually closed (not listened to), to verify that the unit is + properly paused. + + @param services: See _extract_services_list_helper + @returns status, : string for status (None if okay) + message : string for problem for status_set + """ + state = None + message = None + messages = [] + if services is not None: + services = _extract_services_list_helper(services) + services_running, services_states = _check_running_services(services) + if any(services_states): + # there shouldn't be any running so this is a problem + messages.append("these services running: {}" + .format(", ".join( + _filter_tuples(services_running, True)))) + state = "blocked" + ports_open, ports_open_bools = ( + _check_listening_on_services_ports(services, True)) + if any(ports_open_bools): + message_parts = {service: ", ".join([str(v) for v in open_ports]) + for service, open_ports in ports_open.items()} + message = ", ".join( + ["{}: [{}]".format(s, sp) for s, sp in message_parts.items()]) + messages.append( + "these service:ports are open: {}".format(message)) + state = 'blocked' + if ports is not None: + ports_open, bools = _check_listening_on_ports_list(ports) + if any(bools): + messages.append( + "these ports which should be closed, but are open: {}" + .format(", ".join([str(p) for p, v in ports_open if v]))) + state = 'blocked' + if messages: + message = ("Services should be paused but {}" + .format(", ".join(messages))) + return state, message + + +def set_unit_paused(): + """Set the unit to a paused state in the local kv() store. + This does NOT actually pause the unit + """ + with unitdata.HookData()() as t: + kv = t[0] + kv.set('unit-paused', True) + + +def clear_unit_paused(): + """Clear the unit from a paused state in the local kv() store + This does NOT actually restart any services - it only clears the + local state. + """ + with unitdata.HookData()() as t: + kv = t[0] + kv.set('unit-paused', False) + + +def is_unit_paused_set(): + """Return the state of the kv().get('unit-paused'). + This does NOT verify that the unit really is paused. + + To help with units that don't have HookData() (testing) + if it excepts, return False + """ + try: + with unitdata.HookData()() as t: + kv = t[0] + # transform something truth-y into a Boolean. + return not (not (kv.get('unit-paused'))) + except Exception: + return False + + +def is_hook_allowed(hookname, check_deferred_restarts=True): + """Check if hook can run. + + :param hookname: Name of hook to check.. + :type hookname: str + :param check_deferred_restarts: Whether to check deferred restarts. + :type check_deferred_restarts: bool + """ + permitted = True + reasons = [] + if is_unit_paused_set(): + reasons.append( + "Unit is pause or upgrading. Skipping {}".format(hookname)) + permitted = False + + if check_deferred_restarts: + if deferred_events.is_restart_permitted(): + permitted = True + deferred_events.clear_deferred_hook(hookname) + else: + if not config().changed('enable-auto-restarts'): + deferred_events.set_deferred_hook(hookname) + reasons.append("auto restarts are disabled") + permitted = False + return permitted, " and ".join(reasons) + + +def manage_payload_services(action, services=None, charm_func=None): + """Run an action against all services. + + An optional charm_func() can be called. It should raise an Exception to + indicate that the function failed. If it was successful it should return + None or an optional message. + + The signature for charm_func is: + charm_func() -> message: str + + charm_func() is executed after any services are stopped, if supplied. + + The services object can either be: + - None : no services were passed (an empty dict is returned) + - a list of strings + - A dictionary (optionally OrderedDict) {service_name: {'service': ..}} + - An array of [{'service': service_name, ...}, ...] + + :param action: Action to run: pause, resume, start or stop. + :type action: str + :param services: See above + :type services: See above + :param charm_func: function to run for custom charm pausing. + :type charm_func: f() + :returns: Status boolean and list of messages + :rtype: (bool, []) + :raises: RuntimeError + """ + actions = { + 'pause': service_pause, + 'resume': service_resume, + 'start': service_start, + 'stop': service_stop} + action = action.lower() + if action not in actions.keys(): + raise RuntimeError( + "action: {} must be one of: {}".format(action, + ', '.join(actions.keys()))) + services = _extract_services_list_helper(services) + messages = [] + success = True + if services: + for service in services.keys(): + rc = actions[action](service) + if not rc: + success = False + messages.append("{} didn't {} cleanly.".format(service, + action)) + if charm_func: + try: + message = charm_func() + if message: + messages.append(message) + except Exception as e: + success = False + messages.append(str(e)) + return success, messages + + +def make_wait_for_ports_barrier(ports, retry_count=5): + """Make a function to wait for port shutdowns. + + Create a function which closes over the provided ports. The function will + retry probing ports until they are closed or the retry count has been reached. + + """ + @decorators.retry_on_predicate(retry_count, operator.not_, base_delay=0.1) + def retry_port_check(): + _, ports_states = _check_listening_on_ports_list(ports) + juju_log("Probe ports {}, result: {}".format(ports, ports_states), level="DEBUG") + return any(ports_states) + return retry_port_check + + +def pause_unit(assess_status_func, services=None, ports=None, + charm_func=None): + """Pause a unit by stopping the services and setting 'unit-paused' + in the local kv() store. + + Also checks that the services have stopped and ports are no longer + being listened to. + + An optional charm_func() can be called that can either raise an + Exception or return non None, None to indicate that the unit + didn't pause cleanly. + + The signature for charm_func is: + charm_func() -> message: string + + charm_func() is executed after any services are stopped, if supplied. + + The services object can either be: + - None : no services were passed (an empty dict is returned) + - a list of strings + - A dictionary (optionally OrderedDict) {service_name: {'service': ..}} + - An array of [{'service': service_name, ...}, ...] + + @param assess_status_func: (f() -> message: string | None) or None + @param services: OPTIONAL see above + @param ports: OPTIONAL list of port + @param charm_func: function to run for custom charm pausing. + @returns None + @raises Exception(message) on an error for action_fail(). + """ + _, messages = manage_payload_services( + 'pause', + services=services, + charm_func=charm_func) + set_unit_paused() + + if assess_status_func: + message = assess_status_func() + if message: + messages.append(message) + if messages and not is_unit_upgrading_set(): + raise Exception("Couldn't pause: {}".format("; ".join(messages))) + + +def resume_unit(assess_status_func, services=None, ports=None, + charm_func=None): + """Resume a unit by starting the services and clearning 'unit-paused' + in the local kv() store. + + Also checks that the services have started and ports are being listened to. + + An optional charm_func() can be called that can either raise an + Exception or return non None to indicate that the unit + didn't resume cleanly. + + The signature for charm_func is: + charm_func() -> message: string + + charm_func() is executed after any services are started, if supplied. + + The services object can either be: + - None : no services were passed (an empty dict is returned) + - a list of strings + - A dictionary (optionally OrderedDict) {service_name: {'service': ..}} + - An array of [{'service': service_name, ...}, ...] + + @param assess_status_func: (f() -> message: string | None) or None + @param services: OPTIONAL see above + @param ports: OPTIONAL list of port + @param charm_func: function to run for custom charm resuming. + @returns None + @raises Exception(message) on an error for action_fail(). + """ + _, messages = manage_payload_services( + 'resume', + services=services, + charm_func=charm_func) + clear_unit_paused() + if assess_status_func: + message = assess_status_func() + if message: + messages.append(message) + if messages: + raise Exception("Couldn't resume: {}".format("; ".join(messages))) + + +def restart_services_action(services=None, when_all_stopped_func=None, + deferred_only=None): + """Manage a service restart request via charm action. + + :param services: Services to be restarted + :type model_name: List[str] + :param when_all_stopped_func: Function to call when all services are + stopped. + :type when_all_stopped_func: Callable[] + :param model_name: Only restart services which have a deferred restart + event. + :type model_name: bool + """ + if services and deferred_only: + raise ValueError( + "services and deferred_only are mutually exclusive") + if deferred_only: + services = list(set( + [a.service for a in deferred_events.get_deferred_restarts()])) + _, messages = manage_payload_services( + 'stop', + services=services, + charm_func=when_all_stopped_func) + if messages: + raise ServiceActionError( + "Error processing service stop request: {}".format( + "; ".join(messages))) + _, messages = manage_payload_services( + 'start', + services=services) + if messages: + raise ServiceActionError( + "Error processing service start request: {}".format( + "; ".join(messages))) + deferred_events.clear_deferred_restarts(services) + + +def make_assess_status_func(*args, **kwargs): + """Creates an assess_status_func() suitable for handing to pause_unit() + and resume_unit(). + + This uses the _determine_os_workload_status(...) function to determine + what the workload_status should be for the unit. If the unit is + not in maintenance or active states, then the message is returned to + the caller. This is so an action that doesn't result in either a + complete pause or complete resume can signal failure with an action_fail() + """ + def _assess_status_func(): + state, message = _determine_os_workload_status(*args, **kwargs) + status_set(state, message) + if state not in ['maintenance', 'active']: + return message + return None + + return _assess_status_func + + +def pausable_restart_on_change(restart_map, stopstart=False, + restart_functions=None, + can_restart_now_f=None, + post_svc_restart_f=None, + pre_restarts_wait_f=None): + """A restart_on_change decorator that checks to see if the unit is + paused. If it is paused then the decorated function doesn't fire. + + This is provided as a helper, as the @restart_on_change(...) decorator + is in core.host, yet the openstack specific helpers are in this file + (contrib.openstack.utils). Thus, this needs to be an optional feature + for openstack charms (or charms that wish to use the openstack + pause/resume type features). + + It is used as follows: + + from contrib.openstack.utils import ( + pausable_restart_on_change as restart_on_change) + + @restart_on_change(restart_map, stopstart=) + def some_hook(...): + pass + + see core.utils.restart_on_change() for more details. + + Note restart_map can be a callable, in which case, restart_map is only + evaluated at runtime. This means that it is lazy and the underlying + function won't be called if the decorated function is never called. Note, + retains backwards compatibility for passing a non-callable dictionary. + + :param f: function to decorate. + :type f: Callable + :param restart_map: Optionally callable, which then returns the restart_map or + the restart map {conf_file: [services]} + :type restart_map: Union[Callable[[],], Dict[str, List[str,]] + :param stopstart: whether to stop, start or restart a service + :type stopstart: booleean + :param restart_functions: nonstandard functions to use to restart services + {svc: func, ...} + :type restart_functions: Dict[str, Callable[[str], None]] + :param can_restart_now_f: A function used to check if the restart is + permitted. + :type can_restart_now_f: Callable[[str, List[str]], boolean] + :param post_svc_restart_f: A function run after a service has + restarted. + :type post_svc_restart_f: Callable[[str], None] + :param pre_restarts_wait_f: A function called before any restarts. + :type pre_restarts_wait_f: Callable[None, None] + :returns: decorator to use a restart_on_change with pausability + :rtype: decorator + + + """ + def wrap(f): + __restart_map_cache = None + + @functools.wraps(f) + def wrapped_f(*args, **kwargs): + nonlocal __restart_map_cache + if is_unit_paused_set(): + return f(*args, **kwargs) + if __restart_map_cache is None: + __restart_map_cache = restart_map() \ + if callable(restart_map) else restart_map + # otherwise, normal restart_on_change functionality + return restart_on_change_helper( + (lambda: f(*args, **kwargs)), + __restart_map_cache, + stopstart, + restart_functions, + can_restart_now_f, + post_svc_restart_f, + pre_restarts_wait_f) + return wrapped_f + return wrap + + +def ordered(orderme): + """Converts the provided dictionary into a collections.OrderedDict. + + The items in the returned OrderedDict will be inserted based on the + natural sort order of the keys. Nested dictionaries will also be sorted + in order to ensure fully predictable ordering. + + :param orderme: the dict to order + :return: collections.OrderedDict + :raises: ValueError: if `orderme` isn't a dict instance. + """ + if not isinstance(orderme, dict): + raise ValueError('argument must be a dict type') + + result = OrderedDict() + for k, v in sorted(orderme.items(), key=lambda x: x[0]): + if isinstance(v, dict): + result[k] = ordered(v) + else: + result[k] = v + + return result + + +def config_flags_parser(config_flags): + """Parses config flags string into dict. + + This parsing method supports a few different formats for the config + flag values to be parsed: + + 1. A string in the simple format of key=value pairs, with the possibility + of specifying multiple key value pairs within the same string. For + example, a string in the format of 'key1=value1, key2=value2' will + return a dict of: + + {'key1': 'value1', 'key2': 'value2'}. + + 2. A string in the above format, but supporting a comma-delimited list + of values for the same key. For example, a string in the format of + 'key1=value1, key2=value3,value4,value5' will return a dict of: + + {'key1': 'value1', 'key2': 'value2,value3,value4'} + + 3. A string containing a colon character (:) prior to an equal + character (=) will be treated as yaml and parsed as such. This can be + used to specify more complex key value pairs. For example, + a string in the format of 'key1: subkey1=value1, subkey2=value2' will + return a dict of: + + {'key1', 'subkey1=value1, subkey2=value2'} + + The provided config_flags string may be a list of comma-separated values + which themselves may be comma-separated list of values. + """ + # If we find a colon before an equals sign then treat it as yaml. + # Note: limit it to finding the colon first since this indicates assignment + # for inline yaml. + colon = config_flags.find(':') + equals = config_flags.find('=') + if colon > 0: + if colon < equals or equals < 0: + return ordered(yaml.safe_load(config_flags)) + + if config_flags.find('==') >= 0: + juju_log("config_flags is not in expected format (key=value)", + level=ERROR) + raise OSContextError + + # strip the following from each value. + post_strippers = ' ,' + # we strip any leading/trailing '=' or ' ' from the string then + # split on '='. + split = config_flags.strip(' =').split('=') + limit = len(split) + flags = OrderedDict() + for i in range(0, limit - 1): + current = split[i] + next = split[i + 1] + vindex = next.rfind(',') + if (i == limit - 2) or (vindex < 0): + value = next + else: + value = next[:vindex] + + if i == 0: + key = current + else: + # if this not the first entry, expect an embedded key. + index = current.rfind(',') + if index < 0: + juju_log("Invalid config value(s) at index %s" % (i), + level=ERROR) + raise OSContextError + key = current[index + 1:] + + # Add to collection. + flags[key.strip(post_strippers)] = value.rstrip(post_strippers) + + return flags + + +def os_application_version_set(package): + '''Set version of application for Juju 2.0 and later''' + application_version = get_upstream_version(package) + # NOTE(jamespage) if not able to figure out package version, fallback to + # openstack codename version detection. + if not application_version: + application_version_set(os_release(package)) + else: + application_version_set(application_version) + + +def os_application_status_set(check_function): + """Run the supplied function and set the application status accordingly. + + :param check_function: Function to run to get app states and messages. + :type check_function: function + """ + state, message = check_function() + status_set(state, message, application=True) + + +def enable_memcache(source=None, release=None, package=None): + """Determine if memcache should be enabled on the local unit + + @param release: release of OpenStack currently deployed + @param package: package to derive OpenStack version deployed + @returns boolean Whether memcache should be enabled + """ + _release = None + if release: + _release = release + else: + _release = os_release(package) + if not _release: + _release = get_os_codename_install_source(source) + + return CompareOpenStackReleases(_release) >= 'mitaka' + + +def token_cache_pkgs(source=None, release=None): + """Determine additional packages needed for token caching + + @param source: source string for charm + @param release: release of OpenStack currently deployed + @returns List of package to enable token caching + """ + packages = [] + if enable_memcache(source=source, release=release): + packages.extend(['memcached', 'python-memcache']) + return packages + + +def update_json_file(filename, items): + """Updates the json `filename` with a given dict. + :param filename: path to json file (e.g. /etc/glance/policy.json) + :param items: dict of items to update + """ + if not items: + return + + with open(filename) as fd: + policy = json.load(fd) + + # Compare before and after and if nothing has changed don't write the file + # since that could cause unnecessary service restarts. + before = json.dumps(policy, indent=4, sort_keys=True) + policy.update(items) + after = json.dumps(policy, indent=4, sort_keys=True) + if before == after: + return + + with open(filename, "w") as fd: + fd.write(after) + + +@cached +def snap_install_requested(): + """ Determine if installing from snaps + + If openstack-origin is of the form snap:track/channel[/branch] + and channel is in SNAPS_CHANNELS return True. + """ + origin = config('openstack-origin') or "" + if not origin.startswith('snap:'): + return False + + _src = origin[5:] + if '/' in _src: + channel = _src.split('/')[1] + else: + # Handle snap:track with no channel + channel = 'stable' + return valid_snap_channel(channel) + + +def get_snaps_install_info_from_origin(snaps, src, mode='classic'): + """Generate a dictionary of snap install information from origin + + @param snaps: List of snaps + @param src: String of openstack-origin or source of the form + snap:track/channel + @param mode: String classic, devmode or jailmode + @returns: Dictionary of snaps with channels and modes + """ + + if not src.startswith('snap:'): + juju_log("Snap source is not a snap origin", 'WARN') + return {} + + _src = src[5:] + channel = '--channel={}'.format(_src) + + return {snap: {'channel': channel, 'mode': mode} + for snap in snaps} + + +def install_os_snaps(snaps, refresh=False): + """Install OpenStack snaps from channel and with mode + + @param snaps: Dictionary of snaps with channels and modes of the form: + {'snap_name': {'channel': 'snap_channel', + 'mode': 'snap_mode'}} + Where channel is a snapstore channel and mode is --classic, --devmode + or --jailmode. + @param post_snap_install: Callback function to run after snaps have been + installed + """ + + def _ensure_flag(flag): + if flag.startswith('--'): + return flag + return '--{}'.format(flag) + + if refresh: + for snap in snaps.keys(): + snap_refresh(snap, + _ensure_flag(snaps[snap]['channel']), + _ensure_flag(snaps[snap]['mode'])) + else: + for snap in snaps.keys(): + snap_install(snap, + _ensure_flag(snaps[snap]['channel']), + _ensure_flag(snaps[snap]['mode'])) + + +def set_unit_upgrading(): + """Set the unit to a upgrading state in the local kv() store. + """ + with unitdata.HookData()() as t: + kv = t[0] + kv.set('unit-upgrading', True) + + +def clear_unit_upgrading(): + """Clear the unit from a upgrading state in the local kv() store + """ + with unitdata.HookData()() as t: + kv = t[0] + kv.set('unit-upgrading', False) + + +def is_unit_upgrading_set(): + """Return the state of the kv().get('unit-upgrading'). + + To help with units that don't have HookData() (testing) + if it excepts, return False + """ + try: + with unitdata.HookData()() as t: + kv = t[0] + # transform something truth-y into a Boolean. + return not (not (kv.get('unit-upgrading'))) + except Exception: + return False + + +def series_upgrade_prepare(pause_unit_helper=None, configs=None): + """ Run common series upgrade prepare tasks. + + :param pause_unit_helper: function: Function to pause unit + :param configs: OSConfigRenderer object: Configurations + :returns None: + """ + set_unit_upgrading() + if pause_unit_helper and configs: + if not is_unit_paused_set(): + pause_unit_helper(configs) + + +def series_upgrade_complete(resume_unit_helper=None, configs=None): + """ Run common series upgrade complete tasks. + + :param resume_unit_helper: function: Function to resume unit + :param configs: OSConfigRenderer object: Configurations + :returns None: + """ + clear_unit_paused() + clear_unit_upgrading() + if configs: + configs.write_all() + if resume_unit_helper: + resume_unit_helper(configs) + + +def is_db_initialised(): + """Check leader storage to see if database has been initialised. + + :returns: Whether DB has been initialised + :rtype: bool + """ + db_initialised = None + if leader_get('db-initialised') is None: + juju_log( + 'db-initialised key missing, assuming db is not initialised', + 'DEBUG') + db_initialised = False + else: + db_initialised = bool_from_string(leader_get('db-initialised')) + juju_log('Database initialised: {}'.format(db_initialised), 'DEBUG') + return db_initialised + + +def set_db_initialised(): + """Add flag to leader storage to indicate database has been initialised. + """ + juju_log('Setting db-initialised to True', 'DEBUG') + leader_set({'db-initialised': True}) + + +def is_db_maintenance_mode(relid=None): + """Check relation data from notifications of db in maintenance mode. + + :returns: Whether db has notified it is in maintenance mode. + :rtype: bool + """ + juju_log('Checking for maintenance notifications', 'DEBUG') + if relid: + r_ids = [relid] + else: + r_ids = relation_ids('shared-db') + rids_units = [(r, u) for r in r_ids for u in related_units(r)] + notifications = [] + for r_id, unit in rids_units: + settings = relation_get(unit=unit, rid=r_id) + for key, value in settings.items(): + if value and key in DB_MAINTENANCE_KEYS: + juju_log( + 'Unit: {}, Key: {}, Value: {}'.format(unit, key, value), + 'DEBUG') + try: + notifications.append(bool_from_string(value)) + except ValueError: + juju_log( + 'Could not discern bool from {}'.format(value), + 'WARN') + pass + return True in notifications + + +@cached +def container_scoped_relations(): + """Get all the container scoped relations + + :returns: List of relation names + :rtype: List + """ + md = metadata() + relations = [] + for relation_type in ('provides', 'requires', 'peers'): + for relation in md.get(relation_type, []): + if md[relation_type][relation].get('scope') == 'container': + relations.append(relation) + return relations + + +def container_scoped_relation_get(attribute=None): + """Get relation data from all container scoped relations. + + :param attribute: Name of attribute to get + :type attribute: Optional[str] + :returns: Iterator with relation data + :rtype: Iterator[Optional[any]] + """ + for endpoint_name in container_scoped_relations(): + for rid in relation_ids(endpoint_name): + for unit in related_units(rid): + yield relation_get( + attribute=attribute, + unit=unit, + rid=rid) + + +def is_db_ready(use_current_context=False, rel_name=None): + """Check remote database is ready to be used. + + Database relations are expected to provide a list of 'allowed' units to + confirm that the database is ready for use by those units. + + If db relation has provided this information and local unit is a member, + returns True otherwise False. + + :param use_current_context: Whether to limit checks to current hook + context. + :type use_current_context: bool + :param rel_name: Name of relation to check + :type rel_name: string + :returns: Whether remote db is ready. + :rtype: bool + :raises: Exception + """ + key = 'allowed_units' + + rel_name = rel_name or 'shared-db' + this_unit = local_unit() + + if use_current_context: + if relation_id() in relation_ids(rel_name): + rids_units = [(None, None)] + else: + raise Exception("use_current_context=True but not in {} " + "rel hook contexts (currently in {})." + .format(rel_name, relation_id())) + else: + rids_units = [(r_id, u) + for r_id in relation_ids(rel_name) + for u in related_units(r_id)] + + for rid, unit in rids_units: + allowed_units = relation_get(rid=rid, unit=unit, attribute=key) + if allowed_units and this_unit in allowed_units.split(): + juju_log("This unit ({}) is in allowed unit list from {}".format( + this_unit, + unit), 'DEBUG') + return True + + juju_log("This unit was not found in any allowed unit list") + return False + + +def is_expected_scale(peer_relation_name='cluster'): + """Query juju goal-state to determine whether our peer- and dependency- + relations are at the expected scale. + + Useful for deferring per unit per relation housekeeping work until we are + ready to complete it successfully and without unnecessary repetiton. + + Always returns True if version of juju used does not support goal-state. + + :param peer_relation_name: Name of peer relation + :type rel_name: string + :returns: True or False + :rtype: bool + """ + def _get_relation_id(rel_type): + return next((rid for rid in relation_ids(reltype=rel_type)), None) + + Relation = namedtuple('Relation', 'rel_type rel_id') + peer_rid = _get_relation_id(peer_relation_name) + # Units with no peers should still have a peer relation. + if not peer_rid: + juju_log('Not at expected scale, no peer relation found', 'DEBUG') + return False + expected_relations = [ + Relation(rel_type='shared-db', rel_id=_get_relation_id('shared-db'))] + if expect_ha(): + expected_relations.append( + Relation( + rel_type='ha', + rel_id=_get_relation_id('ha'))) + juju_log( + 'Checking scale of {} relations'.format( + ','.join([r.rel_type for r in expected_relations])), + 'DEBUG') + try: + if (len(related_units(relid=peer_rid)) < + len(list(expected_peer_units()))): + return False + for rel in expected_relations: + if not rel.rel_id: + juju_log( + 'Expected to find {} relation, but it is missing'.format( + rel.rel_type), + 'DEBUG') + return False + # Goal state returns every unit even for container scoped + # relations but the charm only ever has a relation with + # the local unit. + if rel.rel_type in container_scoped_relations(): + expected_count = 1 + else: + expected_count = len( + list(expected_related_units(reltype=rel.rel_type))) + if len(related_units(relid=rel.rel_id)) < expected_count: + juju_log( + ('Not at expected scale, not enough units on {} ' + 'relation'.format(rel.rel_type)), + 'DEBUG') + return False + except NotImplementedError: + return True + juju_log('All checks have passed, unit is at expected scale', 'DEBUG') + return True + + +def get_peer_key(unit_name): + """Get the peer key for this unit. + + The peer key is the key a unit uses to publish its status down the peer + relation + + :param unit_name: Name of unit + :type unit_name: string + :returns: Peer key for given unit + :rtype: string + """ + return 'unit-state-{}'.format(unit_name.replace('/', '-')) + + +UNIT_READY = 'READY' +UNIT_NOTREADY = 'NOTREADY' +UNIT_UNKNOWN = 'UNKNOWN' +UNIT_STATES = [UNIT_READY, UNIT_NOTREADY, UNIT_UNKNOWN] + + +def inform_peers_unit_state(state, relation_name='cluster'): + """Inform peers of the state of this unit. + + :param state: State of unit to publish + :type state: string + :param relation_name: Name of relation to publish state on + :type relation_name: string + """ + if state not in UNIT_STATES: + raise ValueError( + "Setting invalid state {} for unit".format(state)) + this_unit = local_unit() + for r_id in relation_ids(relation_name): + juju_log('Telling peer behind relation {} that {} is {}'.format( + r_id, this_unit, state), 'DEBUG') + relation_set(relation_id=r_id, + relation_settings={ + get_peer_key(this_unit): state}) + + +def get_peers_unit_state(relation_name='cluster'): + """Get the state of all peers. + + :param relation_name: Name of relation to check peers on. + :type relation_name: string + :returns: Unit states keyed on unit name. + :rtype: dict + :raises: ValueError + """ + r_ids = relation_ids(relation_name) + rids_units = [(r, u) for r in r_ids for u in related_units(r)] + unit_states = {} + for r_id, unit in rids_units: + settings = relation_get(unit=unit, rid=r_id) + unit_states[unit] = settings.get(get_peer_key(unit), UNIT_UNKNOWN) + if unit_states[unit] not in UNIT_STATES: + raise ValueError( + "Unit in unknown state {}".format(unit_states[unit])) + return unit_states + + +def are_peers_ready(relation_name='cluster'): + """Check if all peers are ready. + + :param relation_name: Name of relation to check peers on. + :type relation_name: string + :returns: Whether all units are ready. + :rtype: bool + """ + unit_states = get_peers_unit_state(relation_name).values() + juju_log('{} peers are in the following states: {}'.format( + relation_name, unit_states), 'DEBUG') + return all(state == UNIT_READY for state in unit_states) + + +def inform_peers_if_ready(check_unit_ready_func, relation_name='cluster'): + """Inform peers if this unit is ready. + + The check function should return a tuple (state, message). A state + of 'READY' indicates the unit is READY. + + :param check_unit_ready_func: Function to run to check readiness + :type check_unit_ready_func: function + :param relation_name: Name of relation to check peers on. + :type relation_name: string + """ + unit_ready, msg = check_unit_ready_func() + if unit_ready: + state = UNIT_READY + else: + state = UNIT_NOTREADY + juju_log('Telling peers this unit is: {}'.format(state), 'DEBUG') + inform_peers_unit_state(state, relation_name) + + +def check_api_unit_ready(check_db_ready=True): + """Check if this unit is ready. + + :param check_db_ready: Include checks of database readiness. + :type check_db_ready: bool + :returns: Whether unit state is ready and status message + :rtype: (bool, str) + """ + unit_state, msg = get_api_unit_status(check_db_ready=check_db_ready) + return unit_state == WORKLOAD_STATES.ACTIVE, msg + + +def get_api_unit_status(check_db_ready=True): + """Return a workload status and message for this unit. + + :param check_db_ready: Include checks of database readiness. + :type check_db_ready: bool + :returns: Workload state and message + :rtype: (bool, str) + """ + unit_state = WORKLOAD_STATES.ACTIVE + msg = 'Unit is ready' + if is_db_maintenance_mode(): + unit_state = WORKLOAD_STATES.MAINTENANCE + msg = 'Database in maintenance mode.' + elif is_unit_paused_set(): + unit_state = WORKLOAD_STATES.BLOCKED + msg = 'Unit paused.' + elif check_db_ready and not is_db_ready(): + unit_state = WORKLOAD_STATES.WAITING + msg = 'Allowed_units list provided but this unit not present' + elif not is_db_initialised(): + unit_state = WORKLOAD_STATES.WAITING + msg = 'Database not initialised' + elif not is_expected_scale(): + unit_state = WORKLOAD_STATES.WAITING + msg = 'Charm and its dependencies not yet at expected scale' + juju_log(msg, 'DEBUG') + return unit_state, msg + + +def check_api_application_ready(): + """Check if this application is ready. + + :returns: Whether application state is ready and status message + :rtype: (bool, str) + """ + app_state, msg = get_api_application_status() + return app_state == WORKLOAD_STATES.ACTIVE, msg + + +def get_api_application_status(): + """Return a workload status and message for this application. + + :returns: Workload state and message + :rtype: (bool, str) + """ + app_state, msg = get_api_unit_status() + if app_state == WORKLOAD_STATES.ACTIVE: + if are_peers_ready(): + msg = 'Application Ready' + else: + app_state = WORKLOAD_STATES.WAITING + msg = 'Some units are not ready' + juju_log(msg, 'DEBUG') + return app_state, msg + + +def sequence_status_check_functions(*functions): + """Sequence the functions passed so that they all get a chance to run as + the charm status check functions. + + :param *functions: a list of functions that return (state, message) + :type *functions: List[Callable[[OSConfigRender], (str, str)]] + :returns: the Callable that takes configs and returns (state, message) + :rtype: Callable[[OSConfigRender], (str, str)] + """ + def _inner_sequenced_functions(configs): + state, message = 'unknown', '' + for f in functions: + new_state, new_message = f(configs) + state = workload_state_compare(state, new_state) + if message: + message = "{}, {}".format(message, new_message) + else: + message = new_message + return state, message + + return _inner_sequenced_functions + + +SubordinatePackages = namedtuple('SubordinatePackages', ['install', 'purge']) + + +def get_subordinate_release_packages(os_release, package_type='deb'): + """Iterate over subordinate relations and get package information. + + :param os_release: OpenStack release to look for + :type os_release: str + :param package_type: Package type (one of 'deb' or 'snap') + :type package_type: str + :returns: Packages to install and packages to purge or None + :rtype: SubordinatePackages[set,set] + """ + install = set() + purge = set() + + for rdata in container_scoped_relation_get('releases-packages-map'): + rp_map = json.loads(rdata or '{}') + # The map provided by subordinate has OpenStack release name as key. + # Find package information from subordinate matching requested release + # or the most recent release prior to requested release by sorting the + # keys in reverse order. This follows established patterns in our + # charms for templates and reactive charm implementations, i.e. as long + # as nothing has changed the definitions for the prior OpenStack + # release is still valid. + for release in sorted(rp_map.keys(), reverse=True): + if (CompareOpenStackReleases(release) <= os_release and + package_type in rp_map[release]): + for name, container in ( + ('install', install), + ('purge', purge)): + for pkg in rp_map[release][package_type].get(name, []): + container.add(pkg) + break + return SubordinatePackages(install, purge) + + +def get_subordinate_services(): + """Iterate over subordinate relations and get service information. + + In a similar fashion as with get_subordinate_release_packages(), + principle charms can retrieve a list of services advertised by their + subordinate charms. This is useful to know about subordinate services when + pausing, resuming or upgrading a principle unit. + + :returns: Name of all services advertised by all subordinates + :rtype: Set[str] + """ + services = set() + for rdata in container_scoped_relation_get('services'): + services |= set(json.loads(rdata or '[]')) + return services + + +os_restart_on_change = partial( + pausable_restart_on_change, + can_restart_now_f=deferred_events.check_and_record_restart_request, + post_svc_restart_f=deferred_events.process_svc_restart) + + +def restart_services_action_helper(all_services): + """Helper to run the restart-services action. + + NOTE: all_services is all services that could be restarted but + depending on the action arguments it may be a subset of + these that are actually restarted. + + :param all_services: All services that could be restarted + :type all_services: List[str] + """ + deferred_only = action_get("deferred-only") + services = action_get("services") + if services: + services = services.split() + else: + services = all_services + if deferred_only: + restart_services_action(deferred_only=True) + else: + restart_services_action(services=services) + + +def show_deferred_events_action_helper(): + """Helper to run the show-deferred-restarts action.""" + restarts = [] + for event in deferred_events.get_deferred_events(): + restarts.append('{} {} {}'.format( + str(event.timestamp), + event.service.ljust(40), + event.reason)) + restarts.sort() + output = { + 'restarts': restarts, + 'hooks': deferred_events.get_deferred_hooks()} + action_set({'output': "{}".format( + yaml.dump(output, default_flow_style=False))}) diff --git a/ceph-proxy/charmhelpers/contrib/python.py b/ceph-proxy/charmhelpers/contrib/python.py new file mode 100644 index 00000000..fcded680 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/python.py @@ -0,0 +1,19 @@ +# Copyright 2014-2019 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# deprecated aliases for backwards compatibility +from charmhelpers.fetch.python import debug # noqa +from charmhelpers.fetch.python import packages # noqa +from charmhelpers.fetch.python import rpdb # noqa +from charmhelpers.fetch.python import version # noqa diff --git a/ceph-proxy/charmhelpers/contrib/storage/__init__.py b/ceph-proxy/charmhelpers/contrib/storage/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/storage/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-proxy/charmhelpers/contrib/storage/linux/__init__.py b/ceph-proxy/charmhelpers/contrib/storage/linux/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/storage/linux/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-proxy/charmhelpers/contrib/storage/linux/ceph.py b/ceph-proxy/charmhelpers/contrib/storage/linux/ceph.py new file mode 100644 index 00000000..6ec67cba --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/storage/linux/ceph.py @@ -0,0 +1,2401 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file is sourced from lp:openstack-charm-helpers +# +# Authors: +# James Page +# Adam Gandelman +# + +import collections +import errno +import hashlib +import math + +import os +import shutil +import json +import time + +from subprocess import ( + check_call, + check_output, + CalledProcessError, +) +from charmhelpers import deprecate +from charmhelpers.core.hookenv import ( + application_name, + config, + service_name, + local_unit, + relation_get, + relation_ids, + relation_set, + related_units, + log, + DEBUG, + INFO, + WARNING, + ERROR, +) +from charmhelpers.core.host import ( + mount, + mounts, + service_start, + service_stop, + service_running, + umount, + cmp_pkgrevno, +) +from charmhelpers.fetch import ( + apt_install, +) +from charmhelpers.core.unitdata import kv + +from charmhelpers.core.kernel import modprobe +from charmhelpers.contrib.openstack.utils import config_flags_parser + +KEYRING = '/etc/ceph/ceph.client.{}.keyring' +KEYFILE = '/etc/ceph/ceph.client.{}.key' + +CEPH_CONF = """[global] +auth supported = {auth} +keyring = {keyring} +mon host = {mon_hosts} +log to syslog = {use_syslog} +err to syslog = {use_syslog} +clog to syslog = {use_syslog} +""" + +# The number of placement groups per OSD to target for placement group +# calculations. This number is chosen as 100 due to the ceph PG Calc +# documentation recommending to choose 100 for clusters which are not +# expected to increase in the foreseeable future. Since the majority of the +# calculations are done on deployment, target the case of non-expanding +# clusters as the default. +DEFAULT_PGS_PER_OSD_TARGET = 100 +DEFAULT_POOL_WEIGHT = 10.0 +LEGACY_PG_COUNT = 200 +DEFAULT_MINIMUM_PGS = 2 +AUTOSCALER_DEFAULT_PGS = 32 + + +class OsdPostUpgradeError(Exception): + """Error class for OSD post-upgrade operations.""" + pass + + +class OSDSettingConflict(Exception): + """Error class for conflicting osd setting requests.""" + pass + + +class OSDSettingNotAllowed(Exception): + """Error class for a disallowed setting.""" + pass + + +OSD_SETTING_EXCEPTIONS = (OSDSettingConflict, OSDSettingNotAllowed) + +OSD_SETTING_WHITELIST = [ + 'osd heartbeat grace', + 'osd heartbeat interval', +] + + +def _order_dict_by_key(rdict): + """Convert a dictionary into an OrderedDict sorted by key. + + :param rdict: Dictionary to be ordered. + :type rdict: dict + :returns: Ordered Dictionary. + :rtype: collections.OrderedDict + """ + return collections.OrderedDict(sorted(rdict.items(), key=lambda k: k[0])) + + +def get_osd_settings(relation_name): + """Consolidate requested osd settings from all clients. + + Consolidate requested osd settings from all clients. Check that the + requested setting is on the whitelist and it does not conflict with + any other requested settings. + + :returns: Dictionary of settings + :rtype: dict + + :raises: OSDSettingNotAllowed + :raises: OSDSettingConflict + """ + rel_ids = relation_ids(relation_name) + osd_settings = {} + for relid in rel_ids: + for unit in related_units(relid): + unit_settings = relation_get('osd-settings', unit, relid) or '{}' + unit_settings = json.loads(unit_settings) + for key, value in unit_settings.items(): + if key not in OSD_SETTING_WHITELIST: + msg = 'Illegal settings "{}"'.format(key) + raise OSDSettingNotAllowed(msg) + if key in osd_settings: + if osd_settings[key] != unit_settings[key]: + msg = 'Conflicting settings for "{}"'.format(key) + raise OSDSettingConflict(msg) + else: + osd_settings[key] = value + return _order_dict_by_key(osd_settings) + + +def send_application_name(relid=None, app_name=None): + """Send the application name down the relation. + + :param relid: Relation id to set application name in. + :type relid: str + :param app_name: Application name to send in the relation. + :type app_name: str + """ + if app_name is None: + app_name = application_name() + relation_set( + relation_id=relid, + relation_settings={'application-name': app_name}) + + +def send_osd_settings(): + """Pass on requested OSD settings to osd units.""" + try: + settings = get_osd_settings('client') + except OSD_SETTING_EXCEPTIONS as e: + # There is a problem with the settings, not passing them on. Update + # status will notify the user. + log(e, level=ERROR) + return + data = { + 'osd-settings': json.dumps(settings, sort_keys=True)} + for relid in relation_ids('osd'): + relation_set(relation_id=relid, + relation_settings=data) + + +def validator(value, valid_type, valid_range=None): + """Helper function for type validation. + + Used to validate these: + https://docs.ceph.com/docs/master/rados/operations/pools/#set-pool-values + https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#inline-compression + + Example input: + validator(value=1, + valid_type=int, + valid_range=[0, 2]) + + This says I'm testing value=1. It must be an int inclusive in [0,2] + + :param value: The value to validate. + :type value: any + :param valid_type: The type that value should be. + :type valid_type: any + :param valid_range: A range of values that value can assume. + :type valid_range: Optional[Union[List,Tuple]] + :raises: AssertionError, ValueError + """ + assert isinstance(value, valid_type), ( + "{} is not a {}".format(value, valid_type)) + if valid_range is not None: + assert isinstance( + valid_range, list) or isinstance(valid_range, tuple), ( + "valid_range must be of type List or Tuple, " + "was given {} of type {}" + .format(valid_range, type(valid_range))) + # If we're dealing with strings + if isinstance(value, str): + assert value in valid_range, ( + "{} is not in the list {}".format(value, valid_range)) + # Integer, float should have a min and max + else: + if len(valid_range) != 2: + raise ValueError( + "Invalid valid_range list of {} for {}. " + "List must be [min,max]".format(valid_range, value)) + assert value >= valid_range[0], ( + "{} is less than minimum allowed value of {}" + .format(value, valid_range[0])) + assert value <= valid_range[1], ( + "{} is greater than maximum allowed value of {}" + .format(value, valid_range[1])) + + +class PoolCreationError(Exception): + """A custom exception to inform the caller that a pool creation failed. + + Provides an error message + """ + + def __init__(self, message): + super(PoolCreationError, self).__init__(message) + + +class BasePool(object): + """An object oriented approach to Ceph pool creation. + + This base class is inherited by ReplicatedPool and ErasurePool. Do not call + create() on this base class as it will raise an exception. + + Instantiate a child class and call create(). + """ + # Dictionary that maps pool operation properties to Tuples with valid type + # and valid range + op_validation_map = { + 'compression-algorithm': (str, ('lz4', 'snappy', 'zlib', 'zstd')), + 'compression-mode': (str, ('none', 'passive', 'aggressive', 'force')), + 'compression-required-ratio': (float, None), + 'compression-min-blob-size': (int, None), + 'compression-min-blob-size-hdd': (int, None), + 'compression-min-blob-size-ssd': (int, None), + 'compression-max-blob-size': (int, None), + 'compression-max-blob-size-hdd': (int, None), + 'compression-max-blob-size-ssd': (int, None), + 'rbd-mirroring-mode': (str, ('image', 'pool')) + } + + def __init__(self, service, name=None, percent_data=None, app_name=None, + op=None): + """Initialize BasePool object. + + Pool information is either initialized from individual keyword + arguments or from a individual CephBrokerRq operation Dict. + + :param service: The Ceph user name to run commands under. + :type service: str + :param name: Name of pool to operate on. + :type name: str + :param percent_data: The expected pool size in relation to all + available resources in the Ceph cluster. Will be + used to set the ``target_size_ratio`` pool + property. (default: 10.0) + :type percent_data: Optional[float] + :param app_name: Ceph application name, usually one of: + ('cephfs', 'rbd', 'rgw') (default: 'unknown') + :type app_name: Optional[str] + :param op: Broker request Op to compile pool data from. + :type op: Optional[Dict[str,any]] + :raises: KeyError + """ + # NOTE: Do not perform initialization steps that require live data from + # a running cluster here. The *Pool classes may be used for validation. + self.service = service + self.op = op or {} + + if op: + # When initializing from op the `name` attribute is required and we + # will fail with KeyError if it is not provided. + self.name = op['name'] + self.percent_data = op.get('weight') + self.app_name = op.get('app-name') + else: + self.name = name + self.percent_data = percent_data + self.app_name = app_name + + # Set defaults for these if they are not provided + self.percent_data = self.percent_data or 10.0 + self.app_name = self.app_name or 'unknown' + + def validate(self): + """Check that value of supplied operation parameters are valid. + + :raises: ValueError + """ + for op_key, op_value in self.op.items(): + if op_key in self.op_validation_map and op_value is not None: + valid_type, valid_range = self.op_validation_map[op_key] + try: + validator(op_value, valid_type, valid_range) + except (AssertionError, ValueError) as e: + # Normalize on ValueError, also add information about which + # variable we had an issue with. + raise ValueError("'{}': {}".format(op_key, str(e))) + + def _create(self): + """Perform the pool creation, method MUST be overridden by child class. + """ + raise NotImplementedError + + def _post_create(self): + """Perform common post pool creation tasks. + + Note that pool properties subject to change during the lifetime of a + pool / deployment should go into the ``update`` method. + + Do not add calls for a specific pool type here, those should go into + one of the pool specific classes. + """ + nautilus_or_later = cmp_pkgrevno('ceph-common', '14.2.0') >= 0 + if nautilus_or_later: + # Ensure we set the expected pool ratio + update_pool( + client=self.service, + pool=self.name, + settings={ + 'target_size_ratio': str( + self.percent_data / 100.0), + }) + try: + set_app_name_for_pool(client=self.service, + pool=self.name, + name=self.app_name) + except CalledProcessError: + log('Could not set app name for pool {}' + .format(self.name), + level=WARNING) + if 'pg_autoscaler' in enabled_manager_modules(): + try: + enable_pg_autoscale(self.service, self.name) + except CalledProcessError as e: + log('Could not configure auto scaling for pool {}: {}' + .format(self.name, e), + level=WARNING) + + def create(self): + """Create pool and perform any post pool creation tasks. + + To allow for sharing of common code among pool specific classes the + processing has been broken out into the private methods ``_create`` + and ``_post_create``. + + Do not add any pool type specific handling here, that should go into + one of the pool specific classes. + """ + if not pool_exists(self.service, self.name): + self.validate() + self._create() + self._post_create() + self.update() + + def set_quota(self): + """Set a quota if requested. + + :raises: CalledProcessError + """ + max_bytes = self.op.get('max-bytes') + max_objects = self.op.get('max-objects') + if max_bytes or max_objects: + set_pool_quota(service=self.service, pool_name=self.name, + max_bytes=max_bytes, max_objects=max_objects) + + def set_compression(self): + """Set compression properties if requested. + + :raises: CalledProcessError + """ + compression_properties = { + key.replace('-', '_'): value + for key, value in self.op.items() + if key in ( + 'compression-algorithm', + 'compression-mode', + 'compression-required-ratio', + 'compression-min-blob-size', + 'compression-min-blob-size-hdd', + 'compression-min-blob-size-ssd', + 'compression-max-blob-size', + 'compression-max-blob-size-hdd', + 'compression-max-blob-size-ssd') and value} + if compression_properties: + update_pool(self.service, self.name, compression_properties) + + def update(self): + """Update properties for an already existing pool. + + Do not add calls for a specific pool type here, those should go into + one of the pool specific classes. + """ + self.validate() + self.set_quota() + self.set_compression() + + def add_cache_tier(self, cache_pool, mode): + """Adds a new cache tier to an existing pool. + + :param cache_pool: The cache tier pool name to add. + :type cache_pool: str + :param mode: The caching mode to use for this pool. + valid range = ["readonly", "writeback"] + :type mode: str + """ + # Check the input types and values + validator(value=cache_pool, valid_type=str) + validator( + value=mode, valid_type=str, + valid_range=["readonly", "writeback"]) + + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'add', self.name, cache_pool, + ]) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'cache-mode', cache_pool, mode, + ]) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'set-overlay', self.name, cache_pool, + ]) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'pool', 'set', cache_pool, 'hit_set_type', 'bloom', + ]) + + def remove_cache_tier(self, cache_pool): + """Removes a cache tier from Ceph. + + Flushes all dirty objects from writeback pools and waits for that to + complete. + + :param cache_pool: The cache tier pool name to remove. + :type cache_pool: str + """ + # read-only is easy, writeback is much harder + mode = get_cache_mode(self.service, cache_pool) + if mode == 'readonly': + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'cache-mode', cache_pool, 'none' + ]) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'remove', self.name, cache_pool, + ]) + + elif mode == 'writeback': + pool_forward_cmd = ['ceph', '--id', self.service, 'osd', 'tier', + 'cache-mode', cache_pool, 'forward'] + if cmp_pkgrevno('ceph-common', '10.1') >= 0: + # Jewel added a mandatory flag + pool_forward_cmd.append('--yes-i-really-mean-it') + + check_call(pool_forward_cmd) + # Flush the cache and wait for it to return + check_call([ + 'rados', '--id', self.service, + '-p', cache_pool, 'cache-flush-evict-all']) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'remove-overlay', self.name]) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'remove', self.name, cache_pool]) + + def get_pgs(self, pool_size, percent_data=DEFAULT_POOL_WEIGHT, + device_class=None): + """Return the number of placement groups to use when creating the pool. + + Returns the number of placement groups which should be specified when + creating the pool. This is based upon the calculation guidelines + provided by the Ceph Placement Group Calculator (located online at + http://ceph.com/pgcalc/). + + The number of placement groups are calculated using the following: + + (Target PGs per OSD) * (OSD #) * (%Data) + ---------------------------------------- + (Pool size) + + Per the upstream guidelines, the OSD # should really be considered + based on the number of OSDs which are eligible to be selected by the + pool. Since the pool creation doesn't specify any of CRUSH set rules, + the default rule will be dependent upon the type of pool being + created (replicated or erasure). + + This code makes no attempt to determine the number of OSDs which can be + selected for the specific rule, rather it is left to the user to tune + in the form of 'expected-osd-count' config option. + + :param pool_size: pool_size is either the number of replicas for + replicated pools or the K+M sum for erasure coded pools + :type pool_size: int + :param percent_data: the percentage of data that is expected to + be contained in the pool for the specific OSD set. Default value + is to assume 10% of the data is for this pool, which is a + relatively low % of the data but allows for the pg_num to be + increased. NOTE: the default is primarily to handle the scenario + where related charms requiring pools has not been upgraded to + include an update to indicate their relative usage of the pools. + :type percent_data: float + :param device_class: class of storage to use for basis of pgs + calculation; ceph supports nvme, ssd and hdd by default based + on presence of devices of each type in the deployment. + :type device_class: str + :returns: The number of pgs to use. + :rtype: int + """ + + # Note: This calculation follows the approach that is provided + # by the Ceph PG Calculator located at http://ceph.com/pgcalc/. + validator(value=pool_size, valid_type=int) + + # Ensure that percent data is set to something - even with a default + # it can be set to None, which would wreak havoc below. + if percent_data is None: + percent_data = DEFAULT_POOL_WEIGHT + + # If the expected-osd-count is specified, then use the max between + # the expected-osd-count and the actual osd_count + osd_list = get_osds(self.service, device_class) + expected = config('expected-osd-count') or 0 + + if osd_list: + if device_class: + osd_count = len(osd_list) + else: + osd_count = max(expected, len(osd_list)) + + # Log a message to provide some insight if the calculations claim + # to be off because someone is setting the expected count and + # there are more OSDs in reality. Try to make a proper guess + # based upon the cluster itself. + if not device_class and expected and osd_count != expected: + log("Found more OSDs than provided expected count. " + "Using the actual count instead", INFO) + elif expected: + # Use the expected-osd-count in older ceph versions to allow for + # a more accurate pg calculations + osd_count = expected + else: + # NOTE(james-page): Default to 200 for older ceph versions + # which don't support OSD query from cli + return LEGACY_PG_COUNT + + percent_data /= 100.0 + target_pgs_per_osd = config( + 'pgs-per-osd') or DEFAULT_PGS_PER_OSD_TARGET + num_pg = (target_pgs_per_osd * osd_count * percent_data) // pool_size + + # NOTE: ensure a sane minimum number of PGS otherwise we don't get any + # reasonable data distribution in minimal OSD configurations + if num_pg < DEFAULT_MINIMUM_PGS: + num_pg = DEFAULT_MINIMUM_PGS + + # The CRUSH algorithm has a slight optimization for placement groups + # with powers of 2 so find the nearest power of 2. If the nearest + # power of 2 is more than 25% below the original value, the next + # highest value is used. To do this, find the nearest power of 2 such + # that 2^n <= num_pg, check to see if its within the 25% tolerance. + exponent = math.floor(math.log(num_pg, 2)) + nearest = 2 ** exponent + if (num_pg - nearest) > (num_pg * 0.25): + # Choose the next highest power of 2 since the nearest is more + # than 25% below the original value. + return int(nearest * 2) + else: + return int(nearest) + + +class Pool(BasePool): + """Compatibility shim for any descendents external to this library.""" + + @deprecate( + 'The ``Pool`` baseclass has been replaced by ``BasePool`` class.') + def __init__(self, service, name): + super(Pool, self).__init__(service, name=name) + + def create(self): + pass + + +class ReplicatedPool(BasePool): + def __init__(self, service, name=None, pg_num=None, replicas=None, + percent_data=None, app_name=None, op=None, + profile_name='replicated_rule'): + """Initialize ReplicatedPool object. + + Pool information is either initialized from individual keyword + arguments or from a individual CephBrokerRq operation Dict. + + Please refer to the docstring of the ``BasePool`` class for + documentation of the common parameters. + + :param pg_num: Express wish for number of Placement Groups (this value + is subject to validation against a running cluster prior + to use to avoid creating a pool with too many PGs) + :type pg_num: int + :param replicas: Number of copies there should be of each object added + to this replicated pool. + :type replicas: int + :raises: KeyError + :param profile_name: Crush Profile to use + :type profile_name: Optional[str] + """ + # NOTE: Do not perform initialization steps that require live data from + # a running cluster here. The *Pool classes may be used for validation. + + # The common parameters are handled in our parents initializer + super(ReplicatedPool, self).__init__( + service=service, name=name, percent_data=percent_data, + app_name=app_name, op=op) + + if op: + # When initializing from op `replicas` is a required attribute, and + # we will fail with KeyError if it is not provided. + self.replicas = op['replicas'] + self.pg_num = op.get('pg_num') + self.profile_name = op.get('crush-profile') or profile_name + else: + self.replicas = replicas or 2 + self.pg_num = pg_num + self.profile_name = profile_name or 'replicated_rule' + + def _create(self): + # Validate if crush profile exists + if self.profile_name is None: + msg = ("Failed to discover crush profile named " + "{}".format(self.profile_name)) + log(msg, level=ERROR) + raise PoolCreationError(msg) + + # Do extra validation on pg_num with data from live cluster + if self.pg_num: + # Since the number of placement groups were specified, ensure + # that there aren't too many created. + max_pgs = self.get_pgs(self.replicas, 100.0) + self.pg_num = min(self.pg_num, max_pgs) + else: + self.pg_num = self.get_pgs(self.replicas, self.percent_data) + + nautilus_or_later = cmp_pkgrevno('ceph-common', '14.2.0') >= 0 + # Create it + if nautilus_or_later: + cmd = [ + 'ceph', '--id', self.service, 'osd', 'pool', 'create', + '--pg-num-min={}'.format( + min(AUTOSCALER_DEFAULT_PGS, self.pg_num) + ), + self.name, str(self.pg_num), self.profile_name + ] + else: + cmd = [ + 'ceph', '--id', self.service, 'osd', 'pool', 'create', + self.name, str(self.pg_num), self.profile_name + ] + check_call(cmd) + + def _post_create(self): + # Set the pool replica size + update_pool(client=self.service, + pool=self.name, + settings={'size': str(self.replicas)}) + # Perform other common post pool creation tasks + super(ReplicatedPool, self)._post_create() + + +class ErasurePool(BasePool): + """Default jerasure erasure coded pool.""" + + def __init__(self, service, name=None, erasure_code_profile=None, + percent_data=None, app_name=None, op=None, + allow_ec_overwrites=False): + """Initialize ErasurePool object. + + Pool information is either initialized from individual keyword + arguments or from a individual CephBrokerRq operation Dict. + + Please refer to the docstring of the ``BasePool`` class for + documentation of the common parameters. + + :param erasure_code_profile: EC Profile to use (default: 'default') + :type erasure_code_profile: Optional[str] + """ + # NOTE: Do not perform initialization steps that require live data from + # a running cluster here. The *Pool classes may be used for validation. + + # The common parameters are handled in our parents initializer + super(ErasurePool, self).__init__( + service=service, name=name, percent_data=percent_data, + app_name=app_name, op=op) + + if op: + # Note that the different default when initializing from op stems + # from different handling of this in the `charms.ceph` library. + self.erasure_code_profile = op.get('erasure-profile', + 'default-canonical') + self.allow_ec_overwrites = op.get('allow-ec-overwrites') + else: + # We keep the class default when initialized from keyword arguments + # to not break the API for any other consumers. + self.erasure_code_profile = erasure_code_profile or 'default' + self.allow_ec_overwrites = allow_ec_overwrites + + def _create(self): + # Try to find the erasure profile information in order to properly + # size the number of placement groups. The size of an erasure + # coded placement group is calculated as k+m. + erasure_profile = get_erasure_profile(self.service, + self.erasure_code_profile) + + # Check for errors + if erasure_profile is None: + msg = ("Failed to discover erasure profile named " + "{}".format(self.erasure_code_profile)) + log(msg, level=ERROR) + raise PoolCreationError(msg) + if 'k' not in erasure_profile or 'm' not in erasure_profile: + # Error + msg = ("Unable to find k (data chunks) or m (coding chunks) " + "in erasure profile {}".format(erasure_profile)) + log(msg, level=ERROR) + raise PoolCreationError(msg) + + k = int(erasure_profile['k']) + m = int(erasure_profile['m']) + pgs = self.get_pgs(k + m, self.percent_data) + nautilus_or_later = cmp_pkgrevno('ceph-common', '14.2.0') >= 0 + # Create it + if nautilus_or_later: + cmd = [ + 'ceph', '--id', self.service, 'osd', 'pool', 'create', + '--pg-num-min={}'.format( + min(AUTOSCALER_DEFAULT_PGS, pgs) + ), + self.name, str(pgs), str(pgs), + 'erasure', self.erasure_code_profile + ] + else: + cmd = [ + 'ceph', '--id', self.service, 'osd', 'pool', 'create', + self.name, str(pgs), str(pgs), + 'erasure', self.erasure_code_profile + ] + check_call(cmd) + + def _post_create(self): + super(ErasurePool, self)._post_create() + if self.allow_ec_overwrites: + update_pool(self.service, self.name, + {'allow_ec_overwrites': 'true'}) + + +def enabled_manager_modules(): + """Return a list of enabled manager modules. + + :rtype: List[str] + """ + cmd = ['ceph', 'mgr', 'module', 'ls'] + quincy_or_later = cmp_pkgrevno('ceph-common', '17.1.0') >= 0 + if quincy_or_later: + cmd.append('--format=json') + try: + modules = check_output(cmd).decode('utf-8') + except CalledProcessError as e: + log("Failed to list ceph modules: {}".format(e), WARNING) + return [] + modules = json.loads(modules) + return modules['enabled_modules'] + + +def enable_pg_autoscale(service, pool_name): + """Enable Ceph's PG autoscaler for the specified pool. + + :param service: The Ceph user name to run the command under + :type service: str + :param pool_name: The name of the pool to enable sutoscaling on + :type pool_name: str + :raises: CalledProcessError if the command fails + """ + check_call([ + 'ceph', '--id', service, + 'osd', 'pool', 'set', pool_name, 'pg_autoscale_mode', 'on']) + + +def get_mon_map(service): + """Return the current monitor map. + + :param service: The Ceph user name to run the command under + :type service: str + :returns: Dictionary with monitor map data + :rtype: Dict[str,any] + :raises: ValueError if the monmap fails to parse, CalledProcessError if our + ceph command fails. + """ + try: + octopus_or_later = cmp_pkgrevno('ceph-common', '15.0.0') >= 0 + mon_status_cmd = 'quorum_status' if octopus_or_later else 'mon_status' + mon_status = (check_output(['ceph', '--id', service, mon_status_cmd, + '--format=json'])).decode('utf-8') + try: + return json.loads(mon_status) + except ValueError as v: + log("Unable to parse mon_status json: {}. Error: {}" + .format(mon_status, str(v))) + raise + except CalledProcessError as e: + log("mon_status command failed with message: {}" + .format(str(e))) + raise + + +def hash_monitor_names(service): + """Get a sorted list of monitor hashes in ascending order. + + Uses the get_mon_map() function to get information about the monitor + cluster. Hash the name of each monitor. + + :param service: The Ceph user name to run the command under. + :type service: str + :returns: a sorted list of monitor hashes in an ascending order. + :rtype : List[str] + :raises: CalledProcessError, ValueError + """ + try: + hash_list = [] + monitor_list = get_mon_map(service=service) + if monitor_list['monmap']['mons']: + for mon in monitor_list['monmap']['mons']: + hash_list.append( + hashlib.sha224(mon['name'].encode('utf-8')).hexdigest()) + return sorted(hash_list) + else: + return None + except (ValueError, CalledProcessError): + raise + + +def monitor_key_delete(service, key): + """Delete a key and value pair from the monitor cluster. + + Deletes a key value pair on the monitor cluster. + + :param service: The Ceph user name to run the command under + :type service: str + :param key: The key to delete. + :type key: str + :raises: CalledProcessError + """ + try: + check_output( + ['ceph', '--id', service, + 'config-key', 'del', str(key)]) + except CalledProcessError as e: + log("Monitor config-key put failed with message: {}" + .format(e.output)) + raise + + +def monitor_key_set(service, key, value): + """Set a key value pair on the monitor cluster. + + :param service: The Ceph user name to run the command under. + :type service str + :param key: The key to set. + :type key: str + :param value: The value to set. This will be coerced into a string. + :type value: str + :raises: CalledProcessError + """ + try: + check_output( + ['ceph', '--id', service, + 'config-key', 'put', str(key), str(value)]) + except CalledProcessError as e: + log("Monitor config-key put failed with message: {}" + .format(e.output)) + raise + + +def monitor_key_get(service, key): + """Get the value of an existing key in the monitor cluster. + + :param service: The Ceph user name to run the command under + :type service: str + :param key: The key to search for. + :type key: str + :return: Returns the value of that key or None if not found. + :rtype: Optional[str] + """ + try: + output = check_output( + ['ceph', '--id', service, + 'config-key', 'get', str(key)]).decode('UTF-8') + return output + except CalledProcessError as e: + log("Monitor config-key get failed with message: {}" + .format(e.output)) + return None + + +def monitor_key_exists(service, key): + """Search for existence of key in the monitor cluster. + + :param service: The Ceph user name to run the command under. + :type service: str + :param key: The key to search for. + :type key: str + :return: Returns True if the key exists, False if not. + :rtype: bool + :raises: CalledProcessError if an unknown error occurs. + """ + try: + check_call( + ['ceph', '--id', service, + 'config-key', 'exists', str(key)]) + # I can return true here regardless because Ceph returns + # ENOENT if the key wasn't found + return True + except CalledProcessError as e: + if e.returncode == errno.ENOENT: + return False + else: + log("Unknown error from ceph config-get exists: {} {}" + .format(e.returncode, e.output)) + raise + + +def get_erasure_profile(service, name): + """Get an existing erasure code profile if it exists. + + :param service: The Ceph user name to run the command under. + :type service: str + :param name: Name of profile. + :type name: str + :returns: Dictionary with profile data. + :rtype: Optional[Dict[str]] + """ + try: + out = check_output(['ceph', '--id', service, + 'osd', 'erasure-code-profile', 'get', + name, '--format=json']).decode('utf-8') + return json.loads(out) + except (CalledProcessError, OSError, ValueError): + return None + + +def pool_set(service, pool_name, key, value): + """Sets a value for a RADOS pool in ceph. + + :param service: The Ceph user name to run the command under. + :type service: str + :param pool_name: Name of pool to set property on. + :type pool_name: str + :param key: Property key. + :type key: str + :param value: Value, will be coerced into str and shifted to lowercase. + :type value: str + :raises: CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'set', pool_name, key, str(value).lower()] + check_call(cmd) + + +def snapshot_pool(service, pool_name, snapshot_name): + """Snapshots a RADOS pool in Ceph. + + :param service: The Ceph user name to run the command under. + :type service: str + :param pool_name: Name of pool to snapshot. + :type pool_name: str + :param snapshot_name: Name of snapshot to create. + :type snapshot_name: str + :raises: CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'mksnap', pool_name, snapshot_name] + check_call(cmd) + + +def remove_pool_snapshot(service, pool_name, snapshot_name): + """Remove a snapshot from a RADOS pool in Ceph. + + :param service: The Ceph user name to run the command under. + :type service: str + :param pool_name: Name of pool to remove snapshot from. + :type pool_name: str + :param snapshot_name: Name of snapshot to remove. + :type snapshot_name: str + :raises: CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'rmsnap', pool_name, snapshot_name] + check_call(cmd) + + +def set_pool_quota(service, pool_name, max_bytes=None, max_objects=None): + """Set byte quota on a RADOS pool in Ceph. + + :param service: The Ceph user name to run the command under + :type service: str + :param pool_name: Name of pool + :type pool_name: str + :param max_bytes: Maximum bytes quota to apply + :type max_bytes: int + :param max_objects: Maximum objects quota to apply + :type max_objects: int + :raises: subprocess.CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'set-quota', pool_name] + if max_bytes: + cmd = cmd + ['max_bytes', str(max_bytes)] + if max_objects: + cmd = cmd + ['max_objects', str(max_objects)] + check_call(cmd) + + +def remove_pool_quota(service, pool_name): + """Remove byte quota on a RADOS pool in Ceph. + + :param service: The Ceph user name to run the command under. + :type service: str + :param pool_name: Name of pool to remove quota from. + :type pool_name: str + :raises: CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'set-quota', pool_name, 'max_bytes', '0'] + check_call(cmd) + + +def remove_erasure_profile(service, profile_name): + """Remove erasure code profile. + + :param service: The Ceph user name to run the command under + :type service: str + :param profile_name: Name of profile to remove. + :type profile_name: str + :raises: CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'erasure-code-profile', 'rm', profile_name] + check_call(cmd) + + +def create_erasure_profile(service, profile_name, + erasure_plugin_name='jerasure', + failure_domain=None, + data_chunks=2, coding_chunks=1, + locality=None, durability_estimator=None, + helper_chunks=None, + scalar_mds=None, + crush_locality=None, + device_class=None, + erasure_plugin_technique=None): + """Create a new erasure code profile if one does not already exist for it. + + Profiles are considered immutable so will not be updated if the named + profile already exists. + + Please refer to [0] for more details. + + 0: http://docs.ceph.com/docs/master/rados/operations/erasure-code-profile/ + + :param service: The Ceph user name to run the command under. + :type service: str + :param profile_name: Name of profile. + :type profile_name: str + :param erasure_plugin_name: Erasure code plugin. + :type erasure_plugin_name: str + :param failure_domain: Failure domain, one of: + ('chassis', 'datacenter', 'host', 'osd', 'pdu', + 'pod', 'rack', 'region', 'room', 'root', 'row'). + :type failure_domain: str + :param data_chunks: Number of data chunks. + :type data_chunks: int + :param coding_chunks: Number of coding chunks. + :type coding_chunks: int + :param locality: Locality. + :type locality: int + :param durability_estimator: Durability estimator. + :type durability_estimator: int + :param helper_chunks: int + :type helper_chunks: int + :param device_class: Restrict placement to devices of specific class. + :type device_class: str + :param scalar_mds: one of ['isa', 'jerasure', 'shec'] + :type scalar_mds: str + :param crush_locality: LRC locality faulure domain, one of: + ('chassis', 'datacenter', 'host', 'osd', 'pdu', 'pod', + 'rack', 'region', 'room', 'root', 'row') or unset. + :type crush_locaity: str + :param erasure_plugin_technique: Coding technique for EC plugin + :type erasure_plugin_technique: str + :return: None. Can raise CalledProcessError, ValueError or AssertionError + """ + if erasure_profile_exists(service, profile_name): + log('EC profile {} exists, skipping update'.format(profile_name), + level=WARNING) + return + + plugin_techniques = { + 'jerasure': [ + 'reed_sol_van', + 'reed_sol_r6_op', + 'cauchy_orig', + 'cauchy_good', + 'liberation', + 'blaum_roth', + 'liber8tion' + ], + 'lrc': [], + 'isa': [ + 'reed_sol_van', + 'cauchy', + ], + 'shec': [ + 'single', + 'multiple' + ], + 'clay': [], + } + failure_domains = [ + 'chassis', 'datacenter', + 'host', 'osd', + 'pdu', 'pod', + 'rack', 'region', + 'room', 'root', + 'row', + ] + device_classes = [ + 'ssd', + 'hdd', + 'nvme' + ] + + validator(erasure_plugin_name, str, list(plugin_techniques.keys())) + + cmd = [ + 'ceph', '--id', service, + 'osd', 'erasure-code-profile', 'set', profile_name, + 'plugin={}'.format(erasure_plugin_name), + 'k={}'.format(str(data_chunks)), + 'm={}'.format(str(coding_chunks)), + ] + + if erasure_plugin_technique: + validator(erasure_plugin_technique, str, + plugin_techniques[erasure_plugin_name]) + cmd.append('technique={}'.format(erasure_plugin_technique)) + + luminous_or_later = cmp_pkgrevno('ceph-common', '12.0.0') >= 0 + + # Set failure domain from options if not provided in args + if not failure_domain and config('customize-failure-domain'): + # Defaults to 'host' so just need to deal with + # setting 'rack' if feature is enabled + failure_domain = 'rack' + + if failure_domain: + validator(failure_domain, str, failure_domains) + # failure_domain changed in luminous + if luminous_or_later: + cmd.append('crush-failure-domain={}'.format(failure_domain)) + else: + cmd.append('ruleset-failure-domain={}'.format(failure_domain)) + + # device class new in luminous + if luminous_or_later and device_class: + validator(device_class, str, device_classes) + cmd.append('crush-device-class={}'.format(device_class)) + else: + log('Skipping device class configuration (ceph < 12.0.0)', + level=DEBUG) + + # Add plugin specific information + if erasure_plugin_name == 'lrc': + # LRC mandatory configuration + if locality: + cmd.append('l={}'.format(str(locality))) + else: + raise ValueError("locality must be provided for lrc plugin") + # LRC optional configuration + if crush_locality: + validator(crush_locality, str, failure_domains) + cmd.append('crush-locality={}'.format(crush_locality)) + + if erasure_plugin_name == 'shec': + # SHEC optional configuration + if durability_estimator: + cmd.append('c={}'.format((durability_estimator))) + + if erasure_plugin_name == 'clay': + # CLAY optional configuration + if helper_chunks: + cmd.append('d={}'.format(str(helper_chunks))) + if scalar_mds: + cmd.append('scalar-mds={}'.format(scalar_mds)) + + check_call(cmd) + + +def rename_pool(service, old_name, new_name): + """Rename a Ceph pool from old_name to new_name. + + :param service: The Ceph user name to run the command under. + :type service: str + :param old_name: Name of pool subject to rename. + :type old_name: str + :param new_name: Name to rename pool to. + :type new_name: str + """ + validator(value=old_name, valid_type=str) + validator(value=new_name, valid_type=str) + + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'rename', old_name, new_name] + check_call(cmd) + + +def erasure_profile_exists(service, name): + """Check to see if an Erasure code profile already exists. + + :param service: The Ceph user name to run the command under + :type service: str + :param name: Name of profile to look for. + :type name: str + :returns: True if it exists, False otherwise. + :rtype: bool + """ + validator(value=name, valid_type=str) + try: + check_call(['ceph', '--id', service, + 'osd', 'erasure-code-profile', 'get', + name]) + return True + except CalledProcessError: + return False + + +def get_cache_mode(service, pool_name): + """Find the current caching mode of the pool_name given. + + :param service: The Ceph user name to run the command under + :type service: str + :param pool_name: Name of pool. + :type pool_name: str + :returns: Current cache mode. + :rtype: Optional[int] + """ + validator(value=service, valid_type=str) + validator(value=pool_name, valid_type=str) + out = check_output(['ceph', '--id', service, + 'osd', 'dump', '--format=json']).decode('utf-8') + try: + osd_json = json.loads(out) + for pool in osd_json['pools']: + if pool['pool_name'] == pool_name: + return pool['cache_mode'] + return None + except ValueError: + raise + + +def pool_exists(service, name): + """Check to see if a RADOS pool already exists.""" + try: + out = check_output( + ['rados', '--id', service, 'lspools']).decode('utf-8') + except CalledProcessError: + return False + + return name in out.split() + + +def get_osds(service, device_class=None): + """Return a list of all Ceph Object Storage Daemons currently in the + cluster (optionally filtered by storage device class). + + :param device_class: Class of storage device for OSD's + :type device_class: str + """ + luminous_or_later = cmp_pkgrevno('ceph-common', '12.0.0') >= 0 + if luminous_or_later and device_class: + out = check_output(['ceph', '--id', service, + 'osd', 'crush', 'class', + 'ls-osd', device_class, + '--format=json']).decode('utf-8') + else: + out = check_output(['ceph', '--id', service, + 'osd', 'ls', + '--format=json']).decode('utf-8') + return json.loads(out) + + +def install(): + """Basic Ceph client installation.""" + ceph_dir = "/etc/ceph" + if not os.path.exists(ceph_dir): + os.mkdir(ceph_dir) + + apt_install('ceph-common', fatal=True) + + +def rbd_exists(service, pool, rbd_img): + """Check to see if a RADOS block device exists.""" + try: + out = check_output(['rbd', 'list', '--id', + service, '--pool', pool]).decode('utf-8') + except CalledProcessError: + return False + + return rbd_img in out + + +def create_rbd_image(service, pool, image, sizemb): + """Create a new RADOS block device.""" + cmd = ['rbd', 'create', image, '--size', str(sizemb), '--id', service, + '--pool', pool] + check_call(cmd) + + +def update_pool(client, pool, settings): + """Update pool properties. + + :param client: Client/User-name to authenticate with. + :type client: str + :param pool: Name of pool to operate on + :type pool: str + :param settings: Dictionary with key/value pairs to set. + :type settings: Dict[str, str] + :raises: CalledProcessError + """ + cmd = ['ceph', '--id', client, 'osd', 'pool', 'set', pool] + for k, v in settings.items(): + check_call(cmd + [k, v]) + + +def set_app_name_for_pool(client, pool, name): + """Calls `osd pool application enable` for the specified pool name + + :param client: Name of the ceph client to use + :type client: str + :param pool: Pool to set app name for + :type pool: str + :param name: app name for the specified pool + :type name: str + + :raises: CalledProcessError if ceph call fails + """ + if cmp_pkgrevno('ceph-common', '12.0.0') >= 0: + cmd = ['ceph', '--id', client, 'osd', 'pool', + 'application', 'enable', pool, name] + check_call(cmd) + + +def create_pool(service, name, replicas=3, pg_num=None): + """Create a new RADOS pool.""" + if pool_exists(service, name): + log("Ceph pool {} already exists, skipping creation".format(name), + level=WARNING) + return + + if not pg_num: + # Calculate the number of placement groups based + # on upstream recommended best practices. + osds = get_osds(service) + if osds: + pg_num = (len(osds) * 100 // replicas) + else: + # NOTE(james-page): Default to 200 for older ceph versions + # which don't support OSD query from cli + pg_num = 200 + + cmd = ['ceph', '--id', service, 'osd', 'pool', 'create', name, str(pg_num)] + check_call(cmd) + + update_pool(service, name, settings={'size': str(replicas)}) + + +def delete_pool(service, name): + """Delete a RADOS pool from ceph.""" + cmd = ['ceph', '--id', service, 'osd', 'pool', 'delete', name, + '--yes-i-really-really-mean-it'] + check_call(cmd) + + +def _keyfile_path(service): + return KEYFILE.format(service) + + +def _keyring_path(service): + return KEYRING.format(service) + + +def add_key(service, key): + """Add a key to a keyring. + + Creates the keyring if it doesn't already exist. + + Logs and returns if the key is already in the keyring. + """ + keyring = _keyring_path(service) + if os.path.exists(keyring): + with open(keyring, 'r') as ring: + if key in ring.read(): + log('Ceph keyring exists at %s and has not changed.' % keyring, + level=DEBUG) + return + log('Updating existing keyring %s.' % keyring, level=DEBUG) + + cmd = ['ceph-authtool', keyring, '--create-keyring', + '--name=client.{}'.format(service), '--add-key={}'.format(key)] + check_call(cmd) + log('Created new ceph keyring at %s.' % keyring, level=DEBUG) + + +def create_keyring(service, key): + """Deprecated. Please use the more accurately named 'add_key'""" + return add_key(service, key) + + +def delete_keyring(service): + """Delete an existing Ceph keyring.""" + keyring = _keyring_path(service) + if not os.path.exists(keyring): + log('Keyring does not exist at %s' % keyring, level=WARNING) + return + + os.remove(keyring) + log('Deleted ring at %s.' % keyring, level=INFO) + + +def create_key_file(service, key): + """Create a file containing key.""" + keyfile = _keyfile_path(service) + if os.path.exists(keyfile): + log('Keyfile exists at %s.' % keyfile, level=WARNING) + return + + with open(keyfile, 'w') as fd: + fd.write(key) + + log('Created new keyfile at %s.' % keyfile, level=INFO) + + +def get_ceph_nodes(relation='ceph'): + """Query named relation to determine current nodes.""" + hosts = [] + for r_id in relation_ids(relation): + for unit in related_units(r_id): + hosts.append(relation_get('private-address', unit=unit, rid=r_id)) + + return hosts + + +def configure(service, key, auth, use_syslog): + """Perform basic configuration of Ceph.""" + add_key(service, key) + create_key_file(service, key) + hosts = get_ceph_nodes() + with open('/etc/ceph/ceph.conf', 'w') as ceph_conf: + ceph_conf.write(CEPH_CONF.format(auth=auth, + keyring=_keyring_path(service), + mon_hosts=",".join(map(str, hosts)), + use_syslog=use_syslog)) + modprobe('rbd') + + +def image_mapped(name): + """Determine whether a RADOS block device is mapped locally.""" + try: + out = check_output(['rbd', 'showmapped']).decode('utf-8') + except CalledProcessError: + return False + + return name in out + + +def map_block_storage(service, pool, image): + """Map a RADOS block device for local use.""" + cmd = [ + 'rbd', + 'map', + '{}/{}'.format(pool, image), + '--user', + service, + '--secret', + _keyfile_path(service), + ] + check_call(cmd) + + +def filesystem_mounted(fs): + """Determine whether a filesystem is already mounted.""" + return fs in [f for f, m in mounts()] + + +def make_filesystem(blk_device, fstype='ext4', timeout=10): + """Make a new filesystem on the specified block device.""" + count = 0 + e_noent = errno.ENOENT + while not os.path.exists(blk_device): + if count >= timeout: + log('Gave up waiting on block device %s' % blk_device, + level=ERROR) + raise IOError(e_noent, os.strerror(e_noent), blk_device) + + log('Waiting for block device %s to appear' % blk_device, + level=DEBUG) + count += 1 + time.sleep(1) + else: + log('Formatting block device %s as filesystem %s.' % + (blk_device, fstype), level=INFO) + check_call(['mkfs', '-t', fstype, blk_device]) + + +def place_data_on_block_device(blk_device, data_src_dst): + """Migrate data in data_src_dst to blk_device and then remount.""" + # mount block device into /mnt + mount(blk_device, '/mnt') + # copy data to /mnt + copy_files(data_src_dst, '/mnt') + # umount block device + umount('/mnt') + # Grab user/group ID's from original source + _dir = os.stat(data_src_dst) + uid = _dir.st_uid + gid = _dir.st_gid + # re-mount where the data should originally be + # TODO: persist is currently a NO-OP in core.host + mount(blk_device, data_src_dst, persist=True) + # ensure original ownership of new mount. + os.chown(data_src_dst, uid, gid) + + +def copy_files(src, dst, symlinks=False, ignore=None): + """Copy files from src to dst.""" + for item in os.listdir(src): + s = os.path.join(src, item) + d = os.path.join(dst, item) + if os.path.isdir(s): + shutil.copytree(s, d, symlinks, ignore) + else: + shutil.copy2(s, d) + + +def ensure_ceph_storage(service, pool, rbd_img, sizemb, mount_point, + blk_device, fstype, system_services=[], + replicas=3): + """NOTE: This function must only be called from a single service unit for + the same rbd_img otherwise data loss will occur. + + Ensures given pool and RBD image exists, is mapped to a block device, + and the device is formatted and mounted at the given mount_point. + + If formatting a device for the first time, data existing at mount_point + will be migrated to the RBD device before being re-mounted. + + All services listed in system_services will be stopped prior to data + migration and restarted when complete. + """ + # Ensure pool, RBD image, RBD mappings are in place. + if not pool_exists(service, pool): + log('Creating new pool {}.'.format(pool), level=INFO) + create_pool(service, pool, replicas=replicas) + + if not rbd_exists(service, pool, rbd_img): + log('Creating RBD image ({}).'.format(rbd_img), level=INFO) + create_rbd_image(service, pool, rbd_img, sizemb) + + if not image_mapped(rbd_img): + log('Mapping RBD Image {} as a Block Device.'.format(rbd_img), + level=INFO) + map_block_storage(service, pool, rbd_img) + + # make file system + # TODO: What happens if for whatever reason this is run again and + # the data is already in the rbd device and/or is mounted?? + # When it is mounted already, it will fail to make the fs + # XXX: This is really sketchy! Need to at least add an fstab entry + # otherwise this hook will blow away existing data if its executed + # after a reboot. + if not filesystem_mounted(mount_point): + make_filesystem(blk_device, fstype) + + for svc in system_services: + if service_running(svc): + log('Stopping services {} prior to migrating data.' + .format(svc), level=DEBUG) + service_stop(svc) + + place_data_on_block_device(blk_device, mount_point) + + for svc in system_services: + log('Starting service {} after migrating data.' + .format(svc), level=DEBUG) + service_start(svc) + + +def ensure_ceph_keyring(service, user=None, group=None, + relation='ceph', key=None): + """Ensures a ceph keyring is created for a named service and optionally + ensures user and group ownership. + + @returns boolean: Flag to indicate whether a key was successfully written + to disk based on either relation data or a supplied key + """ + if not key: + for rid in relation_ids(relation): + for unit in related_units(rid): + key = relation_get('key', rid=rid, unit=unit) + if key: + break + + if not key: + return False + + add_key(service=service, key=key) + keyring = _keyring_path(service) + if user and group: + check_call(['chown', '%s.%s' % (user, group), keyring]) + + return True + + +class CephBrokerRq(object): + """Ceph broker request. + + Multiple operations can be added to a request and sent to the Ceph broker + to be executed. + + Request is json-encoded for sending over the wire. + + The API is versioned and defaults to version 1. + """ + + # The below hash is the result of running + # `hashlib.sha1('[]'.encode()).hexdigest()` + EMPTY_LIST_SHA = '97d170e1550eee4afc0af065b78cda302a97674c' + + def __init__(self, api_version=1, request_id=None, raw_request_data=None): + """Initialize CephBrokerRq object. + + Builds a new empty request or rebuilds a request from on-wire JSON + data. + + :param api_version: API version for request (default: 1). + :type api_version: Optional[int] + :param request_id: Unique identifier for request. The identifier will + be updated as ops are added or removed from the + broker request. This ensures that Ceph will + correctly process requests where operations are + added after the initial request is processed. + (default: sha1 of operations) + :type request_id: Optional[str] + :param raw_request_data: JSON-encoded string to build request from. + :type raw_request_data: Optional[str] + :raises: KeyError + """ + if raw_request_data: + request_data = json.loads(raw_request_data) + self.api_version = request_data['api-version'] + self.set_ops(request_data['ops']) + self.request_id = request_data['request-id'] + else: + self.api_version = api_version + if request_id: + self.request_id = request_id + else: + self.request_id = CephBrokerRq.EMPTY_LIST_SHA + self.ops = [] + + def _hash_ops(self): + """Return the sha1 of the requested Broker ops.""" + return hashlib.sha1(json.dumps(self.ops, sort_keys=True).encode()).hexdigest() + + def add_op(self, op): + """Add an op if it is not already in the list. + + :param op: Operation to add. + :type op: dict + """ + if op not in self.ops: + self.ops.append(op) + self.request_id = self._hash_ops() + + def add_op_request_access_to_group(self, name, namespace=None, + permission=None, key_name=None, + object_prefix_permissions=None): + """ + Adds the requested permissions to the current service's Ceph key, + allowing the key to access only the specified pools or + object prefixes. object_prefix_permissions should be a dictionary + keyed on the permission with the corresponding value being a list + of prefixes to apply that permission to. + { + 'rwx': ['prefix1', 'prefix2'], + 'class-read': ['prefix3']} + """ + self.add_op({ + 'op': 'add-permissions-to-key', 'group': name, + 'namespace': namespace, + 'name': key_name or service_name(), + 'group-permission': permission, + 'object-prefix-permissions': object_prefix_permissions}) + + def add_op_create_pool(self, name, replica_count=3, pg_num=None, + weight=None, group=None, namespace=None, + app_name=None, max_bytes=None, max_objects=None): + """DEPRECATED: Use ``add_op_create_replicated_pool()`` or + ``add_op_create_erasure_pool()`` instead. + """ + return self.add_op_create_replicated_pool( + name, replica_count=replica_count, pg_num=pg_num, weight=weight, + group=group, namespace=namespace, app_name=app_name, + max_bytes=max_bytes, max_objects=max_objects) + + # Use function parameters and docstring to define types in a compatible + # manner. + # + # NOTE: Our caller should always use a kwarg Dict when calling us so + # no need to maintain fixed order/position for parameters. Please keep them + # sorted by name when adding new ones. + def _partial_build_common_op_create(self, + app_name=None, + compression_algorithm=None, + compression_mode=None, + compression_required_ratio=None, + compression_min_blob_size=None, + compression_min_blob_size_hdd=None, + compression_min_blob_size_ssd=None, + compression_max_blob_size=None, + compression_max_blob_size_hdd=None, + compression_max_blob_size_ssd=None, + group=None, + max_bytes=None, + max_objects=None, + namespace=None, + rbd_mirroring_mode='pool', + weight=None): + """Build common part of a create pool operation. + + :param app_name: Tag pool with application name. Note that there is + certain protocols emerging upstream with regard to + meaningful application names to use. + Examples are 'rbd' and 'rgw'. + :type app_name: Optional[str] + :param compression_algorithm: Compressor to use, one of: + ('lz4', 'snappy', 'zlib', 'zstd') + :type compression_algorithm: Optional[str] + :param compression_mode: When to compress data, one of: + ('none', 'passive', 'aggressive', 'force') + :type compression_mode: Optional[str] + :param compression_required_ratio: Minimum compression ratio for data + chunk, if the requested ratio is not + achieved the compressed version will + be thrown away and the original + stored. + :type compression_required_ratio: Optional[float] + :param compression_min_blob_size: Chunks smaller than this are never + compressed (unit: bytes). + :type compression_min_blob_size: Optional[int] + :param compression_min_blob_size_hdd: Chunks smaller than this are not + compressed when destined to + rotational media (unit: bytes). + :type compression_min_blob_size_hdd: Optional[int] + :param compression_min_blob_size_ssd: Chunks smaller than this are not + compressed when destined to flash + media (unit: bytes). + :type compression_min_blob_size_ssd: Optional[int] + :param compression_max_blob_size: Chunks larger than this are broken + into N * compression_max_blob_size + chunks before being compressed + (unit: bytes). + :type compression_max_blob_size: Optional[int] + :param compression_max_blob_size_hdd: Chunks larger than this are + broken into + N * compression_max_blob_size_hdd + chunks before being compressed + when destined for rotational + media (unit: bytes) + :type compression_max_blob_size_hdd: Optional[int] + :param compression_max_blob_size_ssd: Chunks larger than this are + broken into + N * compression_max_blob_size_ssd + chunks before being compressed + when destined for flash media + (unit: bytes). + :type compression_max_blob_size_ssd: Optional[int] + :param group: Group to add pool to + :type group: Optional[str] + :param max_bytes: Maximum bytes quota to apply + :type max_bytes: Optional[int] + :param max_objects: Maximum objects quota to apply + :type max_objects: Optional[int] + :param namespace: Group namespace + :type namespace: Optional[str] + :param rbd_mirroring_mode: Pool mirroring mode used when Ceph RBD + mirroring is enabled. + :type rbd_mirroring_mode: Optional[str] + :param weight: The percentage of data that is expected to be contained + in the pool from the total available space on the OSDs. + Used to calculate number of Placement Groups to create + for pool. + :type weight: Optional[float] + :returns: Dictionary with kwarg name as key. + :rtype: Dict[str,any] + :raises: AssertionError + """ + return { + 'app-name': app_name, + 'compression-algorithm': compression_algorithm, + 'compression-mode': compression_mode, + 'compression-required-ratio': compression_required_ratio, + 'compression-min-blob-size': compression_min_blob_size, + 'compression-min-blob-size-hdd': compression_min_blob_size_hdd, + 'compression-min-blob-size-ssd': compression_min_blob_size_ssd, + 'compression-max-blob-size': compression_max_blob_size, + 'compression-max-blob-size-hdd': compression_max_blob_size_hdd, + 'compression-max-blob-size-ssd': compression_max_blob_size_ssd, + 'group': group, + 'max-bytes': max_bytes, + 'max-objects': max_objects, + 'group-namespace': namespace, + 'rbd-mirroring-mode': rbd_mirroring_mode, + 'weight': weight, + } + + def add_op_create_replicated_pool(self, name, replica_count=3, pg_num=None, + crush_profile=None, **kwargs): + """Adds an operation to create a replicated pool. + + Refer to docstring for ``_partial_build_common_op_create`` for + documentation of keyword arguments. + + :param name: Name of pool to create + :type name: str + :param replica_count: Number of copies Ceph should keep of your data. + :type replica_count: int + :param pg_num: Request specific number of Placement Groups to create + for pool. + :type pg_num: int + :raises: AssertionError if provided data is of invalid type/range + :param crush_profile: Name of crush profile to use. If not set the + ceph-mon unit handling the broker request will + set its default value. + :type crush_profile: Optional[str] + """ + if pg_num and kwargs.get('weight'): + raise ValueError('pg_num and weight are mutually exclusive') + + op = { + 'op': 'create-pool', + 'name': name, + 'replicas': replica_count, + 'pg_num': pg_num, + 'crush-profile': crush_profile + } + op.update(self._partial_build_common_op_create(**kwargs)) + + # Initialize Pool-object to validate type and range of ops. + pool = ReplicatedPool('dummy-service', op=op) + pool.validate() + + self.add_op(op) + + def add_op_create_erasure_pool(self, name, erasure_profile=None, + allow_ec_overwrites=False, **kwargs): + """Adds an operation to create a erasure coded pool. + + Refer to docstring for ``_partial_build_common_op_create`` for + documentation of keyword arguments. + + :param name: Name of pool to create + :type name: str + :param erasure_profile: Name of erasure code profile to use. If not + set the ceph-mon unit handling the broker + request will set its default value. + :type erasure_profile: str + :param allow_ec_overwrites: allow EC pools to be overridden + :type allow_ec_overwrites: bool + :raises: AssertionError if provided data is of invalid type/range + """ + op = { + 'op': 'create-pool', + 'name': name, + 'pool-type': 'erasure', + 'erasure-profile': erasure_profile, + 'allow-ec-overwrites': allow_ec_overwrites, + } + op.update(self._partial_build_common_op_create(**kwargs)) + + # Initialize Pool-object to validate type and range of ops. + pool = ErasurePool('dummy-service', op) + pool.validate() + + self.add_op(op) + + def add_op_create_erasure_profile(self, name, + erasure_type='jerasure', + erasure_technique=None, + k=None, m=None, + failure_domain=None, + lrc_locality=None, + shec_durability_estimator=None, + clay_helper_chunks=None, + device_class=None, + clay_scalar_mds=None, + lrc_crush_locality=None): + """Adds an operation to create a erasure coding profile. + + :param name: Name of profile to create + :type name: str + :param erasure_type: Which of the erasure coding plugins should be used + :type erasure_type: string + :param erasure_technique: EC plugin technique to use + :type erasure_technique: string + :param k: Number of data chunks + :type k: int + :param m: Number of coding chunks + :type m: int + :param lrc_locality: Group the coding and data chunks into sets of size locality + (lrc plugin) + :type lrc_locality: int + :param durability_estimator: The number of parity chunks each of which includes + a data chunk in its calculation range (shec plugin) + :type durability_estimator: int + :param helper_chunks: The number of helper chunks to use for recovery operations + (clay plugin) + :type: helper_chunks: int + :param failure_domain: Type of failure domain from Ceph bucket types + to be used + :type failure_domain: string + :param device_class: Device class to use for profile (ssd, hdd) + :type device_class: string + :param clay_scalar_mds: Plugin to use for CLAY layered construction + (jerasure|isa|shec) + :type clay_scaler_mds: string + :param lrc_crush_locality: Type of crush bucket in which set of chunks + defined by lrc_locality will be stored. + :type lrc_crush_locality: string + """ + self.add_op({'op': 'create-erasure-profile', + 'name': name, + 'k': k, + 'm': m, + 'l': lrc_locality, + 'c': shec_durability_estimator, + 'd': clay_helper_chunks, + 'erasure-type': erasure_type, + 'erasure-technique': erasure_technique, + 'failure-domain': failure_domain, + 'device-class': device_class, + 'scalar-mds': clay_scalar_mds, + 'crush-locality': lrc_crush_locality}) + + def set_ops(self, ops): + """Set request ops to provided value. + + Useful for injecting ops that come from a previous request + to allow comparisons to ensure validity. + """ + self.ops = ops + self.request_id = self._hash_ops() + + @property + def request(self): + return json.dumps({'api-version': self.api_version, 'ops': self.ops, + 'request-id': self.request_id}) + + def _ops_equal(self, other): + keys_to_compare = [ + 'replicas', 'name', 'op', 'pg_num', 'group-permission', + 'object-prefix-permissions', + ] + keys_to_compare += list(self._partial_build_common_op_create().keys()) + if len(self.ops) == len(other.ops): + for req_no in range(0, len(self.ops)): + for key in keys_to_compare: + if self.ops[req_no].get(key) != other.ops[req_no].get(key): + return False + else: + return False + return True + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return False + if self.api_version == other.api_version and \ + self._ops_equal(other): + return True + else: + return False + + def __ne__(self, other): + return not self.__eq__(other) + + +class CephBrokerRsp(object): + """Ceph broker response. + + Response is json-decoded and contents provided as methods/properties. + + The API is versioned and defaults to version 1. + """ + + def __init__(self, encoded_rsp): + self.api_version = None + self.rsp = json.loads(encoded_rsp) + + @property + def request_id(self): + return self.rsp.get('request-id') + + @property + def exit_code(self): + return self.rsp.get('exit-code') + + @property + def exit_msg(self): + return self.rsp.get('stderr') + + +# Ceph Broker Conversation: +# If a charm needs an action to be taken by ceph it can create a CephBrokerRq +# and send that request to ceph via the ceph relation. The CephBrokerRq has a +# unique id so that the client can identity which CephBrokerRsp is associated +# with the request. Ceph will also respond to each client unit individually +# creating a response key per client unit eg glance/0 will get a CephBrokerRsp +# via key broker-rsp-glance-0 +# +# To use this the charm can just do something like: +# +# from charmhelpers.contrib.storage.linux.ceph import ( +# send_request_if_needed, +# is_request_complete, +# CephBrokerRq, +# ) +# +# @hooks.hook('ceph-relation-changed') +# def ceph_changed(): +# rq = CephBrokerRq() +# rq.add_op_create_pool(name='poolname', replica_count=3) +# +# if is_request_complete(rq): +# +# else: +# send_request_if_needed(get_ceph_request()) +# +# CephBrokerRq and CephBrokerRsp are serialized into JSON. Below is an example +# of glance having sent a request to ceph which ceph has successfully processed +# 'ceph:8': { +# 'ceph/0': { +# 'auth': 'cephx', +# 'broker-rsp-glance-0': '{"request-id": "0bc7dc54", "exit-code": 0}', +# 'broker_rsp': '{"request-id": "0da543b8", "exit-code": 0}', +# 'ceph-public-address': '10.5.44.103', +# 'key': 'AQCLDttVuHXINhAAvI144CB09dYchhHyTUY9BQ==', +# 'private-address': '10.5.44.103', +# }, +# 'glance/0': { +# 'broker_req': ('{"api-version": 1, "request-id": "0bc7dc54", ' +# '"ops": [{"replicas": 3, "name": "glance", ' +# '"op": "create-pool"}]}'), +# 'private-address': '10.5.44.109', +# }, +# } + +def get_previous_request(rid): + """Return the last ceph broker request sent on a given relation + + :param rid: Relation id to query for request + :type rid: str + :returns: CephBrokerRq object or None if relation data not found. + :rtype: Optional[CephBrokerRq] + """ + broker_req = relation_get(attribute='broker_req', rid=rid, + unit=local_unit()) + if broker_req: + return CephBrokerRq(raw_request_data=broker_req) + + +def get_request_states(request, relation='ceph'): + """Return a dict of requests per relation id with their corresponding + completion state. + + This allows a charm, which has a request for ceph, to see whether there is + an equivalent request already being processed and if so what state that + request is in. + + @param request: A CephBrokerRq object + """ + complete = [] + requests = {} + for rid in relation_ids(relation): + complete = False + previous_request = get_previous_request(rid) + if request == previous_request: + sent = True + complete = is_request_complete_for_rid(previous_request, rid) + else: + sent = False + complete = False + + requests[rid] = { + 'sent': sent, + 'complete': complete, + } + + return requests + + +def is_request_sent(request, relation='ceph'): + """Check to see if a functionally equivalent request has already been sent + + Returns True if a similair request has been sent + + @param request: A CephBrokerRq object + """ + states = get_request_states(request, relation=relation) + for rid in states.keys(): + if not states[rid]['sent']: + return False + + return True + + +def is_request_complete(request, relation='ceph'): + """Check to see if a functionally equivalent request has already been + completed + + Returns True if a similair request has been completed + + @param request: A CephBrokerRq object + """ + states = get_request_states(request, relation=relation) + for rid in states.keys(): + if not states[rid]['complete']: + return False + + return True + + +def is_request_complete_for_rid(request, rid): + """Check if a given request has been completed on the given relation + + @param request: A CephBrokerRq object + @param rid: Relation ID + """ + broker_key = get_broker_rsp_key() + for unit in related_units(rid): + rdata = relation_get(rid=rid, unit=unit) + if rdata.get(broker_key): + rsp = CephBrokerRsp(rdata.get(broker_key)) + if rsp.request_id == request.request_id: + if not rsp.exit_code: + return True + else: + # The remote unit sent no reply targeted at this unit so either the + # remote ceph cluster does not support unit targeted replies or it + # has not processed our request yet. + if rdata.get('broker_rsp'): + request_data = json.loads(rdata['broker_rsp']) + if request_data.get('request-id'): + log('Ignoring legacy broker_rsp without unit key as remote ' + 'service supports unit specific replies', level=DEBUG) + else: + log('Using legacy broker_rsp as remote service does not ' + 'supports unit specific replies', level=DEBUG) + rsp = CephBrokerRsp(rdata['broker_rsp']) + if not rsp.exit_code: + return True + + return False + + +def get_broker_rsp_key(): + """Return broker response key for this unit + + This is the key that ceph is going to use to pass request status + information back to this unit + """ + return 'broker-rsp-' + local_unit().replace('/', '-') + + +def send_request_if_needed(request, relation='ceph'): + """Send broker request if an equivalent request has not already been sent + + @param request: A CephBrokerRq object + """ + if is_request_sent(request, relation=relation): + log('Request already sent but not complete, not sending new request', + level=DEBUG) + else: + for rid in relation_ids(relation): + log('Sending request {}'.format(request.request_id), level=DEBUG) + relation_set(relation_id=rid, broker_req=request.request) + relation_set(relation_id=rid, relation_settings={'unit-name': local_unit()}) + + +def has_broker_rsp(rid=None, unit=None): + """Return True if the broker_rsp key is 'truthy' (i.e. set to something) in the relation data. + + :param rid: The relation to check (default of None means current relation) + :type rid: Union[str, None] + :param unit: The remote unit to check (default of None means current unit) + :type unit: Union[str, None] + :returns: True if broker key exists and is set to something 'truthy' + :rtype: bool + """ + rdata = relation_get(rid=rid, unit=unit) or {} + broker_rsp = rdata.get(get_broker_rsp_key()) + return True if broker_rsp else False + + +def is_broker_action_done(action, rid=None, unit=None): + """Check whether broker action has completed yet. + + @param action: name of action to be performed + @returns True if action complete otherwise False + """ + rdata = relation_get(rid=rid, unit=unit) or {} + broker_rsp = rdata.get(get_broker_rsp_key()) + if not broker_rsp: + return False + + rsp = CephBrokerRsp(broker_rsp) + unit_name = local_unit().partition('/')[2] + key = "unit_{}_ceph_broker_action.{}".format(unit_name, action) + kvstore = kv() + val = kvstore.get(key=key) + if val and val == rsp.request_id: + return True + + return False + + +def mark_broker_action_done(action, rid=None, unit=None): + """Mark action as having been completed. + + @param action: name of action to be performed + @returns None + """ + rdata = relation_get(rid=rid, unit=unit) or {} + broker_rsp = rdata.get(get_broker_rsp_key()) + if not broker_rsp: + return + + rsp = CephBrokerRsp(broker_rsp) + unit_name = local_unit().partition('/')[2] + key = "unit_{}_ceph_broker_action.{}".format(unit_name, action) + kvstore = kv() + kvstore.set(key=key, value=rsp.request_id) + kvstore.flush() + + +class CephConfContext(object): + """Ceph config (ceph.conf) context. + + Supports user-provided Ceph configuration settings. Use can provide a + dictionary as the value for the config-flags charm option containing + Ceph configuration settings keyede by their section in ceph.conf. + """ + def __init__(self, permitted_sections=None): + self.permitted_sections = permitted_sections or [] + + def __call__(self): + conf = config('config-flags') + if not conf: + return {} + + conf = config_flags_parser(conf) + if not isinstance(conf, dict): + log("Provided config-flags is not a dictionary - ignoring", + level=WARNING) + return {} + + permitted = self.permitted_sections + if permitted: + diff = set(conf.keys()).difference(set(permitted)) + if diff: + log("Config-flags contains invalid keys '%s' - they will be " + "ignored" % (', '.join(diff)), level=WARNING) + + ceph_conf = {} + for key in conf: + if permitted and key not in permitted: + log("Ignoring key '%s'" % key, level=WARNING) + continue + + ceph_conf[key] = conf[key] + return ceph_conf + + +class CephOSDConfContext(CephConfContext): + """Ceph config (ceph.conf) context. + + Consolidates settings from config-flags via CephConfContext with + settings provided by the mons. The config-flag values are preserved in + conf['osd'], settings from the mons which do not clash with config-flag + settings are in conf['osd_from_client'] and finally settings which do + clash are in conf['osd_from_client_conflict']. Rather than silently drop + the conflicting settings they are provided in the context so they can be + rendered commented out to give some visibility to the admin. + """ + + def __init__(self, permitted_sections=None): + super(CephOSDConfContext, self).__init__( + permitted_sections=permitted_sections) + try: + self.settings_from_mons = get_osd_settings('mon') + except OSDSettingConflict: + log( + "OSD settings from mons are inconsistent, ignoring them", + level=WARNING) + self.settings_from_mons = {} + + def filter_osd_from_mon_settings(self): + """Filter settings from client relation against config-flags. + + :returns: A tuple ( + ,config-flag values, + ,client settings which do not conflict with config-flag values, + ,client settings which confilct with config-flag values) + :rtype: (OrderedDict, OrderedDict, OrderedDict) + """ + ceph_conf = super(CephOSDConfContext, self).__call__() + conflicting_entries = {} + clear_entries = {} + for key, value in self.settings_from_mons.items(): + if key in ceph_conf.get('osd', {}): + if ceph_conf['osd'][key] != value: + conflicting_entries[key] = value + else: + clear_entries[key] = value + clear_entries = _order_dict_by_key(clear_entries) + conflicting_entries = _order_dict_by_key(conflicting_entries) + return ceph_conf, clear_entries, conflicting_entries + + def __call__(self): + """Construct OSD config context. + + Standard context with two additional special keys. + osd_from_client_conflict: client settings which confilct with + config-flag values + osd_from_client: settings which do not conflict with config-flag + values + + :returns: OSD config context dict. + :rtype: dict + """ + conf, osd_clear, osd_conflict = self.filter_osd_from_mon_settings() + conf['osd_from_client_conflict'] = osd_conflict + conf['osd_from_client'] = osd_clear + return conf diff --git a/ceph-proxy/charmhelpers/contrib/storage/linux/loopback.py b/ceph-proxy/charmhelpers/contrib/storage/linux/loopback.py new file mode 100644 index 00000000..04daea29 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/storage/linux/loopback.py @@ -0,0 +1,88 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from subprocess import ( + check_call, + check_output, +) + + +################################################## +# loopback device helpers. +################################################## +def loopback_devices(): + ''' + Parse through 'losetup -a' output to determine currently mapped + loopback devices. Output is expected to look like: + + /dev/loop0: [0807]:961814 (/tmp/my.img) + + or: + + /dev/loop0: [0807]:961814 (/tmp/my.img (deleted)) + + :returns: dict: a dict mapping {loopback_dev: backing_file} + ''' + loopbacks = {} + cmd = ['losetup', '-a'] + output = check_output(cmd).decode('utf-8') + devs = [d.strip().split(' ', 2) for d in output.splitlines() if d != ''] + for dev, _, f in devs: + loopbacks[dev.replace(':', '')] = re.search(r'\((.+)\)', f).groups()[0] + return loopbacks + + +def create_loopback(file_path): + ''' + Create a loopback device for a given backing file. + + :returns: str: Full path to new loopback device (eg, /dev/loop0) + ''' + file_path = os.path.abspath(file_path) + check_call(['losetup', '--find', file_path]) + for d, f in loopback_devices().items(): + if f == file_path: + return d + + +def ensure_loopback_device(path, size): + ''' + Ensure a loopback device exists for a given backing file path and size. + If it a loopback device is not mapped to file, a new one will be created. + + TODO: Confirm size of found loopback device. + + :returns: str: Full path to the ensured loopback device (eg, /dev/loop0) + ''' + for d, f in loopback_devices().items(): + if f == path: + return d + + if not os.path.exists(path): + cmd = ['truncate', '--size', size, path] + check_call(cmd) + + return create_loopback(path) + + +def is_mapped_loopback_device(device): + """ + Checks if a given device name is an existing/mapped loopback device. + :param device: str: Full path to the device (eg, /dev/loop1). + :returns: str: Path to the backing file if is a loopback device + empty string otherwise + """ + return loopback_devices().get(device, "") diff --git a/ceph-proxy/charmhelpers/contrib/storage/linux/lvm.py b/ceph-proxy/charmhelpers/contrib/storage/linux/lvm.py new file mode 100644 index 00000000..0d294c79 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/storage/linux/lvm.py @@ -0,0 +1,178 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +from subprocess import ( + CalledProcessError, + check_call, + check_output, +) + + +################################################## +# LVM helpers. +################################################## +def deactivate_lvm_volume_group(block_device): + ''' + Deactivate any volume group associated with an LVM physical volume. + + :param block_device: str: Full path to LVM physical volume + ''' + vg = list_lvm_volume_group(block_device) + if vg: + cmd = ['vgchange', '-an', vg] + check_call(cmd) + + +def is_lvm_physical_volume(block_device): + ''' + Determine whether a block device is initialized as an LVM PV. + + :param block_device: str: Full path of block device to inspect. + + :returns: boolean: True if block device is a PV, False if not. + ''' + try: + check_output(['pvdisplay', block_device]) + return True + except CalledProcessError: + return False + + +def remove_lvm_physical_volume(block_device): + ''' + Remove LVM PV signatures from a given block device. + + :param block_device: str: Full path of block device to scrub. + ''' + check_call(['pvremove', '-ff', '--yes', block_device]) + + +def list_lvm_volume_group(block_device): + ''' + List LVM volume group associated with a given block device. + + Assumes block device is a valid LVM PV. + + :param block_device: str: Full path of block device to inspect. + + :returns: str: Name of volume group associated with block device or None + ''' + vg = None + pvd = check_output(['pvdisplay', block_device]).splitlines() + for lvm in pvd: + lvm = lvm.decode('UTF-8') + if lvm.strip().startswith('VG Name'): + vg = ' '.join(lvm.strip().split()[2:]) + return vg + + +def create_lvm_physical_volume(block_device): + ''' + Initialize a block device as an LVM physical volume. + + :param block_device: str: Full path of block device to initialize. + + ''' + check_call(['pvcreate', block_device]) + + +def create_lvm_volume_group(volume_group, block_device): + ''' + Create an LVM volume group backed by a given block device. + + Assumes block device has already been initialized as an LVM PV. + + :param volume_group: str: Name of volume group to create. + :block_device: str: Full path of PV-initialized block device. + ''' + check_call(['vgcreate', volume_group, block_device]) + + +def list_logical_volumes(select_criteria=None, path_mode=False): + ''' + List logical volumes + + :param select_criteria: str: Limit list to those volumes matching this + criteria (see 'lvs -S help' for more details) + :param path_mode: bool: return logical volume name in 'vg/lv' format, this + format is required for some commands like lvextend + :returns: [str]: List of logical volumes + ''' + lv_diplay_attr = 'lv_name' + if path_mode: + # Parsing output logic relies on the column order + lv_diplay_attr = 'vg_name,' + lv_diplay_attr + cmd = ['lvs', '--options', lv_diplay_attr, '--noheadings'] + if select_criteria: + cmd.extend(['--select', select_criteria]) + lvs = [] + for lv in check_output(cmd).decode('UTF-8').splitlines(): + if not lv: + continue + if path_mode: + lvs.append('/'.join(lv.strip().split())) + else: + lvs.append(lv.strip()) + return lvs + + +list_thin_logical_volume_pools = functools.partial( + list_logical_volumes, + select_criteria='lv_attr =~ ^t') + +list_thin_logical_volumes = functools.partial( + list_logical_volumes, + select_criteria='lv_attr =~ ^V') + + +def extend_logical_volume_by_device(lv_name, block_device): + ''' + Extends the size of logical volume lv_name by the amount of free space on + physical volume block_device. + + :param lv_name: str: name of logical volume to be extended (vg/lv format) + :param block_device: str: name of block_device to be allocated to lv_name + ''' + cmd = ['lvextend', lv_name, block_device] + check_call(cmd) + + +def create_logical_volume(lv_name, volume_group, size=None): + ''' + Create a new logical volume in an existing volume group + + :param lv_name: str: name of logical volume to be created. + :param volume_group: str: Name of volume group to use for the new volume. + :param size: str: Size of logical volume to create (100% if not supplied) + :raises subprocess.CalledProcessError: in the event that the lvcreate fails. + ''' + if size: + check_call([ + 'lvcreate', + '--yes', + '-L', + '{}'.format(size), + '-n', lv_name, volume_group + ]) + # create the lv with all the space available, this is needed because the + # system call is different for LVM + else: + check_call([ + 'lvcreate', + '--yes', + '-l', + '100%FREE', + '-n', lv_name, volume_group + ]) diff --git a/ceph-proxy/charmhelpers/contrib/storage/linux/utils.py b/ceph-proxy/charmhelpers/contrib/storage/linux/utils.py new file mode 100644 index 00000000..4d05b121 --- /dev/null +++ b/ceph-proxy/charmhelpers/contrib/storage/linux/utils.py @@ -0,0 +1,143 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from stat import S_ISBLK + +from subprocess import ( + CalledProcessError, + check_call, + check_output, + call +) + +from charmhelpers.core.hookenv import ( + log, + WARNING, + INFO +) + + +def _luks_uuid(dev): + """ + Check to see if dev is a LUKS encrypted volume, returning the UUID + of volume if it is. + + :param: dev: path to block device to check. + :returns: str. UUID of LUKS device or None if not a LUKS device + """ + try: + cmd = ['cryptsetup', 'luksUUID', dev] + return check_output(cmd).decode('UTF-8').strip() + except CalledProcessError: + return None + + +def is_luks_device(dev): + """ + Determine if dev is a LUKS-formatted block device. + + :param: dev: A full path to a block device to check for LUKS header + presence + :returns: boolean: indicates whether a device is used based on LUKS header. + """ + return True if _luks_uuid(dev) else False + + +def is_mapped_luks_device(dev): + """ + Determine if dev is a mapped LUKS device + :param: dev: A full path to a block device to be checked + :returns: boolean: indicates whether a device is mapped + """ + _, dirs, _ = next(os.walk( + '/sys/class/block/{}/holders/' + .format(os.path.basename(os.path.realpath(dev)))) + ) + is_held = len(dirs) > 0 + return is_held and is_luks_device(dev) + + +def is_block_device(path): + ''' + Confirm device at path is a valid block device node. + + :returns: boolean: True if path is a block device, False if not. + ''' + if not os.path.exists(path): + return False + return S_ISBLK(os.stat(path).st_mode) + + +def zap_disk(block_device): + ''' + Clear a block device of partition table. Relies on sgdisk, which is + installed as pat of the 'gdisk' package in Ubuntu. + + :param block_device: str: Full path of block device to clean. + ''' + # https://github.com/ceph/ceph/commit/fdd7f8d83afa25c4e09aaedd90ab93f3b64a677b + # sometimes sgdisk exits non-zero; this is OK, dd will clean up + call(['sgdisk', '--zap-all', '--', block_device]) + call(['sgdisk', '--clear', '--mbrtogpt', '--', block_device]) + dev_end = check_output(['blockdev', '--getsz', + block_device]).decode('UTF-8') + gpt_end = int(dev_end.split()[0]) - 100 + check_call(['dd', 'if=/dev/zero', 'of=%s' % (block_device), + 'bs=1M', 'count=1']) + check_call(['dd', 'if=/dev/zero', 'of=%s' % (block_device), + 'bs=512', 'count=100', 'seek=%s' % (gpt_end)]) + + +def is_device_mounted(device): + '''Given a device path, return True if that device is mounted, and False + if it isn't. + + :param device: str: Full path of the device to check. + :returns: boolean: True if the path represents a mounted device, False if + it doesn't. + ''' + try: + out = check_output(['lsblk', '-P', device]).decode('UTF-8') + except Exception: + return False + return bool(re.search(r'MOUNTPOINT=".+"', out)) + + +def mkfs_xfs(device, force=False, inode_size=None): + """Format device with XFS filesystem. + + By default this should fail if the device already has a filesystem on it. + :param device: Full path to device to format + :ptype device: tr + :param force: Force operation + :ptype: force: boolean + :param inode_size: XFS inode size in bytes; if set to 0 or None, + the value used will be the XFS system default + :ptype inode_size: int""" + cmd = ['mkfs.xfs'] + if force: + cmd.append("-f") + + if inode_size: + if inode_size >= 256 and inode_size <= 2048: + cmd += ['-i', "size={}".format(inode_size)] + else: + log("Config value xfs-inode-size={} is invalid. Using system default.".format(inode_size), level=WARNING) + else: + log("Using XFS filesystem with system default inode size.", level=INFO) + + cmd += [device] + check_call(cmd) diff --git a/ceph-proxy/charmhelpers/core/__init__.py b/ceph-proxy/charmhelpers/core/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-proxy/charmhelpers/core/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-proxy/charmhelpers/core/decorators.py b/ceph-proxy/charmhelpers/core/decorators.py new file mode 100644 index 00000000..e7e95d17 --- /dev/null +++ b/ceph-proxy/charmhelpers/core/decorators.py @@ -0,0 +1,93 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Copyright 2014 Canonical Ltd. +# +# Authors: +# Edward Hope-Morley +# + +import time + +from charmhelpers.core.hookenv import ( + log, + INFO, +) + + +def retry_on_exception(num_retries, base_delay=0, exc_type=Exception): + """If the decorated function raises exception exc_type, allow num_retries + retry attempts before raise the exception. + """ + def _retry_on_exception_inner_1(f): + def _retry_on_exception_inner_2(*args, **kwargs): + retries = num_retries + multiplier = 1 + while True: + try: + return f(*args, **kwargs) + except exc_type: + if not retries: + raise + + delay = base_delay * multiplier + multiplier += 1 + log("Retrying '%s' %d more times (delay=%s)" % + (f.__name__, retries, delay), level=INFO) + retries -= 1 + if delay: + time.sleep(delay) + + return _retry_on_exception_inner_2 + + return _retry_on_exception_inner_1 + + +def retry_on_predicate(num_retries, predicate_fun, base_delay=0): + """Retry based on return value + + The return value of the decorated function is passed to the given predicate_fun. If the + result of the predicate is False, retry the decorated function up to num_retries times + + An exponential backoff up to base_delay^num_retries seconds can be introduced by setting + base_delay to a nonzero value. The default is to run with a zero (i.e. no) delay + + :param num_retries: Max. number of retries to perform + :type num_retries: int + :param predicate_fun: Predicate function to determine if a retry is necessary + :type predicate_fun: callable + :param base_delay: Starting value in seconds for exponential delay, defaults to 0 (no delay) + :type base_delay: float + """ + def _retry_on_pred_inner_1(f): + def _retry_on_pred_inner_2(*args, **kwargs): + retries = num_retries + multiplier = 1 + delay = base_delay + while True: + result = f(*args, **kwargs) + if predicate_fun(result) or retries <= 0: + return result + delay *= multiplier + multiplier += 1 + log("Result {}, retrying '{}' {} more times (delay={})".format( + result, f.__name__, retries, delay), level=INFO) + retries -= 1 + if delay: + time.sleep(delay) + + return _retry_on_pred_inner_2 + + return _retry_on_pred_inner_1 diff --git a/ceph-proxy/charmhelpers/core/files.py b/ceph-proxy/charmhelpers/core/files.py new file mode 100644 index 00000000..fdd82b75 --- /dev/null +++ b/ceph-proxy/charmhelpers/core/files.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__author__ = 'Jorge Niedbalski ' + +import os +import subprocess + + +def sed(filename, before, after, flags='g'): + """ + Search and replaces the given pattern on filename. + + :param filename: relative or absolute file path. + :param before: expression to be replaced (see 'man sed') + :param after: expression to replace with (see 'man sed') + :param flags: sed-compatible regex flags in example, to make + the search and replace case insensitive, specify ``flags="i"``. + The ``g`` flag is always specified regardless, so you do not + need to remember to include it when overriding this parameter. + :returns: If the sed command exit code was zero then return, + otherwise raise CalledProcessError. + """ + expression = r's/{0}/{1}/{2}'.format(before, + after, flags) + + return subprocess.check_call(["sed", "-i", "-r", "-e", + expression, + os.path.expanduser(filename)]) diff --git a/ceph-proxy/charmhelpers/core/fstab.py b/ceph-proxy/charmhelpers/core/fstab.py new file mode 100644 index 00000000..d9fa9152 --- /dev/null +++ b/ceph-proxy/charmhelpers/core/fstab.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import io +import os + +__author__ = 'Jorge Niedbalski R. ' + + +class Fstab(io.FileIO): + """This class extends file in order to implement a file reader/writer + for file `/etc/fstab` + """ + + class Entry(object): + """Entry class represents a non-comment line on the `/etc/fstab` file + """ + def __init__(self, device, mountpoint, filesystem, + options, d=0, p=0): + self.device = device + self.mountpoint = mountpoint + self.filesystem = filesystem + + if not options: + options = "defaults" + + self.options = options + self.d = int(d) + self.p = int(p) + + def __eq__(self, o): + return str(self) == str(o) + + def __str__(self): + return "{} {} {} {} {} {}".format(self.device, + self.mountpoint, + self.filesystem, + self.options, + self.d, + self.p) + + DEFAULT_PATH = os.path.join(os.path.sep, 'etc', 'fstab') + + def __init__(self, path=None): + if path: + self._path = path + else: + self._path = self.DEFAULT_PATH + super(Fstab, self).__init__(self._path, 'rb+') + + def _hydrate_entry(self, line): + # NOTE: use split with no arguments to split on any + # whitespace including tabs + return Fstab.Entry(*filter( + lambda x: x not in ('', None), + line.strip("\n").split())) + + @property + def entries(self): + self.seek(0) + for line in self.readlines(): + line = line.decode('us-ascii') + try: + if line.strip() and not line.strip().startswith("#"): + yield self._hydrate_entry(line) + except ValueError: + pass + + def get_entry_by_attr(self, attr, value): + for entry in self.entries: + e_attr = getattr(entry, attr) + if e_attr == value: + return entry + return None + + def add_entry(self, entry): + if self.get_entry_by_attr('device', entry.device): + return False + + self.write((str(entry) + '\n').encode('us-ascii')) + self.truncate() + return entry + + def remove_entry(self, entry): + self.seek(0) + + lines = [l.decode('us-ascii') for l in self.readlines()] + + found = False + for index, line in enumerate(lines): + if line.strip() and not line.strip().startswith("#"): + if self._hydrate_entry(line) == entry: + found = True + break + + if not found: + return False + + lines.remove(line) + + self.seek(0) + self.write(''.join(lines).encode('us-ascii')) + self.truncate() + return True + + @classmethod + def remove_by_mountpoint(cls, mountpoint, path=None): + fstab = cls(path=path) + entry = fstab.get_entry_by_attr('mountpoint', mountpoint) + if entry: + return fstab.remove_entry(entry) + return False + + @classmethod + def add(cls, device, mountpoint, filesystem, options=None, path=None): + return cls(path=path).add_entry(Fstab.Entry(device, + mountpoint, filesystem, + options=options)) diff --git a/ceph-proxy/charmhelpers/core/hookenv.py b/ceph-proxy/charmhelpers/core/hookenv.py new file mode 100644 index 00000000..370c3e8f --- /dev/null +++ b/ceph-proxy/charmhelpers/core/hookenv.py @@ -0,0 +1,1636 @@ +# Copyright 2013-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"Interactions with the Juju environment" +# +# Authors: +# Charm Helpers Developers + +import copy +from distutils.version import LooseVersion +from enum import Enum +from functools import wraps +from collections import namedtuple, UserDict +import glob +import os +import json +import yaml +import re +import subprocess +import sys +import errno +import tempfile +from subprocess import CalledProcessError + +from charmhelpers import deprecate + + +CRITICAL = "CRITICAL" +ERROR = "ERROR" +WARNING = "WARNING" +INFO = "INFO" +DEBUG = "DEBUG" +TRACE = "TRACE" +MARKER = object() +SH_MAX_ARG = 131071 + + +RANGE_WARNING = ('Passing NO_PROXY string that includes a cidr. ' + 'This may not be compatible with software you are ' + 'running in your shell.') + + +class WORKLOAD_STATES(Enum): + ACTIVE = 'active' + BLOCKED = 'blocked' + MAINTENANCE = 'maintenance' + WAITING = 'waiting' + + +cache = {} + + +def cached(func): + """Cache return values for multiple executions of func + args + + For example:: + + @cached + def unit_get(attribute): + pass + + unit_get('test') + + will cache the result of unit_get + 'test' for future calls. + """ + @wraps(func) + def wrapper(*args, **kwargs): + global cache + key = json.dumps((func, args, kwargs), sort_keys=True, default=str) + try: + return cache[key] + except KeyError: + pass # Drop out of the exception handler scope. + res = func(*args, **kwargs) + cache[key] = res + return res + wrapper._wrapped = func + return wrapper + + +def flush(key): + """Flushes any entries from function cache where the + key is found in the function+args """ + flush_list = [] + for item in cache: + if key in item: + flush_list.append(item) + for item in flush_list: + del cache[item] + + +def log(message, level=None): + """Write a message to the juju log""" + command = ['juju-log'] + if level: + command += ['-l', level] + if not isinstance(message, str): + message = repr(message) + command += [message[:SH_MAX_ARG]] + # Missing juju-log should not cause failures in unit tests + # Send log output to stderr + try: + subprocess.call(command) + except OSError as e: + if e.errno == errno.ENOENT: + if level: + message = "{}: {}".format(level, message) + message = "juju-log: {}".format(message) + print(message, file=sys.stderr) + else: + raise + + +def function_log(message): + """Write a function progress message""" + command = ['function-log'] + if not isinstance(message, str): + message = repr(message) + command += [message[:SH_MAX_ARG]] + # Missing function-log should not cause failures in unit tests + # Send function_log output to stderr + try: + subprocess.call(command) + except OSError as e: + if e.errno == errno.ENOENT: + message = "function-log: {}".format(message) + print(message, file=sys.stderr) + else: + raise + + +class Serializable(UserDict): + """Wrapper, an object that can be serialized to yaml or json""" + + def __init__(self, obj): + # wrap the object + UserDict.__init__(self) + self.data = obj + + def __getattr__(self, attr): + # See if this object has attribute. + if attr in ("json", "yaml", "data"): + return self.__dict__[attr] + # Check for attribute in wrapped object. + got = getattr(self.data, attr, MARKER) + if got is not MARKER: + return got + # Proxy to the wrapped object via dict interface. + try: + return self.data[attr] + except KeyError: + raise AttributeError(attr) + + def __getstate__(self): + # Pickle as a standard dictionary. + return self.data + + def __setstate__(self, state): + # Unpickle into our wrapper. + self.data = state + + def json(self): + """Serialize the object to json""" + return json.dumps(self.data) + + def yaml(self): + """Serialize the object to yaml""" + return yaml.dump(self.data) + + +def execution_environment(): + """A convenient bundling of the current execution context""" + context = {} + context['conf'] = config() + if relation_id(): + context['reltype'] = relation_type() + context['relid'] = relation_id() + context['rel'] = relation_get() + context['unit'] = local_unit() + context['rels'] = relations() + context['env'] = os.environ + return context + + +def in_relation_hook(): + """Determine whether we're running in a relation hook""" + return 'JUJU_RELATION' in os.environ + + +def relation_type(): + """The scope for the current relation hook""" + return os.environ.get('JUJU_RELATION', None) + + +@cached +def relation_id(relation_name=None, service_or_unit=None): + """The relation ID for the current or a specified relation""" + if not relation_name and not service_or_unit: + return os.environ.get('JUJU_RELATION_ID', None) + elif relation_name and service_or_unit: + service_name = service_or_unit.split('/')[0] + for relid in relation_ids(relation_name): + remote_service = remote_service_name(relid) + if remote_service == service_name: + return relid + else: + raise ValueError('Must specify neither or both of relation_name and service_or_unit') + + +def departing_unit(): + """The departing unit for the current relation hook. + + Available since juju 2.8. + + :returns: the departing unit, or None if the information isn't available. + :rtype: Optional[str] + """ + return os.environ.get('JUJU_DEPARTING_UNIT', None) + + +def local_unit(): + """Local unit ID""" + return os.environ['JUJU_UNIT_NAME'] + + +def remote_unit(): + """The remote unit for the current relation hook""" + return os.environ.get('JUJU_REMOTE_UNIT', None) + + +def application_name(): + """ + The name of the deployed application this unit belongs to. + """ + return local_unit().split('/')[0] + + +def service_name(): + """ + .. deprecated:: 0.19.1 + Alias for :func:`application_name`. + """ + return application_name() + + +def model_name(): + """ + Name of the model that this unit is deployed in. + """ + return os.environ['JUJU_MODEL_NAME'] + + +def model_uuid(): + """ + UUID of the model that this unit is deployed in. + """ + return os.environ['JUJU_MODEL_UUID'] + + +def principal_unit(): + """Returns the principal unit of this unit, otherwise None""" + # Juju 2.2 and above provides JUJU_PRINCIPAL_UNIT + principal_unit = os.environ.get('JUJU_PRINCIPAL_UNIT', None) + # If it's empty, then this unit is the principal + if principal_unit == '': + return os.environ['JUJU_UNIT_NAME'] + elif principal_unit is not None: + return principal_unit + # For Juju 2.1 and below, let's try work out the principle unit by + # the various charms' metadata.yaml. + for reltype in relation_types(): + for rid in relation_ids(reltype): + for unit in related_units(rid): + md = _metadata_unit(unit) + if not md: + continue + subordinate = md.pop('subordinate', None) + if not subordinate: + return unit + return None + + +@cached +def remote_service_name(relid=None): + """The remote service name for a given relation-id (or the current relation)""" + if relid is None: + unit = remote_unit() + else: + units = related_units(relid) + unit = units[0] if units else None + return unit.split('/')[0] if unit else None + + +def hook_name(): + """The name of the currently executing hook""" + return os.environ.get('JUJU_HOOK_NAME', os.path.basename(sys.argv[0])) + + +class Config(dict): + """A dictionary representation of the charm's config.yaml, with some + extra features: + + - See which values in the dictionary have changed since the previous hook. + - For values that have changed, see what the previous value was. + - Store arbitrary data for use in a later hook. + + NOTE: Do not instantiate this object directly - instead call + ``hookenv.config()``, which will return an instance of :class:`Config`. + + Example usage:: + + >>> # inside a hook + >>> from charmhelpers.core import hookenv + >>> config = hookenv.config() + >>> config['foo'] + 'bar' + >>> # store a new key/value for later use + >>> config['mykey'] = 'myval' + + + >>> # user runs `juju set mycharm foo=baz` + >>> # now we're inside subsequent config-changed hook + >>> config = hookenv.config() + >>> config['foo'] + 'baz' + >>> # test to see if this val has changed since last hook + >>> config.changed('foo') + True + >>> # what was the previous value? + >>> config.previous('foo') + 'bar' + >>> # keys/values that we add are preserved across hooks + >>> config['mykey'] + 'myval' + + """ + CONFIG_FILE_NAME = '.juju-persistent-config' + + def __init__(self, *args, **kw): + super(Config, self).__init__(*args, **kw) + self.implicit_save = True + self._prev_dict = None + self.path = os.path.join(charm_dir(), Config.CONFIG_FILE_NAME) + if os.path.exists(self.path) and os.stat(self.path).st_size: + self.load_previous() + atexit(self._implicit_save) + + def load_previous(self, path=None): + """Load previous copy of config from disk. + + In normal usage you don't need to call this method directly - it + is called automatically at object initialization. + + :param path: + + File path from which to load the previous config. If `None`, + config is loaded from the default location. If `path` is + specified, subsequent `save()` calls will write to the same + path. + + """ + self.path = path or self.path + with open(self.path) as f: + try: + self._prev_dict = json.load(f) + except ValueError as e: + log('Found but was unable to parse previous config data, ' + 'ignoring which will report all values as changed - {}' + .format(str(e)), level=ERROR) + return + for k, v in copy.deepcopy(self._prev_dict).items(): + if k not in self: + self[k] = v + + def changed(self, key): + """Return True if the current value for this key is different from + the previous value. + + """ + if self._prev_dict is None: + return True + return self.previous(key) != self.get(key) + + def previous(self, key): + """Return previous value for this key, or None if there + is no previous value. + + """ + if self._prev_dict: + return self._prev_dict.get(key) + return None + + def save(self): + """Save this config to disk. + + If the charm is using the :mod:`Services Framework ` + or :meth:'@hook ' decorator, this + is called automatically at the end of successful hook execution. + Otherwise, it should be called directly by user code. + + To disable automatic saves, set ``implicit_save=False`` on this + instance. + + """ + with open(self.path, 'w') as f: + os.fchmod(f.fileno(), 0o600) + json.dump(self, f) + + def _implicit_save(self): + if self.implicit_save: + self.save() + + +_cache_config = None + + +def config(scope=None): + """ + Get the juju charm configuration (scope==None) or individual key, + (scope=str). The returned value is a Python data structure loaded as + JSON from the Juju config command. + + :param scope: If set, return the value for the specified key. + :type scope: Optional[str] + :returns: Either the whole config as a Config, or a key from it. + :rtype: Any + """ + global _cache_config + config_cmd_line = ['config-get', '--all', '--format=json'] + try: + if _cache_config is None: + config_data = json.loads( + subprocess.check_output(config_cmd_line).decode('UTF-8')) + _cache_config = Config(config_data) + if scope is not None: + return _cache_config.get(scope) + return _cache_config + except (json.decoder.JSONDecodeError, UnicodeDecodeError) as e: + log('Unable to parse output from config-get: config_cmd_line="{}" ' + 'message="{}"' + .format(config_cmd_line, str(e)), level=ERROR) + return None + + +@cached +def relation_get(attribute=None, unit=None, rid=None, app=None): + """Get relation information""" + _args = ['relation-get', '--format=json'] + if app is not None: + if unit is not None: + raise ValueError("Cannot use both 'unit' and 'app'") + _args.append('--app') + if rid: + _args.append('-r') + _args.append(rid) + _args.append(attribute or '-') + # unit or application name + if unit or app: + _args.append(unit or app) + try: + return json.loads(subprocess.check_output(_args).decode('UTF-8')) + except ValueError: + return None + except CalledProcessError as e: + if e.returncode == 2: + return None + raise + + +@cached +def _relation_set_accepts_file(): + """Return True if the juju relation-set command accepts a file. + + Cache the result as it won't change during the execution of a hook, and + thus we can make relation_set() more efficient by only checking for the + first relation_set() call. + + :returns: True if relation_set accepts a file. + :rtype: bool + :raises: subprocess.CalledProcessError if the check fails. + """ + return "--file" in subprocess.check_output( + ["relation-set", "--help"], universal_newlines=True) + + +def relation_set(relation_id=None, relation_settings=None, app=False, **kwargs): + """Set relation information for the current unit""" + relation_settings = relation_settings if relation_settings else {} + relation_cmd_line = ['relation-set'] + if app: + relation_cmd_line.append('--app') + if relation_id is not None: + relation_cmd_line.extend(('-r', relation_id)) + settings = relation_settings.copy() + settings.update(kwargs) + for key, value in settings.items(): + # Force value to be a string: it always should, but some call + # sites pass in things like dicts or numbers. + if value is not None: + settings[key] = "{}".format(value) + if _relation_set_accepts_file(): + # --file was introduced in Juju 1.23.2. Use it by default if + # available, since otherwise we'll break if the relation data is + # too big. Ideally we should tell relation-set to read the data from + # stdin, but that feature is broken in 1.23.2: Bug #1454678. + with tempfile.NamedTemporaryFile(delete=False) as settings_file: + settings_file.write(yaml.safe_dump(settings).encode("utf-8")) + subprocess.check_call( + relation_cmd_line + ["--file", settings_file.name]) + os.remove(settings_file.name) + else: + for key, value in settings.items(): + if value is None: + relation_cmd_line.append('{}='.format(key)) + else: + relation_cmd_line.append('{}={}'.format(key, value)) + subprocess.check_call(relation_cmd_line) + # Flush cache of any relation-gets for local unit + flush(local_unit()) + + +def relation_clear(r_id=None): + ''' Clears any relation data already set on relation r_id ''' + settings = relation_get(rid=r_id, + unit=local_unit()) + for setting in settings: + if setting not in ['public-address', 'private-address']: + settings[setting] = None + relation_set(relation_id=r_id, + **settings) + + +@cached +def relation_ids(reltype=None): + """A list of relation_ids""" + reltype = reltype or relation_type() + relid_cmd_line = ['relation-ids', '--format=json'] + if reltype is not None: + relid_cmd_line.append(reltype) + return json.loads( + subprocess.check_output(relid_cmd_line).decode('UTF-8')) or [] + return [] + + +@cached +def related_units(relid=None): + """A list of related units""" + relid = relid or relation_id() + units_cmd_line = ['relation-list', '--format=json'] + if relid is not None: + units_cmd_line.extend(('-r', relid)) + return json.loads( + subprocess.check_output(units_cmd_line).decode('UTF-8')) or [] + + +def expected_peer_units(): + """Get a generator for units we expect to join peer relation based on + goal-state. + + The local unit is excluded from the result to make it easy to gauge + completion of all peers joining the relation with existing hook tools. + + Example usage: + log('peer {} of {} joined peer relation' + .format(len(related_units()), + len(list(expected_peer_units())))) + + This function will raise NotImplementedError if used with juju versions + without goal-state support. + + :returns: iterator + :rtype: types.GeneratorType + :raises: NotImplementedError + """ + if not has_juju_version("2.4.0"): + # goal-state first appeared in 2.4.0. + raise NotImplementedError("goal-state") + _goal_state = goal_state() + return (key for key in _goal_state['units'] + if '/' in key and key != local_unit()) + + +def expected_related_units(reltype=None): + """Get a generator for units we expect to join relation based on + goal-state. + + Note that you can not use this function for the peer relation, take a look + at expected_peer_units() for that. + + This function will raise KeyError if you request information for a + relation type for which juju goal-state does not have information. It will + raise NotImplementedError if used with juju versions without goal-state + support. + + Example usage: + log('participant {} of {} joined relation {}' + .format(len(related_units()), + len(list(expected_related_units())), + relation_type())) + + :param reltype: Relation type to list data for, default is to list data for + the relation type we are currently executing a hook for. + :type reltype: str + :returns: iterator + :rtype: types.GeneratorType + :raises: KeyError, NotImplementedError + """ + if not has_juju_version("2.4.4"): + # goal-state existed in 2.4.0, but did not list individual units to + # join a relation in 2.4.1 through 2.4.3. (LP: #1794739) + raise NotImplementedError("goal-state relation unit count") + reltype = reltype or relation_type() + _goal_state = goal_state() + return (key for key in _goal_state['relations'][reltype] if '/' in key) + + +@cached +def relation_for_unit(unit=None, rid=None): + """Get the json representation of a unit's relation""" + unit = unit or remote_unit() + relation = relation_get(unit=unit, rid=rid) + for key in relation: + if key.endswith('-list'): + relation[key] = relation[key].split() + relation['__unit__'] = unit + return relation + + +@cached +def relations_for_id(relid=None): + """Get relations of a specific relation ID""" + relation_data = [] + relid = relid or relation_ids() + for unit in related_units(relid): + unit_data = relation_for_unit(unit, relid) + unit_data['__relid__'] = relid + relation_data.append(unit_data) + return relation_data + + +@cached +def relations_of_type(reltype=None): + """Get relations of a specific type""" + relation_data = [] + reltype = reltype or relation_type() + for relid in relation_ids(reltype): + for relation in relations_for_id(relid): + relation['__relid__'] = relid + relation_data.append(relation) + return relation_data + + +@cached +def metadata(): + """Get the current charm metadata.yaml contents as a python object""" + with open(os.path.join(charm_dir(), 'metadata.yaml')) as md: + return yaml.safe_load(md) + + +def _metadata_unit(unit): + """Given the name of a unit (e.g. apache2/0), get the unit charm's + metadata.yaml. Very similar to metadata() but allows us to inspect + other units. Unit needs to be co-located, such as a subordinate or + principal/primary. + + :returns: metadata.yaml as a python object. + + """ + basedir = os.sep.join(charm_dir().split(os.sep)[:-2]) + unitdir = 'unit-{}'.format(unit.replace(os.sep, '-')) + joineddir = os.path.join(basedir, unitdir, 'charm', 'metadata.yaml') + if not os.path.exists(joineddir): + return None + with open(joineddir) as md: + return yaml.safe_load(md) + + +@cached +def relation_types(): + """Get a list of relation types supported by this charm""" + rel_types = [] + md = metadata() + for key in ('provides', 'requires', 'peers'): + section = md.get(key) + if section: + rel_types.extend(section.keys()) + return rel_types + + +@cached +def peer_relation_id(): + '''Get the peers relation id if a peers relation has been joined, else None.''' + md = metadata() + section = md.get('peers') + if section: + for key in section: + relids = relation_ids(key) + if relids: + return relids[0] + return None + + +@cached +def relation_to_interface(relation_name): + """ + Given the name of a relation, return the interface that relation uses. + + :returns: The interface name, or ``None``. + """ + return relation_to_role_and_interface(relation_name)[1] + + +@cached +def relation_to_role_and_interface(relation_name): + """ + Given the name of a relation, return the role and the name of the interface + that relation uses (where role is one of ``provides``, ``requires``, or ``peers``). + + :returns: A tuple containing ``(role, interface)``, or ``(None, None)``. + """ + _metadata = metadata() + for role in ('provides', 'requires', 'peers'): + interface = _metadata.get(role, {}).get(relation_name, {}).get('interface') + if interface: + return role, interface + return None, None + + +@cached +def role_and_interface_to_relations(role, interface_name): + """ + Given a role and interface name, return a list of relation names for the + current charm that use that interface under that role (where role is one + of ``provides``, ``requires``, or ``peers``). + + :returns: A list of relation names. + """ + _metadata = metadata() + results = [] + for relation_name, relation in _metadata.get(role, {}).items(): + if relation['interface'] == interface_name: + results.append(relation_name) + return results + + +@cached +def interface_to_relations(interface_name): + """ + Given an interface, return a list of relation names for the current + charm that use that interface. + + :returns: A list of relation names. + """ + results = [] + for role in ('provides', 'requires', 'peers'): + results.extend(role_and_interface_to_relations(role, interface_name)) + return results + + +@cached +def charm_name(): + """Get the name of the current charm as is specified on metadata.yaml""" + return metadata().get('name') + + +@cached +def relations(): + """Get a nested dictionary of relation data for all related units""" + rels = {} + for reltype in relation_types(): + relids = {} + for relid in relation_ids(reltype): + units = {local_unit(): relation_get(unit=local_unit(), rid=relid)} + for unit in related_units(relid): + reldata = relation_get(unit=unit, rid=relid) + units[unit] = reldata + relids[relid] = units + rels[reltype] = relids + return rels + + +@cached +def is_relation_made(relation, keys='private-address'): + ''' + Determine whether a relation is established by checking for + presence of key(s). If a list of keys is provided, they + must all be present for the relation to be identified as made + ''' + if isinstance(keys, str): + keys = [keys] + for r_id in relation_ids(relation): + for unit in related_units(r_id): + context = {} + for k in keys: + context[k] = relation_get(k, rid=r_id, + unit=unit) + if None not in context.values(): + return True + return False + + +def _port_op(op_name, port, protocol="TCP"): + """Open or close a service network port""" + _args = [op_name] + icmp = protocol.upper() == "ICMP" + if icmp: + _args.append(protocol) + else: + _args.append('{}/{}'.format(port, protocol)) + try: + subprocess.check_call(_args) + except subprocess.CalledProcessError: + # Older Juju pre 2.3 doesn't support ICMP + # so treat it as a no-op if it fails. + if not icmp: + raise + + +def open_port(port, protocol="TCP"): + """Open a service network port""" + _port_op('open-port', port, protocol) + + +def close_port(port, protocol="TCP"): + """Close a service network port""" + _port_op('close-port', port, protocol) + + +def open_ports(start, end, protocol="TCP"): + """Opens a range of service network ports""" + _args = ['open-port'] + _args.append('{}-{}/{}'.format(start, end, protocol)) + subprocess.check_call(_args) + + +def close_ports(start, end, protocol="TCP"): + """Close a range of service network ports""" + _args = ['close-port'] + _args.append('{}-{}/{}'.format(start, end, protocol)) + subprocess.check_call(_args) + + +def opened_ports(): + """Get the opened ports + + *Note that this will only show ports opened in a previous hook* + + :returns: Opened ports as a list of strings: ``['8080/tcp', '8081-8083/tcp']`` + """ + _args = ['opened-ports', '--format=json'] + return json.loads(subprocess.check_output(_args).decode('UTF-8')) + + +@cached +def unit_get(attribute): + """Get the unit ID for the remote unit""" + _args = ['unit-get', '--format=json', attribute] + try: + return json.loads(subprocess.check_output(_args).decode('UTF-8')) + except ValueError: + return None + + +def unit_public_ip(): + """Get this unit's public IP address""" + return unit_get('public-address') + + +def unit_private_ip(): + """Get this unit's private IP address""" + return unit_get('private-address') + + +@cached +def storage_get(attribute=None, storage_id=None): + """Get storage attributes""" + _args = ['storage-get', '--format=json'] + if storage_id: + _args.extend(('-s', storage_id)) + if attribute: + _args.append(attribute) + try: + return json.loads(subprocess.check_output(_args).decode('UTF-8')) + except ValueError: + return None + + +@cached +def storage_list(storage_name=None): + """List the storage IDs for the unit""" + _args = ['storage-list', '--format=json'] + if storage_name: + _args.append(storage_name) + try: + return json.loads(subprocess.check_output(_args).decode('UTF-8')) + except ValueError: + return None + except OSError as e: + import errno + if e.errno == errno.ENOENT: + # storage-list does not exist + return [] + raise + + +class UnregisteredHookError(Exception): + """Raised when an undefined hook is called""" + pass + + +class Hooks(object): + """A convenient handler for hook functions. + + Example:: + + hooks = Hooks() + + # register a hook, taking its name from the function name + @hooks.hook() + def install(): + pass # your code here + + # register a hook, providing a custom hook name + @hooks.hook("config-changed") + def config_changed(): + pass # your code here + + if __name__ == "__main__": + # execute a hook based on the name the program is called by + hooks.execute(sys.argv) + """ + + def __init__(self, config_save=None): + super(Hooks, self).__init__() + self._hooks = {} + + # For unknown reasons, we allow the Hooks constructor to override + # config().implicit_save. + if config_save is not None: + config().implicit_save = config_save + + def register(self, name, function): + """Register a hook""" + self._hooks[name] = function + + def execute(self, args): + """Execute a registered hook based on args[0]""" + _run_atstart() + hook_name = os.path.basename(args[0]) + if hook_name in self._hooks: + try: + self._hooks[hook_name]() + except SystemExit as x: + if x.code is None or x.code == 0: + _run_atexit() + raise + _run_atexit() + else: + raise UnregisteredHookError(hook_name) + + def hook(self, *hook_names): + """Decorator, registering them as hooks""" + def wrapper(decorated): + for hook_name in hook_names: + self.register(hook_name, decorated) + else: + self.register(decorated.__name__, decorated) + if '_' in decorated.__name__: + self.register( + decorated.__name__.replace('_', '-'), decorated) + return decorated + return wrapper + + +class NoNetworkBinding(Exception): + pass + + +def charm_dir(): + """Return the root directory of the current charm""" + d = os.environ.get('JUJU_CHARM_DIR') + if d is not None: + return d + return os.environ.get('CHARM_DIR') + + +def cmd_exists(cmd): + """Return True if the specified cmd exists in the path""" + return any( + os.access(os.path.join(path, cmd), os.X_OK) + for path in os.environ["PATH"].split(os.pathsep) + ) + + +@cached +def action_get(key=None): + """Gets the value of an action parameter, or all key/value param pairs.""" + cmd = ['action-get'] + if key is not None: + cmd.append(key) + cmd.append('--format=json') + action_data = json.loads(subprocess.check_output(cmd).decode('UTF-8')) + return action_data + + +@cached +@deprecate("moved to action_get()", log=log) +def function_get(key=None): + """ + .. deprecated:: + Gets the value of an action parameter, or all key/value param pairs. + """ + cmd = ['function-get'] + # Fallback for older charms. + if not cmd_exists('function-get'): + cmd = ['action-get'] + + if key is not None: + cmd.append(key) + cmd.append('--format=json') + function_data = json.loads(subprocess.check_output(cmd).decode('UTF-8')) + return function_data + + +def action_set(values): + """Sets the values to be returned after the action finishes.""" + cmd = ['action-set'] + for k, v in list(values.items()): + cmd.append('{}={}'.format(k, v)) + subprocess.check_call(cmd) + + +@deprecate("moved to action_set()", log=log) +def function_set(values): + """ + .. deprecated:: + Sets the values to be returned after the function finishes. + """ + cmd = ['function-set'] + # Fallback for older charms. + if not cmd_exists('function-get'): + cmd = ['action-set'] + + for k, v in list(values.items()): + cmd.append('{}={}'.format(k, v)) + subprocess.check_call(cmd) + + +def action_fail(message): + """ + Sets the action status to failed and sets the error message. + + The results set by action_set are preserved. + """ + subprocess.check_call(['action-fail', message]) + + +@deprecate("moved to action_fail()", log=log) +def function_fail(message): + """ + .. deprecated:: + Sets the function status to failed and sets the error message. + + The results set by function_set are preserved. + """ + cmd = ['function-fail'] + # Fallback for older charms. + if not cmd_exists('function-fail'): + cmd = ['action-fail'] + cmd.append(message) + + subprocess.check_call(cmd) + + +def action_name(): + """Get the name of the currently executing action.""" + return os.environ.get('JUJU_ACTION_NAME') + + +def function_name(): + """Get the name of the currently executing function.""" + return os.environ.get('JUJU_FUNCTION_NAME') or action_name() + + +def action_uuid(): + """Get the UUID of the currently executing action.""" + return os.environ.get('JUJU_ACTION_UUID') + + +def function_id(): + """Get the ID of the currently executing function.""" + return os.environ.get('JUJU_FUNCTION_ID') or action_uuid() + + +def action_tag(): + """Get the tag for the currently executing action.""" + return os.environ.get('JUJU_ACTION_TAG') + + +def function_tag(): + """Get the tag for the currently executing function.""" + return os.environ.get('JUJU_FUNCTION_TAG') or action_tag() + + +def status_set(workload_state, message, application=False): + """Set the workload state with a message + + Use status-set to set the workload state with a message which is visible + to the user via juju status. If the status-set command is not found then + assume this is juju < 1.23 and juju-log the message instead. + + workload_state -- valid juju workload state. str or WORKLOAD_STATES + message -- status update message + application -- Whether this is an application state set + """ + bad_state_msg = '{!r} is not a valid workload state' + + if isinstance(workload_state, str): + try: + # Convert string to enum. + workload_state = WORKLOAD_STATES[workload_state.upper()] + except KeyError: + raise ValueError(bad_state_msg.format(workload_state)) + + if workload_state not in WORKLOAD_STATES: + raise ValueError(bad_state_msg.format(workload_state)) + + cmd = ['status-set'] + if application: + cmd.append('--application') + cmd.extend([workload_state.value, message]) + try: + ret = subprocess.call(cmd) + if ret == 0: + return + except OSError as e: + if e.errno != errno.ENOENT: + raise + log_message = 'status-set failed: {} {}'.format(workload_state.value, + message) + log(log_message, level='INFO') + + +def status_get(): + """Retrieve the previously set juju workload state and message + + If the status-get command is not found then assume this is juju < 1.23 and + return 'unknown', "" + + """ + cmd = ['status-get', "--format=json", "--include-data"] + try: + raw_status = subprocess.check_output(cmd) + except OSError as e: + if e.errno == errno.ENOENT: + return ('unknown', "") + else: + raise + else: + status = json.loads(raw_status.decode("UTF-8")) + return (status["status"], status["message"]) + + +def translate_exc(from_exc, to_exc): + def inner_translate_exc1(f): + @wraps(f) + def inner_translate_exc2(*args, **kwargs): + try: + return f(*args, **kwargs) + except from_exc: + raise to_exc + + return inner_translate_exc2 + + return inner_translate_exc1 + + +def application_version_set(version): + """Charm authors may trigger this command from any hook to output what + version of the application is running. This could be a package version, + for instance postgres version 9.5. It could also be a build number or + version control revision identifier, for instance git sha 6fb7ba68. """ + + cmd = ['application-version-set'] + cmd.append(version) + try: + subprocess.check_call(cmd) + except OSError: + log("Application Version: {}".format(version)) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +@cached +def goal_state(): + """Juju goal state values""" + cmd = ['goal-state', '--format=json'] + return json.loads(subprocess.check_output(cmd).decode('UTF-8')) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def is_leader(): + """Does the current unit hold the juju leadership + + Uses juju to determine whether the current unit is the leader of its peers + """ + cmd = ['is-leader', '--format=json'] + return json.loads(subprocess.check_output(cmd).decode('UTF-8')) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def leader_get(attribute=None): + """Juju leader get value(s)""" + cmd = ['leader-get', '--format=json'] + [attribute or '-'] + return json.loads(subprocess.check_output(cmd).decode('UTF-8')) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def leader_set(settings=None, **kwargs): + """Juju leader set value(s)""" + # Don't log secrets. + # log("Juju leader-set '%s'" % (settings), level=DEBUG) + cmd = ['leader-set'] + settings = settings or {} + settings.update(kwargs) + for k, v in settings.items(): + if v is None: + cmd.append('{}='.format(k)) + else: + cmd.append('{}={}'.format(k, v)) + subprocess.check_call(cmd) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def payload_register(ptype, klass, pid): + """ is used while a hook is running to let Juju know that a + payload has been started.""" + cmd = ['payload-register'] + for x in [ptype, klass, pid]: + cmd.append(x) + subprocess.check_call(cmd) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def payload_unregister(klass, pid): + """ is used while a hook is running to let Juju know + that a payload has been manually stopped. The and provided + must match a payload that has been previously registered with juju using + payload-register.""" + cmd = ['payload-unregister'] + for x in [klass, pid]: + cmd.append(x) + subprocess.check_call(cmd) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def payload_status_set(klass, pid, status): + """is used to update the current status of a registered payload. + The and provided must match a payload that has been previously + registered with juju using payload-register. The must be one of the + follow: starting, started, stopping, stopped""" + cmd = ['payload-status-set'] + for x in [klass, pid, status]: + cmd.append(x) + subprocess.check_call(cmd) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def resource_get(name): + """used to fetch the resource path of the given name. + + must match a name of defined resource in metadata.yaml + + returns either a path or False if resource not available + """ + if not name: + return False + + cmd = ['resource-get', name] + try: + return subprocess.check_output(cmd).decode('UTF-8') + except subprocess.CalledProcessError: + return False + + +@cached +def juju_version(): + """Full version string (eg. '1.23.3.1-trusty-amd64')""" + # Per https://bugs.launchpad.net/juju-core/+bug/1455368/comments/1 + jujud = glob.glob('/var/lib/juju/tools/machine-*/jujud')[0] + return subprocess.check_output([jujud, 'version'], + universal_newlines=True).strip() + + +def has_juju_version(minimum_version): + """Return True if the Juju version is at least the provided version""" + return LooseVersion(juju_version()) >= LooseVersion(minimum_version) + + +_atexit = [] +_atstart = [] + + +def atstart(callback, *args, **kwargs): + '''Schedule a callback to run before the main hook. + + Callbacks are run in the order they were added. + + This is useful for modules and classes to perform initialization + and inject behavior. In particular: + + - Run common code before all of your hooks, such as logging + the hook name or interesting relation data. + - Defer object or module initialization that requires a hook + context until we know there actually is a hook context, + making testing easier. + - Rather than requiring charm authors to include boilerplate to + invoke your helper's behavior, have it run automatically if + your object is instantiated or module imported. + + This is not at all useful after your hook framework as been launched. + ''' + global _atstart + _atstart.append((callback, args, kwargs)) + + +def atexit(callback, *args, **kwargs): + '''Schedule a callback to run on successful hook completion. + + Callbacks are run in the reverse order that they were added.''' + _atexit.append((callback, args, kwargs)) + + +def _run_atstart(): + '''Hook frameworks must invoke this before running the main hook body.''' + global _atstart + for callback, args, kwargs in _atstart: + callback(*args, **kwargs) + del _atstart[:] + + +def _run_atexit(): + '''Hook frameworks must invoke this after the main hook body has + successfully completed. Do not invoke it if the hook fails.''' + global _atexit + for callback, args, kwargs in reversed(_atexit): + callback(*args, **kwargs) + del _atexit[:] + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def network_get_primary_address(binding): + ''' + Deprecated since Juju 2.3; use network_get() + + Retrieve the primary network address for a named binding + + :param binding: string. The name of a relation of extra-binding + :return: string. The primary IP address for the named binding + :raise: NotImplementedError if run on Juju < 2.0 + ''' + cmd = ['network-get', '--primary-address', binding] + try: + response = subprocess.check_output( + cmd, + stderr=subprocess.STDOUT).decode('UTF-8').strip() + except CalledProcessError as e: + if 'no network config found for binding' in e.output.decode('UTF-8'): + raise NoNetworkBinding("No network binding for {}" + .format(binding)) + else: + raise + return response + + +def network_get(endpoint, relation_id=None): + """ + Retrieve the network details for a relation endpoint + + :param endpoint: string. The name of a relation endpoint + :param relation_id: int. The ID of the relation for the current context. + :return: dict. The loaded YAML output of the network-get query. + :raise: NotImplementedError if request not supported by the Juju version. + """ + if not has_juju_version('2.2'): + raise NotImplementedError(juju_version()) # earlier versions require --primary-address + if relation_id and not has_juju_version('2.3'): + raise NotImplementedError # 2.3 added the -r option + + cmd = ['network-get', endpoint, '--format', 'yaml'] + if relation_id: + cmd.append('-r') + cmd.append(relation_id) + response = subprocess.check_output( + cmd, + stderr=subprocess.STDOUT).decode('UTF-8').strip() + return yaml.safe_load(response) + + +def add_metric(*args, **kwargs): + """Add metric values. Values may be expressed with keyword arguments. For + metric names containing dashes, these may be expressed as one or more + 'key=value' positional arguments. May only be called from the collect-metrics + hook.""" + _args = ['add-metric'] + _kvpairs = [] + _kvpairs.extend(args) + _kvpairs.extend(['{}={}'.format(k, v) for k, v in kwargs.items()]) + _args.extend(sorted(_kvpairs)) + try: + subprocess.check_call(_args) + return + except EnvironmentError as e: + if e.errno != errno.ENOENT: + raise + log_message = 'add-metric failed: {}'.format(' '.join(_kvpairs)) + log(log_message, level='INFO') + + +def meter_status(): + """Get the meter status, if running in the meter-status-changed hook.""" + return os.environ.get('JUJU_METER_STATUS') + + +def meter_info(): + """Get the meter status information, if running in the meter-status-changed + hook.""" + return os.environ.get('JUJU_METER_INFO') + + +def iter_units_for_relation_name(relation_name): + """Iterate through all units in a relation + + Generator that iterates through all the units in a relation and yields + a named tuple with rid and unit field names. + + Usage: + data = [(u.rid, u.unit) + for u in iter_units_for_relation_name(relation_name)] + + :param relation_name: string relation name + :yield: Named Tuple with rid and unit field names + """ + RelatedUnit = namedtuple('RelatedUnit', 'rid, unit') + for rid in relation_ids(relation_name): + for unit in related_units(rid): + yield RelatedUnit(rid, unit) + + +def ingress_address(rid=None, unit=None): + """ + Retrieve the ingress-address from a relation when available. + Otherwise, return the private-address. + + When used on the consuming side of the relation (unit is a remote + unit), the ingress-address is the IP address that this unit needs + to use to reach the provided service on the remote unit. + + When used on the providing side of the relation (unit == local_unit()), + the ingress-address is the IP address that is advertised to remote + units on this relation. Remote units need to use this address to + reach the local provided service on this unit. + + Note that charms may document some other method to use in + preference to the ingress_address(), such as an address provided + on a different relation attribute or a service discovery mechanism. + This allows charms to redirect inbound connections to their peers + or different applications such as load balancers. + + Usage: + addresses = [ingress_address(rid=u.rid, unit=u.unit) + for u in iter_units_for_relation_name(relation_name)] + + :param rid: string relation id + :param unit: string unit name + :side effect: calls relation_get + :return: string IP address + """ + settings = relation_get(rid=rid, unit=unit) + return (settings.get('ingress-address') or + settings.get('private-address')) + + +def egress_subnets(rid=None, unit=None): + """ + Retrieve the egress-subnets from a relation. + + This function is to be used on the providing side of the + relation, and provides the ranges of addresses that client + connections may come from. The result is uninteresting on + the consuming side of a relation (unit == local_unit()). + + Returns a stable list of subnets in CIDR format. + eg. ['192.168.1.0/24', '2001::F00F/128'] + + If egress-subnets is not available, falls back to using the published + ingress-address, or finally private-address. + + :param rid: string relation id + :param unit: string unit name + :side effect: calls relation_get + :return: list of subnets in CIDR format. eg. ['192.168.1.0/24', '2001::F00F/128'] + """ + def _to_range(addr): + if re.search(r'^(?:\d{1,3}\.){3}\d{1,3}$', addr) is not None: + addr += '/32' + elif ':' in addr and '/' not in addr: # IPv6 + addr += '/128' + return addr + + settings = relation_get(rid=rid, unit=unit) + if 'egress-subnets' in settings: + return [n.strip() for n in settings['egress-subnets'].split(',') if n.strip()] + if 'ingress-address' in settings: + return [_to_range(settings['ingress-address'])] + if 'private-address' in settings: + return [_to_range(settings['private-address'])] + return [] # Should never happen + + +def unit_doomed(unit=None): + """Determines if the unit is being removed from the model + + Requires Juju 2.4.1. + + :param unit: string unit name, defaults to local_unit + :side effect: calls goal_state + :side effect: calls local_unit + :side effect: calls has_juju_version + :return: True if the unit is being removed, already gone, or never existed + """ + if not has_juju_version("2.4.1"): + # We cannot risk blindly returning False for 'we don't know', + # because that could cause data loss; if call sites don't + # need an accurate answer, they likely don't need this helper + # at all. + # goal-state existed in 2.4.0, but did not handle removals + # correctly until 2.4.1. + raise NotImplementedError("is_doomed") + if unit is None: + unit = local_unit() + gs = goal_state() + units = gs.get('units', {}) + if unit not in units: + return True + # I don't think 'dead' units ever show up in the goal-state, but + # check anyway in addition to 'dying'. + return units[unit]['status'] in ('dying', 'dead') + + +def env_proxy_settings(selected_settings=None): + """Get proxy settings from process environment variables. + + Get charm proxy settings from environment variables that correspond to + juju-http-proxy, juju-https-proxy juju-no-proxy (available as of 2.4.2, see + lp:1782236) and juju-ftp-proxy in a format suitable for passing to an + application that reacts to proxy settings passed as environment variables. + Some applications support lowercase or uppercase notation (e.g. curl), some + support only lowercase (e.g. wget), there are also subjectively rare cases + of only uppercase notation support. no_proxy CIDR and wildcard support also + varies between runtimes and applications as there is no enforced standard. + + Some applications may connect to multiple destinations and expose config + options that would affect only proxy settings for a specific destination + these should be handled in charms in an application-specific manner. + + :param selected_settings: format only a subset of possible settings + :type selected_settings: list + :rtype: Option(None, dict[str, str]) + """ + SUPPORTED_SETTINGS = { + 'http': 'HTTP_PROXY', + 'https': 'HTTPS_PROXY', + 'no_proxy': 'NO_PROXY', + 'ftp': 'FTP_PROXY' + } + if selected_settings is None: + selected_settings = SUPPORTED_SETTINGS + + selected_vars = [v for k, v in SUPPORTED_SETTINGS.items() + if k in selected_settings] + proxy_settings = {} + for var in selected_vars: + var_val = os.getenv(var) + if var_val: + proxy_settings[var] = var_val + proxy_settings[var.lower()] = var_val + # Now handle juju-prefixed environment variables. The legacy vs new + # environment variable usage is mutually exclusive + charm_var_val = os.getenv('JUJU_CHARM_{}'.format(var)) + if charm_var_val: + proxy_settings[var] = charm_var_val + proxy_settings[var.lower()] = charm_var_val + if 'no_proxy' in proxy_settings: + if _contains_range(proxy_settings['no_proxy']): + log(RANGE_WARNING, level=WARNING) + return proxy_settings if proxy_settings else None + + +def _contains_range(addresses): + """Check for cidr or wildcard domain in a string. + + Given a string comprising a comma separated list of ip addresses + and domain names, determine whether the string contains IP ranges + or wildcard domains. + + :param addresses: comma separated list of domains and ip addresses. + :type addresses: str + """ + return ( + # Test for cidr (e.g. 10.20.20.0/24) + "/" in addresses or + # Test for wildcard domains (*.foo.com or .foo.com) + "*" in addresses or + addresses.startswith(".") or + ",." in addresses or + " ." in addresses) + + +def is_subordinate(): + """Check whether charm is subordinate in unit metadata. + + :returns: True if unit is subordniate, False otherwise. + :rtype: bool + """ + return metadata().get('subordinate') is True diff --git a/ceph-proxy/charmhelpers/core/host.py b/ceph-proxy/charmhelpers/core/host.py new file mode 100644 index 00000000..def403c5 --- /dev/null +++ b/ceph-proxy/charmhelpers/core/host.py @@ -0,0 +1,1309 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tools for working with the host system""" +# Copyright 2012 Canonical Ltd. +# +# Authors: +# Nick Moffitt +# Matthew Wedgwood + +import errno +import os +import re +import pwd +import glob +import grp +import random +import string +import subprocess +import hashlib +import functools +import itertools + +from contextlib import contextmanager +from collections import OrderedDict, defaultdict +from .hookenv import log, INFO, DEBUG, local_unit, charm_name +from .fstab import Fstab +from charmhelpers.osplatform import get_platform + +__platform__ = get_platform() +if __platform__ == "ubuntu": + from charmhelpers.core.host_factory.ubuntu import ( # NOQA:F401 + service_available, + add_new_group, + lsb_release, + cmp_pkgrevno, + CompareHostReleases, + get_distrib_codename, + arch + ) # flake8: noqa -- ignore F401 for this import +elif __platform__ == "centos": + from charmhelpers.core.host_factory.centos import ( # NOQA:F401 + service_available, + add_new_group, + lsb_release, + cmp_pkgrevno, + CompareHostReleases, + ) # flake8: noqa -- ignore F401 for this import + +UPDATEDB_PATH = '/etc/updatedb.conf' +CA_CERT_DIR = '/usr/local/share/ca-certificates' + + +def service_start(service_name, **kwargs): + """Start a system service. + + The specified service name is managed via the system level init system. + Some init systems (e.g. upstart) require that additional arguments be + provided in order to directly control service instances whereas other init + systems allow for addressing instances of a service directly by name (e.g. + systemd). + + The kwargs allow for the additional parameters to be passed to underlying + init systems for those systems which require/allow for them. For example, + the ceph-osd upstart script requires the id parameter to be passed along + in order to identify which running daemon should be reloaded. The follow- + ing example stops the ceph-osd service for instance id=4: + + service_stop('ceph-osd', id=4) + + :param service_name: the name of the service to stop + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for systemd enabled systems. + """ + return service('start', service_name, **kwargs) + + +def service_stop(service_name, **kwargs): + """Stop a system service. + + The specified service name is managed via the system level init system. + Some init systems (e.g. upstart) require that additional arguments be + provided in order to directly control service instances whereas other init + systems allow for addressing instances of a service directly by name (e.g. + systemd). + + The kwargs allow for the additional parameters to be passed to underlying + init systems for those systems which require/allow for them. For example, + the ceph-osd upstart script requires the id parameter to be passed along + in order to identify which running daemon should be reloaded. The follow- + ing example stops the ceph-osd service for instance id=4: + + service_stop('ceph-osd', id=4) + + :param service_name: the name of the service to stop + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for systemd enabled systems. + """ + return service('stop', service_name, **kwargs) + + +def service_enable(service_name, **kwargs): + """Enable a system service. + + The specified service name is managed via the system level init system. + Some init systems (e.g. upstart) require that additional arguments be + provided in order to directly control service instances whereas other init + systems allow for addressing instances of a service directly by name (e.g. + systemd). + + The kwargs allow for the additional parameters to be passed to underlying + init systems for those systems which require/allow for them. For example, + the ceph-osd upstart script requires the id parameter to be passed along + in order to identify which running daemon should be restarted. The follow- + ing example restarts the ceph-osd service for instance id=4: + + service_enable('ceph-osd', id=4) + + :param service_name: the name of the service to enable + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for init systems not allowing additional + parameters via the commandline (systemd). + """ + return service('enable', service_name, **kwargs) + + +def service_restart(service_name, **kwargs): + """Restart a system service. + + The specified service name is managed via the system level init system. + Some init systems (e.g. upstart) require that additional arguments be + provided in order to directly control service instances whereas other init + systems allow for addressing instances of a service directly by name (e.g. + systemd). + + The kwargs allow for the additional parameters to be passed to underlying + init systems for those systems which require/allow for them. For example, + the ceph-osd upstart script requires the id parameter to be passed along + in order to identify which running daemon should be restarted. The follow- + ing example restarts the ceph-osd service for instance id=4: + + service_restart('ceph-osd', id=4) + + :param service_name: the name of the service to restart + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for init systems not allowing additional + parameters via the commandline (systemd). + """ + return service('restart', service_name) + + +def service_reload(service_name, restart_on_failure=False, **kwargs): + """Reload a system service, optionally falling back to restart if + reload fails. + + The specified service name is managed via the system level init system. + Some init systems (e.g. upstart) require that additional arguments be + provided in order to directly control service instances whereas other init + systems allow for addressing instances of a service directly by name (e.g. + systemd). + + The kwargs allow for the additional parameters to be passed to underlying + init systems for those systems which require/allow for them. For example, + the ceph-osd upstart script requires the id parameter to be passed along + in order to identify which running daemon should be reloaded. The follow- + ing example restarts the ceph-osd service for instance id=4: + + service_reload('ceph-osd', id=4) + + :param service_name: the name of the service to reload + :param restart_on_failure: boolean indicating whether to fallback to a + restart if the reload fails. + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for init systems not allowing additional + parameters via the commandline (systemd). + """ + service_result = service('reload', service_name, **kwargs) + if not service_result and restart_on_failure: + service_result = service('restart', service_name, **kwargs) + return service_result + + +def service_pause(service_name, init_dir="/etc/init", initd_dir="/etc/init.d", + **kwargs): + """Pause a system service. + + Stop it, and prevent it from starting again at boot. + + :param service_name: the name of the service to pause + :param init_dir: path to the upstart init directory + :param initd_dir: path to the sysv init directory + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for init systems which do not support + key=value arguments via the commandline. + """ + stopped = True + if service_running(service_name, **kwargs): + stopped = service_stop(service_name, **kwargs) + upstart_file = os.path.join(init_dir, "{}.conf".format(service_name)) + sysv_file = os.path.join(initd_dir, service_name) + if init_is_systemd(service_name=service_name): + service('disable', service_name) + service('mask', service_name) + elif os.path.exists(upstart_file): + override_path = os.path.join( + init_dir, '{}.override'.format(service_name)) + with open(override_path, 'w') as fh: + fh.write("manual\n") + elif os.path.exists(sysv_file): + subprocess.check_call(["update-rc.d", service_name, "disable"]) + else: + raise ValueError( + "Unable to detect {0} as SystemD, Upstart {1} or" + " SysV {2}".format( + service_name, upstart_file, sysv_file)) + return stopped + + +def service_resume(service_name, init_dir="/etc/init", + initd_dir="/etc/init.d", **kwargs): + """Resume a system service. + + Re-enable starting again at boot. Start the service. + + :param service_name: the name of the service to resume + :param init_dir: the path to the init dir + :param initd dir: the path to the initd dir + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for systemd enabled systems. + """ + upstart_file = os.path.join(init_dir, "{}.conf".format(service_name)) + sysv_file = os.path.join(initd_dir, service_name) + if init_is_systemd(service_name=service_name): + if service('is-enabled', service_name): + log('service {} already enabled'.format(service_name), level=DEBUG) + else: + service('unmask', service_name) + service('enable', service_name) + elif os.path.exists(upstart_file): + override_path = os.path.join( + init_dir, '{}.override'.format(service_name)) + if os.path.exists(override_path): + os.unlink(override_path) + elif os.path.exists(sysv_file): + subprocess.check_call(["update-rc.d", service_name, "enable"]) + else: + raise ValueError( + "Unable to detect {0} as SystemD, Upstart {1} or" + " SysV {2}".format( + service_name, upstart_file, sysv_file)) + started = service_running(service_name, **kwargs) + + if not started: + started = service_start(service_name, **kwargs) + return started + + +def service(action, service_name=None, **kwargs): + """Control a system service. + + :param action: the action to take on the service + :param service_name: the name of the service to perform th action on + :param **kwargs: additional params to be passed to the service command in + the form of key=value. + """ + if init_is_systemd(service_name=service_name): + cmd = ['systemctl', action] + if service_name is not None: + cmd.append(service_name) + else: + cmd = ['service', service_name, action] + for key, value in kwargs.items(): + parameter = '%s=%s' % (key, value) + cmd.append(parameter) + return subprocess.call(cmd) == 0 + + +_UPSTART_CONF = "/etc/init/{}.conf" +_INIT_D_CONF = "/etc/init.d/{}" + + +def service_running(service_name, **kwargs): + """Determine whether a system service is running. + + :param service_name: the name of the service + :param **kwargs: additional args to pass to the service command. This is + used to pass additional key=value arguments to the + service command line for managing specific instance + units (e.g. service ceph-osd status id=2). The kwargs + are ignored in systemd services. + """ + if init_is_systemd(service_name=service_name): + return service('is-active', service_name) + else: + if os.path.exists(_UPSTART_CONF.format(service_name)): + try: + cmd = ['status', service_name] + for key, value in kwargs.items(): + parameter = '%s=%s' % (key, value) + cmd.append(parameter) + output = subprocess.check_output( + cmd, stderr=subprocess.STDOUT).decode('UTF-8') + except subprocess.CalledProcessError: + return False + else: + # This works for upstart scripts where the 'service' command + # returns a consistent string to represent running + # 'start/running' + if ("start/running" in output or + "is running" in output or + "up and running" in output): + return True + elif os.path.exists(_INIT_D_CONF.format(service_name)): + # Check System V scripts init script return codes + return service('status', service_name) + return False + + +SYSTEMD_SYSTEM = '/run/systemd/system' + + +def init_is_systemd(service_name=None): + """ + Returns whether the host uses systemd for the specified service. + + @param Optional[str] service_name: specific name of service + """ + if str(service_name).startswith("snap."): + return True + if lsb_release()['DISTRIB_CODENAME'] == 'trusty': + return False + return os.path.isdir(SYSTEMD_SYSTEM) + + +def adduser(username, password=None, shell='/bin/bash', + system_user=False, primary_group=None, + secondary_groups=None, uid=None, home_dir=None): + """Add a user to the system. + + Will log but otherwise succeed if the user already exists. + + :param str username: Username to create + :param str password: Password for user; if ``None``, create a system user + :param str shell: The default shell for the user + :param bool system_user: Whether to create a login or system user + :param str primary_group: Primary group for user; defaults to username + :param list secondary_groups: Optional list of additional groups + :param int uid: UID for user being created + :param str home_dir: Home directory for user + + :returns: The password database entry struct, as returned by `pwd.getpwnam` + """ + try: + user_info = pwd.getpwnam(username) + log('user {0} already exists!'.format(username)) + if uid: + user_info = pwd.getpwuid(int(uid)) + log('user with uid {0} already exists!'.format(uid)) + except KeyError: + log('creating user {0}'.format(username)) + cmd = ['useradd'] + if uid: + cmd.extend(['--uid', str(uid)]) + if home_dir: + cmd.extend(['--home', str(home_dir)]) + if system_user or password is None: + cmd.append('--system') + else: + cmd.extend([ + '--create-home', + '--shell', shell, + '--password', password, + ]) + if not primary_group: + try: + grp.getgrnam(username) + primary_group = username # avoid "group exists" error + except KeyError: + pass + if primary_group: + cmd.extend(['-g', primary_group]) + if secondary_groups: + cmd.extend(['-G', ','.join(secondary_groups)]) + cmd.append(username) + subprocess.check_call(cmd) + user_info = pwd.getpwnam(username) + return user_info + + +def user_exists(username): + """Check if a user exists""" + try: + pwd.getpwnam(username) + user_exists = True + except KeyError: + user_exists = False + return user_exists + + +def uid_exists(uid): + """Check if a uid exists""" + try: + pwd.getpwuid(uid) + uid_exists = True + except KeyError: + uid_exists = False + return uid_exists + + +def group_exists(groupname): + """Check if a group exists""" + try: + grp.getgrnam(groupname) + group_exists = True + except KeyError: + group_exists = False + return group_exists + + +def gid_exists(gid): + """Check if a gid exists""" + try: + grp.getgrgid(gid) + gid_exists = True + except KeyError: + gid_exists = False + return gid_exists + + +def add_group(group_name, system_group=False, gid=None): + """Add a group to the system + + Will log but otherwise succeed if the group already exists. + + :param str group_name: group to create + :param bool system_group: Create system group + :param int gid: GID for user being created + + :returns: The password database entry struct, as returned by `grp.getgrnam` + """ + try: + group_info = grp.getgrnam(group_name) + log('group {0} already exists!'.format(group_name)) + if gid: + group_info = grp.getgrgid(gid) + log('group with gid {0} already exists!'.format(gid)) + except KeyError: + log('creating group {0}'.format(group_name)) + add_new_group(group_name, system_group, gid) + group_info = grp.getgrnam(group_name) + return group_info + + +def add_user_to_group(username, group): + """Add a user to a group""" + cmd = ['gpasswd', '-a', username, group] + log("Adding user {} to group {}".format(username, group)) + subprocess.check_call(cmd) + + +def chage(username, lastday=None, expiredate=None, inactive=None, + mindays=None, maxdays=None, root=None, warndays=None): + """Change user password expiry information + + :param str username: User to update + :param str lastday: Set when password was changed in YYYY-MM-DD format + :param str expiredate: Set when user's account will no longer be + accessible in YYYY-MM-DD format. + -1 will remove an account expiration date. + :param str inactive: Set the number of days of inactivity after a password + has expired before the account is locked. + -1 will remove an account's inactivity. + :param str mindays: Set the minimum number of days between password + changes to MIN_DAYS. + 0 indicates the password can be changed anytime. + :param str maxdays: Set the maximum number of days during which a + password is valid. + -1 as MAX_DAYS will remove checking maxdays + :param str root: Apply changes in the CHROOT_DIR directory + :param str warndays: Set the number of days of warning before a password + change is required + :raises subprocess.CalledProcessError: if call to chage fails + """ + cmd = ['chage'] + if root: + cmd.extend(['--root', root]) + if lastday: + cmd.extend(['--lastday', lastday]) + if expiredate: + cmd.extend(['--expiredate', expiredate]) + if inactive: + cmd.extend(['--inactive', inactive]) + if mindays: + cmd.extend(['--mindays', mindays]) + if maxdays: + cmd.extend(['--maxdays', maxdays]) + if warndays: + cmd.extend(['--warndays', warndays]) + cmd.append(username) + subprocess.check_call(cmd) + + +remove_password_expiry = functools.partial(chage, expiredate='-1', inactive='-1', mindays='0', maxdays='-1') + + +def rsync(from_path, to_path, flags='-r', options=None, timeout=None): + """Replicate the contents of a path""" + options = options or ['--delete', '--executability'] + cmd = ['/usr/bin/rsync', flags] + if timeout: + cmd = ['timeout', str(timeout)] + cmd + cmd.extend(options) + cmd.append(from_path) + cmd.append(to_path) + log(" ".join(cmd)) + return subprocess.check_output(cmd, stderr=subprocess.STDOUT).decode('UTF-8').strip() + + +def symlink(source, destination): + """Create a symbolic link""" + log("Symlinking {} as {}".format(source, destination)) + cmd = [ + 'ln', + '-sf', + source, + destination, + ] + subprocess.check_call(cmd) + + +def mkdir(path, owner='root', group='root', perms=0o555, force=False): + """Create a directory""" + log("Making dir {} {}:{} {:o}".format(path, owner, group, + perms)) + uid = pwd.getpwnam(owner).pw_uid + gid = grp.getgrnam(group).gr_gid + realpath = os.path.abspath(path) + path_exists = os.path.exists(realpath) + if path_exists and force: + if not os.path.isdir(realpath): + log("Removing non-directory file {} prior to mkdir()".format(path)) + os.unlink(realpath) + os.makedirs(realpath, perms) + elif not path_exists: + os.makedirs(realpath, perms) + os.chown(realpath, uid, gid) + os.chmod(realpath, perms) + + +def write_file(path, content, owner='root', group='root', perms=0o444): + """Create or overwrite a file with the contents of a byte string.""" + uid = pwd.getpwnam(owner).pw_uid + gid = grp.getgrnam(group).gr_gid + # lets see if we can grab the file and compare the context, to avoid doing + # a write. + existing_content = None + existing_uid, existing_gid, existing_perms = None, None, None + try: + with open(path, 'rb') as target: + existing_content = target.read() + stat = os.stat(path) + existing_uid, existing_gid, existing_perms = ( + stat.st_uid, stat.st_gid, stat.st_mode + ) + except Exception: + pass + if content != existing_content: + log("Writing file {} {}:{} {:o}".format(path, owner, group, perms), + level=DEBUG) + with open(path, 'wb') as target: + os.fchown(target.fileno(), uid, gid) + os.fchmod(target.fileno(), perms) + if isinstance(content, str): + content = content.encode('UTF-8') + target.write(content) + return + # the contents were the same, but we might still need to change the + # ownership or permissions. + if existing_uid != uid: + log("Changing uid on already existing content: {} -> {}" + .format(existing_uid, uid), level=DEBUG) + os.chown(path, uid, -1) + if existing_gid != gid: + log("Changing gid on already existing content: {} -> {}" + .format(existing_gid, gid), level=DEBUG) + os.chown(path, -1, gid) + if existing_perms != perms: + log("Changing permissions on existing content: {} -> {}" + .format(existing_perms, perms), level=DEBUG) + os.chmod(path, perms) + + +def fstab_remove(mp): + """Remove the given mountpoint entry from /etc/fstab""" + return Fstab.remove_by_mountpoint(mp) + + +def fstab_add(dev, mp, fs, options=None): + """Adds the given device entry to the /etc/fstab file""" + return Fstab.add(dev, mp, fs, options=options) + + +def mount(device, mountpoint, options=None, persist=False, filesystem="ext3"): + """Mount a filesystem at a particular mountpoint""" + cmd_args = ['mount'] + if options is not None: + cmd_args.extend(['-o', options]) + cmd_args.extend([device, mountpoint]) + try: + subprocess.check_output(cmd_args) + except subprocess.CalledProcessError as e: + log('Error mounting {} at {}\n{}'.format(device, mountpoint, e.output)) + return False + + if persist: + return fstab_add(device, mountpoint, filesystem, options=options) + return True + + +def umount(mountpoint, persist=False): + """Unmount a filesystem""" + cmd_args = ['umount', mountpoint] + try: + subprocess.check_output(cmd_args) + except subprocess.CalledProcessError as e: + log('Error unmounting {}\n{}'.format(mountpoint, e.output)) + return False + + if persist: + return fstab_remove(mountpoint) + return True + + +def mounts(): + """Get a list of all mounted volumes as [[mountpoint,device],[...]]""" + with open('/proc/mounts') as f: + # [['/mount/point','/dev/path'],[...]] + system_mounts = [m[1::-1] for m in [l.strip().split() + for l in f.readlines()]] + return system_mounts + + +def fstab_mount(mountpoint): + """Mount filesystem using fstab""" + cmd_args = ['mount', mountpoint] + try: + subprocess.check_output(cmd_args) + except subprocess.CalledProcessError as e: + log('Error unmounting {}\n{}'.format(mountpoint, e.output)) + return False + return True + + +def file_hash(path, hash_type='md5'): + """Generate a hash checksum of the contents of 'path' or None if not found. + + :param str hash_type: Any hash alrgorithm supported by :mod:`hashlib`, + such as md5, sha1, sha256, sha512, etc. + """ + if os.path.exists(path): + h = getattr(hashlib, hash_type)() + with open(path, 'rb') as source: + h.update(source.read()) + return h.hexdigest() + else: + return None + + +def path_hash(path): + """Generate a hash checksum of all files matching 'path'. Standard + wildcards like '*' and '?' are supported, see documentation for the 'glob' + module for more information. + + :return: dict: A { filename: hash } dictionary for all matched files. + Empty if none found. + """ + return { + filename: file_hash(filename) + for filename in glob.iglob(path) + } + + +def check_hash(path, checksum, hash_type='md5'): + """Validate a file using a cryptographic checksum. + + :param str checksum: Value of the checksum used to validate the file. + :param str hash_type: Hash algorithm used to generate `checksum`. + Can be any hash algorithm supported by :mod:`hashlib`, + such as md5, sha1, sha256, sha512, etc. + :raises ChecksumError: If the file fails the checksum + + """ + actual_checksum = file_hash(path, hash_type) + if checksum != actual_checksum: + raise ChecksumError("'%s' != '%s'" % (checksum, actual_checksum)) + + +class ChecksumError(ValueError): + """A class derived from Value error to indicate the checksum failed.""" + pass + + +class restart_on_change(object): + """Decorator and context manager to handle restarts. + + Usage: + + @restart_on_change(restart_map, ...) + def function_that_might_trigger_a_restart(...) + ... + + Or: + + with restart_on_change(restart_map, ...): + do_stuff_that_might_trigger_a_restart() + ... + """ + + def __init__(self, restart_map, stopstart=False, restart_functions=None, + can_restart_now_f=None, post_svc_restart_f=None, + pre_restarts_wait_f=None): + """ + :param restart_map: {file: [service, ...]} + :type restart_map: Dict[str, List[str,]] + :param stopstart: whether to stop, start or restart a service + :type stopstart: booleean + :param restart_functions: nonstandard functions to use to restart + services {svc: func, ...} + :type restart_functions: Dict[str, Callable[[str], None]] + :param can_restart_now_f: A function used to check if the restart is + permitted. + :type can_restart_now_f: Callable[[str, List[str]], boolean] + :param post_svc_restart_f: A function run after a service has + restarted. + :type post_svc_restart_f: Callable[[str], None] + :param pre_restarts_wait_f: A function called before any restarts. + :type pre_restarts_wait_f: Callable[None, None] + """ + self.restart_map = restart_map + self.stopstart = stopstart + self.restart_functions = restart_functions + self.can_restart_now_f = can_restart_now_f + self.post_svc_restart_f = post_svc_restart_f + self.pre_restarts_wait_f = pre_restarts_wait_f + + def __call__(self, f): + """Work like a decorator. + + Returns a wrapped function that performs the restart if triggered. + + :param f: The function that is being wrapped. + :type f: Callable[[Any], Any] + :returns: the wrapped function + :rtype: Callable[[Any], Any] + """ + @functools.wraps(f) + def wrapped_f(*args, **kwargs): + return restart_on_change_helper( + (lambda: f(*args, **kwargs)), + self.restart_map, + stopstart=self.stopstart, + restart_functions=self.restart_functions, + can_restart_now_f=self.can_restart_now_f, + post_svc_restart_f=self.post_svc_restart_f, + pre_restarts_wait_f=self.pre_restarts_wait_f) + return wrapped_f + + def __enter__(self): + """Enter the runtime context related to this object. """ + self.checksums = _pre_restart_on_change_helper(self.restart_map) + + def __exit__(self, exc_type, exc_val, exc_tb): + """Exit the runtime context related to this object. + + The parameters describe the exception that caused the context to be + exited. If the context was exited without an exception, all three + arguments will be None. + """ + if exc_type is None: + _post_restart_on_change_helper( + self.checksums, + self.restart_map, + stopstart=self.stopstart, + restart_functions=self.restart_functions, + can_restart_now_f=self.can_restart_now_f, + post_svc_restart_f=self.post_svc_restart_f, + pre_restarts_wait_f=self.pre_restarts_wait_f) + # All is good, so return False; any exceptions will propagate. + return False + + +def restart_on_change_helper(lambda_f, restart_map, stopstart=False, + restart_functions=None, + can_restart_now_f=None, + post_svc_restart_f=None, + pre_restarts_wait_f=None): + """Helper function to perform the restart_on_change function. + + This is provided for decorators to restart services if files described + in the restart_map have changed after an invocation of lambda_f(). + + This functions allows for a number of helper functions to be passed. + + `restart_functions` is a map with a service as the key and the + corresponding value being the function to call to restart the service. For + example if `restart_functions={'some-service': my_restart_func}` then + `my_restart_func` should a function which takes one argument which is the + service name to be retstarted. + + `can_restart_now_f` is a function which checks that a restart is permitted. + It should return a bool which indicates if a restart is allowed and should + take a service name (str) and a list of changed files (List[str]) as + arguments. + + `post_svc_restart_f` is a function which runs after a service has been + restarted. It takes the service name that was restarted as an argument. + + `pre_restarts_wait_f` is a function which is called before any restarts + occur. The use case for this is an application which wants to try and + stagger restarts between units. + + :param lambda_f: function to call. + :type lambda_f: Callable[[], ANY] + :param restart_map: {file: [service, ...]} + :type restart_map: Dict[str, List[str,]] + :param stopstart: whether to stop, start or restart a service + :type stopstart: booleean + :param restart_functions: nonstandard functions to use to restart services + {svc: func, ...} + :type restart_functions: Dict[str, Callable[[str], None]] + :param can_restart_now_f: A function used to check if the restart is + permitted. + :type can_restart_now_f: Callable[[str, List[str]], boolean] + :param post_svc_restart_f: A function run after a service has + restarted. + :type post_svc_restart_f: Callable[[str], None] + :param pre_restarts_wait_f: A function called before any restarts. + :type pre_restarts_wait_f: Callable[None, None] + :returns: result of lambda_f() + :rtype: ANY + """ + checksums = _pre_restart_on_change_helper(restart_map) + r = lambda_f() + _post_restart_on_change_helper(checksums, + restart_map, + stopstart, + restart_functions, + can_restart_now_f, + post_svc_restart_f, + pre_restarts_wait_f) + return r + + +def _pre_restart_on_change_helper(restart_map): + """Take a snapshot of file hashes. + + :param restart_map: {file: [service, ...]} + :type restart_map: Dict[str, List[str,]] + :returns: Dictionary of file paths and the files checksum. + :rtype: Dict[str, str] + """ + return {path: path_hash(path) for path in restart_map} + + +def _post_restart_on_change_helper(checksums, + restart_map, + stopstart=False, + restart_functions=None, + can_restart_now_f=None, + post_svc_restart_f=None, + pre_restarts_wait_f=None): + """Check whether files have changed. + + :param checksums: Dictionary of file paths and the files checksum. + :type checksums: Dict[str, str] + :param restart_map: {file: [service, ...]} + :type restart_map: Dict[str, List[str,]] + :param stopstart: whether to stop, start or restart a service + :type stopstart: booleean + :param restart_functions: nonstandard functions to use to restart services + {svc: func, ...} + :type restart_functions: Dict[str, Callable[[str], None]] + :param can_restart_now_f: A function used to check if the restart is + permitted. + :type can_restart_now_f: Callable[[str, List[str]], boolean] + :param post_svc_restart_f: A function run after a service has + restarted. + :type post_svc_restart_f: Callable[[str], None] + :param pre_restarts_wait_f: A function called before any restarts. + :type pre_restarts_wait_f: Callable[None, None] + """ + if restart_functions is None: + restart_functions = {} + changed_files = defaultdict(list) + restarts = [] + # create a list of lists of the services to restart + for path, services in restart_map.items(): + if path_hash(path) != checksums[path]: + restarts.append(services) + for svc in services: + changed_files[svc].append(path) + # create a flat list of ordered services without duplicates from lists + services_list = list(OrderedDict.fromkeys(itertools.chain(*restarts))) + if services_list: + if pre_restarts_wait_f: + pre_restarts_wait_f() + actions = ('stop', 'start') if stopstart else ('restart',) + for service_name in services_list: + if can_restart_now_f: + if not can_restart_now_f(service_name, + changed_files[service_name]): + continue + if service_name in restart_functions: + restart_functions[service_name](service_name) + else: + for action in actions: + service(action, service_name) + if post_svc_restart_f: + post_svc_restart_f(service_name) + + +def pwgen(length=None): + """Generate a random password.""" + if length is None: + # A random length is ok to use a weak PRNG + length = random.choice(range(35, 45)) + alphanumeric_chars = [ + l for l in (string.ascii_letters + string.digits) + if l not in 'l0QD1vAEIOUaeiou'] + # Use a crypto-friendly PRNG (e.g. /dev/urandom) for making the + # actual password + random_generator = random.SystemRandom() + random_chars = [ + random_generator.choice(alphanumeric_chars) for _ in range(length)] + return ''.join(random_chars) + + +def is_phy_iface(interface): + """Returns True if interface is not virtual, otherwise False.""" + if interface: + sys_net = '/sys/class/net' + if os.path.isdir(sys_net): + for iface in glob.glob(os.path.join(sys_net, '*')): + if '/virtual/' in os.path.realpath(iface): + continue + + if interface == os.path.basename(iface): + return True + + return False + + +def get_bond_master(interface): + """Returns bond master if interface is bond slave otherwise None. + + NOTE: the provided interface is expected to be physical + """ + if interface: + iface_path = '/sys/class/net/%s' % (interface) + if os.path.exists(iface_path): + if '/virtual/' in os.path.realpath(iface_path): + return None + + master = os.path.join(iface_path, 'master') + if os.path.exists(master): + master = os.path.realpath(master) + # make sure it is a bond master + if os.path.exists(os.path.join(master, 'bonding')): + return os.path.basename(master) + + return None + + +def list_nics(nic_type=None): + """Return a list of nics of given type(s)""" + if isinstance(nic_type, str): + int_types = [nic_type] + else: + int_types = nic_type + + interfaces = [] + if nic_type: + for int_type in int_types: + cmd = ['ip', 'addr', 'show', 'label', int_type + '*'] + ip_output = subprocess.check_output( + cmd).decode('UTF-8', errors='replace') + ip_output = ip_output.split('\n') + ip_output = (line for line in ip_output if line) + for line in ip_output: + if line.split()[1].startswith(int_type): + matched = re.search('.*: (' + int_type + + r'[0-9]+\.[0-9]+)@.*', line) + if matched: + iface = matched.groups()[0] + else: + iface = line.split()[1].replace(":", "") + + if iface not in interfaces: + interfaces.append(iface) + else: + cmd = ['ip', 'a'] + ip_output = subprocess.check_output( + cmd).decode('UTF-8', errors='replace').split('\n') + ip_output = (line.strip() for line in ip_output if line) + + key = re.compile(r'^[0-9]+:\s+(.+):') + for line in ip_output: + matched = re.search(key, line) + if matched: + iface = matched.group(1) + iface = iface.partition("@")[0] + if iface not in interfaces: + interfaces.append(iface) + + return interfaces + + +def set_nic_mtu(nic, mtu): + """Set the Maximum Transmission Unit (MTU) on a network interface.""" + cmd = ['ip', 'link', 'set', nic, 'mtu', mtu] + subprocess.check_call(cmd) + + +def get_nic_mtu(nic): + """Return the Maximum Transmission Unit (MTU) for a network interface.""" + cmd = ['ip', 'addr', 'show', nic] + ip_output = subprocess.check_output( + cmd).decode('UTF-8', errors='replace').split('\n') + mtu = "" + for line in ip_output: + words = line.split() + if 'mtu' in words: + mtu = words[words.index("mtu") + 1] + return mtu + + +def get_nic_hwaddr(nic): + """Return the Media Access Control (MAC) for a network interface.""" + cmd = ['ip', '-o', '-0', 'addr', 'show', nic] + ip_output = subprocess.check_output(cmd).decode('UTF-8', errors='replace') + hwaddr = "" + words = ip_output.split() + if 'link/ether' in words: + hwaddr = words[words.index('link/ether') + 1] + return hwaddr + + +@contextmanager +def chdir(directory): + """Change the current working directory to a different directory for a code + block and return the previous directory after the block exits. Useful to + run commands from a specified directory. + + :param str directory: The directory path to change to for this context. + """ + cur = os.getcwd() + try: + yield os.chdir(directory) + finally: + os.chdir(cur) + + +def chownr(path, owner, group, follow_links=True, chowntopdir=False): + """Recursively change user and group ownership of files and directories + in given path. Doesn't chown path itself by default, only its children. + + :param str path: The string path to start changing ownership. + :param str owner: The owner string to use when looking up the uid. + :param str group: The group string to use when looking up the gid. + :param bool follow_links: Also follow and chown links if True + :param bool chowntopdir: Also chown path itself if True + """ + uid = pwd.getpwnam(owner).pw_uid + gid = grp.getgrnam(group).gr_gid + if follow_links: + chown = os.chown + else: + chown = os.lchown + + if chowntopdir: + broken_symlink = os.path.lexists(path) and not os.path.exists(path) + if not broken_symlink: + chown(path, uid, gid) + for root, dirs, files in os.walk(path, followlinks=follow_links): + for name in dirs + files: + full = os.path.join(root, name) + try: + chown(full, uid, gid) + except (IOError, OSError) as e: + # Intended to ignore "file not found". + if e.errno == errno.ENOENT: + pass + + +def lchownr(path, owner, group): + """Recursively change user and group ownership of files and directories + in a given path, not following symbolic links. See the documentation for + 'os.lchown' for more information. + + :param str path: The string path to start changing ownership. + :param str owner: The owner string to use when looking up the uid. + :param str group: The group string to use when looking up the gid. + """ + chownr(path, owner, group, follow_links=False) + + +def owner(path): + """Returns a tuple containing the username & groupname owning the path. + + :param str path: the string path to retrieve the ownership + :return tuple(str, str): A (username, groupname) tuple containing the + name of the user and group owning the path. + :raises OSError: if the specified path does not exist + """ + stat = os.stat(path) + username = pwd.getpwuid(stat.st_uid)[0] + groupname = grp.getgrgid(stat.st_gid)[0] + return username, groupname + + +def get_total_ram(): + """The total amount of system RAM in bytes. + + This is what is reported by the OS, and may be overcommitted when + there are multiple containers hosted on the same machine. + """ + with open('/proc/meminfo', 'r') as f: + for line in f.readlines(): + if line: + key, value, unit = line.split() + if key == 'MemTotal:': + assert unit == 'kB', 'Unknown unit' + return int(value) * 1024 # Classic, not KiB. + raise NotImplementedError() + + +UPSTART_CONTAINER_TYPE = '/run/container_type' + + +def is_container(): + """Determine whether unit is running in a container + + @return: boolean indicating if unit is in a container + """ + if init_is_systemd(): + # Detect using systemd-detect-virt + return subprocess.call(['systemd-detect-virt', + '--container']) == 0 + else: + # Detect using upstart container file marker + return os.path.exists(UPSTART_CONTAINER_TYPE) + + +def add_to_updatedb_prunepath(path, updatedb_path=UPDATEDB_PATH): + """Adds the specified path to the mlocate's udpatedb.conf PRUNEPATH list. + + This method has no effect if the path specified by updatedb_path does not + exist or is not a file. + + @param path: string the path to add to the updatedb.conf PRUNEPATHS value + @param updatedb_path: the path the updatedb.conf file + """ + if not os.path.exists(updatedb_path) or os.path.isdir(updatedb_path): + # If the updatedb.conf file doesn't exist then don't attempt to update + # the file as the package providing mlocate may not be installed on + # the local system + return + + with open(updatedb_path, 'r+') as f_id: + updatedb_text = f_id.read() + output = updatedb(updatedb_text, path) + f_id.seek(0) + f_id.write(output) + f_id.truncate() + + +def updatedb(updatedb_text, new_path): + lines = [line for line in updatedb_text.split("\n")] + for i, line in enumerate(lines): + if line.startswith("PRUNEPATHS="): + paths_line = line.split("=")[1].replace('"', '') + paths = paths_line.split(" ") + if new_path not in paths: + paths.append(new_path) + lines[i] = 'PRUNEPATHS="{}"'.format(' '.join(paths)) + output = "\n".join(lines) + return output + + +def modulo_distribution(modulo=3, wait=30, non_zero_wait=False): + """ Modulo distribution + + This helper uses the unit number, a modulo value and a constant wait time + to produce a calculated wait time distribution. This is useful in large + scale deployments to distribute load during an expensive operation such as + service restarts. + + If you have 1000 nodes that need to restart 100 at a time 1 minute at a + time: + + time.wait(modulo_distribution(modulo=100, wait=60)) + restart() + + If you need restarts to happen serially set modulo to the exact number of + nodes and set a high constant wait time: + + time.wait(modulo_distribution(modulo=10, wait=120)) + restart() + + @param modulo: int The modulo number creates the group distribution + @param wait: int The constant time wait value + @param non_zero_wait: boolean Override unit % modulo == 0, + return modulo * wait. Used to avoid collisions with + leader nodes which are often given priority. + @return: int Calculated time to wait for unit operation + """ + unit_number = int(local_unit().split('/')[1]) + calculated_wait_time = (unit_number % modulo) * wait + if non_zero_wait and calculated_wait_time == 0: + return modulo * wait + else: + return calculated_wait_time + + +def ca_cert_absolute_path(basename_without_extension): + """Returns absolute path to CA certificate. + + :param basename_without_extension: Filename without extension + :type basename_without_extension: str + :returns: Absolute full path + :rtype: str + """ + return '{}/{}.crt'.format(CA_CERT_DIR, basename_without_extension) + + +def install_ca_cert(ca_cert, name=None): + """ + Install the given cert as a trusted CA. + + The ``name`` is the stem of the filename where the cert is written, and if + not provided, it will default to ``juju-{charm_name}``. + + If the cert is empty or None, or is unchanged, nothing is done. + """ + if not ca_cert: + return + if not isinstance(ca_cert, bytes): + ca_cert = ca_cert.encode('utf8') + if not name: + name = 'juju-{}'.format(charm_name()) + cert_file = ca_cert_absolute_path(name) + new_hash = hashlib.md5(ca_cert).hexdigest() + if file_hash(cert_file) == new_hash: + return + log("Installing new CA cert at: {}".format(cert_file), level=INFO) + write_file(cert_file, ca_cert) + subprocess.check_call(['update-ca-certificates', '--fresh']) + + +def get_system_env(key, default=None): + """Get data from system environment as represented in ``/etc/environment``. + + :param key: Key to look up + :type key: str + :param default: Value to return if key is not found + :type default: any + :returns: Value for key if found or contents of default parameter + :rtype: any + :raises: subprocess.CalledProcessError + """ + env_file = '/etc/environment' + # use the shell and env(1) to parse the global environments file. This is + # done to get the correct result even if the user has shell variable + # substitutions or other shell logic in that file. + output = subprocess.check_output( + ['env', '-i', '/bin/bash', '-c', + 'set -a && source {} && env'.format(env_file)], + universal_newlines=True) + for k, v in (line.split('=', 1) + for line in output.splitlines() if '=' in line): + if k == key: + return v + else: + return default diff --git a/ceph-proxy/charmhelpers/core/host_factory/__init__.py b/ceph-proxy/charmhelpers/core/host_factory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-proxy/charmhelpers/core/host_factory/centos.py b/ceph-proxy/charmhelpers/core/host_factory/centos.py new file mode 100644 index 00000000..7781a396 --- /dev/null +++ b/ceph-proxy/charmhelpers/core/host_factory/centos.py @@ -0,0 +1,72 @@ +import subprocess +import yum +import os + +from charmhelpers.core.strutils import BasicStringComparator + + +class CompareHostReleases(BasicStringComparator): + """Provide comparisons of Host releases. + + Use in the form of + + if CompareHostReleases(release) > 'trusty': + # do something with mitaka + """ + + def __init__(self, item): + raise NotImplementedError( + "CompareHostReleases() is not implemented for CentOS") + + +def service_available(service_name): + # """Determine whether a system service is available.""" + if os.path.isdir('/run/systemd/system'): + cmd = ['systemctl', 'is-enabled', service_name] + else: + cmd = ['service', service_name, 'is-enabled'] + return subprocess.call(cmd) == 0 + + +def add_new_group(group_name, system_group=False, gid=None): + cmd = ['groupadd'] + if gid: + cmd.extend(['--gid', str(gid)]) + if system_group: + cmd.append('-r') + cmd.append(group_name) + subprocess.check_call(cmd) + + +def lsb_release(): + """Return /etc/os-release in a dict.""" + d = {} + with open('/etc/os-release', 'r') as lsb: + for l in lsb: + s = l.split('=') + if len(s) != 2: + continue + d[s[0].strip()] = s[1].strip() + return d + + +def cmp_pkgrevno(package, revno, pkgcache=None): + """Compare supplied revno with the revno of the installed package. + + * 1 => Installed revno is greater than supplied arg + * 0 => Installed revno is the same as supplied arg + * -1 => Installed revno is less than supplied arg + + This function imports YumBase function if the pkgcache argument + is None. + """ + if not pkgcache: + y = yum.YumBase() + packages = y.doPackageLists() + pkgcache = {i.Name: i.version for i in packages['installed']} + pkg = pkgcache[package] + if pkg > revno: + return 1 + if pkg < revno: + return -1 + return 0 diff --git a/ceph-proxy/charmhelpers/core/host_factory/ubuntu.py b/ceph-proxy/charmhelpers/core/host_factory/ubuntu.py new file mode 100644 index 00000000..732d76c3 --- /dev/null +++ b/ceph-proxy/charmhelpers/core/host_factory/ubuntu.py @@ -0,0 +1,125 @@ +import subprocess + +from charmhelpers.core.hookenv import cached +from charmhelpers.core.strutils import BasicStringComparator + + +UBUNTU_RELEASES = ( + 'lucid', + 'maverick', + 'natty', + 'oneiric', + 'precise', + 'quantal', + 'raring', + 'saucy', + 'trusty', + 'utopic', + 'vivid', + 'wily', + 'xenial', + 'yakkety', + 'zesty', + 'artful', + 'bionic', + 'cosmic', + 'disco', + 'eoan', + 'focal', + 'groovy', + 'hirsute', + 'impish', + 'jammy', + 'kinetic', + 'lunar', + 'mantic', +) + + +class CompareHostReleases(BasicStringComparator): + """Provide comparisons of Ubuntu releases. + + Use in the form of + + if CompareHostReleases(release) > 'trusty': + # do something with mitaka + """ + _list = UBUNTU_RELEASES + + +def service_available(service_name): + """Determine whether a system service is available""" + try: + subprocess.check_output( + ['service', service_name, 'status'], + stderr=subprocess.STDOUT).decode('UTF-8') + except subprocess.CalledProcessError as e: + return b'unrecognized service' not in e.output + else: + return True + + +def add_new_group(group_name, system_group=False, gid=None): + cmd = ['addgroup'] + if gid: + cmd.extend(['--gid', str(gid)]) + if system_group: + cmd.append('--system') + else: + cmd.extend([ + '--group', + ]) + cmd.append(group_name) + subprocess.check_call(cmd) + + +def lsb_release(): + """Return /etc/lsb-release in a dict""" + d = {} + with open('/etc/lsb-release', 'r') as lsb: + for l in lsb: + k, v = l.split('=') + d[k.strip()] = v.strip() + return d + + +def get_distrib_codename(): + """Return the codename of the distribution + :returns: The codename + :rtype: str + """ + return lsb_release()['DISTRIB_CODENAME'].lower() + + +def cmp_pkgrevno(package, revno, pkgcache=None): + """Compare supplied revno with the revno of the installed package. + + * 1 => Installed revno is greater than supplied arg + * 0 => Installed revno is the same as supplied arg + * -1 => Installed revno is less than supplied arg + + This function imports apt_cache function from charmhelpers.fetch if + the pkgcache argument is None. Be sure to add charmhelpers.fetch if + you call this function, or pass an apt_pkg.Cache() instance. + """ + from charmhelpers.fetch import apt_pkg, get_installed_version + if not pkgcache: + current_ver = get_installed_version(package) + else: + pkg = pkgcache[package] + current_ver = pkg.current_ver + + return apt_pkg.version_compare(current_ver.ver_str, revno) + + +@cached +def arch(): + """Return the package architecture as a string. + + :returns: the architecture + :rtype: str + :raises: subprocess.CalledProcessError if dpkg command fails + """ + return subprocess.check_output( + ['dpkg', '--print-architecture'] + ).rstrip().decode('UTF-8') diff --git a/ceph-proxy/charmhelpers/core/hugepage.py b/ceph-proxy/charmhelpers/core/hugepage.py new file mode 100644 index 00000000..54b5b5e2 --- /dev/null +++ b/ceph-proxy/charmhelpers/core/hugepage.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import yaml +from charmhelpers.core import fstab +from charmhelpers.core import sysctl +from charmhelpers.core.host import ( + add_group, + add_user_to_group, + fstab_mount, + mkdir, +) +from charmhelpers.core.strutils import bytes_from_string +from subprocess import check_output + + +def hugepage_support(user, group='hugetlb', nr_hugepages=256, + max_map_count=65536, mnt_point='/run/hugepages/kvm', + pagesize='2MB', mount=True, set_shmmax=False): + """Enable hugepages on system. + + Args: + user (str) -- Username to allow access to hugepages to + group (str) -- Group name to own hugepages + nr_hugepages (int) -- Number of pages to reserve + max_map_count (int) -- Number of Virtual Memory Areas a process can own + mnt_point (str) -- Directory to mount hugepages on + pagesize (str) -- Size of hugepages + mount (bool) -- Whether to Mount hugepages + """ + group_info = add_group(group) + gid = group_info.gr_gid + add_user_to_group(user, group) + if max_map_count < 2 * nr_hugepages: + max_map_count = 2 * nr_hugepages + sysctl_settings = { + 'vm.nr_hugepages': nr_hugepages, + 'vm.max_map_count': max_map_count, + 'vm.hugetlb_shm_group': gid, + } + if set_shmmax: + shmmax_current = int(check_output(['sysctl', '-n', 'kernel.shmmax'])) + shmmax_minsize = bytes_from_string(pagesize) * nr_hugepages + if shmmax_minsize > shmmax_current: + sysctl_settings['kernel.shmmax'] = shmmax_minsize + sysctl.create(yaml.dump(sysctl_settings), '/etc/sysctl.d/10-hugepage.conf') + mkdir(mnt_point, owner='root', group='root', perms=0o755, force=False) + lfstab = fstab.Fstab() + fstab_entry = lfstab.get_entry_by_attr('mountpoint', mnt_point) + if fstab_entry: + lfstab.remove_entry(fstab_entry) + entry = lfstab.Entry('nodev', mnt_point, 'hugetlbfs', + 'mode=1770,gid={},pagesize={}'.format(gid, pagesize), 0, 0) + lfstab.add_entry(entry) + if mount: + fstab_mount(mnt_point) diff --git a/ceph-proxy/charmhelpers/core/kernel.py b/ceph-proxy/charmhelpers/core/kernel.py new file mode 100644 index 00000000..e01f4f8b --- /dev/null +++ b/ceph-proxy/charmhelpers/core/kernel.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import subprocess + +from charmhelpers.osplatform import get_platform +from charmhelpers.core.hookenv import ( + log, + INFO +) + +__platform__ = get_platform() +if __platform__ == "ubuntu": + from charmhelpers.core.kernel_factory.ubuntu import ( # NOQA:F401 + persistent_modprobe, + update_initramfs, + ) # flake8: noqa -- ignore F401 for this import +elif __platform__ == "centos": + from charmhelpers.core.kernel_factory.centos import ( # NOQA:F401 + persistent_modprobe, + update_initramfs, + ) # flake8: noqa -- ignore F401 for this import + +__author__ = "Jorge Niedbalski " + + +def modprobe(module, persist=True): + """Load a kernel module and configure for auto-load on reboot.""" + cmd = ['modprobe', module] + + log('Loading kernel module %s' % module, level=INFO) + + subprocess.check_call(cmd) + if persist: + persistent_modprobe(module) + + +def rmmod(module, force=False): + """Remove a module from the linux kernel""" + cmd = ['rmmod'] + if force: + cmd.append('-f') + cmd.append(module) + log('Removing kernel module %s' % module, level=INFO) + return subprocess.check_call(cmd) + + +def lsmod(): + """Shows what kernel modules are currently loaded""" + return subprocess.check_output(['lsmod'], + universal_newlines=True) + + +def is_module_loaded(module): + """Checks if a kernel module is already loaded""" + matches = re.findall('^%s[ ]+' % module, lsmod(), re.M) + return len(matches) > 0 diff --git a/ceph-proxy/charmhelpers/core/kernel_factory/__init__.py b/ceph-proxy/charmhelpers/core/kernel_factory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-proxy/charmhelpers/core/kernel_factory/centos.py b/ceph-proxy/charmhelpers/core/kernel_factory/centos.py new file mode 100644 index 00000000..1c402c11 --- /dev/null +++ b/ceph-proxy/charmhelpers/core/kernel_factory/centos.py @@ -0,0 +1,17 @@ +import subprocess +import os + + +def persistent_modprobe(module): + """Load a kernel module and configure for auto-load on reboot.""" + if not os.path.exists('/etc/rc.modules'): + open('/etc/rc.modules', 'a') + os.chmod('/etc/rc.modules', 111) + with open('/etc/rc.modules', 'r+') as modules: + if module not in modules.read(): + modules.write('modprobe %s\n' % module) + + +def update_initramfs(version='all'): + """Updates an initramfs image.""" + return subprocess.check_call(["dracut", "-f", version]) diff --git a/ceph-proxy/charmhelpers/core/kernel_factory/ubuntu.py b/ceph-proxy/charmhelpers/core/kernel_factory/ubuntu.py new file mode 100644 index 00000000..3de372fd --- /dev/null +++ b/ceph-proxy/charmhelpers/core/kernel_factory/ubuntu.py @@ -0,0 +1,13 @@ +import subprocess + + +def persistent_modprobe(module): + """Load a kernel module and configure for auto-load on reboot.""" + with open('/etc/modules', 'r+') as modules: + if module not in modules.read(): + modules.write(module + "\n") + + +def update_initramfs(version='all'): + """Updates an initramfs image.""" + return subprocess.check_call(["update-initramfs", "-k", version, "-u"]) diff --git a/ceph-proxy/charmhelpers/core/services/__init__.py b/ceph-proxy/charmhelpers/core/services/__init__.py new file mode 100644 index 00000000..61fd074e --- /dev/null +++ b/ceph-proxy/charmhelpers/core/services/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .base import * # NOQA +from .helpers import * # NOQA diff --git a/ceph-proxy/charmhelpers/core/services/base.py b/ceph-proxy/charmhelpers/core/services/base.py new file mode 100644 index 00000000..8d217b59 --- /dev/null +++ b/ceph-proxy/charmhelpers/core/services/base.py @@ -0,0 +1,363 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import json +import inspect +from collections import OrderedDict +from collections.abc import Iterable + +from charmhelpers.core import host +from charmhelpers.core import hookenv + + +__all__ = ['ServiceManager', 'ManagerCallback', + 'PortManagerCallback', 'open_ports', 'close_ports', 'manage_ports', + 'service_restart', 'service_stop'] + + +class ServiceManager(object): + def __init__(self, services=None): + """ + Register a list of services, given their definitions. + + Service definitions are dicts in the following formats (all keys except + 'service' are optional):: + + { + "service": , + "required_data": , + "provided_data": , + "data_ready": , + "data_lost": , + "start": , + "stop": , + "ports": , + } + + The 'required_data' list should contain dicts of required data (or + dependency managers that act like dicts and know how to collect the data). + Only when all items in the 'required_data' list are populated are the list + of 'data_ready' and 'start' callbacks executed. See `is_ready()` for more + information. + + The 'provided_data' list should contain relation data providers, most likely + a subclass of :class:`charmhelpers.core.services.helpers.RelationContext`, + that will indicate a set of data to set on a given relation. + + The 'data_ready' value should be either a single callback, or a list of + callbacks, to be called when all items in 'required_data' pass `is_ready()`. + Each callback will be called with the service name as the only parameter. + After all of the 'data_ready' callbacks are called, the 'start' callbacks + are fired. + + The 'data_lost' value should be either a single callback, or a list of + callbacks, to be called when a 'required_data' item no longer passes + `is_ready()`. Each callback will be called with the service name as the + only parameter. After all of the 'data_lost' callbacks are called, + the 'stop' callbacks are fired. + + The 'start' value should be either a single callback, or a list of + callbacks, to be called when starting the service, after the 'data_ready' + callbacks are complete. Each callback will be called with the service + name as the only parameter. This defaults to + `[host.service_start, services.open_ports]`. + + The 'stop' value should be either a single callback, or a list of + callbacks, to be called when stopping the service. If the service is + being stopped because it no longer has all of its 'required_data', this + will be called after all of the 'data_lost' callbacks are complete. + Each callback will be called with the service name as the only parameter. + This defaults to `[services.close_ports, host.service_stop]`. + + The 'ports' value should be a list of ports to manage. The default + 'start' handler will open the ports after the service is started, + and the default 'stop' handler will close the ports prior to stopping + the service. + + + Examples: + + The following registers an Upstart service called bingod that depends on + a mongodb relation and which runs a custom `db_migrate` function prior to + restarting the service, and a Runit service called spadesd:: + + manager = services.ServiceManager([ + { + 'service': 'bingod', + 'ports': [80, 443], + 'required_data': [MongoRelation(), config(), {'my': 'data'}], + 'data_ready': [ + services.template(source='bingod.conf'), + services.template(source='bingod.ini', + target='/etc/bingod.ini', + owner='bingo', perms=0400), + ], + }, + { + 'service': 'spadesd', + 'data_ready': services.template(source='spadesd_run.j2', + target='/etc/sv/spadesd/run', + perms=0555), + 'start': runit_start, + 'stop': runit_stop, + }, + ]) + manager.manage() + """ + self._ready_file = os.path.join(hookenv.charm_dir(), 'READY-SERVICES.json') + self._ready = None + self.services = OrderedDict() + for service in services or []: + service_name = service['service'] + self.services[service_name] = service + + def manage(self): + """ + Handle the current hook by doing The Right Thing with the registered services. + """ + hookenv._run_atstart() + try: + hook_name = hookenv.hook_name() + if hook_name == 'stop': + self.stop_services() + else: + self.reconfigure_services() + self.provide_data() + except SystemExit as x: + if x.code is None or x.code == 0: + hookenv._run_atexit() + hookenv._run_atexit() + + def provide_data(self): + """ + Set the relation data for each provider in the ``provided_data`` list. + + A provider must have a `name` attribute, which indicates which relation + to set data on, and a `provide_data()` method, which returns a dict of + data to set. + + The `provide_data()` method can optionally accept two parameters: + + * ``remote_service`` The name of the remote service that the data will + be provided to. The `provide_data()` method will be called once + for each connected service (not unit). This allows the method to + tailor its data to the given service. + * ``service_ready`` Whether or not the service definition had all of + its requirements met, and thus the ``data_ready`` callbacks run. + + Note that the ``provided_data`` methods are now called **after** the + ``data_ready`` callbacks are run. This gives the ``data_ready`` callbacks + a chance to generate any data necessary for the providing to the remote + services. + """ + for service_name, service in self.services.items(): + service_ready = self.is_ready(service_name) + for provider in service.get('provided_data', []): + for relid in hookenv.relation_ids(provider.name): + units = hookenv.related_units(relid) + if not units: + continue + remote_service = units[0].split('/')[0] + argspec = inspect.getfullargspec(provider.provide_data) + if len(argspec.args) > 1: + data = provider.provide_data(remote_service, service_ready) + else: + data = provider.provide_data() + if data: + hookenv.relation_set(relid, data) + + def reconfigure_services(self, *service_names): + """ + Update all files for one or more registered services, and, + if ready, optionally restart them. + + If no service names are given, reconfigures all registered services. + """ + for service_name in service_names or self.services.keys(): + if self.is_ready(service_name): + self.fire_event('data_ready', service_name) + self.fire_event('start', service_name, default=[ + service_restart, + manage_ports]) + self.save_ready(service_name) + else: + if self.was_ready(service_name): + self.fire_event('data_lost', service_name) + self.fire_event('stop', service_name, default=[ + manage_ports, + service_stop]) + self.save_lost(service_name) + + def stop_services(self, *service_names): + """ + Stop one or more registered services, by name. + + If no service names are given, stops all registered services. + """ + for service_name in service_names or self.services.keys(): + self.fire_event('stop', service_name, default=[ + manage_ports, + service_stop]) + + def get_service(self, service_name): + """ + Given the name of a registered service, return its service definition. + """ + service = self.services.get(service_name) + if not service: + raise KeyError('Service not registered: %s' % service_name) + return service + + def fire_event(self, event_name, service_name, default=None): + """ + Fire a data_ready, data_lost, start, or stop event on a given service. + """ + service = self.get_service(service_name) + callbacks = service.get(event_name, default) + if not callbacks: + return + if not isinstance(callbacks, Iterable): + callbacks = [callbacks] + for callback in callbacks: + if isinstance(callback, ManagerCallback): + callback(self, service_name, event_name) + else: + callback(service_name) + + def is_ready(self, service_name): + """ + Determine if a registered service is ready, by checking its 'required_data'. + + A 'required_data' item can be any mapping type, and is considered ready + if `bool(item)` evaluates as True. + """ + service = self.get_service(service_name) + reqs = service.get('required_data', []) + return all(bool(req) for req in reqs) + + def _load_ready_file(self): + if self._ready is not None: + return + if os.path.exists(self._ready_file): + with open(self._ready_file) as fp: + self._ready = set(json.load(fp)) + else: + self._ready = set() + + def _save_ready_file(self): + if self._ready is None: + return + with open(self._ready_file, 'w') as fp: + json.dump(list(self._ready), fp) + + def save_ready(self, service_name): + """ + Save an indicator that the given service is now data_ready. + """ + self._load_ready_file() + self._ready.add(service_name) + self._save_ready_file() + + def save_lost(self, service_name): + """ + Save an indicator that the given service is no longer data_ready. + """ + self._load_ready_file() + self._ready.discard(service_name) + self._save_ready_file() + + def was_ready(self, service_name): + """ + Determine if the given service was previously data_ready. + """ + self._load_ready_file() + return service_name in self._ready + + +class ManagerCallback(object): + """ + Special case of a callback that takes the `ServiceManager` instance + in addition to the service name. + + Subclasses should implement `__call__` which should accept three parameters: + + * `manager` The `ServiceManager` instance + * `service_name` The name of the service it's being triggered for + * `event_name` The name of the event that this callback is handling + """ + def __call__(self, manager, service_name, event_name): + raise NotImplementedError() + + +class PortManagerCallback(ManagerCallback): + """ + Callback class that will open or close ports, for use as either + a start or stop action. + """ + def __call__(self, manager, service_name, event_name): + service = manager.get_service(service_name) + # turn this generator into a list, + # as we'll be going over it multiple times + new_ports = list(service.get('ports', [])) + port_file = os.path.join(hookenv.charm_dir(), '.{}.ports'.format(service_name)) + if os.path.exists(port_file): + with open(port_file) as fp: + old_ports = fp.read().split(',') + for old_port in old_ports: + if bool(old_port) and not self.ports_contains(old_port, new_ports): + hookenv.close_port(old_port) + with open(port_file, 'w') as fp: + fp.write(','.join(str(port) for port in new_ports)) + for port in new_ports: + # A port is either a number or 'ICMP' + protocol = 'TCP' + if str(port).upper() == 'ICMP': + protocol = 'ICMP' + if event_name == 'start': + hookenv.open_port(port, protocol) + elif event_name == 'stop': + hookenv.close_port(port, protocol) + + def ports_contains(self, port, ports): + if not bool(port): + return False + if str(port).upper() != 'ICMP': + port = int(port) + return port in ports + + +def service_stop(service_name): + """ + Wrapper around host.service_stop to prevent spurious "unknown service" + messages in the logs. + """ + if host.service_running(service_name): + host.service_stop(service_name) + + +def service_restart(service_name): + """ + Wrapper around host.service_restart to prevent spurious "unknown service" + messages in the logs. + """ + if host.service_available(service_name): + if host.service_running(service_name): + host.service_restart(service_name) + else: + host.service_start(service_name) + + +# Convenience aliases +open_ports = close_ports = manage_ports = PortManagerCallback() diff --git a/ceph-proxy/charmhelpers/core/services/helpers.py b/ceph-proxy/charmhelpers/core/services/helpers.py new file mode 100644 index 00000000..5bf62dd5 --- /dev/null +++ b/ceph-proxy/charmhelpers/core/services/helpers.py @@ -0,0 +1,290 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import yaml + +from charmhelpers.core import hookenv +from charmhelpers.core import host +from charmhelpers.core import templating + +from charmhelpers.core.services.base import ManagerCallback + + +__all__ = ['RelationContext', 'TemplateCallback', + 'render_template', 'template'] + + +class RelationContext(dict): + """ + Base class for a context generator that gets relation data from juju. + + Subclasses must provide the attributes `name`, which is the name of the + interface of interest, `interface`, which is the type of the interface of + interest, and `required_keys`, which is the set of keys required for the + relation to be considered complete. The data for all interfaces matching + the `name` attribute that are complete will used to populate the dictionary + values (see `get_data`, below). + + The generated context will be namespaced under the relation :attr:`name`, + to prevent potential naming conflicts. + + :param str name: Override the relation :attr:`name`, since it can vary from charm to charm + :param list additional_required_keys: Extend the list of :attr:`required_keys` + """ + name = None + interface = None + + def __init__(self, name=None, additional_required_keys=None): + if not hasattr(self, 'required_keys'): + self.required_keys = [] + + if name is not None: + self.name = name + if additional_required_keys: + self.required_keys.extend(additional_required_keys) + self.get_data() + + def __bool__(self): + """ + Returns True if all of the required_keys are available. + """ + return self.is_ready() + + __nonzero__ = __bool__ + + def __repr__(self): + return super(RelationContext, self).__repr__() + + def is_ready(self): + """ + Returns True if all of the `required_keys` are available from any units. + """ + ready = len(self.get(self.name, [])) > 0 + if not ready: + hookenv.log('Incomplete relation: {}'.format(self.__class__.__name__), hookenv.DEBUG) + return ready + + def _is_ready(self, unit_data): + """ + Helper method that tests a set of relation data and returns True if + all of the `required_keys` are present. + """ + return set(unit_data.keys()).issuperset(set(self.required_keys)) + + def get_data(self): + """ + Retrieve the relation data for each unit involved in a relation and, + if complete, store it in a list under `self[self.name]`. This + is automatically called when the RelationContext is instantiated. + + The units are sorted lexographically first by the service ID, then by + the unit ID. Thus, if an interface has two other services, 'db:1' + and 'db:2', with 'db:1' having two units, 'wordpress/0' and 'wordpress/1', + and 'db:2' having one unit, 'mediawiki/0', all of which have a complete + set of data, the relation data for the units will be stored in the + order: 'wordpress/0', 'wordpress/1', 'mediawiki/0'. + + If you only care about a single unit on the relation, you can just + access it as `{{ interface[0]['key'] }}`. However, if you can at all + support multiple units on a relation, you should iterate over the list, + like:: + + {% for unit in interface -%} + {{ unit['key'] }}{% if not loop.last %},{% endif %} + {%- endfor %} + + Note that since all sets of relation data from all related services and + units are in a single list, if you need to know which service or unit a + set of data came from, you'll need to extend this class to preserve + that information. + """ + if not hookenv.relation_ids(self.name): + return + + ns = self.setdefault(self.name, []) + for rid in sorted(hookenv.relation_ids(self.name)): + for unit in sorted(hookenv.related_units(rid)): + reldata = hookenv.relation_get(rid=rid, unit=unit) + if self._is_ready(reldata): + ns.append(reldata) + + def provide_data(self): + """ + Return data to be relation_set for this interface. + """ + return {} + + +class MysqlRelation(RelationContext): + """ + Relation context for the `mysql` interface. + + :param str name: Override the relation :attr:`name`, since it can vary from charm to charm + :param list additional_required_keys: Extend the list of :attr:`required_keys` + """ + name = 'db' + interface = 'mysql' + + def __init__(self, *args, **kwargs): + self.required_keys = ['host', 'user', 'password', 'database'] + RelationContext.__init__(self, *args, **kwargs) + + +class HttpRelation(RelationContext): + """ + Relation context for the `http` interface. + + :param str name: Override the relation :attr:`name`, since it can vary from charm to charm + :param list additional_required_keys: Extend the list of :attr:`required_keys` + """ + name = 'website' + interface = 'http' + + def __init__(self, *args, **kwargs): + self.required_keys = ['host', 'port'] + RelationContext.__init__(self, *args, **kwargs) + + def provide_data(self): + return { + 'host': hookenv.unit_get('private-address'), + 'port': 80, + } + + +class RequiredConfig(dict): + """ + Data context that loads config options with one or more mandatory options. + + Once the required options have been changed from their default values, all + config options will be available, namespaced under `config` to prevent + potential naming conflicts (for example, between a config option and a + relation property). + + :param list *args: List of options that must be changed from their default values. + """ + + def __init__(self, *args): + self.required_options = args + self['config'] = hookenv.config() + with open(os.path.join(hookenv.charm_dir(), 'config.yaml')) as fp: + self.config = yaml.safe_load(fp).get('options', {}) + + def __bool__(self): + for option in self.required_options: + if option not in self['config']: + return False + current_value = self['config'][option] + default_value = self.config[option].get('default') + if current_value == default_value: + return False + if current_value in (None, '') and default_value in (None, ''): + return False + return True + + def __nonzero__(self): + return self.__bool__() + + +class StoredContext(dict): + """ + A data context that always returns the data that it was first created with. + + This is useful to do a one-time generation of things like passwords, that + will thereafter use the same value that was originally generated, instead + of generating a new value each time it is run. + """ + def __init__(self, file_name, config_data): + """ + If the file exists, populate `self` with the data from the file. + Otherwise, populate with the given data and persist it to the file. + """ + if os.path.exists(file_name): + self.update(self.read_context(file_name)) + else: + self.store_context(file_name, config_data) + self.update(config_data) + + def store_context(self, file_name, config_data): + if not os.path.isabs(file_name): + file_name = os.path.join(hookenv.charm_dir(), file_name) + with open(file_name, 'w') as file_stream: + os.fchmod(file_stream.fileno(), 0o600) + yaml.dump(config_data, file_stream) + + def read_context(self, file_name): + if not os.path.isabs(file_name): + file_name = os.path.join(hookenv.charm_dir(), file_name) + with open(file_name, 'r') as file_stream: + data = yaml.safe_load(file_stream) + if not data: + raise OSError("%s is empty" % file_name) + return data + + +class TemplateCallback(ManagerCallback): + """ + Callback class that will render a Jinja2 template, for use as a ready + action. + + :param str source: The template source file, relative to + `$CHARM_DIR/templates` + + :param str target: The target to write the rendered template to (or None) + :param str owner: The owner of the rendered file + :param str group: The group of the rendered file + :param int perms: The permissions of the rendered file + :param partial on_change_action: functools partial to be executed when + rendered file changes + :param jinja2 loader template_loader: A jinja2 template loader + + :return str: The rendered template + """ + def __init__(self, source, target, + owner='root', group='root', perms=0o444, + on_change_action=None, template_loader=None): + self.source = source + self.target = target + self.owner = owner + self.group = group + self.perms = perms + self.on_change_action = on_change_action + self.template_loader = template_loader + + def __call__(self, manager, service_name, event_name): + pre_checksum = '' + if self.on_change_action and os.path.isfile(self.target): + pre_checksum = host.file_hash(self.target) + service = manager.get_service(service_name) + context = {'ctx': {}} + for ctx in service.get('required_data', []): + context.update(ctx) + context['ctx'].update(ctx) + + result = templating.render(self.source, self.target, context, + self.owner, self.group, self.perms, + template_loader=self.template_loader) + if self.on_change_action: + if pre_checksum == host.file_hash(self.target): + hookenv.log( + 'No change detected: {}'.format(self.target), + hookenv.DEBUG) + else: + self.on_change_action() + + return result + + +# Convenience aliases for templates +render_template = template = TemplateCallback diff --git a/ceph-proxy/charmhelpers/core/strutils.py b/ceph-proxy/charmhelpers/core/strutils.py new file mode 100644 index 00000000..31366871 --- /dev/null +++ b/ceph-proxy/charmhelpers/core/strutils.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +TRUTHY_STRINGS = {'y', 'yes', 'true', 't', 'on'} +FALSEY_STRINGS = {'n', 'no', 'false', 'f', 'off'} + + +def bool_from_string(value, truthy_strings=TRUTHY_STRINGS, falsey_strings=FALSEY_STRINGS, assume_false=False): + """Interpret string value as boolean. + + Returns True if value translates to True otherwise False. + """ + if isinstance(value, str): + value = str(value) + else: + msg = "Unable to interpret non-string value '%s' as boolean" % (value) + raise ValueError(msg) + + value = value.strip().lower() + + if value in truthy_strings: + return True + elif value in falsey_strings or assume_false: + return False + + msg = "Unable to interpret string value '%s' as boolean" % (value) + raise ValueError(msg) + + +def bytes_from_string(value): + """Interpret human readable string value as bytes. + + Returns int + """ + BYTE_POWER = { + 'K': 1, + 'KB': 1, + 'M': 2, + 'MB': 2, + 'G': 3, + 'GB': 3, + 'T': 4, + 'TB': 4, + 'P': 5, + 'PB': 5, + } + if isinstance(value, str): + value = str(value) + else: + msg = "Unable to interpret non-string value '%s' as bytes" % (value) + raise ValueError(msg) + matches = re.match("([0-9]+)([a-zA-Z]+)", value) + if matches: + size = int(matches.group(1)) * (1024 ** BYTE_POWER[matches.group(2)]) + else: + # Assume that value passed in is bytes + try: + size = int(value) + except ValueError: + msg = "Unable to interpret string value '%s' as bytes" % (value) + raise ValueError(msg) + return size + + +class BasicStringComparator(object): + """Provides a class that will compare strings from an iterator type object. + Used to provide > and < comparisons on strings that may not necessarily be + alphanumerically ordered. e.g. OpenStack or Ubuntu releases AFTER the + z-wrap. + """ + + _list = None + + def __init__(self, item): + if self._list is None: + raise Exception("Must define the _list in the class definition!") + try: + self.index = self._list.index(item) + except Exception: + raise KeyError("Item '{}' is not in list '{}'" + .format(item, self._list)) + + def __eq__(self, other): + assert isinstance(other, str) or isinstance(other, self.__class__) + return self.index == self._list.index(other) + + def __ne__(self, other): + return not self.__eq__(other) + + def __lt__(self, other): + assert isinstance(other, str) or isinstance(other, self.__class__) + return self.index < self._list.index(other) + + def __ge__(self, other): + return not self.__lt__(other) + + def __gt__(self, other): + assert isinstance(other, str) or isinstance(other, self.__class__) + return self.index > self._list.index(other) + + def __le__(self, other): + return not self.__gt__(other) + + def __str__(self): + """Always give back the item at the index so it can be used in + comparisons like: + + s_mitaka = CompareOpenStack('mitaka') + s_newton = CompareOpenstack('newton') + + assert s_newton > s_mitaka + + @returns: + """ + return self._list[self.index] diff --git a/ceph-proxy/charmhelpers/core/sysctl.py b/ceph-proxy/charmhelpers/core/sysctl.py new file mode 100644 index 00000000..386428d6 --- /dev/null +++ b/ceph-proxy/charmhelpers/core/sysctl.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import yaml + +from subprocess import check_call, CalledProcessError + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + ERROR, + WARNING, +) + +from charmhelpers.core.host import is_container + +__author__ = 'Jorge Niedbalski R. ' + + +def create(sysctl_dict, sysctl_file, ignore=False): + """Creates a sysctl.conf file from a YAML associative array + + :param sysctl_dict: a dict or YAML-formatted string of sysctl + options eg "{ 'kernel.max_pid': 1337 }" + :type sysctl_dict: str + :param sysctl_file: path to the sysctl file to be saved + :type sysctl_file: str or unicode + :param ignore: If True, ignore "unknown variable" errors. + :type ignore: bool + :returns: None + """ + if type(sysctl_dict) is not dict: + try: + sysctl_dict_parsed = yaml.safe_load(sysctl_dict) + except yaml.YAMLError: + log("Error parsing YAML sysctl_dict: {}".format(sysctl_dict), + level=ERROR) + return + else: + sysctl_dict_parsed = sysctl_dict + + with open(sysctl_file, "w") as fd: + for key, value in sysctl_dict_parsed.items(): + fd.write("{}={}\n".format(key, value)) + + log("Updating sysctl_file: {} values: {}".format(sysctl_file, + sysctl_dict_parsed), + level=DEBUG) + + call = ["sysctl", "-p", sysctl_file] + if ignore: + call.append("-e") + + try: + check_call(call) + except CalledProcessError as e: + if is_container(): + log("Error setting some sysctl keys in this container: {}".format(e.output), + level=WARNING) + else: + raise e diff --git a/ceph-proxy/charmhelpers/core/templating.py b/ceph-proxy/charmhelpers/core/templating.py new file mode 100644 index 00000000..cb0213dc --- /dev/null +++ b/ceph-proxy/charmhelpers/core/templating.py @@ -0,0 +1,88 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from charmhelpers.core import host +from charmhelpers.core import hookenv + + +def render(source, target, context, owner='root', group='root', + perms=0o444, templates_dir=None, encoding='UTF-8', + template_loader=None, config_template=None): + """ + Render a template. + + The `source` path, if not absolute, is relative to the `templates_dir`. + + The `target` path should be absolute. It can also be `None`, in which + case no file will be written. + + The context should be a dict containing the values to be replaced in the + template. + + config_template may be provided to render from a provided template instead + of loading from a file. + + The `owner`, `group`, and `perms` options will be passed to `write_file`. + + If omitted, `templates_dir` defaults to the `templates` folder in the charm. + + The rendered template will be written to the file as well as being returned + as a string. + + Note: Using this requires python3-jinja2; if it is not installed, calling + this will attempt to use charmhelpers.fetch.apt_install to install it. + """ + try: + from jinja2 import FileSystemLoader, Environment, exceptions + except ImportError: + try: + from charmhelpers.fetch import apt_install + except ImportError: + hookenv.log('Could not import jinja2, and could not import ' + 'charmhelpers.fetch to install it', + level=hookenv.ERROR) + raise + apt_install('python3-jinja2', fatal=True) + from jinja2 import FileSystemLoader, Environment, exceptions + + if template_loader: + template_env = Environment(loader=template_loader) + else: + if templates_dir is None: + templates_dir = os.path.join(hookenv.charm_dir(), 'templates') + template_env = Environment(loader=FileSystemLoader(templates_dir)) + + # load from a string if provided explicitly + if config_template is not None: + template = template_env.from_string(config_template) + else: + try: + source = source + template = template_env.get_template(source) + except exceptions.TemplateNotFound as e: + hookenv.log('Could not load template %s from %s.' % + (source, templates_dir), + level=hookenv.ERROR) + raise e + content = template.render(context) + if target is not None: + target_dir = os.path.dirname(target) + if not os.path.exists(target_dir): + # This is a terrible default directory permission, as the file + # or its siblings will often contain secrets. + host.mkdir(os.path.dirname(target), owner, group, perms=0o755) + host.write_file(target, content.encode(encoding), owner, group, perms) + return content diff --git a/ceph-proxy/charmhelpers/core/unitdata.py b/ceph-proxy/charmhelpers/core/unitdata.py new file mode 100644 index 00000000..65153f1f --- /dev/null +++ b/ceph-proxy/charmhelpers/core/unitdata.py @@ -0,0 +1,563 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Authors: +# Kapil Thangavelu +# +""" +Intro +----- + +A simple way to store state in units. This provides a key value +storage with support for versioned, transactional operation, +and can calculate deltas from previous values to simplify unit logic +when processing changes. + + +Hook Integration +---------------- + +There are several extant frameworks for hook execution, including + + - charmhelpers.core.hookenv.Hooks + - charmhelpers.core.services.ServiceManager + +The storage classes are framework agnostic, one simple integration is +via the HookData contextmanager. It will record the current hook +execution environment (including relation data, config data, etc.), +setup a transaction and allow easy access to the changes from +previously seen values. One consequence of the integration is the +reservation of particular keys ('rels', 'unit', 'env', 'config', +'charm_revisions') for their respective values. + +Here's a fully worked integration example using hookenv.Hooks:: + + from charmhelper.core import hookenv, unitdata + + hook_data = unitdata.HookData() + db = unitdata.kv() + hooks = hookenv.Hooks() + + @hooks.hook + def config_changed(): + # Print all changes to configuration from previously seen + # values. + for changed, (prev, cur) in hook_data.conf.items(): + print('config changed', changed, + 'previous value', prev, + 'current value', cur) + + # Get some unit specific bookkeeping + if not db.get('pkg_key'): + key = urllib.urlopen('https://example.com/pkg_key').read() + db.set('pkg_key', key) + + # Directly access all charm config as a mapping. + conf = db.getrange('config', True) + + # Directly access all relation data as a mapping + rels = db.getrange('rels', True) + + if __name__ == '__main__': + with hook_data(): + hook.execute() + + +A more basic integration is via the hook_scope context manager which simply +manages transaction scope (and records hook name, and timestamp):: + + >>> from unitdata import kv + >>> db = kv() + >>> with db.hook_scope('install'): + ... # do work, in transactional scope. + ... db.set('x', 1) + >>> db.get('x') + 1 + + +Usage +----- + +Values are automatically json de/serialized to preserve basic typing +and complex data struct capabilities (dicts, lists, ints, booleans, etc). + +Individual values can be manipulated via get/set:: + + >>> kv.set('y', True) + >>> kv.get('y') + True + + # We can set complex values (dicts, lists) as a single key. + >>> kv.set('config', {'a': 1, 'b': True'}) + + # Also supports returning dictionaries as a record which + # provides attribute access. + >>> config = kv.get('config', record=True) + >>> config.b + True + + +Groups of keys can be manipulated with update/getrange:: + + >>> kv.update({'z': 1, 'y': 2}, prefix="gui.") + >>> kv.getrange('gui.', strip=True) + {'z': 1, 'y': 2} + +When updating values, its very helpful to understand which values +have actually changed and how have they changed. The storage +provides a delta method to provide for this:: + + >>> data = {'debug': True, 'option': 2} + >>> delta = kv.delta(data, 'config.') + >>> delta.debug.previous + None + >>> delta.debug.current + True + >>> delta + {'debug': (None, True), 'option': (None, 2)} + +Note the delta method does not persist the actual change, it needs to +be explicitly saved via 'update' method:: + + >>> kv.update(data, 'config.') + +Values modified in the context of a hook scope retain historical values +associated to the hookname. + + >>> with db.hook_scope('config-changed'): + ... db.set('x', 42) + >>> db.gethistory('x') + [(1, u'x', 1, u'install', u'2015-01-21T16:49:30.038372'), + (2, u'x', 42, u'config-changed', u'2015-01-21T16:49:30.038786')] + +""" + +import collections +import contextlib +import datetime +import itertools +import json +import logging +import os +import pprint +import sqlite3 +import sys + +__author__ = 'Kapil Thangavelu ' + + +class Storage(object): + """Simple key value database for local unit state within charms. + + Modifications are not persisted unless :meth:`flush` is called. + + To support dicts, lists, integer, floats, and booleans values + are automatically json encoded/decoded. + + Note: to facilitate unit testing, ':memory:' can be passed as the + path parameter which causes sqlite3 to only build the db in memory. + This should only be used for testing purposes. + """ + def __init__(self, path=None, keep_revisions=False): + self.db_path = path + self.keep_revisions = keep_revisions + if path is None: + if 'UNIT_STATE_DB' in os.environ: + self.db_path = os.environ['UNIT_STATE_DB'] + else: + self.db_path = os.path.join( + os.environ.get('CHARM_DIR', ''), '.unit-state.db') + if self.db_path != ':memory:': + with open(self.db_path, 'a') as f: + os.fchmod(f.fileno(), 0o600) + self.conn = sqlite3.connect('%s' % self.db_path) + self.cursor = self.conn.cursor() + self.revision = None + self._closed = False + self._init() + + def close(self): + if self._closed: + return + self.flush(False) + self.cursor.close() + self.conn.close() + self._closed = True + + def get(self, key, default=None, record=False): + self.cursor.execute('select data from kv where key=?', [key]) + result = self.cursor.fetchone() + if not result: + return default + if record: + return Record(json.loads(result[0])) + return json.loads(result[0]) + + def getrange(self, key_prefix, strip=False): + """ + Get a range of keys starting with a common prefix as a mapping of + keys to values. + + :param str key_prefix: Common prefix among all keys + :param bool strip: Optionally strip the common prefix from the key + names in the returned dict + :return dict: A (possibly empty) dict of key-value mappings + """ + self.cursor.execute("select key, data from kv where key like ?", + ['%s%%' % key_prefix]) + result = self.cursor.fetchall() + + if not result: + return {} + if not strip: + key_prefix = '' + return dict([ + (k[len(key_prefix):], json.loads(v)) for k, v in result]) + + def update(self, mapping, prefix=""): + """ + Set the values of multiple keys at once. + + :param dict mapping: Mapping of keys to values + :param str prefix: Optional prefix to apply to all keys in `mapping` + before setting + """ + for k, v in mapping.items(): + self.set("%s%s" % (prefix, k), v) + + def unset(self, key): + """ + Remove a key from the database entirely. + """ + self.cursor.execute('delete from kv where key=?', [key]) + if self.keep_revisions and self.revision and self.cursor.rowcount: + self.cursor.execute( + 'insert into kv_revisions values (?, ?, ?)', + [key, self.revision, json.dumps('DELETED')]) + + def unsetrange(self, keys=None, prefix=""): + """ + Remove a range of keys starting with a common prefix, from the database + entirely. + + :param list keys: List of keys to remove. + :param str prefix: Optional prefix to apply to all keys in ``keys`` + before removing. + """ + if keys is not None: + keys = ['%s%s' % (prefix, key) for key in keys] + self.cursor.execute('delete from kv where key in (%s)' % ','.join(['?'] * len(keys)), keys) + if self.keep_revisions and self.revision and self.cursor.rowcount: + self.cursor.execute( + 'insert into kv_revisions values %s' % ','.join(['(?, ?, ?)'] * len(keys)), + list(itertools.chain.from_iterable((key, self.revision, json.dumps('DELETED')) for key in keys))) + else: + self.cursor.execute('delete from kv where key like ?', + ['%s%%' % prefix]) + if self.keep_revisions and self.revision and self.cursor.rowcount: + self.cursor.execute( + 'insert into kv_revisions values (?, ?, ?)', + ['%s%%' % prefix, self.revision, json.dumps('DELETED')]) + + def set(self, key, value): + """ + Set a value in the database. + + :param str key: Key to set the value for + :param value: Any JSON-serializable value to be set + """ + serialized = json.dumps(value) + + self.cursor.execute('select data from kv where key=?', [key]) + exists = self.cursor.fetchone() + + # Skip mutations to the same value + if exists: + if exists[0] == serialized: + return value + + if not exists: + self.cursor.execute( + 'insert into kv (key, data) values (?, ?)', + (key, serialized)) + else: + self.cursor.execute(''' + update kv + set data = ? + where key = ?''', [serialized, key]) + + # Save + if (not self.keep_revisions) or (not self.revision): + return value + + self.cursor.execute( + 'select 1 from kv_revisions where key=? and revision=?', + [key, self.revision]) + exists = self.cursor.fetchone() + + if not exists: + self.cursor.execute( + '''insert into kv_revisions ( + revision, key, data) values (?, ?, ?)''', + (self.revision, key, serialized)) + else: + self.cursor.execute( + ''' + update kv_revisions + set data = ? + where key = ? + and revision = ?''', + [serialized, key, self.revision]) + + return value + + def delta(self, mapping, prefix): + """ + return a delta containing values that have changed. + """ + previous = self.getrange(prefix, strip=True) + if not previous: + pk = set() + else: + pk = set(previous.keys()) + ck = set(mapping.keys()) + delta = DeltaSet() + + # added + for k in ck.difference(pk): + delta[k] = Delta(None, mapping[k]) + + # removed + for k in pk.difference(ck): + delta[k] = Delta(previous[k], None) + + # changed + for k in pk.intersection(ck): + c = mapping[k] + p = previous[k] + if c != p: + delta[k] = Delta(p, c) + + return delta + + @contextlib.contextmanager + def hook_scope(self, name=""): + """Scope all future interactions to the current hook execution + revision.""" + assert not self.revision + self.cursor.execute( + 'insert into hooks (hook, date) values (?, ?)', + (name or sys.argv[0], + datetime.datetime.utcnow().isoformat())) + self.revision = self.cursor.lastrowid + try: + yield self.revision + self.revision = None + except Exception: + self.flush(False) + self.revision = None + raise + else: + self.flush() + + def flush(self, save=True): + if save: + self.conn.commit() + elif self._closed: + return + else: + self.conn.rollback() + + def _init(self): + self.cursor.execute(''' + create table if not exists kv ( + key text, + data text, + primary key (key) + )''') + self.cursor.execute(''' + create table if not exists kv_revisions ( + key text, + revision integer, + data text, + primary key (key, revision) + )''') + self.cursor.execute(''' + create table if not exists hooks ( + version integer primary key autoincrement, + hook text, + date text + )''') + self.conn.commit() + + def gethistory(self, key, deserialize=False): + self.cursor.execute( + ''' + select kv.revision, kv.key, kv.data, h.hook, h.date + from kv_revisions kv, + hooks h + where kv.key=? + and kv.revision = h.version + ''', [key]) + if deserialize is False: + return self.cursor.fetchall() + return map(_parse_history, self.cursor.fetchall()) + + def debug(self, fh=sys.stderr): + self.cursor.execute('select * from kv') + pprint.pprint(self.cursor.fetchall(), stream=fh) + self.cursor.execute('select * from kv_revisions') + pprint.pprint(self.cursor.fetchall(), stream=fh) + + +def _parse_history(d): + return (d[0], d[1], json.loads(d[2]), d[3], + datetime.datetime.strptime(d[-1], "%Y-%m-%dT%H:%M:%S.%f")) + + +class HookData(object): + """Simple integration for existing hook exec frameworks. + + Records all unit information, and stores deltas for processing + by the hook. + + Sample:: + + from charmhelper.core import hookenv, unitdata + + changes = unitdata.HookData() + db = unitdata.kv() + hooks = hookenv.Hooks() + + @hooks.hook + def config_changed(): + # View all changes to configuration + for changed, (prev, cur) in changes.conf.items(): + print('config changed', changed, + 'previous value', prev, + 'current value', cur) + + # Get some unit specific bookkeeping + if not db.get('pkg_key'): + key = urllib.urlopen('https://example.com/pkg_key').read() + db.set('pkg_key', key) + + if __name__ == '__main__': + with changes(): + hook.execute() + + """ + def __init__(self): + self.kv = kv() + self.conf = None + self.rels = None + + @contextlib.contextmanager + def __call__(self): + from charmhelpers.core import hookenv + hook_name = hookenv.hook_name() + + with self.kv.hook_scope(hook_name): + self._record_charm_version(hookenv.charm_dir()) + delta_config, delta_relation = self._record_hook(hookenv) + yield self.kv, delta_config, delta_relation + + def _record_charm_version(self, charm_dir): + # Record revisions.. charm revisions are meaningless + # to charm authors as they don't control the revision. + # so logic dependnent on revision is not particularly + # useful, however it is useful for debugging analysis. + charm_rev = open( + os.path.join(charm_dir, 'revision')).read().strip() + charm_rev = charm_rev or '0' + revs = self.kv.get('charm_revisions', []) + if charm_rev not in revs: + revs.append(charm_rev.strip() or '0') + self.kv.set('charm_revisions', revs) + + def _record_hook(self, hookenv): + data = hookenv.execution_environment() + self.conf = conf_delta = self.kv.delta(data['conf'], 'config') + self.rels = rels_delta = self.kv.delta(data['rels'], 'rels') + self.kv.set('env', dict(data['env'])) + self.kv.set('unit', data['unit']) + self.kv.set('relid', data.get('relid')) + return conf_delta, rels_delta + + +class Record(dict): + + __slots__ = () + + def __getattr__(self, k): + if k in self: + return self[k] + raise AttributeError(k) + + +class DeltaSet(Record): + + __slots__ = () + + +Delta = collections.namedtuple('Delta', ['previous', 'current']) + + +_KV = None + + +def kv(): + global _KV + + # If we are running unit tests, it is useful to go into memory-backed KV store to + # avoid concurrency issues when running multiple tests. This is not a + # problem when juju is running normally. + + env_var = os.environ.get("CHARM_HELPERS_TESTMODE", "auto").lower() + if env_var not in ["auto", "no", "yes"]: + logging.warning("Unknown value for CHARM_HELPERS_TESTMODE '%s'" + ", assuming 'no'", env_var) + env_var = "no" + + if env_var == "no": + in_memory_db = False + elif env_var == "yes": + in_memory_db = True + elif env_var == "auto": + # If UNIT_STATE_DB is set, respect this request + if "UNIT_STATE_DB" in os.environ: + in_memory_db = False + # Autodetect normal juju execution by looking for juju variables + elif "JUJU_CHARM_DIR" in os.environ or "JUJU_UNIT_NAME" in os.environ: + in_memory_db = False + else: + # We are probably running in unit test mode + logging.warning("Auto-detected unit test environment for KV store.") + in_memory_db = True + else: + # Help the linter realise that in_memory_db is always set + raise Exception("Cannot reach this line") + + if _KV is None: + if in_memory_db: + _KV = Storage(":memory:") + else: + _KV = Storage() + else: + if in_memory_db and _KV.db_path != ":memory:": + logging.warning("Running with in_memory_db and KV is not set to :memory:") + return _KV diff --git a/ceph-proxy/charmhelpers/fetch/__init__.py b/ceph-proxy/charmhelpers/fetch/__init__.py new file mode 100644 index 00000000..1283f25b --- /dev/null +++ b/ceph-proxy/charmhelpers/fetch/__init__.py @@ -0,0 +1,208 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +from charmhelpers.osplatform import get_platform +from yaml import safe_load +from charmhelpers.core.hookenv import ( + config, + log, +) + +from urllib.parse import urlparse, urlunparse + + +# The order of this list is very important. Handlers should be listed in from +# least- to most-specific URL matching. +FETCH_HANDLERS = ( + 'charmhelpers.fetch.archiveurl.ArchiveUrlFetchHandler', + 'charmhelpers.fetch.bzrurl.BzrUrlFetchHandler', + 'charmhelpers.fetch.giturl.GitUrlFetchHandler', +) + + +class SourceConfigError(Exception): + pass + + +class UnhandledSource(Exception): + pass + + +class AptLockError(Exception): + pass + + +class GPGKeyError(Exception): + """Exception occurs when a GPG key cannot be fetched or used. The message + indicates what the problem is. + """ + pass + + +class BaseFetchHandler(object): + + """Base class for FetchHandler implementations in fetch plugins""" + + def can_handle(self, source): + """Returns True if the source can be handled. Otherwise returns + a string explaining why it cannot""" + return "Wrong source type" + + def install(self, source): + """Try to download and unpack the source. Return the path to the + unpacked files or raise UnhandledSource.""" + raise UnhandledSource("Wrong source type {}".format(source)) + + def parse_url(self, url): + return urlparse(url) + + def base_url(self, url): + """Return url without querystring or fragment""" + parts = list(self.parse_url(url)) + parts[4:] = ['' for i in parts[4:]] + return urlunparse(parts) + + +__platform__ = get_platform() +module = "charmhelpers.fetch.%s" % __platform__ +fetch = importlib.import_module(module) + +filter_installed_packages = fetch.filter_installed_packages +filter_missing_packages = fetch.filter_missing_packages +install = fetch.apt_install +upgrade = fetch.apt_upgrade +update = _fetch_update = fetch.apt_update +purge = fetch.apt_purge +add_source = fetch.add_source + +if __platform__ == "ubuntu": + apt_cache = fetch.apt_cache + apt_install = fetch.apt_install + apt_update = fetch.apt_update + apt_upgrade = fetch.apt_upgrade + apt_purge = fetch.apt_purge + apt_autoremove = fetch.apt_autoremove + apt_mark = fetch.apt_mark + apt_hold = fetch.apt_hold + apt_unhold = fetch.apt_unhold + import_key = fetch.import_key + get_upstream_version = fetch.get_upstream_version + apt_pkg = fetch.ubuntu_apt_pkg + get_apt_dpkg_env = fetch.get_apt_dpkg_env + get_installed_version = fetch.get_installed_version + OPENSTACK_RELEASES = fetch.OPENSTACK_RELEASES + UBUNTU_OPENSTACK_RELEASE = fetch.UBUNTU_OPENSTACK_RELEASE +elif __platform__ == "centos": + yum_search = fetch.yum_search + + +def configure_sources(update=False, + sources_var='install_sources', + keys_var='install_keys'): + """Configure multiple sources from charm configuration. + + The lists are encoded as yaml fragments in the configuration. + The fragment needs to be included as a string. Sources and their + corresponding keys are of the types supported by add_source(). + + Example config: + install_sources: | + - "ppa:foo" + - "http://example.com/repo precise main" + install_keys: | + - null + - "a1b2c3d4" + + Note that 'null' (a.k.a. None) should not be quoted. + """ + sources = safe_load((config(sources_var) or '').strip()) or [] + keys = safe_load((config(keys_var) or '').strip()) or None + + if isinstance(sources, str): + sources = [sources] + + if keys is None: + for source in sources: + add_source(source, None) + else: + if isinstance(keys, str): + keys = [keys] + + if len(sources) != len(keys): + raise SourceConfigError( + 'Install sources and keys lists are different lengths') + for source, key in zip(sources, keys): + add_source(source, key) + if update: + _fetch_update(fatal=True) + + +def install_remote(source, *args, **kwargs): + """Install a file tree from a remote source. + + The specified source should be a url of the form: + scheme://[host]/path[#[option=value][&...]] + + Schemes supported are based on this modules submodules. + Options supported are submodule-specific. + Additional arguments are passed through to the submodule. + + For example:: + + dest = install_remote('http://example.com/archive.tgz', + checksum='deadbeef', + hash_type='sha1') + + This will download `archive.tgz`, validate it using SHA1 and, if + the file is ok, extract it and return the directory in which it + was extracted. If the checksum fails, it will raise + :class:`charmhelpers.core.host.ChecksumError`. + """ + # We ONLY check for True here because can_handle may return a string + # explaining why it can't handle a given source. + handlers = [h for h in plugins() if h.can_handle(source) is True] + for handler in handlers: + try: + return handler.install(source, *args, **kwargs) + except UnhandledSource as e: + log('Install source attempt unsuccessful: {}'.format(e), + level='WARNING') + raise UnhandledSource("No handler found for source {}".format(source)) + + +def install_from_config(config_var_name): + """Install a file from config.""" + charm_config = config() + source = charm_config[config_var_name] + return install_remote(source) + + +def plugins(fetch_handlers=None): + if not fetch_handlers: + fetch_handlers = FETCH_HANDLERS + plugin_list = [] + for handler_name in fetch_handlers: + package, classname = handler_name.rsplit('.', 1) + try: + handler_class = getattr( + importlib.import_module(package), + classname) + plugin_list.append(handler_class()) + except NotImplementedError: + # Skip missing plugins so that they can be omitted from + # installation if desired + log("FetchHandler {} not found, skipping plugin".format( + handler_name)) + return plugin_list diff --git a/ceph-proxy/charmhelpers/fetch/archiveurl.py b/ceph-proxy/charmhelpers/fetch/archiveurl.py new file mode 100644 index 00000000..0e35c901 --- /dev/null +++ b/ceph-proxy/charmhelpers/fetch/archiveurl.py @@ -0,0 +1,173 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import os +import hashlib +import re + +from charmhelpers.fetch import ( + BaseFetchHandler, + UnhandledSource +) +from charmhelpers.payload.archive import ( + get_archive_handler, + extract, +) +from charmhelpers.core.hookenv import ( + env_proxy_settings, +) +from charmhelpers.core.host import mkdir, check_hash + +from urllib.request import ( + build_opener, install_opener, urlopen, urlretrieve, + HTTPPasswordMgrWithDefaultRealm, HTTPBasicAuthHandler, + ProxyHandler +) +from urllib.parse import urlparse, urlunparse, parse_qs +from urllib.error import URLError + + +def splituser(host): + _userprog = re.compile('^(.*)@(.*)$') + match = _userprog.match(host) + if match: + return match.group(1, 2) + return None, host + + +def splitpasswd(user): + _passwdprog = re.compile('^([^:]*):(.*)$', re.S) + match = _passwdprog.match(user) + if match: + return match.group(1, 2) + return user, None + + +@contextlib.contextmanager +def proxy_env(): + """ + Creates a context which temporarily modifies the proxy settings in os.environ. + """ + restore = {**os.environ} # Copy the current os.environ + juju_proxies = env_proxy_settings() or {} + os.environ.update(**juju_proxies) # Insert or Update the os.environ + yield os.environ + for key in juju_proxies: + del os.environ[key] # remove any keys which were added or updated + os.environ.update(**restore) # restore any original values + + +class ArchiveUrlFetchHandler(BaseFetchHandler): + """ + Handler to download archive files from arbitrary URLs. + + Can fetch from http, https, ftp, and file URLs. + + Can install either tarballs (.tar, .tgz, .tbz2, etc) or zip files. + + Installs the contents of the archive in $CHARM_DIR/fetched/. + """ + def can_handle(self, source): + url_parts = self.parse_url(source) + if url_parts.scheme not in ('http', 'https', 'ftp', 'file'): + # XXX: Why is this returning a boolean and a string? It's + # doomed to fail since "bool(can_handle('foo://'))" will be True. + return "Wrong source type" + if get_archive_handler(self.base_url(source)): + return True + return False + + def download(self, source, dest): + """ + Download an archive file. + + :param str source: URL pointing to an archive file. + :param str dest: Local path location to download archive file to. + """ + # propagate all exceptions + # URLError, OSError, etc + proto, netloc, path, params, query, fragment = urlparse(source) + handlers = [] + if proto in ('http', 'https'): + auth, barehost = splituser(netloc) + if auth is not None: + source = urlunparse((proto, barehost, path, params, query, fragment)) + username, password = splitpasswd(auth) + passman = HTTPPasswordMgrWithDefaultRealm() + # Realm is set to None in add_password to force the username and password + # to be used whatever the realm + passman.add_password(None, source, username, password) + handlers.append(HTTPBasicAuthHandler(passman)) + + with proxy_env(): + handlers.append(ProxyHandler()) + opener = build_opener(*handlers) + install_opener(opener) + response = urlopen(source) + try: + with open(dest, 'wb') as dest_file: + dest_file.write(response.read()) + except Exception as e: + if os.path.isfile(dest): + os.unlink(dest) + raise e + + # Mandatory file validation via Sha1 or MD5 hashing. + def download_and_validate(self, url, hashsum, validate="sha1"): + tempfile, headers = urlretrieve(url) + check_hash(tempfile, hashsum, validate) + return tempfile + + def install(self, source, dest=None, checksum=None, hash_type='sha1'): + """ + Download and install an archive file, with optional checksum validation. + + The checksum can also be given on the `source` URL's fragment. + For example:: + + handler.install('http://example.com/file.tgz#sha1=deadbeef') + + :param str source: URL pointing to an archive file. + :param str dest: Local destination path to install to. If not given, + installs to `$CHARM_DIR/archives/archive_file_name`. + :param str checksum: If given, validate the archive file after download. + :param str hash_type: Algorithm used to generate `checksum`. + Can be any hash alrgorithm supported by :mod:`hashlib`, + such as md5, sha1, sha256, sha512, etc. + + """ + url_parts = self.parse_url(source) + dest_dir = os.path.join(os.environ.get('CHARM_DIR'), 'fetched') + if not os.path.exists(dest_dir): + mkdir(dest_dir, perms=0o755) + dld_file = os.path.join(dest_dir, os.path.basename(url_parts.path)) + try: + self.download(source, dld_file) + except URLError as e: + raise UnhandledSource(e.reason) + except OSError as e: + raise UnhandledSource(e.strerror) + options = parse_qs(url_parts.fragment) + for key, value in options.items(): + algorithms = hashlib.algorithms_available + if key in algorithms: + if len(value) != 1: + raise TypeError( + "Expected 1 hash value, not %d" % len(value)) + expected = value[0] + check_hash(dld_file, expected, key) + if checksum: + check_hash(dld_file, checksum, hash_type) + return extract(dld_file, dest) diff --git a/ceph-proxy/charmhelpers/fetch/bzrurl.py b/ceph-proxy/charmhelpers/fetch/bzrurl.py new file mode 100644 index 00000000..c4ab3ff1 --- /dev/null +++ b/ceph-proxy/charmhelpers/fetch/bzrurl.py @@ -0,0 +1,76 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from subprocess import STDOUT, check_output +from charmhelpers.fetch import ( + BaseFetchHandler, + UnhandledSource, + filter_installed_packages, + install, +) +from charmhelpers.core.host import mkdir + + +if filter_installed_packages(['bzr']) != []: + install(['bzr']) + if filter_installed_packages(['bzr']) != []: + raise NotImplementedError('Unable to install bzr') + + +class BzrUrlFetchHandler(BaseFetchHandler): + """Handler for bazaar branches via generic and lp URLs.""" + + def can_handle(self, source): + url_parts = self.parse_url(source) + if url_parts.scheme not in ('bzr+ssh', 'lp', ''): + return False + elif not url_parts.scheme: + return os.path.exists(os.path.join(source, '.bzr')) + else: + return True + + def branch(self, source, dest, revno=None): + if not self.can_handle(source): + raise UnhandledSource("Cannot handle {}".format(source)) + cmd_opts = [] + if revno: + cmd_opts += ['-r', str(revno)] + if os.path.exists(dest): + cmd = ['bzr', 'pull'] + cmd += cmd_opts + cmd += ['--overwrite', '-d', dest, source] + else: + cmd = ['bzr', 'branch'] + cmd += cmd_opts + cmd += [source, dest] + check_output(cmd, stderr=STDOUT) + + def install(self, source, dest=None, revno=None): + url_parts = self.parse_url(source) + branch_name = url_parts.path.strip("/").split("/")[-1] + if dest: + dest_dir = os.path.join(dest, branch_name) + else: + dest_dir = os.path.join(os.environ.get('CHARM_DIR'), "fetched", + branch_name) + + if dest and not os.path.exists(dest): + mkdir(dest, perms=0o755) + + try: + self.branch(source, dest_dir, revno) + except OSError as e: + raise UnhandledSource(e.strerror) + return dest_dir diff --git a/ceph-proxy/charmhelpers/fetch/centos.py b/ceph-proxy/charmhelpers/fetch/centos.py new file mode 100644 index 00000000..f8492018 --- /dev/null +++ b/ceph-proxy/charmhelpers/fetch/centos.py @@ -0,0 +1,170 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import subprocess +import os +import time +import yum + +from tempfile import NamedTemporaryFile +from charmhelpers.core.hookenv import log + +YUM_NO_LOCK = 1 # The return code for "couldn't acquire lock" in YUM. +YUM_NO_LOCK_RETRY_DELAY = 10 # Wait 10 seconds between apt lock checks. +YUM_NO_LOCK_RETRY_COUNT = 30 # Retry to acquire the lock X times. + + +def filter_installed_packages(packages): + """Return a list of packages that require installation.""" + yb = yum.YumBase() + package_list = yb.doPackageLists() + temp_cache = {p.base_package_name: 1 for p in package_list['installed']} + + _pkgs = [p for p in packages if not temp_cache.get(p, False)] + return _pkgs + + +def install(packages, options=None, fatal=False): + """Install one or more packages.""" + cmd = ['yum', '--assumeyes'] + if options is not None: + cmd.extend(options) + cmd.append('install') + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + log("Installing {} with options: {}".format(packages, + options)) + _run_yum_command(cmd, fatal) + + +def upgrade(options=None, fatal=False, dist=False): + """Upgrade all packages.""" + cmd = ['yum', '--assumeyes'] + if options is not None: + cmd.extend(options) + cmd.append('upgrade') + log("Upgrading with options: {}".format(options)) + _run_yum_command(cmd, fatal) + + +def update(fatal=False): + """Update local yum cache.""" + cmd = ['yum', '--assumeyes', 'update'] + log("Update with fatal: {}".format(fatal)) + _run_yum_command(cmd, fatal) + + +def purge(packages, fatal=False): + """Purge one or more packages.""" + cmd = ['yum', '--assumeyes', 'remove'] + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + log("Purging {}".format(packages)) + _run_yum_command(cmd, fatal) + + +def yum_search(packages): + """Search for a package.""" + output = {} + cmd = ['yum', 'search'] + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + log("Searching for {}".format(packages)) + result = subprocess.check_output(cmd) + for package in list(packages): + output[package] = package in result + return output + + +def add_source(source, key=None): + """Add a package source to this system. + + @param source: a URL with a rpm package + + @param key: A key to be added to the system's keyring and used + to verify the signatures on packages. Ideally, this should be an + ASCII format GPG public key including the block headers. A GPG key + id may also be used, but be aware that only insecure protocols are + available to retrieve the actual public key from a public keyserver + placing your Juju environment at risk. + """ + if source is None: + log('Source is not present. Skipping') + return + + if source.startswith('http'): + directory = '/etc/yum.repos.d/' + for filename in os.listdir(directory): + with open(directory + filename, 'r') as rpm_file: + if source in rpm_file.read(): + break + else: + log("Add source: {!r}".format(source)) + # write in the charms.repo + with open(directory + 'Charms.repo', 'a') as rpm_file: + rpm_file.write('[%s]\n' % source[7:].replace('/', '_')) + rpm_file.write('name=%s\n' % source[7:]) + rpm_file.write('baseurl=%s\n\n' % source) + else: + log("Unknown source: {!r}".format(source)) + + if key: + if '-----BEGIN PGP PUBLIC KEY BLOCK-----' in key: + with NamedTemporaryFile('w+') as key_file: + key_file.write(key) + key_file.flush() + key_file.seek(0) + subprocess.check_call(['rpm', '--import', key_file.name]) + else: + subprocess.check_call(['rpm', '--import', key]) + + +def _run_yum_command(cmd, fatal=False): + """Run an YUM command. + + Checks the output and retry if the fatal flag is set to True. + + :param: cmd: str: The yum command to run. + :param: fatal: bool: Whether the command's output should be checked and + retried. + """ + env = os.environ.copy() + + if fatal: + retry_count = 0 + result = None + + # If the command is considered "fatal", we need to retry if the yum + # lock was not acquired. + + while result is None or result == YUM_NO_LOCK: + try: + result = subprocess.check_call(cmd, env=env) + except subprocess.CalledProcessError as e: + retry_count = retry_count + 1 + if retry_count > YUM_NO_LOCK_RETRY_COUNT: + raise + result = e.returncode + log("Couldn't acquire YUM lock. Will retry in {} seconds." + "".format(YUM_NO_LOCK_RETRY_DELAY)) + time.sleep(YUM_NO_LOCK_RETRY_DELAY) + + else: + subprocess.call(cmd, env=env) diff --git a/ceph-proxy/charmhelpers/fetch/giturl.py b/ceph-proxy/charmhelpers/fetch/giturl.py new file mode 100644 index 00000000..070ca9bb --- /dev/null +++ b/ceph-proxy/charmhelpers/fetch/giturl.py @@ -0,0 +1,69 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from subprocess import check_output, CalledProcessError, STDOUT +from charmhelpers.fetch import ( + BaseFetchHandler, + UnhandledSource, + filter_installed_packages, + install, +) + +if filter_installed_packages(['git']) != []: + install(['git']) + if filter_installed_packages(['git']) != []: + raise NotImplementedError('Unable to install git') + + +class GitUrlFetchHandler(BaseFetchHandler): + """Handler for git branches via generic and github URLs.""" + + def can_handle(self, source): + url_parts = self.parse_url(source) + # TODO (mattyw) no support for ssh git@ yet + if url_parts.scheme not in ('http', 'https', 'git', ''): + return False + elif not url_parts.scheme: + return os.path.exists(os.path.join(source, '.git')) + else: + return True + + def clone(self, source, dest, branch="master", depth=None): + if not self.can_handle(source): + raise UnhandledSource("Cannot handle {}".format(source)) + + if os.path.exists(dest): + cmd = ['git', '-C', dest, 'pull', source, branch] + else: + cmd = ['git', 'clone', source, dest, '--branch', branch] + if depth: + cmd.extend(['--depth', depth]) + check_output(cmd, stderr=STDOUT) + + def install(self, source, branch="master", dest=None, depth=None): + url_parts = self.parse_url(source) + branch_name = url_parts.path.strip("/").split("/")[-1] + if dest: + dest_dir = os.path.join(dest, branch_name) + else: + dest_dir = os.path.join(os.environ.get('CHARM_DIR'), "fetched", + branch_name) + try: + self.clone(source, dest_dir, branch, depth) + except CalledProcessError as e: + raise UnhandledSource(e) + except OSError as e: + raise UnhandledSource(e.strerror) + return dest_dir diff --git a/ceph-proxy/charmhelpers/fetch/python/__init__.py b/ceph-proxy/charmhelpers/fetch/python/__init__.py new file mode 100644 index 00000000..bff99dc9 --- /dev/null +++ b/ceph-proxy/charmhelpers/fetch/python/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2019 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-proxy/charmhelpers/fetch/python/debug.py b/ceph-proxy/charmhelpers/fetch/python/debug.py new file mode 100644 index 00000000..dd5cca80 --- /dev/null +++ b/ceph-proxy/charmhelpers/fetch/python/debug.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# coding: utf-8 + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import atexit +import sys + +from charmhelpers.fetch.python.rpdb import Rpdb +from charmhelpers.core.hookenv import ( + open_port, + close_port, + ERROR, + log +) + +__author__ = "Jorge Niedbalski " + +DEFAULT_ADDR = "0.0.0.0" +DEFAULT_PORT = 4444 + + +def _error(message): + log(message, level=ERROR) + + +def set_trace(addr=DEFAULT_ADDR, port=DEFAULT_PORT): + """ + Set a trace point using the remote debugger + """ + atexit.register(close_port, port) + try: + log("Starting a remote python debugger session on %s:%s" % (addr, + port)) + open_port(port) + debugger = Rpdb(addr=addr, port=port) + debugger.set_trace(sys._getframe().f_back) + except Exception: + _error("Cannot start a remote debug session on %s:%s" % (addr, + port)) diff --git a/ceph-proxy/charmhelpers/fetch/python/packages.py b/ceph-proxy/charmhelpers/fetch/python/packages.py new file mode 100644 index 00000000..93f1fa3f --- /dev/null +++ b/ceph-proxy/charmhelpers/fetch/python/packages.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python +# coding: utf-8 + +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import subprocess +import sys + +from charmhelpers.fetch import apt_install, apt_update +from charmhelpers.core.hookenv import charm_dir, log + +__author__ = "Jorge Niedbalski " + + +def pip_execute(*args, **kwargs): + """Overridden pip_execute() to stop sys.path being changed. + + The act of importing main from the pip module seems to cause add wheels + from the /usr/share/python-wheels which are installed by various tools. + This function ensures that sys.path remains the same after the call is + executed. + """ + try: + _path = sys.path + try: + from pip import main as _pip_execute + except ImportError: + apt_update() + apt_install('python3-pip') + from pip import main as _pip_execute + _pip_execute(*args, **kwargs) + finally: + sys.path = _path + + +def parse_options(given, available): + """Given a set of options, check if available""" + for key, value in sorted(given.items()): + if not value: + continue + if key in available: + yield "--{0}={1}".format(key, value) + + +def pip_install_requirements(requirements, constraints=None, **options): + """Install a requirements file. + + :param constraints: Path to pip constraints file. + http://pip.readthedocs.org/en/stable/user_guide/#constraints-files + """ + command = ["install"] + + available_options = ('proxy', 'src', 'log', ) + for option in parse_options(options, available_options): + command.append(option) + + command.append("-r {0}".format(requirements)) + if constraints: + command.append("-c {0}".format(constraints)) + log("Installing from file: {} with constraints {} " + "and options: {}".format(requirements, constraints, command)) + else: + log("Installing from file: {} with options: {}".format(requirements, + command)) + pip_execute(command) + + +def pip_install(package, fatal=False, upgrade=False, venv=None, + constraints=None, **options): + """Install a python package""" + if venv: + venv_python = os.path.join(venv, 'bin/pip') + command = [venv_python, "install"] + else: + command = ["install"] + + available_options = ('proxy', 'src', 'log', 'index-url', ) + for option in parse_options(options, available_options): + command.append(option) + + if upgrade: + command.append('--upgrade') + + if constraints: + command.extend(['-c', constraints]) + + if isinstance(package, list): + command.extend(package) + else: + command.append(package) + + log("Installing {} package with options: {}".format(package, + command)) + if venv: + subprocess.check_call(command) + else: + pip_execute(command) + + +def pip_uninstall(package, **options): + """Uninstall a python package""" + command = ["uninstall", "-q", "-y"] + + available_options = ('proxy', 'log', ) + for option in parse_options(options, available_options): + command.append(option) + + if isinstance(package, list): + command.extend(package) + else: + command.append(package) + + log("Uninstalling {} package with options: {}".format(package, + command)) + pip_execute(command) + + +def pip_list(): + """Returns the list of current python installed packages + """ + return pip_execute(["list"]) + + +def pip_create_virtualenv(path=None): + """Create an isolated Python environment.""" + apt_install(['python3-virtualenv', 'virtualenv']) + extra_flags = ['--python=python3'] + + if path: + venv_path = path + else: + venv_path = os.path.join(charm_dir(), 'venv') + + if not os.path.exists(venv_path): + subprocess.check_call(['virtualenv', venv_path] + extra_flags) diff --git a/ceph-proxy/charmhelpers/fetch/python/rpdb.py b/ceph-proxy/charmhelpers/fetch/python/rpdb.py new file mode 100644 index 00000000..9b31610c --- /dev/null +++ b/ceph-proxy/charmhelpers/fetch/python/rpdb.py @@ -0,0 +1,56 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Remote Python Debugger (pdb wrapper).""" + +import pdb +import socket +import sys + +__author__ = "Bertrand Janin " +__version__ = "0.1.3" + + +class Rpdb(pdb.Pdb): + + def __init__(self, addr="127.0.0.1", port=4444): + """Initialize the socket and initialize pdb.""" + + # Backup stdin and stdout before replacing them by the socket handle + self.old_stdout = sys.stdout + self.old_stdin = sys.stdin + + # Open a 'reusable' socket to let the webapp reload on the same port + self.skt = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.skt.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, True) + self.skt.bind((addr, port)) + self.skt.listen(1) + (clientsocket, address) = self.skt.accept() + handle = clientsocket.makefile('rw') + pdb.Pdb.__init__(self, completekey='tab', stdin=handle, stdout=handle) + sys.stdout = sys.stdin = handle + + def shutdown(self): + """Revert stdin and stdout, close the socket.""" + sys.stdout = self.old_stdout + sys.stdin = self.old_stdin + self.skt.close() + self.set_continue() + + def do_continue(self, arg): + """Stop all operation on ``continue``.""" + self.shutdown() + return 1 + + do_EOF = do_quit = do_exit = do_c = do_cont = do_continue diff --git a/ceph-proxy/charmhelpers/fetch/python/version.py b/ceph-proxy/charmhelpers/fetch/python/version.py new file mode 100644 index 00000000..3eb42103 --- /dev/null +++ b/ceph-proxy/charmhelpers/fetch/python/version.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# coding: utf-8 + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +__author__ = "Jorge Niedbalski " + + +def current_version(): + """Current system python version""" + return sys.version_info + + +def current_version_string(): + """Current system python version as string major.minor.micro""" + return "{0}.{1}.{2}".format(sys.version_info.major, + sys.version_info.minor, + sys.version_info.micro) diff --git a/ceph-proxy/charmhelpers/fetch/snap.py b/ceph-proxy/charmhelpers/fetch/snap.py new file mode 100644 index 00000000..7ab7ce3e --- /dev/null +++ b/ceph-proxy/charmhelpers/fetch/snap.py @@ -0,0 +1,150 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Charm helpers snap for classic charms. + +If writing reactive charms, use the snap layer: +https://lists.ubuntu.com/archives/snapcraft/2016-September/001114.html +""" +import subprocess +import os +from time import sleep +from charmhelpers.core.hookenv import log + +__author__ = 'Joseph Borg ' + +# The return code for "couldn't acquire lock" in Snap +# (hopefully this will be improved). +SNAP_NO_LOCK = 1 +SNAP_NO_LOCK_RETRY_DELAY = 10 # Wait X seconds between Snap lock checks. +SNAP_NO_LOCK_RETRY_COUNT = 30 # Retry to acquire the lock X times. +SNAP_CHANNELS = [ + 'edge', + 'beta', + 'candidate', + 'stable', +] + + +class CouldNotAcquireLockException(Exception): + pass + + +class InvalidSnapChannel(Exception): + pass + + +def _snap_exec(commands): + """ + Execute snap commands. + + :param commands: List commands + :return: Integer exit code + """ + assert isinstance(commands, list) + + retry_count = 0 + return_code = None + + while return_code is None or return_code == SNAP_NO_LOCK: + try: + return_code = subprocess.check_call(['snap'] + commands, + env=os.environ) + except subprocess.CalledProcessError as e: + retry_count += + 1 + if retry_count > SNAP_NO_LOCK_RETRY_COUNT: + raise CouldNotAcquireLockException( + 'Could not acquire lock after {} attempts' + .format(SNAP_NO_LOCK_RETRY_COUNT)) + return_code = e.returncode + log('Snap failed to acquire lock, trying again in {} seconds.' + .format(SNAP_NO_LOCK_RETRY_DELAY), level='WARN') + sleep(SNAP_NO_LOCK_RETRY_DELAY) + + return return_code + + +def snap_install(packages, *flags): + """ + Install a snap package. + + :param packages: String or List String package name + :param flags: List String flags to pass to install command + :return: Integer return code from snap + """ + if type(packages) is not list: + packages = [packages] + + flags = list(flags) + + message = 'Installing snap(s) "%s"' % ', '.join(packages) + if flags: + message += ' with option(s) "%s"' % ', '.join(flags) + + log(message, level='INFO') + return _snap_exec(['install'] + flags + packages) + + +def snap_remove(packages, *flags): + """ + Remove a snap package. + + :param packages: String or List String package name + :param flags: List String flags to pass to remove command + :return: Integer return code from snap + """ + if type(packages) is not list: + packages = [packages] + + flags = list(flags) + + message = 'Removing snap(s) "%s"' % ', '.join(packages) + if flags: + message += ' with options "%s"' % ', '.join(flags) + + log(message, level='INFO') + return _snap_exec(['remove'] + flags + packages) + + +def snap_refresh(packages, *flags): + """ + Refresh / Update snap package. + + :param packages: String or List String package name + :param flags: List String flags to pass to refresh command + :return: Integer return code from snap + """ + if type(packages) is not list: + packages = [packages] + + flags = list(flags) + + message = 'Refreshing snap(s) "%s"' % ', '.join(packages) + if flags: + message += ' with options "%s"' % ', '.join(flags) + + log(message, level='INFO') + return _snap_exec(['refresh'] + flags + packages) + + +def valid_snap_channel(channel): + """ Validate snap channel exists + + :raises InvalidSnapChannel: When channel does not exist + :return: Boolean + """ + if channel.lower() in SNAP_CHANNELS: + return True + else: + raise InvalidSnapChannel("Invalid Snap Channel: {}".format(channel)) diff --git a/ceph-proxy/charmhelpers/fetch/ubuntu.py b/ceph-proxy/charmhelpers/fetch/ubuntu.py new file mode 100644 index 00000000..d0089eb7 --- /dev/null +++ b/ceph-proxy/charmhelpers/fetch/ubuntu.py @@ -0,0 +1,1061 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict +import platform +import re +import subprocess +import sys +import time + +from charmhelpers import deprecate +from charmhelpers.core.host import get_distrib_codename, get_system_env + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + WARNING, + env_proxy_settings, +) +from charmhelpers.fetch import SourceConfigError, GPGKeyError +from charmhelpers.fetch import ubuntu_apt_pkg + +PROPOSED_POCKET = ( + "# Proposed\n" + "deb http://archive.ubuntu.com/ubuntu {}-proposed main universe " + "multiverse restricted\n") +PROPOSED_PORTS_POCKET = ( + "# Proposed\n" + "deb http://ports.ubuntu.com/ubuntu-ports {}-proposed main universe " + "multiverse restricted\n") +# Only supports 64bit and ppc64 at the moment. +ARCH_TO_PROPOSED_POCKET = { + 'x86_64': PROPOSED_POCKET, + 'ppc64le': PROPOSED_PORTS_POCKET, + 'aarch64': PROPOSED_PORTS_POCKET, + 's390x': PROPOSED_PORTS_POCKET, +} +CLOUD_ARCHIVE_URL = "http://ubuntu-cloud.archive.canonical.com/ubuntu" +CLOUD_ARCHIVE_KEY_ID = '5EDB1B62EC4926EA' +CLOUD_ARCHIVE = """# Ubuntu Cloud Archive +deb http://ubuntu-cloud.archive.canonical.com/ubuntu {} main +""" +CLOUD_ARCHIVE_POCKETS = { + # Folsom + 'folsom': 'precise-updates/folsom', + 'folsom/updates': 'precise-updates/folsom', + 'precise-folsom': 'precise-updates/folsom', + 'precise-folsom/updates': 'precise-updates/folsom', + 'precise-updates/folsom': 'precise-updates/folsom', + 'folsom/proposed': 'precise-proposed/folsom', + 'precise-folsom/proposed': 'precise-proposed/folsom', + 'precise-proposed/folsom': 'precise-proposed/folsom', + # Grizzly + 'grizzly': 'precise-updates/grizzly', + 'grizzly/updates': 'precise-updates/grizzly', + 'precise-grizzly': 'precise-updates/grizzly', + 'precise-grizzly/updates': 'precise-updates/grizzly', + 'precise-updates/grizzly': 'precise-updates/grizzly', + 'grizzly/proposed': 'precise-proposed/grizzly', + 'precise-grizzly/proposed': 'precise-proposed/grizzly', + 'precise-proposed/grizzly': 'precise-proposed/grizzly', + # Havana + 'havana': 'precise-updates/havana', + 'havana/updates': 'precise-updates/havana', + 'precise-havana': 'precise-updates/havana', + 'precise-havana/updates': 'precise-updates/havana', + 'precise-updates/havana': 'precise-updates/havana', + 'havana/proposed': 'precise-proposed/havana', + 'precise-havana/proposed': 'precise-proposed/havana', + 'precise-proposed/havana': 'precise-proposed/havana', + # Icehouse + 'icehouse': 'precise-updates/icehouse', + 'icehouse/updates': 'precise-updates/icehouse', + 'precise-icehouse': 'precise-updates/icehouse', + 'precise-icehouse/updates': 'precise-updates/icehouse', + 'precise-updates/icehouse': 'precise-updates/icehouse', + 'icehouse/proposed': 'precise-proposed/icehouse', + 'precise-icehouse/proposed': 'precise-proposed/icehouse', + 'precise-proposed/icehouse': 'precise-proposed/icehouse', + # Juno + 'juno': 'trusty-updates/juno', + 'juno/updates': 'trusty-updates/juno', + 'trusty-juno': 'trusty-updates/juno', + 'trusty-juno/updates': 'trusty-updates/juno', + 'trusty-updates/juno': 'trusty-updates/juno', + 'juno/proposed': 'trusty-proposed/juno', + 'trusty-juno/proposed': 'trusty-proposed/juno', + 'trusty-proposed/juno': 'trusty-proposed/juno', + # Kilo + 'kilo': 'trusty-updates/kilo', + 'kilo/updates': 'trusty-updates/kilo', + 'trusty-kilo': 'trusty-updates/kilo', + 'trusty-kilo/updates': 'trusty-updates/kilo', + 'trusty-updates/kilo': 'trusty-updates/kilo', + 'kilo/proposed': 'trusty-proposed/kilo', + 'trusty-kilo/proposed': 'trusty-proposed/kilo', + 'trusty-proposed/kilo': 'trusty-proposed/kilo', + # Liberty + 'liberty': 'trusty-updates/liberty', + 'liberty/updates': 'trusty-updates/liberty', + 'trusty-liberty': 'trusty-updates/liberty', + 'trusty-liberty/updates': 'trusty-updates/liberty', + 'trusty-updates/liberty': 'trusty-updates/liberty', + 'liberty/proposed': 'trusty-proposed/liberty', + 'trusty-liberty/proposed': 'trusty-proposed/liberty', + 'trusty-proposed/liberty': 'trusty-proposed/liberty', + # Mitaka + 'mitaka': 'trusty-updates/mitaka', + 'mitaka/updates': 'trusty-updates/mitaka', + 'trusty-mitaka': 'trusty-updates/mitaka', + 'trusty-mitaka/updates': 'trusty-updates/mitaka', + 'trusty-updates/mitaka': 'trusty-updates/mitaka', + 'mitaka/proposed': 'trusty-proposed/mitaka', + 'trusty-mitaka/proposed': 'trusty-proposed/mitaka', + 'trusty-proposed/mitaka': 'trusty-proposed/mitaka', + # Newton + 'newton': 'xenial-updates/newton', + 'newton/updates': 'xenial-updates/newton', + 'xenial-newton': 'xenial-updates/newton', + 'xenial-newton/updates': 'xenial-updates/newton', + 'xenial-updates/newton': 'xenial-updates/newton', + 'newton/proposed': 'xenial-proposed/newton', + 'xenial-newton/proposed': 'xenial-proposed/newton', + 'xenial-proposed/newton': 'xenial-proposed/newton', + # Ocata + 'ocata': 'xenial-updates/ocata', + 'ocata/updates': 'xenial-updates/ocata', + 'xenial-ocata': 'xenial-updates/ocata', + 'xenial-ocata/updates': 'xenial-updates/ocata', + 'xenial-updates/ocata': 'xenial-updates/ocata', + 'ocata/proposed': 'xenial-proposed/ocata', + 'xenial-ocata/proposed': 'xenial-proposed/ocata', + 'xenial-proposed/ocata': 'xenial-proposed/ocata', + # Pike + 'pike': 'xenial-updates/pike', + 'xenial-pike': 'xenial-updates/pike', + 'xenial-pike/updates': 'xenial-updates/pike', + 'xenial-updates/pike': 'xenial-updates/pike', + 'pike/proposed': 'xenial-proposed/pike', + 'xenial-pike/proposed': 'xenial-proposed/pike', + 'xenial-proposed/pike': 'xenial-proposed/pike', + # Queens + 'queens': 'xenial-updates/queens', + 'xenial-queens': 'xenial-updates/queens', + 'xenial-queens/updates': 'xenial-updates/queens', + 'xenial-updates/queens': 'xenial-updates/queens', + 'queens/proposed': 'xenial-proposed/queens', + 'xenial-queens/proposed': 'xenial-proposed/queens', + 'xenial-proposed/queens': 'xenial-proposed/queens', + # Rocky + 'rocky': 'bionic-updates/rocky', + 'bionic-rocky': 'bionic-updates/rocky', + 'bionic-rocky/updates': 'bionic-updates/rocky', + 'bionic-updates/rocky': 'bionic-updates/rocky', + 'rocky/proposed': 'bionic-proposed/rocky', + 'bionic-rocky/proposed': 'bionic-proposed/rocky', + 'bionic-proposed/rocky': 'bionic-proposed/rocky', + # Stein + 'stein': 'bionic-updates/stein', + 'bionic-stein': 'bionic-updates/stein', + 'bionic-stein/updates': 'bionic-updates/stein', + 'bionic-updates/stein': 'bionic-updates/stein', + 'stein/proposed': 'bionic-proposed/stein', + 'bionic-stein/proposed': 'bionic-proposed/stein', + 'bionic-proposed/stein': 'bionic-proposed/stein', + # Train + 'train': 'bionic-updates/train', + 'bionic-train': 'bionic-updates/train', + 'bionic-train/updates': 'bionic-updates/train', + 'bionic-updates/train': 'bionic-updates/train', + 'train/proposed': 'bionic-proposed/train', + 'bionic-train/proposed': 'bionic-proposed/train', + 'bionic-proposed/train': 'bionic-proposed/train', + # Ussuri + 'ussuri': 'bionic-updates/ussuri', + 'bionic-ussuri': 'bionic-updates/ussuri', + 'bionic-ussuri/updates': 'bionic-updates/ussuri', + 'bionic-updates/ussuri': 'bionic-updates/ussuri', + 'ussuri/proposed': 'bionic-proposed/ussuri', + 'bionic-ussuri/proposed': 'bionic-proposed/ussuri', + 'bionic-proposed/ussuri': 'bionic-proposed/ussuri', + # Victoria + 'victoria': 'focal-updates/victoria', + 'focal-victoria': 'focal-updates/victoria', + 'focal-victoria/updates': 'focal-updates/victoria', + 'focal-updates/victoria': 'focal-updates/victoria', + 'victoria/proposed': 'focal-proposed/victoria', + 'focal-victoria/proposed': 'focal-proposed/victoria', + 'focal-proposed/victoria': 'focal-proposed/victoria', + # Wallaby + 'wallaby': 'focal-updates/wallaby', + 'focal-wallaby': 'focal-updates/wallaby', + 'focal-wallaby/updates': 'focal-updates/wallaby', + 'focal-updates/wallaby': 'focal-updates/wallaby', + 'wallaby/proposed': 'focal-proposed/wallaby', + 'focal-wallaby/proposed': 'focal-proposed/wallaby', + 'focal-proposed/wallaby': 'focal-proposed/wallaby', + # Xena + 'xena': 'focal-updates/xena', + 'focal-xena': 'focal-updates/xena', + 'focal-xena/updates': 'focal-updates/xena', + 'focal-updates/xena': 'focal-updates/xena', + 'xena/proposed': 'focal-proposed/xena', + 'focal-xena/proposed': 'focal-proposed/xena', + 'focal-proposed/xena': 'focal-proposed/xena', + # Yoga + 'yoga': 'focal-updates/yoga', + 'focal-yoga': 'focal-updates/yoga', + 'focal-yoga/updates': 'focal-updates/yoga', + 'focal-updates/yoga': 'focal-updates/yoga', + 'yoga/proposed': 'focal-proposed/yoga', + 'focal-yoga/proposed': 'focal-proposed/yoga', + 'focal-proposed/yoga': 'focal-proposed/yoga', + # Zed + 'zed': 'jammy-updates/zed', + 'jammy-zed': 'jammy-updates/zed', + 'jammy-zed/updates': 'jammy-updates/zed', + 'jammy-updates/zed': 'jammy-updates/zed', + 'zed/proposed': 'jammy-proposed/zed', + 'jammy-zed/proposed': 'jammy-proposed/zed', + 'jammy-proposed/zed': 'jammy-proposed/zed', + # antelope + 'antelope': 'jammy-updates/antelope', + 'jammy-antelope': 'jammy-updates/antelope', + 'jammy-antelope/updates': 'jammy-updates/antelope', + 'jammy-updates/antelope': 'jammy-updates/antelope', + 'antelope/proposed': 'jammy-proposed/antelope', + 'jammy-antelope/proposed': 'jammy-proposed/antelope', + 'jammy-proposed/antelope': 'jammy-proposed/antelope', + # bobcat + 'bobcat': 'jammy-updates/bobcat', + 'jammy-bobcat': 'jammy-updates/bobcat', + 'jammy-bobcat/updates': 'jammy-updates/bobcat', + 'jammy-updates/bobcat': 'jammy-updates/bobcat', + 'bobcat/proposed': 'jammy-proposed/bobcat', + 'jammy-bobcat/proposed': 'jammy-proposed/bobcat', + 'jammy-proposed/bobcat': 'jammy-proposed/bobcat', + # caracal + 'caracal': 'jammy-updates/caracal', + 'jammy-caracal': 'jammy-updates/caracal', + 'jammy-caracal/updates': 'jammy-updates/caracal', + 'jammy-updates/caracal': 'jammy-updates/caracal', + 'caracal/proposed': 'jammy-proposed/caracal', + 'jammy-caracal/proposed': 'jammy-proposed/caracal', + 'jammy-proposed/caracal': 'jammy-proposed/caracal', + + # OVN + 'focal-ovn-22.03': 'focal-updates/ovn-22.03', + 'focal-ovn-22.03/proposed': 'focal-proposed/ovn-22.03', +} + + +OPENSTACK_RELEASES = ( + 'diablo', + 'essex', + 'folsom', + 'grizzly', + 'havana', + 'icehouse', + 'juno', + 'kilo', + 'liberty', + 'mitaka', + 'newton', + 'ocata', + 'pike', + 'queens', + 'rocky', + 'stein', + 'train', + 'ussuri', + 'victoria', + 'wallaby', + 'xena', + 'yoga', + 'zed', + 'antelope', + 'bobcat', + 'caracal', +) + + +UBUNTU_OPENSTACK_RELEASE = OrderedDict([ + ('oneiric', 'diablo'), + ('precise', 'essex'), + ('quantal', 'folsom'), + ('raring', 'grizzly'), + ('saucy', 'havana'), + ('trusty', 'icehouse'), + ('utopic', 'juno'), + ('vivid', 'kilo'), + ('wily', 'liberty'), + ('xenial', 'mitaka'), + ('yakkety', 'newton'), + ('zesty', 'ocata'), + ('artful', 'pike'), + ('bionic', 'queens'), + ('cosmic', 'rocky'), + ('disco', 'stein'), + ('eoan', 'train'), + ('focal', 'ussuri'), + ('groovy', 'victoria'), + ('hirsute', 'wallaby'), + ('impish', 'xena'), + ('jammy', 'yoga'), + ('kinetic', 'zed'), + ('lunar', 'antelope'), + ('mantic', 'bobcat'), + ('noble', 'caracal'), +]) + + +APT_NO_LOCK = 100 # The return code for "couldn't acquire lock" in APT. +CMD_RETRY_DELAY = 10 # Wait 10 seconds between command retries. +CMD_RETRY_COUNT = 10 # Retry a failing fatal command X times. + + +def filter_installed_packages(packages): + """Return a list of packages that require installation.""" + cache = apt_cache() + _pkgs = [] + for package in packages: + try: + p = cache[package] + p.current_ver or _pkgs.append(package) + except KeyError: + log('Package {} has no installation candidate.'.format(package), + level='WARNING') + _pkgs.append(package) + return _pkgs + + +def filter_missing_packages(packages): + """Return a list of packages that are installed. + + :param packages: list of packages to evaluate. + :returns list: Packages that are installed. + """ + return list( + set(packages) - + set(filter_installed_packages(packages)) + ) + + +def apt_cache(*_, **__): + """Shim returning an object simulating the apt_pkg Cache. + + :param _: Accept arguments for compatibility, not used. + :type _: any + :param __: Accept keyword arguments for compatibility, not used. + :type __: any + :returns:Object used to interrogate the system apt and dpkg databases. + :rtype:ubuntu_apt_pkg.Cache + """ + if 'apt_pkg' in sys.modules: + # NOTE(fnordahl): When our consumer use the upstream ``apt_pkg`` module + # in conjunction with the apt_cache helper function, they may expect us + # to call ``apt_pkg.init()`` for them. + # + # Detect this situation, log a warning and make the call to + # ``apt_pkg.init()`` to avoid the consumer Python interpreter from + # crashing with a segmentation fault. + @deprecate( + 'Support for use of upstream ``apt_pkg`` module in conjunction' + 'with charm-helpers is deprecated since 2019-06-25', + date=None, log=lambda x: log(x, level=WARNING)) + def one_shot_log(): + pass + + one_shot_log() + sys.modules['apt_pkg'].init() + return ubuntu_apt_pkg.Cache() + + +def apt_install(packages, options=None, fatal=False, quiet=False): + """Install one or more packages. + + :param packages: Package(s) to install + :type packages: Option[str, List[str]] + :param options: Options to pass on to apt-get + :type options: Option[None, List[str]] + :param fatal: Whether the command's output should be checked and + retried. + :type fatal: bool + :param quiet: if True (default), suppress log message to stdout/stderr + :type quiet: bool + :raises: subprocess.CalledProcessError + """ + if not packages: + log("Nothing to install", level=DEBUG) + return + if options is None: + options = ['--option=Dpkg::Options::=--force-confold'] + + cmd = ['apt-get', '--assume-yes'] + cmd.extend(options) + cmd.append('install') + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + if not quiet: + log("Installing {} with options: {}" + .format(packages, options)) + _run_apt_command(cmd, fatal, quiet=quiet) + + +def apt_upgrade(options=None, fatal=False, dist=False): + """Upgrade all packages. + + :param options: Options to pass on to apt-get + :type options: Option[None, List[str]] + :param fatal: Whether the command's output should be checked and + retried. + :type fatal: bool + :param dist: Whether ``dist-upgrade`` should be used over ``upgrade`` + :type dist: bool + :raises: subprocess.CalledProcessError + """ + if options is None: + options = ['--option=Dpkg::Options::=--force-confold'] + + cmd = ['apt-get', '--assume-yes'] + cmd.extend(options) + if dist: + cmd.append('dist-upgrade') + else: + cmd.append('upgrade') + log("Upgrading with options: {}".format(options)) + _run_apt_command(cmd, fatal) + + +def apt_update(fatal=False): + """Update local apt cache.""" + cmd = ['apt-get', 'update'] + _run_apt_command(cmd, fatal) + + +def apt_purge(packages, fatal=False): + """Purge one or more packages. + + :param packages: Package(s) to install + :type packages: Option[str, List[str]] + :param fatal: Whether the command's output should be checked and + retried. + :type fatal: bool + :raises: subprocess.CalledProcessError + """ + cmd = ['apt-get', '--assume-yes', 'purge'] + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + log("Purging {}".format(packages)) + _run_apt_command(cmd, fatal) + + +def apt_autoremove(purge=True, fatal=False): + """Purge one or more packages. + :param purge: Whether the ``--purge`` option should be passed on or not. + :type purge: bool + :param fatal: Whether the command's output should be checked and + retried. + :type fatal: bool + :raises: subprocess.CalledProcessError + """ + cmd = ['apt-get', '--assume-yes', 'autoremove'] + if purge: + cmd.append('--purge') + _run_apt_command(cmd, fatal) + + +def apt_mark(packages, mark, fatal=False): + """Flag one or more packages using apt-mark.""" + log("Marking {} as {}".format(packages, mark)) + cmd = ['apt-mark', mark] + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + + if fatal: + subprocess.check_call(cmd, universal_newlines=True) + else: + subprocess.call(cmd, universal_newlines=True) + + +def apt_hold(packages, fatal=False): + return apt_mark(packages, 'hold', fatal=fatal) + + +def apt_unhold(packages, fatal=False): + return apt_mark(packages, 'unhold', fatal=fatal) + + +def import_key(key): + """Import an ASCII Armor key. + + A Radix64 format keyid is also supported for backwards + compatibility. In this case Ubuntu keyserver will be + queried for a key via HTTPS by its keyid. This method + is less preferable because https proxy servers may + require traffic decryption which is equivalent to a + man-in-the-middle attack (a proxy server impersonates + keyserver TLS certificates and has to be explicitly + trusted by the system). + + :param key: A GPG key in ASCII armor format, + including BEGIN and END markers or a keyid. + :type key: (bytes, str) + :raises: GPGKeyError if the key could not be imported + """ + key = key.strip() + if '-' in key or '\n' in key: + # Send everything not obviously a keyid to GPG to import, as + # we trust its validation better than our own. eg. handling + # comments before the key. + log("PGP key found (looks like ASCII Armor format)", level=DEBUG) + if ('-----BEGIN PGP PUBLIC KEY BLOCK-----' in key and + '-----END PGP PUBLIC KEY BLOCK-----' in key): + log("Writing provided PGP key in the binary format", level=DEBUG) + key_bytes = key.encode('utf-8') + key_name = _get_keyid_by_gpg_key(key_bytes) + key_gpg = _dearmor_gpg_key(key_bytes) + _write_apt_gpg_keyfile(key_name=key_name, key_material=key_gpg) + else: + raise GPGKeyError("ASCII armor markers missing from GPG key") + else: + log("PGP key found (looks like Radix64 format)", level=WARNING) + log("SECURELY importing PGP key from keyserver; " + "full key not provided.", level=WARNING) + # as of bionic add-apt-repository uses curl with an HTTPS keyserver URL + # to retrieve GPG keys. `apt-key adv` command is deprecated as is + # apt-key in general as noted in its manpage. See lp:1433761 for more + # history. Instead, /etc/apt/trusted.gpg.d is used directly to drop + # gpg + key_asc = _get_key_by_keyid(key) + # write the key in GPG format so that apt-key list shows it + key_gpg = _dearmor_gpg_key(key_asc) + _write_apt_gpg_keyfile(key_name=key, key_material=key_gpg) + + +def _get_keyid_by_gpg_key(key_material): + """Get a GPG key fingerprint by GPG key material. + Gets a GPG key fingerprint (40-digit, 160-bit) by the ASCII armor-encoded + or binary GPG key material. Can be used, for example, to generate file + names for keys passed via charm options. + + :param key_material: ASCII armor-encoded or binary GPG key material + :type key_material: bytes + :raises: GPGKeyError if invalid key material has been provided + :returns: A GPG key fingerprint + :rtype: str + """ + # Use the same gpg command for both Xenial and Bionic + cmd = 'gpg --with-colons --with-fingerprint' + ps = subprocess.Popen(cmd.split(), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + stdin=subprocess.PIPE) + out, err = ps.communicate(input=key_material) + out = out.decode('utf-8') + err = err.decode('utf-8') + if 'gpg: no valid OpenPGP data found.' in err: + raise GPGKeyError('Invalid GPG key material provided') + # from gnupg2 docs: fpr :: Fingerprint (fingerprint is in field 10) + return re.search(r"^fpr:{9}([0-9A-F]{40}):$", out, re.MULTILINE).group(1) + + +def _get_key_by_keyid(keyid): + """Get a key via HTTPS from the Ubuntu keyserver. + Different key ID formats are supported by SKS keyservers (the longer ones + are more secure, see "dead beef attack" and https://evil32.com/). Since + HTTPS is used, if SSLBump-like HTTPS proxies are in place, they will + impersonate keyserver.ubuntu.com and generate a certificate with + keyserver.ubuntu.com in the CN field or in SubjAltName fields of a + certificate. If such proxy behavior is expected it is necessary to add the + CA certificate chain containing the intermediate CA of the SSLBump proxy to + every machine that this code runs on via ca-certs cloud-init directive (via + cloudinit-userdata model-config) or via other means (such as through a + custom charm option). Also note that DNS resolution for the hostname in a + URL is done at a proxy server - not at the client side. + + 8-digit (32 bit) key ID + https://keyserver.ubuntu.com/pks/lookup?search=0x4652B4E6 + 16-digit (64 bit) key ID + https://keyserver.ubuntu.com/pks/lookup?search=0x6E85A86E4652B4E6 + 40-digit key ID: + https://keyserver.ubuntu.com/pks/lookup?search=0x35F77D63B5CEC106C577ED856E85A86E4652B4E6 + + :param keyid: An 8, 16 or 40 hex digit keyid to find a key for + :type keyid: (bytes, str) + :returns: A key material for the specified GPG key id + :rtype: (str, bytes) + :raises: subprocess.CalledProcessError + """ + # options=mr - machine-readable output (disables html wrappers) + keyserver_url = ('https://keyserver.ubuntu.com' + '/pks/lookup?op=get&options=mr&exact=on&search=0x{}') + curl_cmd = ['curl', keyserver_url.format(keyid)] + # use proxy server settings in order to retrieve the key + return subprocess.check_output(curl_cmd, + env=env_proxy_settings(['https', 'no_proxy'])) + + +def _dearmor_gpg_key(key_asc): + """Converts a GPG key in the ASCII armor format to the binary format. + + :param key_asc: A GPG key in ASCII armor format. + :type key_asc: (str, bytes) + :returns: A GPG key in binary format + :rtype: (str, bytes) + :raises: GPGKeyError + """ + ps = subprocess.Popen(['gpg', '--dearmor'], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + stdin=subprocess.PIPE) + out, err = ps.communicate(input=key_asc) + # no need to decode output as it is binary (invalid utf-8), only error + err = err.decode('utf-8') + if 'gpg: no valid OpenPGP data found.' in err: + raise GPGKeyError('Invalid GPG key material. Check your network setup' + ' (MTU, routing, DNS) and/or proxy server settings' + ' as well as destination keyserver status.') + else: + return out + + +def _write_apt_gpg_keyfile(key_name, key_material): + """Writes GPG key material into a file at a provided path. + + :param key_name: A key name to use for a key file (could be a fingerprint) + :type key_name: str + :param key_material: A GPG key material (binary) + :type key_material: (str, bytes) + """ + with open('/etc/apt/trusted.gpg.d/{}.gpg'.format(key_name), + 'wb') as keyf: + keyf.write(key_material) + + +def add_source(source, key=None, fail_invalid=False): + """Add a package source to this system. + + @param source: a URL or sources.list entry, as supported by + add-apt-repository(1). Examples:: + + ppa:charmers/example + deb https://stub:key@private.example.com/ubuntu trusty main + + In addition: + 'proposed:' may be used to enable the standard 'proposed' + pocket for the release. + 'cloud:' may be used to activate official cloud archive pockets, + such as 'cloud:icehouse' + 'distro' may be used as a noop + + Full list of source specifications supported by the function are: + + 'distro': A NOP; i.e. it has no effect. + 'proposed': the proposed deb spec [2] is wrtten to + /etc/apt/sources.list/proposed + 'distro-proposed': adds -proposed to the debs [2] + 'ppa:': add-apt-repository --yes + 'deb ': add-apt-repository --yes deb + 'http://....': add-apt-repository --yes http://... + 'cloud-archive:': add-apt-repository -yes cloud-archive: + 'cloud:[-staging]': specify a Cloud Archive pocket with + optional staging version. If staging is used then the staging PPA [2] + with be used. If staging is NOT used then the cloud archive [3] will be + added, and the 'ubuntu-cloud-keyring' package will be added for the + current distro. + '': translate to cloud: based on the current + distro version (i.e. for 'ussuri' this will either be 'bionic-ussuri' or + 'distro'. + '/proposed': as above, but for proposed. + + Otherwise the source is not recognised and this is logged to the juju log. + However, no error is raised, unless sys_error_on_exit is True. + + [1] deb http://ubuntu-cloud.archive.canonical.com/ubuntu {} main + where {} is replaced with the derived pocket name. + [2] deb http://archive.ubuntu.com/ubuntu {}-proposed \ + main universe multiverse restricted + where {} is replaced with the lsb_release codename (e.g. xenial) + [3] deb http://ubuntu-cloud.archive.canonical.com/ubuntu + to /etc/apt/sources.list.d/cloud-archive-list + + @param key: A key to be added to the system's APT keyring and used + to verify the signatures on packages. Ideally, this should be an + ASCII format GPG public key including the block headers. A GPG key + id may also be used, but be aware that only insecure protocols are + available to retrieve the actual public key from a public keyserver + placing your Juju environment at risk. ppa and cloud archive keys + are securely added automatically, so should not be provided. + + @param fail_invalid: (boolean) if True, then the function raises a + SourceConfigError is there is no matching installation source. + + @raises SourceConfigError() if for cloud:, the is not a + valid pocket in CLOUD_ARCHIVE_POCKETS + """ + # extract the OpenStack versions from the CLOUD_ARCHIVE_POCKETS; can't use + # the list in contrib.openstack.utils as it might not be included in + # classic charms and would break everything. Having OpenStack specific + # code in this file is a bit of an antipattern, anyway. + os_versions_regex = "({})".format("|".join(OPENSTACK_RELEASES)) + + _mapping = OrderedDict([ + (r"^distro$", lambda: None), # This is a NOP + (r"^(?:proposed|distro-proposed)$", _add_proposed), + (r"^cloud-archive:(.*)$", _add_apt_repository), + (r"^((?:deb |http:|https:|ppa:).*)$", _add_apt_repository), + (r"^cloud:(.*)-(.*)\/staging$", _add_cloud_staging), + (r"^cloud:(.*)-(ovn-.*)$", _add_cloud_distro_check), + (r"^cloud:(.*)-(.*)$", _add_cloud_distro_check), + (r"^cloud:(.*)$", _add_cloud_pocket), + (r"^snap:.*-(.*)-(.*)$", _add_cloud_distro_check), + (r"^{}\/proposed$".format(os_versions_regex), + _add_bare_openstack_proposed), + (r"^{}$".format(os_versions_regex), _add_bare_openstack), + ]) + if source is None: + source = '' + for r, fn in _mapping.items(): + m = re.match(r, source) + if m: + if key: + # Import key before adding the source which depends on it, + # as refreshing packages could fail otherwise. + try: + import_key(key) + except GPGKeyError as e: + raise SourceConfigError(str(e)) + # call the associated function with the captured groups + # raises SourceConfigError on error. + fn(*m.groups()) + break + else: + # nothing matched. log an error and maybe sys.exit + err = "Unknown source: {!r}".format(source) + log(err) + if fail_invalid: + raise SourceConfigError(err) + + +def _add_proposed(): + """Add the PROPOSED_POCKET as /etc/apt/source.list.d/proposed.list + + Uses get_distrib_codename to determine the correct stanza for + the deb line. + + For Intel architectures PROPOSED_POCKET is used for the release, but for + other architectures PROPOSED_PORTS_POCKET is used for the release. + """ + release = get_distrib_codename() + arch = platform.machine() + if arch not in ARCH_TO_PROPOSED_POCKET.keys(): + raise SourceConfigError("Arch {} not supported for (distro-)proposed" + .format(arch)) + with open('/etc/apt/sources.list.d/proposed.list', 'w') as apt: + apt.write(ARCH_TO_PROPOSED_POCKET[arch].format(release)) + + +def _add_apt_repository(spec): + """Add the spec using add_apt_repository + + :param spec: the parameter to pass to add_apt_repository + :type spec: str + """ + if '{series}' in spec: + series = get_distrib_codename() + spec = spec.replace('{series}', series) + _run_with_retries(['add-apt-repository', '--yes', spec], + cmd_env=env_proxy_settings(['https', 'http', 'no_proxy']) + ) + + +def __write_sources_list_d_actual_pocket(file, actual_pocket): + with open('/etc/apt/sources.list.d/{}'.format(file), 'w') as apt: + apt.write(CLOUD_ARCHIVE.format(actual_pocket)) + + +def _add_cloud_pocket(pocket): + """Add a cloud pocket as /etc/apt/sources.d/cloud-archive.list + + Note that this overwrites the existing file if there is one. + + This function also converts the simple pocket in to the actual pocket using + the CLOUD_ARCHIVE_POCKETS mapping. + + :param pocket: string representing the pocket to add a deb spec for. + :raises: SourceConfigError if the cloud pocket doesn't exist or the + requested release doesn't match the current distro version. + """ + apt_install(filter_installed_packages(['ubuntu-cloud-keyring']), + fatal=True) + if pocket not in CLOUD_ARCHIVE_POCKETS: + raise SourceConfigError( + 'Unsupported cloud: source option %s' % + pocket) + actual_pocket = CLOUD_ARCHIVE_POCKETS[pocket] + __write_sources_list_d_actual_pocket( + 'cloud-archive{}.list'.format('' if 'ovn' not in pocket else '-ovn'), + actual_pocket) + + +def _add_cloud_staging(cloud_archive_release, openstack_release): + """Add the cloud staging repository which is in + ppa:ubuntu-cloud-archive/-staging + + This function checks that the cloud_archive_release matches the current + codename for the distro that charm is being installed on. + + :param cloud_archive_release: string, codename for the release. + :param openstack_release: String, codename for the openstack release. + :raises: SourceConfigError if the cloud_archive_release doesn't match the + current version of the os. + """ + _verify_is_ubuntu_rel(cloud_archive_release, openstack_release) + ppa = 'ppa:ubuntu-cloud-archive/{}-staging'.format(openstack_release) + cmd = 'add-apt-repository -y {}'.format(ppa) + _run_with_retries(cmd.split(' ')) + + +def _add_cloud_distro_check(cloud_archive_release, openstack_release): + """Add the cloud pocket, but also check the cloud_archive_release against + the current distro, and use the openstack_release as the full lookup. + + This just calls _add_cloud_pocket() with the openstack_release as pocket + to get the correct cloud-archive.list for dpkg to work with. + + :param cloud_archive_release:String, codename for the distro release. + :param openstack_release: String, spec for the release to look up in the + CLOUD_ARCHIVE_POCKETS + :raises: SourceConfigError if this is the wrong distro, or the pocket spec + doesn't exist. + """ + _verify_is_ubuntu_rel(cloud_archive_release, openstack_release) + _add_cloud_pocket("{}-{}".format(cloud_archive_release, openstack_release)) + + +def _verify_is_ubuntu_rel(release, os_release): + """Verify that the release is in the same as the current ubuntu release. + + :param release: String, lowercase for the release. + :param os_release: String, the os_release being asked for + :raises: SourceConfigError if the release is not the same as the ubuntu + release. + """ + ubuntu_rel = get_distrib_codename() + if release != ubuntu_rel: + raise SourceConfigError( + 'Invalid Cloud Archive release specified: {}-{} on this Ubuntu' + 'version ({})'.format(release, os_release, ubuntu_rel)) + + +def _add_bare_openstack(openstack_release): + """Add cloud or distro based on the release given. + + The spec given is, say, 'ussuri', but this could apply cloud:bionic-ussuri + or 'distro' depending on whether the ubuntu release is bionic or focal. + + :param openstack_release: the OpenStack codename to determine the release + for. + :type openstack_release: str + :raises: SourceConfigError + """ + # TODO(ajkavanagh) - surely this means we should be removing cloud archives + # if they exist? + __add_bare_helper(openstack_release, "{}-{}", lambda: None) + + +def _add_bare_openstack_proposed(openstack_release): + """Add cloud of distro but with proposed. + + The spec given is, say, 'ussuri' but this could apply + cloud:bionic-ussuri/proposed or 'distro/proposed' depending on whether the + ubuntu release is bionic or focal. + + :param openstack_release: the OpenStack codename to determine the release + for. + :type openstack_release: str + :raises: SourceConfigError + """ + __add_bare_helper(openstack_release, "{}-{}/proposed", _add_proposed) + + +def __add_bare_helper(openstack_release, pocket_format, final_function): + """Helper for _add_bare_openstack[_proposed] + + The bulk of the work between the two functions is exactly the same except + for the pocket format and the function that is run if it's the distro + version. + + :param openstack_release: the OpenStack codename. e.g. ussuri + :type openstack_release: str + :param pocket_format: the pocket formatter string to construct a pocket str + from the openstack_release and the current ubuntu version. + :type pocket_format: str + :param final_function: the function to call if it is the distro version. + :type final_function: Callable + :raises SourceConfigError on error + """ + ubuntu_version = get_distrib_codename() + possible_pocket = pocket_format.format(ubuntu_version, openstack_release) + if possible_pocket in CLOUD_ARCHIVE_POCKETS: + _add_cloud_pocket(possible_pocket) + return + # Otherwise it's almost certainly the distro version; verify that it + # exists. + try: + assert UBUNTU_OPENSTACK_RELEASE[ubuntu_version] == openstack_release + except KeyError: + raise SourceConfigError( + "Invalid ubuntu version {} isn't known to this library" + .format(ubuntu_version)) + except AssertionError: + raise SourceConfigError( + 'Invalid OpenStack release specified: {} for Ubuntu version {}' + .format(openstack_release, ubuntu_version)) + final_function() + + +def _run_with_retries(cmd, max_retries=CMD_RETRY_COUNT, retry_exitcodes=(1,), + retry_message="", cmd_env=None, quiet=False): + """Run a command and retry until success or max_retries is reached. + + :param cmd: The apt command to run. + :type cmd: str + :param max_retries: The number of retries to attempt on a fatal + command. Defaults to CMD_RETRY_COUNT. + :type max_retries: int + :param retry_exitcodes: Optional additional exit codes to retry. + Defaults to retry on exit code 1. + :type retry_exitcodes: tuple + :param retry_message: Optional log prefix emitted during retries. + :type retry_message: str + :param: cmd_env: Environment variables to add to the command run. + :type cmd_env: Option[None, Dict[str, str]] + :param quiet: if True, silence the output of the command from stdout and + stderr + :type quiet: bool + """ + env = get_apt_dpkg_env() + if cmd_env: + env.update(cmd_env) + + kwargs = {} + if quiet: + kwargs['stdout'] = subprocess.DEVNULL + kwargs['stderr'] = subprocess.DEVNULL + + if not retry_message: + retry_message = "Failed executing '{}'".format(" ".join(cmd)) + retry_message += ". Will retry in {} seconds".format(CMD_RETRY_DELAY) + + retry_count = 0 + result = None + + retry_results = (None,) + retry_exitcodes + while result in retry_results: + try: + result = subprocess.check_call(cmd, env=env, **kwargs) + except subprocess.CalledProcessError as e: + result = e.returncode + if result not in retry_results: + # a non-retriable exitcode was produced + raise + retry_count += 1 + if retry_count > max_retries: + # a retriable exitcode was produced more than {max_retries} times + raise + log(retry_message) + time.sleep(CMD_RETRY_DELAY) + + +def _run_apt_command(cmd, fatal=False, quiet=False): + """Run an apt command with optional retries. + + :param cmd: The apt command to run. + :type cmd: str + :param fatal: Whether the command's output should be checked and + retried. + :type fatal: bool + :param quiet: if True, silence the output of the command from stdout and + stderr + :type quiet: bool + """ + if fatal: + _run_with_retries( + cmd, retry_exitcodes=(1, APT_NO_LOCK,), + retry_message="Couldn't acquire DPKG lock", + quiet=quiet) + else: + kwargs = {} + if quiet: + kwargs['stdout'] = subprocess.DEVNULL + kwargs['stderr'] = subprocess.DEVNULL + subprocess.call(cmd, env=get_apt_dpkg_env(), **kwargs) + + +def get_upstream_version(package): + """Determine upstream version based on installed package + + @returns None (if not installed) or the upstream version + """ + cache = apt_cache() + try: + pkg = cache[package] + except Exception: + # the package is unknown to the current apt cache. + return None + + if not pkg.current_ver: + # package is known, but no version is currently installed. + return None + + return ubuntu_apt_pkg.upstream_version(pkg.current_ver.ver_str) + + +def get_installed_version(package): + """Determine installed version of a package + + @returns None (if not installed) or the installed version as + Version object + """ + cache = apt_cache() + dpkg_result = cache.dpkg_list([package]).get(package, {}) + current_ver = None + installed_version = dpkg_result.get('version') + + if installed_version: + current_ver = ubuntu_apt_pkg.Version({'ver_str': installed_version}) + return current_ver + + +def get_apt_dpkg_env(): + """Get environment suitable for execution of APT and DPKG tools. + + We keep this in a helper function instead of in a global constant to + avoid execution on import of the library. + :returns: Environment suitable for execution of APT and DPKG tools. + :rtype: Dict[str, str] + """ + # The fallback is used in the event of ``/etc/environment`` not containing + # avalid PATH variable. + return {'DEBIAN_FRONTEND': 'noninteractive', + 'PATH': get_system_env('PATH', '/usr/sbin:/usr/bin:/sbin:/bin')} diff --git a/ceph-proxy/charmhelpers/fetch/ubuntu_apt_pkg.py b/ceph-proxy/charmhelpers/fetch/ubuntu_apt_pkg.py new file mode 100644 index 00000000..f4dde4a9 --- /dev/null +++ b/ceph-proxy/charmhelpers/fetch/ubuntu_apt_pkg.py @@ -0,0 +1,327 @@ +# Copyright 2019-2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Provide a subset of the ``python-apt`` module API. + +Data collection is done through subprocess calls to ``apt-cache`` and +``dpkg-query`` commands. + +The main purpose for this module is to avoid dependency on the +``python-apt`` python module. + +The indicated python module is a wrapper around the ``apt`` C++ library +which is tightly connected to the version of the distribution it was +shipped on. It is not developed in a backward/forward compatible manner. + +This in turn makes it incredibly hard to distribute as a wheel for a piece +of python software that supports a span of distro releases [0][1]. + +Upstream feedback like [2] does not give confidence in this ever changing, +so with this we get rid of the dependency. + +0: https://github.com/juju-solutions/layer-basic/pull/135 +1: https://bugs.launchpad.net/charm-octavia/+bug/1824112 +2: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=845330#10 +""" + +import locale +import os +import subprocess +import sys + +from charmhelpers import deprecate +from charmhelpers.core.hookenv import log + + +class _container(dict): + """Simple container for attributes.""" + __getattr__ = dict.__getitem__ + __setattr__ = dict.__setitem__ + + +class Package(_container): + """Simple container for package attributes.""" + + +class Version(_container): + """Simple container for version attributes.""" + + +class Cache(object): + """Simulation of ``apt_pkg`` Cache object.""" + def __init__(self, progress=None): + pass + + def __contains__(self, package): + try: + pkg = self.__getitem__(package) + return pkg is not None + except KeyError: + return False + + def __getitem__(self, package): + """Get information about a package from apt and dpkg databases. + + :param package: Name of package + :type package: str + :returns: Package object + :rtype: object + :raises: KeyError, subprocess.CalledProcessError + """ + apt_result = self._apt_cache_show([package])[package] + apt_result['name'] = apt_result.pop('package') + pkg = Package(apt_result) + dpkg_result = self.dpkg_list([package]).get(package, {}) + current_ver = None + installed_version = dpkg_result.get('version') + if installed_version: + current_ver = Version({'ver_str': installed_version}) + pkg.current_ver = current_ver + pkg.architecture = dpkg_result.get('architecture') + return pkg + + @deprecate("use dpkg_list() instead.", "2022-05", log=log) + def _dpkg_list(self, packages): + return self.dpkg_list(packages) + + def dpkg_list(self, packages): + """Get data from system dpkg database for package. + + Note that this method is also useful for querying package names + containing wildcards, for example + + apt_cache().dpkg_list(['nvidia-vgpu-ubuntu-*']) + + may return + + { + 'nvidia-vgpu-ubuntu-470': { + 'name': 'nvidia-vgpu-ubuntu-470', + 'version': '470.68', + 'architecture': 'amd64', + 'description': 'NVIDIA vGPU driver - version 470.68' + } + } + + :param packages: Packages to get data from + :type packages: List[str] + :returns: Structured data about installed packages, keys like + ``dpkg-query --list`` + :rtype: dict + :raises: subprocess.CalledProcessError + """ + pkgs = {} + cmd = [ + 'dpkg-query', '--show', + '--showformat', + r'${db:Status-Abbrev}\t${Package}\t${Version}\t${Architecture}\t${binary:Summary}\n' + ] + cmd.extend(packages) + try: + output = subprocess.check_output(cmd, + stderr=subprocess.STDOUT, + universal_newlines=True) + except subprocess.CalledProcessError as cp: + # ``dpkg-query`` may return error and at the same time have + # produced useful output, for example when asked for multiple + # packages where some are not installed + if cp.returncode != 1: + raise + output = cp.output + for line in output.splitlines(): + # only process lines for successfully installed packages + if not (line.startswith('ii ') or line.startswith('hi ')): + continue + status, name, version, arch, desc = line.split('\t', 4) + pkgs[name] = { + 'name': name, + 'version': version, + 'architecture': arch, + 'description': desc, + } + return pkgs + + def _apt_cache_show(self, packages): + """Get data from system apt cache for package. + + :param packages: Packages to get data from + :type packages: List[str] + :returns: Structured data about package, keys like + ``apt-cache show`` + :rtype: dict + :raises: subprocess.CalledProcessError + """ + pkgs = {} + cmd = ['apt-cache', 'show', '--no-all-versions'] + cmd.extend(packages) + if locale.getlocale() == (None, None): + # subprocess calls out to locale.getpreferredencoding(False) to + # determine encoding. Workaround for Trusty where the + # environment appears to not be set up correctly. + locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') + try: + output = subprocess.check_output(cmd, + stderr=subprocess.STDOUT, + universal_newlines=True) + previous = None + pkg = {} + for line in output.splitlines(): + if not line: + if 'package' in pkg: + pkgs.update({pkg['package']: pkg}) + pkg = {} + continue + if line.startswith(' '): + if previous and previous in pkg: + pkg[previous] += os.linesep + line.lstrip() + continue + if ':' in line: + kv = line.split(':', 1) + key = kv[0].lower() + if key == 'n': + continue + previous = key + pkg.update({key: kv[1].lstrip()}) + except subprocess.CalledProcessError as cp: + # ``apt-cache`` returns 100 if none of the packages asked for + # exist in the apt cache. + if cp.returncode != 100: + raise + return pkgs + + +class Config(_container): + def __init__(self): + super(Config, self).__init__(self._populate()) + + def _populate(self): + cfgs = {} + cmd = ['apt-config', 'dump'] + output = subprocess.check_output(cmd, + stderr=subprocess.STDOUT, + universal_newlines=True) + for line in output.splitlines(): + if not line.startswith("CommandLine"): + k, v = line.split(" ", 1) + cfgs[k] = v.strip(";").strip("\"") + + return cfgs + + +# Backwards compatibility with old apt_pkg module +sys.modules[__name__].config = Config() + + +def init(): + """Compatibility shim that does nothing.""" + pass + + +def upstream_version(version): + """Extracts upstream version from a version string. + + Upstream reference: https://salsa.debian.org/apt-team/apt/blob/master/ + apt-pkg/deb/debversion.cc#L259 + + :param version: Version string + :type version: str + :returns: Upstream version + :rtype: str + """ + if version: + version = version.split(':')[-1] + version = version.split('-')[0] + return version + + +def version_compare(a, b): + """Compare the given versions. + + Call out to ``dpkg`` to make sure the code doing the comparison is + compatible with what the ``apt`` library would do. Mimic the return + values. + + Upstream reference: + https://apt-team.pages.debian.net/python-apt/library/apt_pkg.html + ?highlight=version_compare#apt_pkg.version_compare + + :param a: version string + :type a: str + :param b: version string + :type b: str + :returns: >0 if ``a`` is greater than ``b``, 0 if a equals b, + <0 if ``a`` is smaller than ``b`` + :rtype: int + :raises: subprocess.CalledProcessError, RuntimeError + """ + for op in ('gt', 1), ('eq', 0), ('lt', -1): + try: + subprocess.check_call(['dpkg', '--compare-versions', + a, op[0], b], + stderr=subprocess.STDOUT, + universal_newlines=True) + return op[1] + except subprocess.CalledProcessError as cp: + if cp.returncode == 1: + continue + raise + else: + raise RuntimeError('Unable to compare "{}" and "{}", according to ' + 'our logic they are neither greater, equal nor ' + 'less than each other.'.format(a, b)) + + +class PkgVersion(): + """Allow package versions to be compared. + + For example:: + + >>> import charmhelpers.fetch as fetch + >>> (fetch.apt_pkg.PkgVersion('2:20.4.0') < + ... fetch.apt_pkg.PkgVersion('2:20.5.0')) + True + >>> pkgs = [fetch.apt_pkg.PkgVersion('2:20.4.0'), + ... fetch.apt_pkg.PkgVersion('2:21.4.0'), + ... fetch.apt_pkg.PkgVersion('2:17.4.0')] + >>> pkgs.sort() + >>> pkgs + [2:17.4.0, 2:20.4.0, 2:21.4.0] + """ + + def __init__(self, version): + self.version = version + + def __lt__(self, other): + return version_compare(self.version, other.version) == -1 + + def __le__(self, other): + return self.__lt__(other) or self.__eq__(other) + + def __gt__(self, other): + return version_compare(self.version, other.version) == 1 + + def __ge__(self, other): + return self.__gt__(other) or self.__eq__(other) + + def __eq__(self, other): + return version_compare(self.version, other.version) == 0 + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return self.version + + def __hash__(self): + return hash(repr(self)) diff --git a/ceph-proxy/charmhelpers/osplatform.py b/ceph-proxy/charmhelpers/osplatform.py new file mode 100644 index 00000000..5d121866 --- /dev/null +++ b/ceph-proxy/charmhelpers/osplatform.py @@ -0,0 +1,61 @@ +import platform +import os + + +def get_platform(): + """Return the current OS platform. + + For example: if current os platform is Ubuntu then a string "ubuntu" + will be returned (which is the name of the module). + This string is used to decide which platform module should be imported. + """ + current_platform = _get_current_platform() + + if "Ubuntu" in current_platform: + return "ubuntu" + elif "CentOS" in current_platform: + return "centos" + elif "debian" in current_platform or "Debian" in current_platform: + # Stock Python does not detect Ubuntu and instead returns debian. + # Or at least it does in some build environments like Travis CI + return "ubuntu" + elif "elementary" in current_platform: + # ElementaryOS fails to run tests locally without this. + return "ubuntu" + elif "Pop!_OS" in current_platform: + # Pop!_OS also fails to run tests locally without this. + return "ubuntu" + else: + raise RuntimeError("This module is not supported on {}." + .format(current_platform)) + + +def _get_current_platform(): + """Return the current platform information for the OS. + + Attempts to lookup linux distribution information from the platform + module for releases of python < 3.7. For newer versions of python, + the platform is determined from the /etc/os-release file. + """ + # linux_distribution is deprecated and will be removed in Python 3.7 + # Warnings *not* disabled, as we certainly need to fix this. + if hasattr(platform, 'linux_distribution'): + tuple_platform = platform.linux_distribution() + current_platform = tuple_platform[0] + else: + current_platform = _get_platform_from_fs() + + return current_platform + + +def _get_platform_from_fs(): + """Get Platform from /etc/os-release.""" + with open(os.path.join(os.sep, 'etc', 'os-release')) as fin: + content = dict( + line.split('=', 1) + for line in fin.read().splitlines() + if '=' in line + ) + for k, v in content.items(): + content[k] = v.strip('"') + return content["NAME"] diff --git a/ceph-proxy/charmhelpers/payload/__init__.py b/ceph-proxy/charmhelpers/payload/__init__.py new file mode 100644 index 00000000..ee55cb3d --- /dev/null +++ b/ceph-proxy/charmhelpers/payload/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"Tools for working with files injected into a charm just before deployment." diff --git a/ceph-proxy/charmhelpers/payload/execd.py b/ceph-proxy/charmhelpers/payload/execd.py new file mode 100644 index 00000000..1502aa0b --- /dev/null +++ b/ceph-proxy/charmhelpers/payload/execd.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import subprocess +from charmhelpers.core import hookenv + + +def default_execd_dir(): + return os.path.join(os.environ['CHARM_DIR'], 'exec.d') + + +def execd_module_paths(execd_dir=None): + """Generate a list of full paths to modules within execd_dir.""" + if not execd_dir: + execd_dir = default_execd_dir() + + if not os.path.exists(execd_dir): + return + + for subpath in os.listdir(execd_dir): + module = os.path.join(execd_dir, subpath) + if os.path.isdir(module): + yield module + + +def execd_submodule_paths(command, execd_dir=None): + """Generate a list of full paths to the specified command within exec_dir. + """ + for module_path in execd_module_paths(execd_dir): + path = os.path.join(module_path, command) + if os.access(path, os.X_OK) and os.path.isfile(path): + yield path + + +def execd_run(command, execd_dir=None, die_on_error=True, stderr=subprocess.STDOUT): + """Run command for each module within execd_dir which defines it.""" + for submodule_path in execd_submodule_paths(command, execd_dir): + try: + subprocess.check_output(submodule_path, stderr=stderr, + universal_newlines=True) + except subprocess.CalledProcessError as e: + hookenv.log("Error ({}) running {}. Output: {}".format( + e.returncode, e.cmd, e.output)) + if die_on_error: + sys.exit(e.returncode) + + +def execd_preinstall(execd_dir=None): + """Run charm-pre-install for each module within execd_dir.""" + execd_run('charm-pre-install', execd_dir=execd_dir) diff --git a/ceph-proxy/config.yaml b/ceph-proxy/config.yaml new file mode 100644 index 00000000..ff510ed9 --- /dev/null +++ b/ceph-proxy/config.yaml @@ -0,0 +1,76 @@ +options: + loglevel: + type: int + default: 1 + description: Mon and OSD debug level. Max is 20. + use-syslog: + type: boolean + default: False + description: | + Setting this to True will allow supporting services to log to syslog. + source: + type: string + default: caracal + description: | + Repository from which to install. May be one of the following: + distro (default), ppa:somecustom/ppa, a deb url sources entry, + or a supported Ubuntu Cloud Archive e.g. + . + cloud:- + cloud:-/updates + cloud:-/staging + cloud:-/proposed + . + See https://wiki.ubuntu.com/OpenStack/CloudArchive for info on which + cloud archives are available and supported. + . + NOTE: updating this setting to a source that is known to provide + a later version of OpenStack will trigger a software upgrade unless + action-managed-upgrade is set to True. + key: + type: string + default: "" + description: | + Key ID to import to the apt keyring to support use with arbitrary source + configuration from outside of Launchpad archives or PPA's. + fsid: + type: string + default: "" + description: | + fsid of the ceph cluster. To generate a suitable value use `uuid` + . + This configuration element is mandatory and the service will fail on + install if it is not provided. + monitor-hosts: + type: string + default: "" + description: | + Space-delimited list of existing monitor hosts, in the format + {IP / Hostname}:{port} {IP / Hostname}:{port} + admin-key: + type: string + default: "" + description: Admin cephx key for existing Ceph cluster + auth-supported: + type: string + default: cephx + description: | + Which authentication flavour to use. + . + Valid options are "cephx" and "none". If "none" is specified, keys will + still be created and deployed so that it can be enabled later. + user-keys: + type: string + default: "" + description: | + A space-separated list of : pairs used to + lookup authentication keys for a specific user instead of trying to + create a user and a key via ceph-mon. + admin-user: + type: string + default: "client.admin" + description: | + A configurable admin user name. Used for scenarios where pools are + pre-created and the user given to charm-ceph-proxy simply needs to + check the existence of a given pool and error out if one does not + exist. Can be used in conjunction with user-keys. diff --git a/ceph-proxy/copyright b/ceph-proxy/copyright new file mode 100644 index 00000000..bdfae0e0 --- /dev/null +++ b/ceph-proxy/copyright @@ -0,0 +1,15 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0 +Comment: The licensing of this charm is aligned to upstream ceph + as the ceph upstart integration is distributed as part of the charm. + +Files: * +Copyright: 2012, Canonical Ltd. +License: LGPL-2.1 + +Files: files/upstart/* +Copyright: 2004-2010 by Sage Weil +License: LGPL-2.1 + +License: LGPL-2.1 + On Debian GNU/Linux system you can find the complete text of the + LGPL-2.1 license in '/usr/share/common-licenses/LGPL-2.1' diff --git a/ceph-proxy/files/nagios/check_ceph_status.py b/ceph-proxy/files/nagios/check_ceph_status.py new file mode 100755 index 00000000..c70e6459 --- /dev/null +++ b/ceph-proxy/files/nagios/check_ceph_status.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +# Copyright (C) 2014 Canonical +# All Rights Reserved +# Author: Jacek Nykis + +import re +import argparse +import subprocess +import nagios_plugin + + +def check_ceph_status(args): + if args.status_file: + nagios_plugin.check_file_freshness(args.status_file, 3600) + with open(args.status_file, "r") as f: + lines = f.readlines() + status_data = dict( + line.strip().split(' ', 1) for line in lines if len(line) > 1 + ) + else: + lines = subprocess.check_output(["ceph", "status"]).split('\n') + status_data = dict( + line.strip().split(' ', 1) for line in lines if len(line) > 1 + ) + + if ('health' not in status_data or + 'monmap' not in status_data or + 'osdmap'not in status_data): + raise nagios_plugin.UnknownError('UNKNOWN: status data is incomplete') + + if status_data['health'] != 'HEALTH_OK': + msg = 'CRITICAL: ceph health status: "{}"'.format( + status_data['health']) + raise nagios_plugin.CriticalError(msg) + osds = re.search(r"^.*: (\d+) osds: (\d+) up, (\d+) in", + status_data['osdmap']) + if osds.group(1) > osds.group(2): # not all OSDs are "up" + msg = 'CRITICAL: Some OSDs are not up. Total: {}, up: {}'.format( + osds.group(1), osds.group(2)) + raise nagios_plugin.CriticalError(msg) + print("All OK") + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Check ceph status') + parser.add_argument('-f', '--file', dest='status_file', + default=False, + help='Optional file with "ceph status" output') + args = parser.parse_args() + nagios_plugin.try_check(check_ceph_status, args) diff --git a/ceph-proxy/files/nagios/collect_ceph_status.sh b/ceph-proxy/files/nagios/collect_ceph_status.sh new file mode 100755 index 00000000..dbdd3acf --- /dev/null +++ b/ceph-proxy/files/nagios/collect_ceph_status.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Copyright (C) 2014 Canonical +# All Rights Reserved +# Author: Jacek Nykis + +LOCK=/var/lock/ceph-status.lock +lockfile-create -r2 --lock-name $LOCK > /dev/null 2>&1 +if [ $? -ne 0 ]; then + exit 1 +fi +trap "rm -f $LOCK > /dev/null 2>&1" exit + +DATA_DIR="/var/lib/nagios" +if [ ! -d $DATA_DIR ]; then + mkdir -p $DATA_DIR +fi + +ceph status >${DATA_DIR}/cat-ceph-status.txt diff --git a/ceph-proxy/files/upstart/ceph-create-keys.conf b/ceph-proxy/files/upstart/ceph-create-keys.conf new file mode 100644 index 00000000..6fb45818 --- /dev/null +++ b/ceph-proxy/files/upstart/ceph-create-keys.conf @@ -0,0 +1,8 @@ +description "Create Ceph client.admin key when possible" + +start on started ceph-mon +stop on runlevel [!2345] + +task + +exec /usr/sbin/ceph-create-keys --cluster="${cluster:-ceph}" -i "${id:-$(hostname)}" diff --git a/ceph-proxy/files/upstart/ceph-hotplug.conf b/ceph-proxy/files/upstart/ceph-hotplug.conf new file mode 100644 index 00000000..d82e7c84 --- /dev/null +++ b/ceph-proxy/files/upstart/ceph-hotplug.conf @@ -0,0 +1,11 @@ +description "Ceph hotplug" + +start on block-device-added \ + DEVTYPE=partition \ + ID_PART_ENTRY_TYPE=4fbd7e29-9d25-41b8-afd0-062c0ceff05d +stop on runlevel [!2345] + +task +instance $DEVNAME + +exec /usr/sbin/ceph-disk activate --mount -- "$DEVNAME" diff --git a/ceph-proxy/files/upstart/ceph-mon-all-starter.conf b/ceph-proxy/files/upstart/ceph-mon-all-starter.conf new file mode 100644 index 00000000..f7188cb7 --- /dev/null +++ b/ceph-proxy/files/upstart/ceph-mon-all-starter.conf @@ -0,0 +1,20 @@ +description "Ceph MON (start all instances)" + +start on starting ceph-mon-all +stop on runlevel [!2345] + +task + +script + set -e + # TODO what's the valid charset for cluster names and mon ids? + find /var/lib/ceph/mon/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[a-z0-9]+-[a-z0-9._-]+' -printf '%P\n' \ + | while read f; do + if [ -e "/var/lib/ceph/mon/$f/done" ]; then + cluster="${f%%-*}" + id="${f#*-}" + + initctl emit ceph-mon cluster="$cluster" id="$id" + fi + done +end script diff --git a/ceph-proxy/files/upstart/ceph-mon-all.conf b/ceph-proxy/files/upstart/ceph-mon-all.conf new file mode 100644 index 00000000..006f2f20 --- /dev/null +++ b/ceph-proxy/files/upstart/ceph-mon-all.conf @@ -0,0 +1,4 @@ +description "Ceph monitor (all instances)" + +start on (local-filesystems and net-device-up IFACE!=lo) +stop on runlevel [!2345] diff --git a/ceph-proxy/files/upstart/ceph-mon.conf b/ceph-proxy/files/upstart/ceph-mon.conf new file mode 100644 index 00000000..74a4b643 --- /dev/null +++ b/ceph-proxy/files/upstart/ceph-mon.conf @@ -0,0 +1,25 @@ +description "Ceph MON" + +start on ceph-mon +stop on runlevel [!2345] or stopping ceph-mon-all + +respawn +respawn limit 5 30 + +pre-start script + set -e + test -x /usr/bin/ceph-mon || { stop; exit 0; } + test -d "/var/lib/ceph/mon/${cluster:-ceph}-$id" || { stop; exit 0; } + + install -d -m0755 /var/run/ceph +end script + +instance ${cluster:-ceph}/$id +export cluster +export id + +# this breaks oneiric +#usage "cluster = name of cluster (defaults to 'ceph'); id = monitor instance id" + +exec /usr/bin/ceph-mon --cluster="${cluster:-ceph}" -i "$id" -f + diff --git a/ceph-proxy/hardening.yaml b/ceph-proxy/hardening.yaml new file mode 100644 index 00000000..314bb385 --- /dev/null +++ b/ceph-proxy/hardening.yaml @@ -0,0 +1,5 @@ +# Overrides file for contrib.hardening. See README.hardening in +# contrib.hardening for info on how to use this file. +ssh: + server: + use_pam: 'yes' # juju requires this diff --git a/ceph-proxy/hooks/__init__.py b/ceph-proxy/hooks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-proxy/hooks/ceph.py b/ceph-proxy/hooks/ceph.py new file mode 100644 index 00000000..04485faf --- /dev/null +++ b/ceph-proxy/hooks/ceph.py @@ -0,0 +1,645 @@ +# +# Copyright 2012 Canonical Ltd. +# +# Authors: +# James Page +# Paul Collins +# +import json +import subprocess +import time +import os +import re +import sys +import collections + +from charmhelpers.contrib.storage.linux.utils import ( + is_block_device, + zap_disk, + is_device_mounted, +) +from charmhelpers.core.host import ( + mkdir, + chownr, + service_restart, + lsb_release, + cmp_pkgrevno, + CompareHostReleases, +) +from charmhelpers.core.hookenv import ( + log, + DEBUG, + ERROR, + cached, + status_set, + WARNING, + config, +) +from charmhelpers.fetch import ( + apt_cache +) +from utils import ( + get_unit_hostname, +) + +LEADER = 'leader' +PEON = 'peon' +QUORUM = [LEADER, PEON] + +PACKAGES = ['ceph', 'gdisk', 'btrfs-tools', 'xfsprogs'] +PACKAGES_FOCAL = ['ceph', 'gdisk', 'btrfs-progs', 'xfsprogs'] + + +def ceph_user(): + if get_version() > 1: + return 'ceph' + else: + return "root" + + +def get_local_mon_ids(): + """ + This will list the /var/lib/ceph/mon/* directories and try + to split the ID off of the directory name and return it in + a list + + :return: list. A list of monitor identifiers :raise: OSError if + something goes wrong with listing the directory. + """ + mon_ids = [] + mon_path = os.path.join(os.sep, 'var', 'lib', 'ceph', 'mon') + if os.path.exists(mon_path): + try: + dirs = os.listdir(mon_path) + for mon_dir in dirs: + # Basically this takes everything after ceph- as the monitor ID + match = re.search('ceph-(?P.*)', mon_dir) + if match: + mon_ids.append(match.group('mon_id')) + except OSError: + raise + return mon_ids + + +def get_version(): + """Derive Ceph release from an installed package.""" + import apt_pkg as apt + + cache = apt_cache() + package = "ceph" + try: + pkg = cache[package] + except Exception: + # the package is unknown to the current apt cache. + e = 'Could not determine version of package with no installation ' \ + 'candidate: %s' % package + error_out(e) + + if not pkg.current_ver: + # package is known, but no version is currently installed. + e = 'Could not determine version of uninstalled package: %s' % package + error_out(e) + + vers = apt.upstream_version(pkg.current_ver.ver_str) + + # x.y match only for 20XX.X + # and ignore patch level for other packages + match = re.match(r'^(\d+)\.(\d+)', vers) + + if match: + vers = match.group(0) + return float(vers) + + +def error_out(msg): + log("FATAL ERROR: %s" % msg, + level=ERROR) + sys.exit(1) + + +def is_quorum(): + asok = "/var/run/ceph/ceph-mon.{}.asok".format(get_unit_hostname()) + cmd = [ + "sudo", + "-u", + ceph_user(), + "ceph", + "--admin-daemon", + asok, + "mon_status" + ] + if os.path.exists(asok): + try: + result = json.loads(subprocess.check_output(cmd).decode('utf-8')) + except subprocess.CalledProcessError: + return False + except ValueError: + # Non JSON response from mon_status + return False + if result['state'] in QUORUM: + return True + else: + return False + else: + return False + + +def is_leader(): + asok = "/var/run/ceph/ceph-mon.{}.asok".format(get_unit_hostname()) + cmd = [ + "sudo", + "-u", + ceph_user(), + "ceph", + "--admin-daemon", + asok, + "mon_status" + ] + if os.path.exists(asok): + try: + result = json.loads(subprocess.check_output(cmd).decode('utf-8')) + except subprocess.CalledProcessError: + return False + except ValueError: + # Non JSON response from mon_status + return False + if result['state'] == LEADER: + return True + else: + return False + else: + return False + + +def wait_for_quorum(): + while not is_quorum(): + log("Waiting for quorum to be reached") + time.sleep(3) + + +def add_bootstrap_hint(peer): + asok = "/var/run/ceph/ceph-mon.{}.asok".format(get_unit_hostname()) + cmd = [ + "sudo", + "-u", + ceph_user(), + "ceph", + "--admin-daemon", + asok, + "add_bootstrap_peer_hint", + peer + ] + if os.path.exists(asok): + # Ignore any errors for this call + subprocess.call(cmd) + + +DISK_FORMATS = [ + 'xfs', + 'ext4', + 'btrfs' +] + + +def is_osd_disk(dev): + try: + info = (subprocess + .check_output(['sgdisk', '-i', '1', dev]) + .decode('utf-8')) + info = info.split("\n") # IGNORE:E1103 + for line in info: + if line.startswith( + 'Partition GUID code: 4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D' + ): + return True + except subprocess.CalledProcessError: + pass + return False + + +def start_osds(devices): + # Scan for ceph block devices + rescan_osd_devices() + if cmp_pkgrevno('ceph', "0.56.6") >= 0: + # Use ceph-disk activate for directory based OSD's + for dev_or_path in devices: + if os.path.exists(dev_or_path) and os.path.isdir(dev_or_path): + subprocess.check_call(['ceph-disk', 'activate', dev_or_path]) + + +def rescan_osd_devices(): + cmd = [ + 'udevadm', 'trigger', + '--subsystem-match=block', '--action=add' + ] + + subprocess.call(cmd) + + +_bootstrap_keyring = "/var/lib/ceph/bootstrap-osd/ceph.keyring" + + +def is_bootstrapped(): + return os.path.exists(_bootstrap_keyring) + + +def wait_for_bootstrap(): + while not is_bootstrapped(): + time.sleep(3) + + +def import_osd_bootstrap_key(key): + if not os.path.exists(_bootstrap_keyring): + cmd = [ + "sudo", + "-u", + ceph_user(), + 'ceph-authtool', + _bootstrap_keyring, + '--create-keyring', + '--name=client.bootstrap-osd', + '--add-key={}'.format(key) + ] + subprocess.check_call(cmd) + + +def generate_monitor_secret(): + cmd = [ + 'ceph-authtool', + '/dev/stdout', + '--name=mon.', + '--gen-key' + ] + res = subprocess.check_output(cmd).decode('utf-8') + + return "{}==".format(res.split('=')[1].strip()) + + +# OSD caps taken from ceph-create-keys +_osd_bootstrap_caps = { + 'mon': [ + 'allow command osd create ...', + 'allow command osd crush set ...', + r'allow command auth add * osd allow\ * mon allow\ rwx', + 'allow command mon getmap' + ] +} + +_osd_bootstrap_caps_profile = { + 'mon': [ + 'allow profile bootstrap-osd' + ] +} + + +def parse_key(raw_key): + # get-or-create appears to have different output depending + # on whether its 'get' or 'create' + # 'create' just returns the key, 'get' is more verbose and + # needs parsing + key = None + if len(raw_key.splitlines()) == 1: + key = raw_key + else: + for element in raw_key.splitlines(): + if 'key' in element: + key = element.split(' = ')[1].strip() # IGNORE:E1103 + return key + + +def get_osd_bootstrap_key(): + try: + # Attempt to get/create a key using the OSD bootstrap profile first + key = get_named_key('bootstrap-osd', + _osd_bootstrap_caps_profile) + except Exception: + # If that fails try with the older style permissions + key = get_named_key('bootstrap-osd', + _osd_bootstrap_caps) + return key + + +_radosgw_keyring = "/etc/ceph/keyring.rados.gateway" + + +def import_radosgw_key(key): + if not os.path.exists(_radosgw_keyring): + cmd = [ + "sudo", + "-u", + ceph_user(), + 'ceph-authtool', + _radosgw_keyring, + '--create-keyring', + '--name=client.radosgw.gateway', + '--add-key={}'.format(key) + ] + subprocess.check_call(cmd) + + +# OSD caps taken from ceph-create-keys +_radosgw_caps = { + 'mon': ['allow rw'], + 'osd': ['allow rwx'] +} +_upgrade_caps = { + 'mon': ['allow rwx'] +} + + +def get_radosgw_key(name='radosgw.gateway'): + return get_named_key(name, _radosgw_caps) + + +def get_mds_key(name): + return get_named_entity_key(entity='mds', + name=name, + caps=mds_caps) + + +_default_caps = collections.OrderedDict([ + ('mon', ['allow r', + 'allow command "osd blacklist"']), + ('osd', ['allow rwx']), +]) + +admin_caps = { + 'mds': ['allow'], + 'mon': ['allow *'], + 'osd': ['allow *'] +} + +mds_caps = collections.OrderedDict([ + ('osd', ['allow *']), + ('mds', ['allow']), + ('mon', ['allow rwx']), +]) + +osd_upgrade_caps = { + 'mon': ['allow command "config-key"', + 'allow command "osd tree"', + 'allow command "config-key list"', + 'allow command "config-key put"', + 'allow command "config-key get"', + 'allow command "config-key exists"', + ] +} + + +def get_upgrade_key(): + return get_named_key('upgrade-osd', _upgrade_caps) + + +def _config_user_key(name): + user_keys_list = config('user-keys') + if user_keys_list: + for ukpair in user_keys_list.split(' '): + uk = ukpair.split(':') + if len(uk) == 2: + user_type, k = uk + t, u = user_type.split('.') + if u == name: + return k + + +def get_named_entity_key(name, caps=None, pool_list=None, + entity='client'): + """Retrieve a specific named cephx key. + + :param name: String Name of key to get. EXACT MATCH + :param caps: dict of cephx capabilities + :param pool_list: The list of pools to give access to + :param entity: String Name of type to get. + :returns: Returns a cephx key + """ + key_name = '{}.{}'.format(entity, name) + try: + # Does the key already exist? + output = str(subprocess.check_output( + [ + 'sudo', + '-u', ceph_user(), + 'ceph', + '--name', config('admin-user'), + '--keyring', + '/var/lib/ceph/mon/ceph-{}/keyring'.format( + get_unit_hostname() + ), + 'auth', + 'get', + key_name, + ]).decode('UTF-8')).strip() + # NOTE(jamespage); + # Apply any changes to key capabilities, dealing with + # upgrades which requires new caps for operation. + upgrade_key_caps(key_name, + caps or _default_caps, + pool_list) + return parse_key(output) + except subprocess.CalledProcessError: + # Couldn't get the key, time to create it! + log("Creating new key for {}".format(key_name), level=DEBUG) + + caps = caps or _default_caps + cmd = [ + "sudo", + "-u", + ceph_user(), + 'ceph', + '--name', config('admin-user'), + '--keyring', + '/var/lib/ceph/mon/ceph-{}/keyring'.format( + get_unit_hostname() + ), + 'auth', 'get-or-create', key_name, + ] + # Add capabilities + for subsystem, subcaps in caps.items(): + if subsystem == 'osd': + if pool_list: + # This will output a string similar to: + # "pool=rgw pool=rbd pool=something" + pools = " ".join(['pool={0}'.format(i) for i in pool_list]) + subcaps[0] = subcaps[0] + " " + pools + cmd.extend([subsystem, '; '.join(subcaps)]) + + log("Calling check_output: {}".format(cmd), level=DEBUG) + return parse_key(str(subprocess + .check_output(cmd) + .decode('UTF-8')) + .strip()) # IGNORE:E1103 + + +def get_named_key(name, caps=None, pool_list=None): + """Retrieve a specific named cephx key. + + :param name: String Name of key to get. + :param caps: dict of cephx capabilities + :param pool_list: The list of pools to give access to + :returns: Returns a cephx key + """ + return get_named_entity_key(name, caps, pool_list, entity='client') + + +def upgrade_key_caps(key, caps, pool_list=None): + """ Upgrade key to have capabilities caps """ + if not is_leader(): + # Not the MON leader OR not clustered + return + cmd = [ + "sudo", "-u", ceph_user(), 'ceph', 'auth', 'caps', key + ] + for subsystem, subcaps in caps.items(): + if subsystem == 'osd': + if pool_list: + # This will output a string similar to: + # "pool=rgw pool=rbd pool=something" + pools = " ".join(['pool={0}'.format(i) for i in pool_list]) + subcaps[0] = subcaps[0] + " " + pools + cmd.extend([subsystem, '; '.join(subcaps)]) + subprocess.check_call(cmd) + + +@cached +def systemd(): + return CompareHostReleases(lsb_release()['DISTRIB_CODENAME']) >= 'vivid' + + +def bootstrap_monitor_cluster(secret): + hostname = get_unit_hostname() + path = '/var/lib/ceph/mon/ceph-{}'.format(hostname) + done = '{}/done'.format(path) + if systemd(): + init_marker = '{}/systemd'.format(path) + else: + init_marker = '{}/upstart'.format(path) + + keyring = '/var/lib/ceph/tmp/{}.mon.keyring'.format(hostname) + + if os.path.exists(done): + log('bootstrap_monitor_cluster: mon already initialized.') + else: + # Ceph >= 0.61.3 needs this for ceph-mon fs creation + mkdir('/var/run/ceph', owner=ceph_user(), + group=ceph_user(), perms=0o755) + mkdir(path, owner=ceph_user(), group=ceph_user()) + # end changes for Ceph >= 0.61.3 + try: + subprocess.check_call(['ceph-authtool', keyring, + '--create-keyring', '--name=mon.', + '--add-key={}'.format(secret), + '--cap', 'mon', 'allow *']) + + subprocess.check_call(['ceph-mon', '--mkfs', + '-i', hostname, + '--keyring', keyring]) + chownr(path, ceph_user(), ceph_user()) + with open(done, 'w'): + pass + with open(init_marker, 'w'): + pass + + if systemd(): + subprocess.check_call(['systemctl', 'enable', 'ceph-mon']) + service_restart('ceph-mon') + else: + service_restart('ceph-mon-all') + except Exception: + raise + finally: + os.unlink(keyring) + + +def update_monfs(): + hostname = get_unit_hostname() + monfs = '/var/lib/ceph/mon/ceph-{}'.format(hostname) + if systemd(): + init_marker = '{}/systemd'.format(monfs) + else: + init_marker = '{}/upstart'.format(monfs) + if os.path.exists(monfs) and not os.path.exists(init_marker): + # Mark mon as managed by upstart so that + # it gets start correctly on reboots + with open(init_marker, 'w'): + pass + + +def osdize(dev, osd_format, osd_journal, reformat_osd=False, + ignore_errors=False): + if dev.startswith('/dev'): + osdize_dev(dev, osd_format, osd_journal, reformat_osd, ignore_errors) + else: + osdize_dir(dev) + + +def osdize_dev(dev, osd_format, osd_journal, reformat_osd=False, + ignore_errors=False): + if not os.path.exists(dev): + log('Path {} does not exist - bailing'.format(dev)) + return + + if not is_block_device(dev): + log('Path {} is not a block device - bailing'.format(dev)) + return + + if is_osd_disk(dev) and not reformat_osd: + log('Looks like {} is already an OSD, skipping.'.format(dev)) + return + + if is_device_mounted(dev): + log('Looks like {} is in use, skipping.'.format(dev)) + return + + status_set('maintenance', 'Initializing device {}'.format(dev)) + cmd = ['ceph-disk', 'prepare'] + # Later versions of ceph support more options + if cmp_pkgrevno('ceph', '0.48.3') >= 0: + if osd_format: + cmd.append('--fs-type') + cmd.append(osd_format) + if reformat_osd: + cmd.append('--zap-disk') + cmd.append(dev) + if osd_journal and os.path.exists(osd_journal): + cmd.append(osd_journal) + else: + # Just provide the device - no other options + # for older versions of ceph + cmd.append(dev) + if reformat_osd: + zap_disk(dev) + + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError as e: + if ignore_errors: + log('Unable to initialize device: {}'.format(dev), WARNING) + else: + log('Unable to initialize device: {}'.format(dev), ERROR) + raise e + + +def osdize_dir(path): + if os.path.exists(os.path.join(path, 'upstart')): + log('Path {} is already configured as an OSD - bailing'.format(path)) + return + + if cmp_pkgrevno('ceph', "0.56.6") < 0: + log('Unable to use directories for OSDs with ceph < 0.56.6', + level=ERROR) + raise + + mkdir(path, owner=ceph_user(), group=ceph_user(), perms=0o755) + chownr('/var/lib/ceph', ceph_user(), ceph_user()) + cmd = [ + 'sudo', '-u', ceph_user(), + 'ceph-disk', + 'prepare', + '--data-dir', + path + ] + subprocess.check_call(cmd) + + +def filesystem_mounted(fs): + return subprocess.call(['grep', '-wqs', fs, '/proc/mounts']) == 0 diff --git a/ceph-proxy/hooks/ceph_hooks.py b/ceph-proxy/hooks/ceph_hooks.py new file mode 100755 index 00000000..682c7b32 --- /dev/null +++ b/ceph-proxy/hooks/ceph_hooks.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 + +# +# Copyright 2012 Canonical Ltd. +# +# Authors: +# Paul Collins +# James Page +# + +import glob +import os +import shutil +import sys + + +_path = os.path.dirname(os.path.realpath(__file__)) +_root = os.path.abspath(os.path.join(_path, '..')) +_lib = os.path.abspath(os.path.join(_path, '../lib')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + + +_add_path(_root) +_add_path(_lib) + +import ceph +from charmhelpers.core.hookenv import ( + log, + DEBUG, + INFO, + config, + is_leader, + relation_ids, + related_units, + relation_get, + relation_set, + remote_unit, + Hooks, UnregisteredHookError, + service_name, + status_set,) +from charmhelpers.core.host import ( + cmp_pkgrevno, + CompareHostReleases, + lsb_release, + mkdir, +) +from charmhelpers.fetch import ( + apt_install, + apt_update, + filter_installed_packages, + add_source +) +from charmhelpers.payload.execd import execd_preinstall +from charmhelpers.contrib.openstack.alternatives import install_alternative +from charmhelpers.contrib.openstack.utils import ( + clear_unit_paused, + clear_unit_upgrading, + is_unit_upgrading_set, + set_unit_paused, + set_unit_upgrading, +) + +from charmhelpers.core.templating import render + +from charms_ceph.broker import ( + process_requests +) + +from utils import get_unit_hostname + +hooks = Hooks() + + +def install_upstart_scripts(): + # Only install upstart configurations for older versions + if cmp_pkgrevno('ceph', "0.55.1") < 0: + for x in glob.glob('files/upstart/*.conf'): + shutil.copy(x, '/etc/init/') + + +@hooks.hook('install.real') +def install(): + execd_preinstall() + package_install() + install_upstart_scripts() + + +def package_install(): + add_source(config('source'), config('key')) + apt_update(fatal=True) + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) >= "focal": + _packages = ceph.PACKAGES_FOCAL + else: + _packages = ceph.PACKAGES + apt_install(packages=_packages, fatal=True) + + +def emit_cephconf(): + cephcontext = { + 'auth_supported': config('auth-supported'), + 'mon_hosts': config('monitor-hosts'), + 'fsid': config('fsid'), + 'use_syslog': str(config('use-syslog')).lower(), + 'loglevel': config('loglevel'), + } + + # Install ceph.conf as an alternative to support + # co-existence with other charms that write this file + charm_ceph_conf = "/var/lib/charm/{}/ceph.conf".format(service_name()) + mkdir(os.path.dirname(charm_ceph_conf), owner=ceph.ceph_user(), + group=ceph.ceph_user()) + render('ceph.conf', charm_ceph_conf, cephcontext, perms=0o644) + install_alternative('ceph.conf', '/etc/ceph/ceph.conf', + charm_ceph_conf, 100) + + keyring_template = 'ceph.keyring' + keyring = 'ceph.{}.keyring'.format(config('admin-user')) + keyring_path = '/etc/ceph/' + keyring + ctx = { + 'admin_key': config('admin-key'), + 'admin_user': config('admin-user'), + } + user = ceph.ceph_user() + render(keyring_template, keyring_path, ctx, owner=user, perms=0o600) + + keyring = 'keyring' + keyring_path = ( + '/var/lib/ceph/mon/ceph-' + + get_unit_hostname() + + '/' + + keyring) + render('mon.keyring', keyring_path, ctx, owner=user, perms=0o600) + + notify_radosgws() + notify_client() + notify_cephfs_mds() + + +@hooks.hook('config-changed') +def config_changed(): + c = config() + if c.previous('source') != config('source') or \ + c.previous('key') != config('key'): + package_install() + emit_cephconf() + + +def notify_radosgws(): + for relid in relation_ids('radosgw'): + for unit in related_units(relid): + radosgw_relation(relid=relid, unit=unit) + + +def notify_client(): + for relid in relation_ids('client'): + for unit in related_units(relid): + client_relation_joined(relid=relid, unit=unit) + + +def notify_cephfs_mds(): + for relid in relation_ids('mds'): + for unit in related_units(relid): + mds_relation_joined(relid=relid, unit=unit) + + +@hooks.hook('radosgw-relation-changed') +@hooks.hook('radosgw-relation-joined') +def radosgw_relation(relid=None, unit=None): + # Install radosgw for admin tools + apt_install(packages=filter_installed_packages(['radosgw'])) + if not unit: + unit = remote_unit() + + # NOTE: radosgw needs some usage OSD storage, so defer key + # provision until OSD units are detected. + if ready(): + log('mon cluster in quorum and osds related ' + '- providing radosgw with keys') + ceph_addrs = config('monitor-hosts') + data = { + 'fsid': config('fsid'), + 'auth': config('auth-supported'), + 'ceph-public-address': ceph_addrs, + } + key_name = relation_get('key_name', unit=unit, rid=relid) + if key_name: + # New style, per unit keys + data['{}_key'.format(key_name)] = ( + ceph.get_radosgw_key(name=key_name) + ) + else: + # Old style global radosgw key + data['radosgw_key'] = ceph.get_radosgw_key() + + settings = relation_get(rid=relid, unit=unit) or {} + """Process broker request(s).""" + if 'broker_req' in settings: + rsp = process_requests(settings['broker_req']) + unit_id = unit.replace('/', '-') + unit_response_key = 'broker-rsp-' + unit_id + data[unit_response_key] = rsp + + log('relation_set (%s): %s' % (relid, str(data)), level=DEBUG) + relation_set(relation_id=relid, relation_settings=data) + else: + log('FSID or admin key not provided, please configure them') + + +@hooks.hook('mds-relation-joined') +@hooks.hook('mds-relation-changed') +def mds_relation_joined(relid=None, unit=None): + if not ready(): + log('MDS: FSID or admin key not provided, please configure them', + level=INFO) + return + + log('ceph-proxy config ok - providing mds client with keys') + if not unit: + unit = remote_unit() + + mds_name = relation_get(attribute='mds-name', + rid=relid, unit=unit) + ceph_addrs = config('monitor-hosts') + data = { + 'fsid': config('fsid'), + 'auth': config('auth-supported'), + 'ceph-public-address': ceph_addrs, + } + if mds_name: + data['{}_mds_key'.format(mds_name)] = ( + ceph.get_mds_key(name=mds_name) + ) + + settings = relation_get(rid=relid, unit=unit) or {} + if 'broker_req' in settings: + rsp = process_requests(settings['broker_req']) + unit_id = unit.replace('/', '-') + unit_response_key = 'broker-rsp-' + unit_id + data[unit_response_key] = rsp + log('MDS: relation_set (%s): %s' % (relid, str(data)), level=DEBUG) + relation_set(relation_id=relid, relation_settings=data) + + +@hooks.hook('client-relation-joined') +def client_relation_joined(relid=None, unit=None): + if ready(): + service_name = None + if relid is None: + units = [remote_unit()] + service_name = units[0].split('/')[0] + else: + units = related_units(relid) + if len(units) > 0: + service_name = units[0].split('/')[0] + if unit is None: + unit = units[0] + if service_name is not None: + ceph_addrs = config('monitor-hosts') + data = {'key': ceph.get_named_key(service_name), + 'auth': config('auth-supported'), + 'ceph-public-address': ceph_addrs} + + settings = relation_get(rid=relid, unit=unit) or {} + data_update = {} + if 'broker_req' in settings: + rsp = process_requests(settings['broker_req']) + unit_id = unit.replace('/', '-') + unit_response_key = 'broker-rsp-' + unit_id + data_update[unit_response_key] = rsp + data.update(data_update) + + log('relation_set (%s): %s' % (relid, str(data)), level=DEBUG) + relation_set(relation_id=relid, + relation_settings=data) + else: + log('FSID or admin key not provided, please configure them') + + +@hooks.hook('client-relation-changed') +def client_relation_changed(): + """Process broker requests from ceph client relations.""" + if ready(): + settings = relation_get() or {} + if 'broker_req' in settings: + # the request is processed only by the leader as reported by juju + if not is_leader(): + log("Not leader - ignoring broker request", level=DEBUG) + else: + rsp = process_requests(settings['broker_req']) + unit_id = remote_unit().replace('/', '-') + unit_response_key = 'broker-rsp-' + unit_id + # broker_rsp is being left for backward compatibility, + # unit_response_key superscedes it + data = { + 'broker_rsp': rsp, + unit_response_key: rsp, + } + log('relation_set: %s' % str(data), level=DEBUG) + relation_set(relation_settings=data) + else: + log('FSID or admin key not provided, please configure them') + + +def ready(): + return config('fsid') and config('admin-key') + + +def assess_status(): + '''Assess status of current unit''' + if is_unit_upgrading_set(): + status_set("blocked", + "Ready for do-release-upgrade and reboot. " + "Set complete when finished.") + return + + if ready(): + status_set('active', 'Ready to proxy settings') + else: + status_set('blocked', 'Ensure FSID and admin-key are set') + + +@hooks.hook('update-status') +def update_status(): + log('Updating status.') + + +@hooks.hook('pre-series-upgrade') +def pre_series_upgrade(): + log("Running prepare series upgrade hook", "INFO") + # NOTE: The Ceph packages handle the series upgrade gracefully. + # In order to indicate the step of the series upgrade process for + # administrators and automated scripts, the charm sets the paused and + # upgrading states. + set_unit_paused() + set_unit_upgrading() + + +@hooks.hook('post-series-upgrade') +def post_series_upgrade(): + log("Running complete series upgrade hook", "INFO") + # In order to indicate the step of the series upgrade process for + # administrators and automated scripts, the charm clears the paused and + # upgrading states. + clear_unit_paused() + clear_unit_upgrading() + + +if __name__ == '__main__': + try: + hooks.execute(sys.argv) + except UnregisteredHookError as e: + log('Unknown hook {} - skipping.'.format(e)) + assess_status() diff --git a/ceph-proxy/hooks/client-relation-changed b/ceph-proxy/hooks/client-relation-changed new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-proxy/hooks/client-relation-changed @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-proxy/hooks/client-relation-joined b/ceph-proxy/hooks/client-relation-joined new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-proxy/hooks/client-relation-joined @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-proxy/hooks/config-changed b/ceph-proxy/hooks/config-changed new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-proxy/hooks/config-changed @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-proxy/hooks/install b/ceph-proxy/hooks/install new file mode 100755 index 00000000..869ee204 --- /dev/null +++ b/ceph-proxy/hooks/install @@ -0,0 +1,21 @@ +#!/bin/bash -e +# Wrapper to deal with newer Ubuntu versions that don't have py2 installed +# by default. + +declare -a DEPS=('apt' 'netaddr' 'netifaces' 'pip' 'yaml') + +check_and_install() { + pkg="${1}-${2}" + if ! dpkg -s ${pkg} 2>&1 > /dev/null; then + apt-get -y install ${pkg} + fi +} + +PYTHON="python3" + +for dep in ${DEPS[@]}; do + check_and_install ${PYTHON} ${dep} +done + +./hooks/install_deps +exec ./hooks/install.real diff --git a/ceph-proxy/hooks/install.real b/ceph-proxy/hooks/install.real new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-proxy/hooks/install.real @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-proxy/hooks/install_deps b/ceph-proxy/hooks/install_deps new file mode 100755 index 00000000..c480f29e --- /dev/null +++ b/ceph-proxy/hooks/install_deps @@ -0,0 +1,18 @@ +#!/bin/bash -e +# Wrapper to ensure that python dependencies are installed before we get into +# the python part of the hook execution + +declare -a DEPS=('dnspython' 'pyudev') + +check_and_install() { + pkg="${1}-${2}" + if ! dpkg -s ${pkg} 2>&1 > /dev/null; then + apt-get -y install ${pkg} + fi +} + +PYTHON="python3" + +for dep in ${DEPS[@]}; do + check_and_install ${PYTHON} ${dep} +done diff --git a/ceph-proxy/hooks/mds-relation-changed b/ceph-proxy/hooks/mds-relation-changed new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-proxy/hooks/mds-relation-changed @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-proxy/hooks/mds-relation-joined b/ceph-proxy/hooks/mds-relation-joined new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-proxy/hooks/mds-relation-joined @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-proxy/hooks/post-series-upgrade b/ceph-proxy/hooks/post-series-upgrade new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-proxy/hooks/post-series-upgrade @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-proxy/hooks/pre-series-upgrade b/ceph-proxy/hooks/pre-series-upgrade new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-proxy/hooks/pre-series-upgrade @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-proxy/hooks/radosgw-relation-changed b/ceph-proxy/hooks/radosgw-relation-changed new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-proxy/hooks/radosgw-relation-changed @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-proxy/hooks/radosgw-relation-joined b/ceph-proxy/hooks/radosgw-relation-joined new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-proxy/hooks/radosgw-relation-joined @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-proxy/hooks/update-status b/ceph-proxy/hooks/update-status new file mode 120000 index 00000000..52d96630 --- /dev/null +++ b/ceph-proxy/hooks/update-status @@ -0,0 +1 @@ +ceph_hooks.py \ No newline at end of file diff --git a/ceph-proxy/hooks/upgrade-charm b/ceph-proxy/hooks/upgrade-charm new file mode 100755 index 00000000..c32fb38c --- /dev/null +++ b/ceph-proxy/hooks/upgrade-charm @@ -0,0 +1,6 @@ +#!/bin/bash -e +# Wrapper to ensure that old python bytecode isn't hanging around +# after we upgrade the charm with newer libraries +rm -rf **/*.pyc + +./hooks/install_deps diff --git a/ceph-proxy/hooks/utils.py b/ceph-proxy/hooks/utils.py new file mode 100644 index 00000000..d1cf5009 --- /dev/null +++ b/ceph-proxy/hooks/utils.py @@ -0,0 +1,149 @@ + +# +# Copyright 2012 Canonical Ltd. +# +# Authors: +# James Page +# Paul Collins +# + +import socket +import re +from charmhelpers.core.hookenv import ( + unit_get, + cached, + config, + status_set, + network_get_primary_address, + log, + DEBUG, +) +from charmhelpers.fetch import ( + apt_install, + filter_installed_packages, +) + +from charmhelpers.core.host import ( + lsb_release, + CompareHostReleases, +) + +from charmhelpers.contrib.network.ip import ( + get_address_in_network, + get_ipv6_addr, +) + +try: + import dns.resolver +except ImportError: + apt_install(filter_installed_packages(['python-dnspython']), + fatal=True) + import dns.resolver + + +def enable_pocket(pocket): + apt_sources = "/etc/apt/sources.list" + with open(apt_sources, "rt") as sources: + lines = sources.readlines() + with open(apt_sources, "wt") as sources: + for line in lines: + if pocket in line: + sources.write(re.sub('^# deb', 'deb', line)) + else: + sources.write(line) + + +@cached +def get_unit_hostname(): + return socket.gethostname() + + +@cached +def get_host_ip(hostname=None): + if config('prefer-ipv6'): + return get_ipv6_addr()[0] + + hostname = hostname or unit_get('private-address') + try: + # Test to see if already an IPv4 address + socket.inet_aton(hostname) + return hostname + except socket.error: + # This may throw an NXDOMAIN exception; in which case + # things are badly broken so just let it kill the hook + answers = dns.resolver.query(hostname, 'A') + if answers: + return answers[0].address + + +@cached +def get_public_addr(): + if config('ceph-public-network'): + return get_network_addrs('ceph-public-network')[0] + + try: + return network_get_primary_address('public') + except NotImplementedError: + log("network-get not supported", DEBUG) + + return get_host_ip() + + +@cached +def get_cluster_addr(): + if config('ceph-cluster-network'): + return get_network_addrs('ceph-cluster-network')[0] + + try: + return network_get_primary_address('cluster') + except NotImplementedError: + log("network-get not supported", DEBUG) + + return get_host_ip() + + +def get_networks(config_opt='ceph-public-network'): + """Get all configured networks from provided config option. + + If public network(s) are provided, go through them and return those for + which we have an address configured. + """ + networks = config(config_opt) + if networks: + networks = networks.split() + return [n for n in networks if get_address_in_network(n)] + + return [] + + +def get_network_addrs(config_opt): + """Get all configured public networks addresses. + + If public network(s) are provided, go through them and return the + addresses we have configured on any of those networks. + """ + addrs = [] + networks = config(config_opt) + if networks: + networks = networks.split() + addrs = [get_address_in_network(n) for n in networks] + addrs = [a for a in addrs if a] + + if not addrs: + if networks: + msg = ("Could not find an address on any of '%s' - resolve this " + "error to retry" % (networks)) + status_set('blocked', msg) + raise Exception(msg) + else: + return [get_host_ip()] + + return addrs + + +def assert_charm_supports_ipv6(): + """Check whether we are able to support charms ipv6.""" + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) < "trusty": + raise Exception("IPv6 is not supported in the charms for Ubuntu " + "versions less than Trusty 14.04") diff --git a/ceph-proxy/icon.svg b/ceph-proxy/icon.svg new file mode 100644 index 00000000..e9383990 --- /dev/null +++ b/ceph-proxy/icon.svg @@ -0,0 +1,311 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + diff --git a/ceph-proxy/lib/charms_ceph/__init__.py b/ceph-proxy/lib/charms_ceph/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-proxy/lib/charms_ceph/broker.py b/ceph-proxy/lib/charms_ceph/broker.py new file mode 100644 index 00000000..7ca96922 --- /dev/null +++ b/ceph-proxy/lib/charms_ceph/broker.py @@ -0,0 +1,980 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import json +import os + +from subprocess import check_call, check_output, CalledProcessError +from tempfile import NamedTemporaryFile + +from charms_ceph.utils import ( + get_cephfs, + get_osd_weight +) +from charms_ceph.crush_utils import Crushmap + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + INFO, + ERROR, +) +from charmhelpers.contrib.storage.linux.ceph import ( + create_erasure_profile, + delete_pool, + erasure_profile_exists, + get_osds, + monitor_key_get, + monitor_key_set, + pool_exists, + pool_set, + remove_pool_snapshot, + rename_pool, + snapshot_pool, + validator, + ErasurePool, + BasePool, + ReplicatedPool, +) + +# This comes from http://docs.ceph.com/docs/master/rados/operations/pools/ +# This should do a decent job of preventing people from passing in bad values. +# It will give a useful error message + +POOL_KEYS = { + # "Ceph Key Name": [Python type, [Valid Range]] + "size": [int], + "min_size": [int], + "crash_replay_interval": [int], + "pgp_num": [int], # = or < pg_num + "crush_ruleset": [int], + "hashpspool": [bool], + "nodelete": [bool], + "nopgchange": [bool], + "nosizechange": [bool], + "write_fadvise_dontneed": [bool], + "noscrub": [bool], + "nodeep-scrub": [bool], + "hit_set_type": [str, ["bloom", "explicit_hash", + "explicit_object"]], + "hit_set_count": [int, [1, 1]], + "hit_set_period": [int], + "hit_set_fpp": [float, [0.0, 1.0]], + "cache_target_dirty_ratio": [float], + "cache_target_dirty_high_ratio": [float], + "cache_target_full_ratio": [float], + "target_max_bytes": [int], + "target_max_objects": [int], + "cache_min_flush_age": [int], + "cache_min_evict_age": [int], + "fast_read": [bool], + "allow_ec_overwrites": [bool], + "compression_mode": [str, ["none", "passive", "aggressive", "force"]], + "compression_algorithm": [str, ["lz4", "snappy", "zlib", "zstd"]], + "compression_required_ratio": [float, [0.0, 1.0]], + "crush_rule": [str], +} + +CEPH_BUCKET_TYPES = [ + 'osd', + 'host', + 'chassis', + 'rack', + 'row', + 'pdu', + 'pod', + 'room', + 'datacenter', + 'region', + 'root' +] + + +def decode_req_encode_rsp(f): + """Decorator to decode incoming requests and encode responses.""" + + def decode_inner(req): + if isinstance(req, bytes): + req = req.decode('utf-8') + return json.dumps(f(json.loads(req))) + + return decode_inner + + +@decode_req_encode_rsp +def process_requests(reqs): + """Process Ceph broker request(s). + + This is a versioned api. API version must be supplied by the client making + the request. + + :param reqs: dict of request parameters. + :returns: dict. exit-code and reason if not 0 + """ + request_id = reqs.get('request-id') + try: + version = reqs.get('api-version') + if version == 1: + log('Processing request {}'.format(request_id), level=DEBUG) + resp = process_requests_v1(reqs['ops']) + if request_id: + resp['request-id'] = request_id + + return resp + + except Exception as exc: + log(str(exc), level=ERROR) + msg = ("Unexpected error occurred while processing requests: %s" % + reqs) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + msg = ("Missing or invalid api version ({})".format(version)) + resp = {'exit-code': 1, 'stderr': msg} + if request_id: + resp['request-id'] = request_id + + return resp + + +def handle_create_erasure_profile(request, service): + """Create an erasure profile. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + # "isa" | "lrc" | "shec" | "clay" or it defaults to "jerasure" + erasure_type = request.get('erasure-type') + # dependent on erasure coding type + erasure_technique = request.get('erasure-technique') + # "host" | "rack" | ... + failure_domain = request.get('failure-domain') + name = request.get('name') + # Binary Distribution Matrix (BDM) parameters + bdm_k = request.get('k') + bdm_m = request.get('m') + # LRC parameters + bdm_l = request.get('l') + crush_locality = request.get('crush-locality') + # SHEC parameters + bdm_c = request.get('c') + # CLAY parameters + bdm_d = request.get('d') + scalar_mds = request.get('scalar-mds') + # Device Class + device_class = request.get('device-class') + + if failure_domain and failure_domain not in CEPH_BUCKET_TYPES: + msg = "failure-domain must be one of {}".format(CEPH_BUCKET_TYPES) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + create_erasure_profile(service=service, + erasure_plugin_name=erasure_type, + profile_name=name, + failure_domain=failure_domain, + data_chunks=bdm_k, + coding_chunks=bdm_m, + locality=bdm_l, + durability_estimator=bdm_d, + helper_chunks=bdm_c, + scalar_mds=scalar_mds, + crush_locality=crush_locality, + device_class=device_class, + erasure_plugin_technique=erasure_technique) + + return {'exit-code': 0} + + +def handle_add_permissions_to_key(request, service): + """Groups are defined by the key cephx.groups.(namespace-)?-(name). This + key will contain a dict serialized to JSON with data about the group, + including pools and members. + + A group can optionally have a namespace defined that will be used to + further restrict pool access. + """ + resp = {'exit-code': 0} + + service_name = request.get('name') + group_name = request.get('group') + group_namespace = request.get('group-namespace') + if group_namespace: + group_name = "{}-{}".format(group_namespace, group_name) + group = get_group(group_name=group_name) + service_obj = get_service_groups(service=service_name, + namespace=group_namespace) + if request.get('object-prefix-permissions'): + service_obj['object_prefix_perms'] = request.get( + 'object-prefix-permissions') + format("Service object: {}".format(service_obj)) + permission = request.get('group-permission') or "rwx" + if service_name not in group['services']: + group['services'].append(service_name) + save_group(group=group, group_name=group_name) + if permission not in service_obj['group_names']: + service_obj['group_names'][permission] = [] + if group_name not in service_obj['group_names'][permission]: + service_obj['group_names'][permission].append(group_name) + save_service(service=service_obj, service_name=service_name) + service_obj['groups'] = _build_service_groups(service_obj, + group_namespace) + update_service_permissions(service_name, service_obj, group_namespace) + + return resp + + +def handle_set_key_permissions(request, service): + """Ensure the key has the requested permissions.""" + permissions = request.get('permissions') + client = request.get('client') + call = ['ceph', '--id', service, 'auth', 'caps', + 'client.{}'.format(client)] + permissions + try: + check_call(call) + except CalledProcessError as e: + log("Error updating key capabilities: {}".format(e), level=ERROR) + + +def update_service_permissions(service, service_obj=None, namespace=None): + """Update the key permissions for the named client in Ceph""" + if not service_obj: + service_obj = get_service_groups(service=service, namespace=namespace) + permissions = pool_permission_list_for_service(service_obj) + call = ['ceph', 'auth', 'caps', 'client.{}'.format(service)] + permissions + try: + check_call(call) + except CalledProcessError as e: + log("Error updating key capabilities: {}".format(e)) + + +def add_pool_to_group(pool, group, namespace=None): + """Add a named pool to a named group""" + group_name = group + if namespace: + group_name = "{}-{}".format(namespace, group_name) + group = get_group(group_name=group_name) + if pool not in group['pools']: + group["pools"].append(pool) + save_group(group, group_name=group_name) + for service in group['services']: + update_service_permissions(service, namespace=namespace) + + +def pool_permission_list_for_service(service): + """Build the permission string for Ceph for a given service""" + permissions = [] + permission_types = collections.OrderedDict() + for permission, group in sorted(service["group_names"].items()): + if permission not in permission_types: + permission_types[permission] = [] + for item in group: + permission_types[permission].append(item) + for permission, groups in permission_types.items(): + permission = "allow {}".format(permission) + for group in groups: + for pool in service['groups'][group].get('pools', []): + permissions.append("{} pool={}".format(permission, pool)) + for permission, prefixes in sorted( + service.get("object_prefix_perms", {}).items()): + for prefix in prefixes: + permissions.append("allow {} object_prefix {}".format(permission, + prefix)) + return ['mon', ('allow r, allow command "osd blacklist"' + ', allow command "osd blocklist"'), + 'osd', ', '.join(permissions)] + + +def get_service_groups(service, namespace=None): + """Services are objects stored with some metadata, they look like (for a + service named "nova"): + { + group_names: {'rwx': ['images']}, + groups: {} + } + After populating the group, it looks like: + { + group_names: {'rwx': ['images']}, + groups: { + 'images': { + pools: ['glance'], + services: ['nova'] + } + } + } + """ + service_json = monitor_key_get(service='admin', + key="cephx.services.{}".format(service)) + try: + service = json.loads(service_json) + except (TypeError, ValueError): + service = None + if service: + service['groups'] = _build_service_groups(service, namespace) + else: + service = {'group_names': {}, 'groups': {}} + return service + + +def _build_service_groups(service, namespace=None): + """Rebuild the 'groups' dict for a service group + + :returns: dict: dictionary keyed by group name of the following + format: + + { + 'images': { + pools: ['glance'], + services: ['nova', 'glance] + }, + 'vms':{ + pools: ['nova'], + services: ['nova'] + } + } + """ + all_groups = {} + for groups in service['group_names'].values(): + for group in groups: + name = group + if namespace: + name = "{}-{}".format(namespace, name) + all_groups[group] = get_group(group_name=name) + return all_groups + + +def get_group(group_name): + """A group is a structure to hold data about a named group, structured as: + { + pools: ['glance'], + services: ['nova'] + } + """ + group_key = get_group_key(group_name=group_name) + group_json = monitor_key_get(service='admin', key=group_key) + try: + group = json.loads(group_json) + except (TypeError, ValueError): + group = None + if not group: + group = { + 'pools': [], + 'services': [] + } + return group + + +def save_service(service_name, service): + """Persist a service in the monitor cluster""" + service['groups'] = {} + return monitor_key_set(service='admin', + key="cephx.services.{}".format(service_name), + value=json.dumps(service, sort_keys=True)) + + +def save_group(group, group_name): + """Persist a group in the monitor cluster""" + group_key = get_group_key(group_name=group_name) + return monitor_key_set(service='admin', + key=group_key, + value=json.dumps(group, sort_keys=True)) + + +def get_group_key(group_name): + """Build group key""" + return 'cephx.groups.{}'.format(group_name) + + +def handle_erasure_pool(request, service): + """Create a new erasure coded pool. + + :param request: dict of request operations and params. + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0. + """ + pool_name = request.get('name') + erasure_profile = request.get('erasure-profile') + group_name = request.get('group') + + if erasure_profile is None: + erasure_profile = "default-canonical" + + if group_name: + group_namespace = request.get('group-namespace') + # Add the pool to the group named "group_name" + add_pool_to_group(pool=pool_name, + group=group_name, + namespace=group_namespace) + + # TODO: Default to 3/2 erasure coding. I believe this requires min 5 osds + if not erasure_profile_exists(service=service, name=erasure_profile): + # TODO: Fail and tell them to create the profile or default + msg = ("erasure-profile {} does not exist. Please create it with: " + "create-erasure-profile".format(erasure_profile)) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + try: + pool = ErasurePool(service=service, + op=request) + except KeyError: + msg = "Missing parameter." + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + # Ok make the erasure pool + if not pool_exists(service=service, name=pool_name): + log("Creating pool '{}' (erasure_profile={})" + .format(pool.name, erasure_profile), level=INFO) + pool.create() + + # Set/update properties that are allowed to change after pool creation. + pool.update() + + +def handle_replicated_pool(request, service): + """Create a new replicated pool. + + :param request: dict of request operations and params. + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0. + """ + pool_name = request.get('name') + group_name = request.get('group') + + # Optional params + # NOTE: Check this against the handling in the Pool classes, reconcile and + # remove. + pg_num = request.get('pg_num') + replicas = request.get('replicas') + if pg_num: + # Cap pg_num to max allowed just in case. + osds = get_osds(service) + if osds: + pg_num = min(pg_num, (len(osds) * 100 // replicas)) + request.update({'pg_num': pg_num}) + + if group_name: + group_namespace = request.get('group-namespace') + # Add the pool to the group named "group_name" + add_pool_to_group(pool=pool_name, + group=group_name, + namespace=group_namespace) + + try: + pool = ReplicatedPool(service=service, + op=request) + except KeyError: + msg = "Missing parameter." + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + if not pool_exists(service=service, name=pool_name): + log("Creating pool '{}' (replicas={})".format(pool.name, replicas), + level=INFO) + pool.create() + else: + log("Pool '{}' already exists - skipping create".format(pool.name), + level=DEBUG) + + # Set/update properties that are allowed to change after pool creation. + pool.update() + + +def handle_create_cache_tier(request, service): + """Create a cache tier on a cold pool. Modes supported are + "writeback" and "readonly". + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + # mode = "writeback" | "readonly" + storage_pool = request.get('cold-pool') + cache_pool = request.get('hot-pool') + cache_mode = request.get('mode') + + if cache_mode is None: + cache_mode = "writeback" + + # cache and storage pool must exist first + if not pool_exists(service=service, name=storage_pool) or not pool_exists( + service=service, name=cache_pool): + msg = ("cold-pool: {} and hot-pool: {} must exist. Please create " + "them first".format(storage_pool, cache_pool)) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + p = BasePool(service=service, name=storage_pool) + p.add_cache_tier(cache_pool=cache_pool, mode=cache_mode) + + +def handle_remove_cache_tier(request, service): + """Remove a cache tier from the cold pool. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + storage_pool = request.get('cold-pool') + cache_pool = request.get('hot-pool') + # cache and storage pool must exist first + if not pool_exists(service=service, name=storage_pool) or not pool_exists( + service=service, name=cache_pool): + msg = ("cold-pool: {} or hot-pool: {} doesn't exist. Not " + "deleting cache tier".format(storage_pool, cache_pool)) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + pool = BasePool(name=storage_pool, service=service) + pool.remove_cache_tier(cache_pool=cache_pool) + + +def handle_set_pool_value(request, service, coerce=False): + """Sets an arbitrary pool value. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :param coerce: Try to parse/coerce the value into the correct type. + Used by the action code that only gets Str from Juju + :returns: dict. exit-code and reason if not 0 + """ + # Set arbitrary pool values + params = {'pool': request.get('name'), + 'key': request.get('key'), + 'value': request.get('value')} + if params['key'] not in POOL_KEYS: + msg = "Invalid key '{}'".format(params['key']) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + # Get the validation method + validator_params = POOL_KEYS[params['key']] + # BUG: #1838650 - the function needs to try to coerce the value param to + # the type required for the validator to pass. Note, if this blows, then + # the param isn't parsable to the correct type. + if coerce: + try: + params['value'] = validator_params[0](params['value']) + except ValueError: + raise RuntimeError("Value {} isn't of type {}" + .format(params['value'], validator_params[0])) + # end of BUG: #1838650 + if len(validator_params) == 1: + # Validate that what the user passed is actually legal per Ceph's rules + validator(params['value'], validator_params[0]) + else: + # Validate that what the user passed is actually legal per Ceph's rules + validator(params['value'], validator_params[0], validator_params[1]) + + # Set the value + pool_set(service=service, pool_name=params['pool'], key=params['key'], + value=params['value']) + + +def handle_rgw_regionmap_update(request, service): + """Change the radosgw region map. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + name = request.get('client-name') + if not name: + msg = "Missing rgw-region or client-name params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + try: + check_output(['radosgw-admin', + '--id', service, + 'regionmap', 'update', '--name', name]) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + + +def handle_rgw_regionmap_default(request, service): + """Create a radosgw region map. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + region = request.get('rgw-region') + name = request.get('client-name') + if not region or not name: + msg = "Missing rgw-region or client-name params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + try: + check_output( + [ + 'radosgw-admin', + '--id', service, + 'regionmap', + 'default', + '--rgw-region', region, + '--name', name]) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + + +def handle_rgw_zone_set(request, service): + """Create a radosgw zone. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + json_file = request.get('zone-json') + name = request.get('client-name') + region_name = request.get('region-name') + zone_name = request.get('zone-name') + if not json_file or not name or not region_name or not zone_name: + msg = "Missing json-file or client-name params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + infile = NamedTemporaryFile(delete=False) + with open(infile.name, 'w') as infile_handle: + infile_handle.write(json_file) + try: + check_output( + [ + 'radosgw-admin', + '--id', service, + 'zone', + 'set', + '--rgw-zone', zone_name, + '--infile', infile.name, + '--name', name, + ] + ) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + os.unlink(infile.name) + + +def handle_put_osd_in_bucket(request, service): + """Move an osd into a specified crush bucket. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + osd_id = request.get('osd') + target_bucket = request.get('bucket') + if not osd_id or not target_bucket: + msg = "Missing OSD ID or Bucket" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + crushmap = Crushmap() + try: + crushmap.ensure_bucket_is_present(target_bucket) + check_output( + [ + 'ceph', + '--id', service, + 'osd', + 'crush', + 'set', + str(osd_id), + str(get_osd_weight(osd_id)), + "root={}".format(target_bucket) + ] + ) + + except Exception as exc: + msg = "Failed to move OSD " \ + "{} into Bucket {} :: {}".format(osd_id, target_bucket, exc) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + +def handle_rgw_create_user(request, service): + """Create a new rados gateway user. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + user_id = request.get('rgw-uid') + display_name = request.get('display-name') + name = request.get('client-name') + if not name or not display_name or not user_id: + msg = "Missing client-name, display-name or rgw-uid" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + try: + create_output = check_output( + [ + 'radosgw-admin', + '--id', service, + 'user', + 'create', + '--uid', user_id, + '--display-name', display_name, + '--name', name, + '--system' + ] + ) + try: + user_json = json.loads(str(create_output.decode('UTF-8'))) + return {'exit-code': 0, 'user': user_json} + except ValueError as err: + log(err, level=ERROR) + return {'exit-code': 1, 'stderr': err} + + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + + +def handle_create_cephfs(request, service): + """Create a new cephfs. + + :param request: The broker request + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + cephfs_name = request.get('mds_name') + data_pool = request.get('data_pool') + extra_pools = request.get('extra_pools', None) or [] + metadata_pool = request.get('metadata_pool') + # Check if the user params were provided + if not cephfs_name or not data_pool or not metadata_pool: + msg = "Missing mds_name, data_pool or metadata_pool params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + # Sanity check that the required pools exist + for pool_name in [data_pool, metadata_pool] + extra_pools: + if not pool_exists(service=service, name=pool_name): + msg = "CephFS pool {} does not exist. Cannot create CephFS".format( + pool_name) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + if cephfs_name in get_cephfs(service=service): + # CephFS new has already been called + log("CephFS already created") + return + + # Finally create CephFS + try: + check_output(["ceph", + '--id', service, + "fs", "new", cephfs_name, + metadata_pool, + data_pool]) + except CalledProcessError as err: + if err.returncode == 22: + log("CephFS already created") + return + else: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + for pool_name in extra_pools: + cmd = ["ceph", '--id', service, "fs", "add_data_pool", cephfs_name, + pool_name] + try: + check_output(cmd) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + + +def handle_rgw_region_set(request, service): + # radosgw-admin region set --infile us.json --name client.radosgw.us-east-1 + """Set the rados gateway region. + + :param request: dict. The broker request. + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + json_file = request.get('region-json') + name = request.get('client-name') + region_name = request.get('region-name') + zone_name = request.get('zone-name') + if not json_file or not name or not region_name or not zone_name: + msg = "Missing json-file or client-name params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + infile = NamedTemporaryFile(delete=False) + with open(infile.name, 'w') as infile_handle: + infile_handle.write(json_file) + try: + check_output( + [ + 'radosgw-admin', + '--id', service, + 'region', + 'set', + '--rgw-zone', zone_name, + '--infile', infile.name, + '--name', name, + ] + ) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + os.unlink(infile.name) + + +def handle_create_cephfs_client(request, service): + """Creates a new CephFS client for a filesystem. + + :param request: The broker request + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0. + """ + fs_name = request.get('fs_name') + client_id = request.get('client_id') + # TODO: fs allows setting write permissions for a list of paths. + path = request.get('path') + perms = request.get('perms') + # Need all parameters + if not fs_name or not client_id or not path or not perms: + msg = "Missing fs_name, client_id, path or perms params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + # Skip creation if the request has already been called + # This makes it a bit more compatible with older Ceph versions + # that throw when trying to authorize a user with the same + # capabilites that it currently has. + try: + cmd = ["ceph", "--id", service, "auth", "ls", "-f", "json"] + auth_ls = json.loads(check_output(cmd, encoding="utf-8")) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + except ValueError as err: + log(str(err), level=ERROR) + return {'exit-code': 1, 'stderr': str(err)} + + client = "client.{}".format(client_id) + for elem in auth_ls["auth_dump"]: + if client == elem["entity"]: + log("Client {} has already been created".format(client)) + return {'exit-code': 0, 'key': elem["key"]} + + # Try to authorize the client + # `ceph fs authorize` already returns the correct error + # message if the filesystem doesn't exist. + try: + cmd = [ + "ceph", + "--id", service, + "fs", "authorize", + fs_name, + client, + path, + perms, + "-f", "json" + ] + fs_auth = json.loads(check_output(cmd, encoding="utf-8")) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + except ValueError as err: + log(str(err), level=ERROR) + return {'exit-code': 1, 'stderr': str(err)} + + return {'exit-code': 0, 'key': fs_auth[0]["key"]} + + +def process_requests_v1(reqs): + """Process v1 requests. + + Takes a list of requests (dicts) and processes each one. If an error is + found, processing stops and the client is notified in the response. + + Returns a response dict containing the exit code (non-zero if any + operation failed along with an explanation). + """ + ret = None + log("Processing {} ceph broker requests".format(len(reqs)), level=INFO) + for req in reqs: + op = req.get('op') + log("Processing op='{}'".format(op), level=DEBUG) + # Use admin client since we do not have other client key locations + # setup to use them for these operations. + svc = 'admin' + if op == "create-pool": + pool_type = req.get('pool-type') # "replicated" | "erasure" + + # Default to replicated if pool_type isn't given + if pool_type == 'erasure': + ret = handle_erasure_pool(request=req, service=svc) + else: + ret = handle_replicated_pool(request=req, service=svc) + elif op == "create-cephfs": + ret = handle_create_cephfs(request=req, service=svc) + elif op == "create-cache-tier": + ret = handle_create_cache_tier(request=req, service=svc) + elif op == "remove-cache-tier": + ret = handle_remove_cache_tier(request=req, service=svc) + elif op == "create-erasure-profile": + ret = handle_create_erasure_profile(request=req, service=svc) + elif op == "delete-pool": + pool = req.get('name') + ret = delete_pool(service=svc, name=pool) + elif op == "rename-pool": + old_name = req.get('name') + new_name = req.get('new-name') + ret = rename_pool(service=svc, old_name=old_name, + new_name=new_name) + elif op == "snapshot-pool": + pool = req.get('name') + snapshot_name = req.get('snapshot-name') + ret = snapshot_pool(service=svc, pool_name=pool, + snapshot_name=snapshot_name) + elif op == "remove-pool-snapshot": + pool = req.get('name') + snapshot_name = req.get('snapshot-name') + ret = remove_pool_snapshot(service=svc, pool_name=pool, + snapshot_name=snapshot_name) + elif op == "set-pool-value": + ret = handle_set_pool_value(request=req, service=svc) + elif op == "rgw-region-set": + ret = handle_rgw_region_set(request=req, service=svc) + elif op == "rgw-zone-set": + ret = handle_rgw_zone_set(request=req, service=svc) + elif op == "rgw-regionmap-update": + ret = handle_rgw_regionmap_update(request=req, service=svc) + elif op == "rgw-regionmap-default": + ret = handle_rgw_regionmap_default(request=req, service=svc) + elif op == "rgw-create-user": + ret = handle_rgw_create_user(request=req, service=svc) + elif op == "move-osd-to-bucket": + ret = handle_put_osd_in_bucket(request=req, service=svc) + elif op == "add-permissions-to-key": + ret = handle_add_permissions_to_key(request=req, service=svc) + elif op == 'set-key-permissions': + ret = handle_set_key_permissions(request=req, service=svc) + elif op == "create-cephfs-client": + ret = handle_create_cephfs_client(request=req, service=svc) + else: + msg = "Unknown operation '{}'".format(op) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + if isinstance(ret, dict) and 'exit-code' in ret: + return ret + + return {'exit-code': 0} diff --git a/ceph-proxy/lib/charms_ceph/crush_utils.py b/ceph-proxy/lib/charms_ceph/crush_utils.py new file mode 100644 index 00000000..37084bf1 --- /dev/null +++ b/ceph-proxy/lib/charms_ceph/crush_utils.py @@ -0,0 +1,154 @@ +# Copyright 2014 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +from subprocess import check_output, CalledProcessError + +from charmhelpers.core.hookenv import ( + log, + ERROR, +) + +CRUSH_BUCKET = """root {name} {{ + id {id} # do not change unnecessarily + # weight 0.000 + alg straw2 + hash 0 # rjenkins1 +}} + +rule {name} {{ + ruleset 0 + type replicated + min_size 1 + max_size 10 + step take {name} + step chooseleaf firstn 0 type host + step emit +}}""" + +# This regular expression looks for a string like: +# root NAME { +# id NUMBER +# so that we can extract NAME and ID from the crushmap +CRUSHMAP_BUCKETS_RE = re.compile(r"root\s+(.+)\s+\{\s*id\s+(-?\d+)") + +# This regular expression looks for ID strings in the crushmap like: +# id NUMBER +# so that we can extract the IDs from a crushmap +CRUSHMAP_ID_RE = re.compile(r"id\s+(-?\d+)") + + +class Crushmap(object): + """An object oriented approach to Ceph crushmap management.""" + + def __init__(self): + self._crushmap = self.load_crushmap() + roots = re.findall(CRUSHMAP_BUCKETS_RE, self._crushmap) + buckets = [] + ids = list(map( + lambda x: int(x), + re.findall(CRUSHMAP_ID_RE, self._crushmap))) + ids = sorted(ids) + if roots != []: + for root in roots: + buckets.append(CRUSHBucket(root[0], root[1], True)) + + self._buckets = buckets + if ids != []: + self._ids = ids + else: + self._ids = [0] + + def load_crushmap(self): + try: + crush = str(check_output(['ceph', 'osd', 'getcrushmap']) + .decode('UTF-8')) + return str(check_output(['crushtool', '-d', '-'], + stdin=crush.stdout) + .decode('UTF-8')) + except CalledProcessError as e: + log("Error occurred while loading and decompiling CRUSH map:" + "{}".format(e), ERROR) + raise + + def ensure_bucket_is_present(self, bucket_name): + if bucket_name not in [bucket.name for bucket in self.buckets()]: + self.add_bucket(bucket_name) + self.save() + + def buckets(self): + """Return a list of buckets that are in the Crushmap.""" + return self._buckets + + def add_bucket(self, bucket_name): + """Add a named bucket to Ceph""" + new_id = min(self._ids) - 1 + self._ids.append(new_id) + self._buckets.append(CRUSHBucket(bucket_name, new_id)) + + def save(self): + """Persist Crushmap to Ceph""" + try: + crushmap = self.build_crushmap() + compiled = str(check_output(['crushtool', '-c', '/dev/stdin', '-o', + '/dev/stdout'], stdin=crushmap) + .decode('UTF-8')) + ceph_output = str(check_output(['ceph', 'osd', 'setcrushmap', '-i', + '/dev/stdin'], stdin=compiled) + .decode('UTF-8')) + return ceph_output + except CalledProcessError as e: + log("save error: {}".format(e)) + raise + + def build_crushmap(self): + """Modifies the current CRUSH map to include the new buckets""" + tmp_crushmap = self._crushmap + for bucket in self._buckets: + if not bucket.default: + tmp_crushmap = "{}\n\n{}".format( + tmp_crushmap, + Crushmap.bucket_string(bucket.name, bucket.id)) + + return tmp_crushmap + + @staticmethod + def bucket_string(name, id): + return CRUSH_BUCKET.format(name=name, id=id) + + +class CRUSHBucket(object): + """CRUSH bucket description object.""" + + def __init__(self, name, id, default=False): + self.name = name + self.id = int(id) + self.default = default + + def __repr__(self): + return "Bucket {{Name: {name}, ID: {id}}}".format( + name=self.name, id=self.id) + + def __eq__(self, other): + """Override the default Equals behavior""" + if isinstance(other, self.__class__): + return self.__dict__ == other.__dict__ + return NotImplemented + + def __ne__(self, other): + """Define a non-equality test""" + if isinstance(other, self.__class__): + return not self.__eq__(other) + return NotImplemented diff --git a/ceph-proxy/lib/charms_ceph/utils.py b/ceph-proxy/lib/charms_ceph/utils.py new file mode 100644 index 00000000..63dd1fae --- /dev/null +++ b/ceph-proxy/lib/charms_ceph/utils.py @@ -0,0 +1,3563 @@ +# Copyright 2017-2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import glob +import itertools +import json +import os +import pyudev +import random +import re +import socket +import subprocess +import sys +import time +import uuid +import functools + +from contextlib import contextmanager +from datetime import datetime + +from charmhelpers.core import hookenv +from charmhelpers.core import templating +from charmhelpers.core.host import ( + chownr, + cmp_pkgrevno, + lsb_release, + mkdir, + owner, + service_restart, + service_start, + service_stop, + CompareHostReleases, + write_file, + is_container, +) +from charmhelpers.core.hookenv import ( + cached, + config, + log, + status_set, + DEBUG, + ERROR, + WARNING, + storage_get, + storage_list, +) +from charmhelpers.fetch import ( + add_source, + apt_install, + apt_purge, + apt_update, + filter_missing_packages, + get_installed_version +) +from charmhelpers.contrib.storage.linux.ceph import ( + get_mon_map, + monitor_key_set, + monitor_key_exists, + monitor_key_get, +) +from charmhelpers.contrib.storage.linux.utils import ( + is_block_device, + is_device_mounted, +) +from charmhelpers.contrib.openstack.utils import ( + get_os_codename_install_source, +) +from charmhelpers.contrib.storage.linux import lvm +from charmhelpers.core.unitdata import kv + +CEPH_BASE_DIR = os.path.join(os.sep, 'var', 'lib', 'ceph') +OSD_BASE_DIR = os.path.join(CEPH_BASE_DIR, 'osd') +HDPARM_FILE = os.path.join(os.sep, 'etc', 'hdparm.conf') + +LEADER = 'leader' +PEON = 'peon' +QUORUM = [LEADER, PEON] + +PACKAGES = ['ceph', 'gdisk', + 'radosgw', 'xfsprogs', + 'lvm2', 'parted', 'smartmontools'] + +REMOVE_PACKAGES = [] +CHRONY_PACKAGE = 'chrony' + +CEPH_KEY_MANAGER = 'ceph' +VAULT_KEY_MANAGER = 'vault' +KEY_MANAGERS = [ + CEPH_KEY_MANAGER, + VAULT_KEY_MANAGER, +] + +LinkSpeed = { + "BASE_10": 10, + "BASE_100": 100, + "BASE_1000": 1000, + "GBASE_10": 10000, + "GBASE_40": 40000, + "GBASE_100": 100000, + "UNKNOWN": None +} + +# Mapping of adapter speed to sysctl settings +NETWORK_ADAPTER_SYSCTLS = { + # 10Gb + LinkSpeed["GBASE_10"]: { + 'net.core.rmem_default': 524287, + 'net.core.wmem_default': 524287, + 'net.core.rmem_max': 524287, + 'net.core.wmem_max': 524287, + 'net.core.optmem_max': 524287, + 'net.core.netdev_max_backlog': 300000, + 'net.ipv4.tcp_rmem': '10000000 10000000 10000000', + 'net.ipv4.tcp_wmem': '10000000 10000000 10000000', + 'net.ipv4.tcp_mem': '10000000 10000000 10000000' + }, + # Mellanox 10/40Gb + LinkSpeed["GBASE_40"]: { + 'net.ipv4.tcp_timestamps': 0, + 'net.ipv4.tcp_sack': 1, + 'net.core.netdev_max_backlog': 250000, + 'net.core.rmem_max': 4194304, + 'net.core.wmem_max': 4194304, + 'net.core.rmem_default': 4194304, + 'net.core.wmem_default': 4194304, + 'net.core.optmem_max': 4194304, + 'net.ipv4.tcp_rmem': '4096 87380 4194304', + 'net.ipv4.tcp_wmem': '4096 65536 4194304', + 'net.ipv4.tcp_low_latency': 1, + 'net.ipv4.tcp_adv_win_scale': 1 + } +} + + +class Partition(object): + def __init__(self, name, number, size, start, end, sectors, uuid): + """A block device partition. + + :param name: Name of block device + :param number: Partition number + :param size: Capacity of the device + :param start: Starting block + :param end: Ending block + :param sectors: Number of blocks + :param uuid: UUID of the partition + """ + self.name = name, + self.number = number + self.size = size + self.start = start + self.end = end + self.sectors = sectors + self.uuid = uuid + + def __str__(self): + return "number: {} start: {} end: {} sectors: {} size: {} " \ + "name: {} uuid: {}".format(self.number, self.start, + self.end, + self.sectors, self.size, + self.name, self.uuid) + + def __eq__(self, other): + if isinstance(other, self.__class__): + return self.__dict__ == other.__dict__ + return False + + def __ne__(self, other): + return not self.__eq__(other) + + +def unmounted_disks(): + """List of unmounted block devices on the current host.""" + disks = [] + context = pyudev.Context() + for device in context.list_devices(DEVTYPE='disk'): + if device['SUBSYSTEM'] == 'block': + if device.device_node is None: + continue + + matched = False + for block_type in [u'dm-', u'loop', u'ram', u'nbd']: + if block_type in device.device_node: + matched = True + if matched: + continue + + disks.append(device.device_node) + log("Found disks: {}".format(disks)) + return [disk for disk in disks if not is_device_mounted(disk)] + + +def save_sysctls(sysctl_dict, save_location): + """Persist the sysctls to the hard drive. + + :param sysctl_dict: dict + :param save_location: path to save the settings to + :raises: IOError if anything goes wrong with writing. + """ + try: + # Persist the settings for reboots + with open(save_location, "w") as fd: + for key, value in sysctl_dict.items(): + fd.write("{}={}\n".format(key, value)) + + except IOError as e: + log("Unable to persist sysctl settings to {}. Error {}".format( + save_location, e), level=ERROR) + raise + + +def tune_nic(network_interface): + """This will set optimal sysctls for the particular network adapter. + + :param network_interface: string The network adapter name. + """ + speed = get_link_speed(network_interface) + if speed in NETWORK_ADAPTER_SYSCTLS: + status_set('maintenance', 'Tuning device {}'.format( + network_interface)) + sysctl_file = os.path.join( + os.sep, + 'etc', + 'sysctl.d', + '51-ceph-osd-charm-{}.conf'.format(network_interface)) + try: + log("Saving sysctl_file: {} values: {}".format( + sysctl_file, NETWORK_ADAPTER_SYSCTLS[speed]), + level=DEBUG) + save_sysctls(sysctl_dict=NETWORK_ADAPTER_SYSCTLS[speed], + save_location=sysctl_file) + except IOError as e: + log("Write to /etc/sysctl.d/51-ceph-osd-charm-{} " + "failed. {}".format(network_interface, e), + level=ERROR) + + try: + # Apply the settings + log("Applying sysctl settings", level=DEBUG) + subprocess.check_output(["sysctl", "-p", sysctl_file]) + except subprocess.CalledProcessError as err: + log('sysctl -p {} failed with error {}'.format(sysctl_file, + err.output), + level=ERROR) + else: + log("No settings found for network adapter: {}".format( + network_interface), level=DEBUG) + + +def get_link_speed(network_interface): + """This will find the link speed for a given network device. Returns None + if an error occurs. + :param network_interface: string The network adapter interface. + :returns: LinkSpeed + """ + speed_path = os.path.join(os.sep, 'sys', 'class', 'net', + network_interface, 'speed') + # I'm not sure where else we'd check if this doesn't exist + if not os.path.exists(speed_path): + return LinkSpeed["UNKNOWN"] + + try: + with open(speed_path, 'r') as sysfs: + nic_speed = sysfs.readlines() + + # Did we actually read anything? + if not nic_speed: + return LinkSpeed["UNKNOWN"] + + # Try to find a sysctl match for this particular speed + for name, speed in LinkSpeed.items(): + if speed == int(nic_speed[0].strip()): + return speed + # Default to UNKNOWN if we can't find a match + return LinkSpeed["UNKNOWN"] + except IOError as e: + log("Unable to open {path} because of error: {error}".format( + path=speed_path, + error=e), level='error') + return LinkSpeed["UNKNOWN"] + + +def persist_settings(settings_dict): + # Write all settings to /etc/hdparm.conf + """This will persist the hard drive settings to the /etc/hdparm.conf file + + The settings_dict should be in the form of {"uuid": {"key":"value"}} + + :param settings_dict: dict of settings to save + """ + if not settings_dict: + return + + try: + templating.render(source='hdparm.conf', target=HDPARM_FILE, + context=settings_dict) + except IOError as err: + log("Unable to open {path} because of error: {error}".format( + path=HDPARM_FILE, error=err), level=ERROR) + except Exception as e: + # The templating.render can raise a jinja2 exception if the + # template is not found. Rather than polluting the import + # space of this charm, simply catch Exception + log('Unable to render {path} due to error: {error}'.format( + path=HDPARM_FILE, error=e), level=ERROR) + + +def set_max_sectors_kb(dev_name, max_sectors_size): + """This function sets the max_sectors_kb size of a given block device. + + :param dev_name: Name of the block device to query + :param max_sectors_size: int of the max_sectors_size to save + """ + max_sectors_kb_path = os.path.join('sys', 'block', dev_name, 'queue', + 'max_sectors_kb') + try: + with open(max_sectors_kb_path, 'w') as f: + f.write(max_sectors_size) + except IOError as e: + log('Failed to write max_sectors_kb to {}. Error: {}'.format( + max_sectors_kb_path, e), level=ERROR) + + +def get_max_sectors_kb(dev_name): + """This function gets the max_sectors_kb size of a given block device. + + :param dev_name: Name of the block device to query + :returns: int which is either the max_sectors_kb or 0 on error. + """ + max_sectors_kb_path = os.path.join('sys', 'block', dev_name, 'queue', + 'max_sectors_kb') + + # Read in what Linux has set by default + if os.path.exists(max_sectors_kb_path): + try: + with open(max_sectors_kb_path, 'r') as f: + max_sectors_kb = f.read().strip() + return int(max_sectors_kb) + except IOError as e: + log('Failed to read max_sectors_kb to {}. Error: {}'.format( + max_sectors_kb_path, e), level=ERROR) + # Bail. + return 0 + return 0 + + +def get_max_hw_sectors_kb(dev_name): + """This function gets the max_hw_sectors_kb for a given block device. + + :param dev_name: Name of the block device to query + :returns: int which is either the max_hw_sectors_kb or 0 on error. + """ + max_hw_sectors_kb_path = os.path.join('sys', 'block', dev_name, 'queue', + 'max_hw_sectors_kb') + # Read in what the hardware supports + if os.path.exists(max_hw_sectors_kb_path): + try: + with open(max_hw_sectors_kb_path, 'r') as f: + max_hw_sectors_kb = f.read().strip() + return int(max_hw_sectors_kb) + except IOError as e: + log('Failed to read max_hw_sectors_kb to {}. Error: {}'.format( + max_hw_sectors_kb_path, e), level=ERROR) + return 0 + return 0 + + +def set_hdd_read_ahead(dev_name, read_ahead_sectors=256): + """This function sets the hard drive read ahead. + + :param dev_name: Name of the block device to set read ahead on. + :param read_ahead_sectors: int How many sectors to read ahead. + """ + try: + # Set the read ahead sectors to 256 + log('Setting read ahead to {} for device {}'.format( + read_ahead_sectors, + dev_name)) + subprocess.check_output(['hdparm', + '-a{}'.format(read_ahead_sectors), + dev_name]) + except subprocess.CalledProcessError as e: + log('hdparm failed with error: {}'.format(e.output), + level=ERROR) + + +def get_block_uuid(block_dev): + """This queries blkid to get the uuid for a block device. + + :param block_dev: Name of the block device to query. + :returns: The UUID of the device or None on Error. + """ + try: + block_info = str(subprocess + .check_output(['blkid', '-o', 'export', block_dev]) + .decode('UTF-8')) + for tag in block_info.split('\n'): + parts = tag.split('=') + if parts[0] == 'UUID': + return parts[1] + return None + except subprocess.CalledProcessError as err: + log('get_block_uuid failed with error: {}'.format(err.output), + level=ERROR) + return None + + +def check_max_sectors(save_settings_dict, + block_dev, + uuid): + """Tune the max_hw_sectors if needed. + + make sure that /sys/.../max_sectors_kb matches max_hw_sectors_kb or at + least 1MB for spinning disks + If the box has a RAID card with cache this could go much bigger. + + :param save_settings_dict: The dict used to persist settings + :param block_dev: A block device name: Example: /dev/sda + :param uuid: The uuid of the block device + """ + dev_name = None + path_parts = os.path.split(block_dev) + if len(path_parts) == 2: + dev_name = path_parts[1] + else: + log('Unable to determine the block device name from path: {}'.format( + block_dev)) + # Play it safe and bail + return + max_sectors_kb = get_max_sectors_kb(dev_name=dev_name) + max_hw_sectors_kb = get_max_hw_sectors_kb(dev_name=dev_name) + + if max_sectors_kb < max_hw_sectors_kb: + # OK we have a situation where the hardware supports more than Linux is + # currently requesting + config_max_sectors_kb = hookenv.config('max-sectors-kb') + if config_max_sectors_kb < max_hw_sectors_kb: + # Set the max_sectors_kb to the config.yaml value if it is less + # than the max_hw_sectors_kb + log('Setting max_sectors_kb for device {} to {}'.format( + dev_name, config_max_sectors_kb)) + save_settings_dict[ + "drive_settings"][uuid][ + "read_ahead_sect"] = config_max_sectors_kb + set_max_sectors_kb(dev_name=dev_name, + max_sectors_size=config_max_sectors_kb) + else: + # Set to the max_hw_sectors_kb + log('Setting max_sectors_kb for device {} to {}'.format( + dev_name, max_hw_sectors_kb)) + save_settings_dict[ + "drive_settings"][uuid]['read_ahead_sect'] = max_hw_sectors_kb + set_max_sectors_kb(dev_name=dev_name, + max_sectors_size=max_hw_sectors_kb) + else: + log('max_sectors_kb match max_hw_sectors_kb. No change needed for ' + 'device: {}'.format(block_dev)) + + +def tune_dev(block_dev): + """Try to make some intelligent decisions with HDD tuning. Future work will + include optimizing SSDs. + + This function will change the read ahead sectors and the max write + sectors for each block device. + + :param block_dev: A block device name: Example: /dev/sda + """ + uuid = get_block_uuid(block_dev) + if uuid is None: + log('block device {} uuid is None. Unable to save to ' + 'hdparm.conf'.format(block_dev), level=DEBUG) + return + save_settings_dict = {} + log('Tuning device {}'.format(block_dev)) + status_set('maintenance', 'Tuning device {}'.format(block_dev)) + set_hdd_read_ahead(block_dev) + save_settings_dict["drive_settings"] = {} + save_settings_dict["drive_settings"][uuid] = {} + save_settings_dict["drive_settings"][uuid]['read_ahead_sect'] = 256 + + check_max_sectors(block_dev=block_dev, + save_settings_dict=save_settings_dict, + uuid=uuid) + + persist_settings(settings_dict=save_settings_dict) + status_set('maintenance', 'Finished tuning device {}'.format(block_dev)) + + +def ceph_user(): + return 'ceph' + + +class CrushLocation(object): + def __init__(self, identifier, name, osd="", host="", chassis="", + rack="", row="", pdu="", pod="", room="", + datacenter="", zone="", region="", root=""): + self.identifier = identifier + self.name = name + self.osd = osd + self.host = host + self.chassis = chassis + self.rack = rack + self.row = row + self.pdu = pdu + self.pod = pod + self.room = room + self.datacenter = datacenter + self.zone = zone + self.region = region + self.root = root + + def __str__(self): + return "name: {} id: {} osd: {} host: {} chassis: {} rack: {} " \ + "row: {} pdu: {} pod: {} room: {} datacenter: {} zone: {} " \ + "region: {} root: {}".format(self.name, self.identifier, + self.osd, self.host, self.chassis, + self.rack, self.row, self.pdu, + self.pod, self.room, + self.datacenter, self.zone, + self.region, self.root) + + def __eq__(self, other): + return not self.name < other.name and not other.name < self.name + + def __ne__(self, other): + return self.name < other.name or other.name < self.name + + def __gt__(self, other): + return self.name > other.name + + def __ge__(self, other): + return not self.name < other.name + + def __le__(self, other): + return self.name < other.name + + +def get_osd_weight(osd_id): + """Returns the weight of the specified OSD. + + :returns: Float + :raises: ValueError if the monmap fails to parse. + :raises: CalledProcessError if our Ceph command fails. + """ + try: + tree = str(subprocess + .check_output(['ceph', 'osd', 'tree', '--format=json']) + .decode('UTF-8')) + try: + json_tree = json.loads(tree) + # Make sure children are present in the JSON + if not json_tree['nodes']: + return None + for device in json_tree['nodes']: + if device['type'] == 'osd' and device['name'] == osd_id: + return device['crush_weight'] + except ValueError as v: + log("Unable to parse ceph tree json: {}. Error: {}".format( + tree, v)) + raise + except subprocess.CalledProcessError as e: + log("ceph osd tree command failed with message: {}".format( + e)) + raise + + +def _filter_nodes_and_set_attributes(node, node_lookup_map, lookup_type): + """Get all nodes of the desired type, with all their attributes. + + These attributes can be direct or inherited from ancestors. + """ + attribute_dict = {node['type']: node['name']} + if node['type'] == lookup_type: + attribute_dict['name'] = node['name'] + attribute_dict['identifier'] = node['id'] + return [attribute_dict] + elif not node.get('children'): + return [attribute_dict] + else: + descendant_attribute_dicts = [ + _filter_nodes_and_set_attributes(node_lookup_map[node_id], + node_lookup_map, lookup_type) + for node_id in node.get('children', []) + ] + return [dict(attribute_dict, **descendant_attribute_dict) + for descendant_attribute_dict + in itertools.chain.from_iterable(descendant_attribute_dicts)] + + +def _flatten_roots(nodes, lookup_type='host'): + """Get a flattened list of nodes of the desired type. + + :param nodes: list of nodes defined as a dictionary of attributes and + children + :type nodes: List[Dict[int, Any]] + :param lookup_type: type of searched node + :type lookup_type: str + :returns: flattened list of nodes + :rtype: List[Dict[str, Any]] + """ + lookup_map = {node['id']: node for node in nodes} + root_attributes_dicts = [_filter_nodes_and_set_attributes(node, lookup_map, + lookup_type) + for node in nodes if node['type'] == 'root'] + # get a flattened list of roots. + return list(itertools.chain.from_iterable(root_attributes_dicts)) + + +def get_osd_tree(service): + """Returns the current OSD map in JSON. + + :returns: List. + :rtype: List[CrushLocation] + :raises: ValueError if the monmap fails to parse. + Also raises CalledProcessError if our Ceph command fails + """ + try: + tree = str(subprocess + .check_output(['ceph', '--id', service, + 'osd', 'tree', '--format=json']) + .decode('UTF-8')) + try: + json_tree = json.loads(tree) + roots = _flatten_roots(json_tree["nodes"]) + return [CrushLocation(**host) for host in roots] + except ValueError as v: + log("Unable to parse ceph tree json: {}. Error: {}".format( + tree, v)) + raise + except subprocess.CalledProcessError as e: + log("ceph osd tree command failed with message: {}".format(e)) + raise + + +def _get_child_dirs(path): + """Returns a list of directory names in the specified path. + + :param path: a full path listing of the parent directory to return child + directory names + :returns: list. A list of child directories under the parent directory + :raises: ValueError if the specified path does not exist or is not a + directory, + OSError if an error occurs reading the directory listing + """ + if not os.path.exists(path): + raise ValueError('Specified path "%s" does not exist' % path) + if not os.path.isdir(path): + raise ValueError('Specified path "%s" is not a directory' % path) + + files_in_dir = [os.path.join(path, f) for f in os.listdir(path)] + return list(filter(os.path.isdir, files_in_dir)) + + +def _get_osd_num_from_dirname(dirname): + """Parses the dirname and returns the OSD id. + + Parses a string in the form of 'ceph-{osd#}' and returns the OSD number + from the directory name. + + :param dirname: the directory name to return the OSD number from + :return int: the OSD number the directory name corresponds to + :raises ValueError: if the OSD number cannot be parsed from the provided + directory name. + """ + match = re.search(r'ceph-(?P\d+)', dirname) + if not match: + raise ValueError("dirname not in correct format: {}".format(dirname)) + + return match.group('osd_id') + + +def get_crimson_osd_ids(): + """Return a set of the OSDs that are running with the Crimson backend.""" + rv = set() + try: + out = subprocess.check_output(['pgrep', 'crimson-osd', '-a']) + for line in out.decode('utf8').splitlines(): + rv.add(line.split()[-1]) + except Exception: + pass + + return rv + + +def get_local_osd_ids(): + """This will list the /var/lib/ceph/osd/* directories and try + to split the ID off of the directory name and return it in + a list. Excludes crimson OSD's from the returned list. + + :returns: list. A list of OSD identifiers + :raises: OSError if something goes wrong with listing the directory. + """ + osd_ids = [] + crimson_osds = get_crimson_osd_ids() + osd_path = os.path.join(os.sep, 'var', 'lib', 'ceph', 'osd') + if os.path.exists(osd_path): + try: + dirs = os.listdir(osd_path) + for osd_dir in dirs: + osd_id = osd_dir.split('-')[1] if '-' in osd_dir else '' + if (_is_int(osd_id) and + filesystem_mounted(os.path.join( + os.sep, osd_path, osd_dir)) and + osd_id not in crimson_osds): + osd_ids.append(osd_id) + except OSError: + raise + return osd_ids + + +def get_local_mon_ids(): + """This will list the /var/lib/ceph/mon/* directories and try + to split the ID off of the directory name and return it in + a list. + + :returns: list. A list of monitor identifiers + :raises: OSError if something goes wrong with listing the directory. + """ + mon_ids = [] + mon_path = os.path.join(os.sep, 'var', 'lib', 'ceph', 'mon') + if os.path.exists(mon_path): + try: + dirs = os.listdir(mon_path) + for mon_dir in dirs: + # Basically this takes everything after ceph- as the monitor ID + match = re.search('ceph-(?P.*)', mon_dir) + if match: + mon_ids.append(match.group('mon_id')) + except OSError: + raise + return mon_ids + + +def _is_int(v): + """Return True if the object v can be turned into an integer.""" + try: + int(v) + return True + except ValueError: + return False + + +def get_version(): + """Derive Ceph release from an installed package.""" + import apt_pkg as apt + + package = "ceph" + + current_ver = get_installed_version(package) + if not current_ver: + # package is known, but no version is currently installed. + e = 'Could not determine version of uninstalled package: %s' % package + error_out(e) + + vers = apt.upstream_version(current_ver.ver_str) + + # x.y match only for 20XX.X + # and ignore patch level for other packages + match = re.match(r'^(\d+)\.(\d+)', vers) + + if match: + vers = match.group(0) + return float(vers) + + +def error_out(msg): + log("FATAL ERROR: {}".format(msg), + level=ERROR) + sys.exit(1) + + +def is_quorum(): + asok = "/var/run/ceph/ceph-mon.{}.asok".format(socket.gethostname()) + cmd = [ + "sudo", + "-u", + ceph_user(), + "ceph", + "--admin-daemon", + asok, + "mon_status" + ] + if os.path.exists(asok): + try: + result = json.loads(str(subprocess + .check_output(cmd) + .decode('UTF-8'))) + except subprocess.CalledProcessError: + return False + except ValueError: + # Non JSON response from mon_status + return False + if result['state'] in QUORUM: + return True + else: + return False + else: + return False + + +def is_leader(): + asok = "/var/run/ceph/ceph-mon.{}.asok".format(socket.gethostname()) + cmd = [ + "sudo", + "-u", + ceph_user(), + "ceph", + "--admin-daemon", + asok, + "mon_status" + ] + if os.path.exists(asok): + try: + result = json.loads(str(subprocess + .check_output(cmd) + .decode('UTF-8'))) + except subprocess.CalledProcessError: + return False + except ValueError: + # Non JSON response from mon_status + return False + if result['state'] == LEADER: + return True + else: + return False + else: + return False + + +def manager_available(): + # if manager daemon isn't on this release, just say it is Fine + if cmp_pkgrevno('ceph', '11.0.0') < 0: + return True + cmd = ["sudo", "-u", "ceph", "ceph", "mgr", "dump", "-f", "json"] + try: + result = json.loads(subprocess.check_output(cmd).decode('UTF-8')) + return result['available'] + except subprocess.CalledProcessError as e: + log("'{}' failed: {}".format(" ".join(cmd), str(e))) + return False + except Exception: + return False + + +def wait_for_quorum(): + while not is_quorum(): + log("Waiting for quorum to be reached") + time.sleep(3) + + +def wait_for_manager(): + while not manager_available(): + log("Waiting for manager to be available") + time.sleep(5) + + +def add_bootstrap_hint(peer): + asok = "/var/run/ceph/ceph-mon.{}.asok".format(socket.gethostname()) + cmd = [ + "sudo", + "-u", + ceph_user(), + "ceph", + "--admin-daemon", + asok, + "add_bootstrap_peer_hint", + peer + ] + if os.path.exists(asok): + # Ignore any errors for this call + subprocess.call(cmd) + + +DISK_FORMATS = [ + 'xfs', + 'ext4', + 'btrfs' +] + +CEPH_PARTITIONS = [ + '89C57F98-2FE5-4DC0-89C1-5EC00CEFF2BE', # Ceph encrypted disk in creation + '45B0969E-9B03-4F30-B4C6-5EC00CEFF106', # Ceph encrypted journal + '4FBD7E29-9D25-41B8-AFD0-5EC00CEFF05D', # Ceph encrypted OSD data + '4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D', # Ceph OSD data + '45B0969E-9B03-4F30-B4C6-B4B80CEFF106', # Ceph OSD journal + '89C57F98-2FE5-4DC0-89C1-F3AD0CEFF2BE', # Ceph disk in creation +] + + +def get_partition_list(dev): + """Lists the partitions of a block device. + + :param dev: Path to a block device. ex: /dev/sda + :returns: Returns a list of Partition objects. + :raises: CalledProcessException if lsblk fails + """ + partitions_list = [] + try: + partitions = get_partitions(dev) + # For each line of output + for partition in partitions: + parts = partition.split() + try: + partitions_list.append( + Partition(number=parts[0], + start=parts[1], + end=parts[2], + sectors=parts[3], + size=parts[4], + name=parts[5], + uuid=parts[6]) + ) + except IndexError: + partitions_list.append( + Partition(number=parts[0], + start=parts[1], + end=parts[2], + sectors=parts[3], + size=parts[4], + name="", + uuid=parts[5]) + ) + + return partitions_list + except subprocess.CalledProcessError: + raise + + +def is_pristine_disk(dev): + """ + Read first 2048 bytes (LBA 0 - 3) of block device to determine whether it + is actually all zeros and safe for us to use. + + Existing partitioning tools does not discern between a failure to read from + block device, failure to understand a partition table and the fact that a + block device has no partition table. Since we need to be positive about + which is which we need to read the device directly and confirm ourselves. + + :param dev: Path to block device + :type dev: str + :returns: True all 2048 bytes == 0x0, False if not + :rtype: bool + """ + want_bytes = 2048 + + try: + f = open(dev, 'rb') + except OSError as e: + log(e) + return False + + data = f.read(want_bytes) + read_bytes = len(data) + if read_bytes != want_bytes: + log('{}: short read, got {} bytes expected {}.' + .format(dev, read_bytes, want_bytes), level=WARNING) + return False + + return all(byte == 0x0 for byte in data) + + +def is_osd_disk(dev): + db = kv() + osd_devices = db.get('osd-devices', []) + if dev in osd_devices: + log('Device {} already processed by charm,' + ' skipping'.format(dev)) + return True + + partitions = get_partition_list(dev) + for partition in partitions: + try: + info = str(subprocess + .check_output(['sgdisk', '-i', partition.number, dev]) + .decode('UTF-8')) + info = info.split("\n") # IGNORE:E1103 + for line in info: + for ptype in CEPH_PARTITIONS: + sig = 'Partition GUID code: {}'.format(ptype) + if line.startswith(sig): + return True + except subprocess.CalledProcessError as e: + log("sgdisk inspection of partition {} on {} failed with " + "error: {}. Skipping".format(partition.minor, dev, e), + level=ERROR) + return False + + +def start_osds(devices): + # Scan for Ceph block devices + rescan_osd_devices() + if (cmp_pkgrevno('ceph', '0.56.6') >= 0 and + cmp_pkgrevno('ceph', '14.2.0') < 0): + # Use ceph-disk activate for directory based OSD's + for dev_or_path in devices: + if os.path.exists(dev_or_path) and os.path.isdir(dev_or_path): + subprocess.check_call( + ['ceph-disk', 'activate', dev_or_path]) + + +def udevadm_settle(): + cmd = ['udevadm', 'settle'] + subprocess.call(cmd) + + +def rescan_osd_devices(): + cmd = [ + 'udevadm', 'trigger', + '--subsystem-match=block', '--action=add' + ] + + subprocess.call(cmd) + + udevadm_settle() + + +_client_admin_keyring = '/etc/ceph/ceph.client.admin.keyring' + + +def is_bootstrapped(): + return os.path.exists( + '/var/lib/ceph/mon/ceph-{}/done'.format(socket.gethostname())) + + +def wait_for_bootstrap(): + while not is_bootstrapped(): + time.sleep(3) + + +def generate_monitor_secret(): + cmd = [ + 'ceph-authtool', + '/dev/stdout', + '--name=mon.', + '--gen-key' + ] + res = str(subprocess.check_output(cmd).decode('UTF-8')) + + return "{}==".format(res.split('=')[1].strip()) + + +# OSD caps taken from ceph-create-keys +_osd_bootstrap_caps = { + 'mon': [ + 'allow command osd create ...', + 'allow command osd crush set ...', + r'allow command auth add * osd allow\ * mon allow\ rwx', + 'allow command mon getmap' + ] +} + +_osd_bootstrap_caps_profile = { + 'mon': [ + 'allow profile bootstrap-osd' + ] +} + + +def parse_key(raw_key): + # get-or-create appears to have different output depending + # on whether its 'get' or 'create' + # 'create' just returns the key, 'get' is more verbose and + # needs parsing + key = None + if len(raw_key.splitlines()) == 1: + key = raw_key + else: + for element in raw_key.splitlines(): + if 'key' in element: + return element.split(' = ')[1].strip() # IGNORE:E1103 + return key + + +def get_osd_bootstrap_key(): + try: + # Attempt to get/create a key using the OSD bootstrap profile first + key = get_named_key('bootstrap-osd', + _osd_bootstrap_caps_profile) + except Exception: + # If that fails try with the older style permissions + key = get_named_key('bootstrap-osd', + _osd_bootstrap_caps) + return key + + +_radosgw_keyring = "/etc/ceph/keyring.rados.gateway" + + +def import_radosgw_key(key): + if not os.path.exists(_radosgw_keyring): + cmd = [ + "sudo", + "-u", + ceph_user(), + 'ceph-authtool', + _radosgw_keyring, + '--create-keyring', + '--name=client.radosgw.gateway', + '--add-key={}'.format(key) + ] + subprocess.check_call(cmd) + + +# OSD caps taken from ceph-create-keys +_radosgw_caps = { + 'mon': ['allow rw'], + 'osd': ['allow rwx'] +} +_upgrade_caps = { + 'mon': ['allow rwx'] +} + + +def get_radosgw_key(pool_list=None, name=None): + return get_named_key(name=name or 'radosgw.gateway', + caps=_radosgw_caps, + pool_list=pool_list) + + +def get_mds_key(name): + return create_named_keyring(entity='mds', + name=name, + caps=mds_caps) + + +_mds_bootstrap_caps_profile = { + 'mon': [ + 'allow profile bootstrap-mds' + ] +} + + +def get_mds_bootstrap_key(): + return get_named_key('bootstrap-mds', + _mds_bootstrap_caps_profile) + + +_default_caps = collections.OrderedDict([ + ('mon', ['allow r', + 'allow command "osd blacklist"', + 'allow command "osd blocklist"']), + ('osd', ['allow rwx']), +]) + +admin_caps = collections.OrderedDict([ + ('mds', ['allow *']), + ('mgr', ['allow *']), + ('mon', ['allow *']), + ('osd', ['allow *']) +]) + +mds_caps = collections.OrderedDict([ + ('osd', ['allow *']), + ('mds', ['allow']), + ('mon', ['allow rwx']), +]) + +osd_upgrade_caps = collections.OrderedDict([ + ('mon', ['allow command "config-key"', + 'allow command "osd tree"', + 'allow command "config-key list"', + 'allow command "config-key put"', + 'allow command "config-key get"', + 'allow command "config-key exists"', + 'allow command "osd out"', + 'allow command "osd in"', + 'allow command "osd rm"', + 'allow command "auth del"', + ]) +]) + +rbd_mirror_caps = collections.OrderedDict([ + ('mon', ['allow profile rbd-mirror-peer', + 'allow command "service dump"', + 'allow command "service status"' + ]), + ('osd', ['profile rbd']), + ('mgr', ['allow r']), +]) + + +def get_rbd_mirror_key(name): + return get_named_key(name=name, caps=rbd_mirror_caps) + + +def create_named_keyring(entity, name, caps=None): + caps = caps or _default_caps + cmd = [ + "sudo", + "-u", + ceph_user(), + 'ceph', + '--name', 'mon.', + '--keyring', + '/var/lib/ceph/mon/ceph-{}/keyring'.format( + socket.gethostname() + ), + 'auth', 'get-or-create', '{entity}.{name}'.format(entity=entity, + name=name), + ] + for subsystem, subcaps in caps.items(): + cmd.extend([subsystem, '; '.join(subcaps)]) + log("Calling check_output: {}".format(cmd), level=DEBUG) + return (parse_key(str(subprocess + .check_output(cmd) + .decode('UTF-8')) + .strip())) # IGNORE:E1103 + + +def get_upgrade_key(): + return get_named_key('upgrade-osd', _upgrade_caps) + + +def is_internal_client(name): + keys = ('osd-upgrade', 'osd-removal', 'admin', 'rbd-mirror', 'mds') + return any(name.startswith(key) for key in keys) + + +def get_named_key(name, caps=None, pool_list=None): + """Retrieve a specific named cephx key. + + :param name: String Name of key to get. + :param pool_list: The list of pools to give access to + :param caps: dict of cephx capabilities + :returns: Returns a cephx key + """ + caps = caps or _default_caps + key_name = 'client.{}'.format(name) + + key = ceph_auth_get(key_name) + if key: + if is_internal_client(name): + upgrade_key_caps(key_name, caps) + return key + + log("Creating new key for {}".format(name), level=DEBUG) + cmd = [ + "sudo", + "-u", + ceph_user(), + 'ceph', + '--name', 'mon.', + '--keyring', + '/var/lib/ceph/mon/ceph-{}/keyring'.format( + socket.gethostname() + ), + 'auth', 'get-or-create', key_name, + ] + # Add capabilities + for subsystem, subcaps in caps.items(): + if subsystem == 'osd': + if pool_list: + # This will output a string similar to: + # "pool=rgw pool=rbd pool=something" + pools = " ".join(['pool={0}'.format(i) for i in pool_list]) + subcaps[0] = subcaps[0] + " " + pools + cmd.extend([subsystem, '; '.join(subcaps)]) + ceph_auth_get.cache_clear() + + log("Calling check_output: {}".format(cmd), level=DEBUG) + return parse_key(str(subprocess + .check_output(cmd) + .decode('UTF-8')) + .strip()) # IGNORE:E1103 + + +@functools.lru_cache() +def ceph_auth_get(key_name): + try: + # Does the key already exist? + output = str(subprocess.check_output( + [ + 'sudo', + '-u', ceph_user(), + 'ceph', + '--name', 'mon.', + '--keyring', + '/var/lib/ceph/mon/ceph-{}/keyring'.format( + socket.gethostname() + ), + 'auth', + 'get', + key_name, + ]).decode('UTF-8')).strip() + return parse_key(output) + except subprocess.CalledProcessError: + # Couldn't get the key + pass + + +def upgrade_key_caps(key, caps, pool_list=None): + """Upgrade key to have capabilities caps""" + if not is_leader(): + # Not the MON leader OR not clustered + return + cmd = [ + "sudo", "-u", ceph_user(), 'ceph', 'auth', 'caps', key + ] + for subsystem, subcaps in caps.items(): + if subsystem == 'osd': + if pool_list: + # This will output a string similar to: + # "pool=rgw pool=rbd pool=something" + pools = " ".join(['pool={0}'.format(i) for i in pool_list]) + subcaps[0] = subcaps[0] + " " + pools + cmd.extend([subsystem, '; '.join(subcaps)]) + subprocess.check_call(cmd) + + +@cached +def systemd(): + return CompareHostReleases(lsb_release()['DISTRIB_CODENAME']) >= 'vivid' + + +def bootstrap_monitor_cluster(secret): + """Bootstrap local Ceph mon into the Ceph cluster + + :param secret: cephx secret to use for monitor authentication + :type secret: str + :raises: Exception if Ceph mon cannot be bootstrapped + """ + hostname = socket.gethostname() + path = '/var/lib/ceph/mon/ceph-{}'.format(hostname) + done = '{}/done'.format(path) + if systemd(): + init_marker = '{}/systemd'.format(path) + else: + init_marker = '{}/upstart'.format(path) + + keyring = '/var/lib/ceph/tmp/{}.mon.keyring'.format(hostname) + + if os.path.exists(done): + log('bootstrap_monitor_cluster: mon already initialized.') + else: + # Ceph >= 0.61.3 needs this for ceph-mon fs creation + mkdir('/var/run/ceph', owner=ceph_user(), + group=ceph_user(), perms=0o755) + mkdir(path, owner=ceph_user(), group=ceph_user(), + perms=0o755) + # end changes for Ceph >= 0.61.3 + try: + _create_monitor(keyring, + secret, + hostname, + path, + done, + init_marker) + except Exception: + raise + finally: + os.unlink(keyring) + + +def _create_monitor(keyring, secret, hostname, path, done, init_marker): + """Create monitor filesystem and enable and start ceph-mon process + + :param keyring: path to temporary keyring on disk + :type keyring: str + :param secret: cephx secret to use for monitor authentication + :type: secret: str + :param hostname: hostname of the local unit + :type hostname: str + :param path: full path to Ceph mon directory + :type path: str + :param done: full path to 'done' marker for Ceph mon + :type done: str + :param init_marker: full path to 'init' marker for Ceph mon + :type init_marker: str + """ + subprocess.check_call(['ceph-authtool', keyring, + '--create-keyring', '--name=mon.', + '--add-key={}'.format(secret), + '--cap', 'mon', 'allow *']) + subprocess.check_call(['ceph-mon', '--mkfs', + '-i', hostname, + '--keyring', keyring]) + chownr('/var/log/ceph', ceph_user(), ceph_user()) + chownr(path, ceph_user(), ceph_user()) + with open(done, 'w'): + pass + with open(init_marker, 'w'): + pass + + if systemd(): + if cmp_pkgrevno('ceph', '14.0.0') >= 0: + systemd_unit = 'ceph-mon@{}'.format(socket.gethostname()) + else: + systemd_unit = 'ceph-mon' + subprocess.check_call(['systemctl', 'enable', systemd_unit]) + service_restart(systemd_unit) + else: + service_restart('ceph-mon-all') + + +def create_keyrings(): + """Create keyrings for operation of ceph-mon units + + NOTE: The quorum should be done before to execute this function. + + :raises: Exception if keyrings cannot be created + """ + if cmp_pkgrevno('ceph', '14.0.0') >= 0: + # NOTE(jamespage): At Nautilus, keys are created by the + # monitors automatically and just need + # exporting. + output = str(subprocess.check_output( + [ + 'sudo', + '-u', ceph_user(), + 'ceph', + '--name', 'mon.', + '--keyring', + '/var/lib/ceph/mon/ceph-{}/keyring'.format( + socket.gethostname() + ), + 'auth', 'get', 'client.admin', + ]).decode('UTF-8')).strip() + if not output: + # NOTE: key not yet created, raise exception and retry + raise Exception + # NOTE: octopus wants newline at end of file LP: #1864706 + output += '\n' + write_file(_client_admin_keyring, output, + owner=ceph_user(), group=ceph_user(), + perms=0o400) + else: + # NOTE(jamespage): Later Ceph releases require explicit + # call to ceph-create-keys to setup the + # admin keys for the cluster; this command + # will wait for quorum in the cluster before + # returning. + # NOTE(fnordahl): Explicitly run `ceph-create-keys` for older + # Ceph releases too. This improves bootstrap + # resilience as the charm will wait for + # presence of peer units before attempting + # to bootstrap. Note that charms deploying + # ceph-mon service should disable running of + # `ceph-create-keys` service in init system. + cmd = ['ceph-create-keys', '--id', socket.gethostname()] + if cmp_pkgrevno('ceph', '12.0.0') >= 0: + # NOTE(fnordahl): The default timeout in ceph-create-keys of 600 + # seconds is not adequate. Increase timeout when + # timeout parameter available. For older releases + # we rely on retry_on_exception decorator. + # LP#1719436 + cmd.extend(['--timeout', '1800']) + subprocess.check_call(cmd) + osstat = os.stat(_client_admin_keyring) + if not osstat.st_size: + # NOTE(fnordahl): Retry will fail as long as this file exists. + # LP#1719436 + os.remove(_client_admin_keyring) + raise Exception + + +def update_monfs(): + hostname = socket.gethostname() + monfs = '/var/lib/ceph/mon/ceph-{}'.format(hostname) + if systemd(): + init_marker = '{}/systemd'.format(monfs) + else: + init_marker = '{}/upstart'.format(monfs) + if os.path.exists(monfs) and not os.path.exists(init_marker): + # Mark mon as managed by upstart so that + # it gets start correctly on reboots + with open(init_marker, 'w'): + pass + + +def get_partitions(dev): + cmd = ['partx', '--raw', '--noheadings', dev] + try: + out = str(subprocess.check_output(cmd).decode('UTF-8')).splitlines() + log("get partitions: {}".format(out), level=DEBUG) + return out + except subprocess.CalledProcessError as e: + log("Can't get info for {0}: {1}".format(dev, e.output)) + return [] + + +def get_lvs(dev): + """ + List logical volumes for the provided block device + + :param: dev: Full path to block device. + :raises subprocess.CalledProcessError: in the event that any supporting + operation failed. + :returns: list: List of logical volumes provided by the block device + """ + if not lvm.is_lvm_physical_volume(dev): + return [] + vg_name = lvm.list_lvm_volume_group(dev) + return lvm.list_logical_volumes('vg_name={}'.format(vg_name)) + + +def find_least_used_utility_device(utility_devices, lvs=False): + """ + Find a utility device which has the smallest number of partitions + among other devices in the supplied list. + + :utility_devices: A list of devices to be used for filestore journal + or bluestore wal or db. + :lvs: flag to indicate whether inspection should be based on LVM LV's + :return: string device name + """ + if lvs: + usages = map(lambda a: (len(get_lvs(a)), a), utility_devices) + else: + usages = map(lambda a: (len(get_partitions(a)), a), utility_devices) + least = min(usages, key=lambda t: t[0]) + return least[1] + + +def get_devices(name): + """Merge config and Juju storage based devices + + :name: The name of the device type, e.g.: wal, osd, journal + :returns: Set(device names), which are strings + """ + if config(name): + devices = [dev.strip() for dev in config(name).split(' ')] + else: + devices = [] + storage_ids = storage_list(name) + devices.extend((storage_get('location', sid) for sid in storage_ids)) + devices = filter(os.path.exists, devices) + + return set(devices) + + +def osdize(dev, osd_format, osd_journal, ignore_errors=False, encrypt=False, + key_manager=CEPH_KEY_MANAGER, osd_id=None): + if dev.startswith('/dev'): + osdize_dev(dev, osd_format, osd_journal, + ignore_errors, encrypt, + key_manager, osd_id) + else: + if cmp_pkgrevno('ceph', '14.0.0') >= 0: + log("Directory backed OSDs can not be created on Nautilus", + level=WARNING) + return + osdize_dir(dev, encrypt) + + +def osdize_dev(dev, osd_format, osd_journal, ignore_errors=False, + encrypt=False, key_manager=CEPH_KEY_MANAGER, + osd_id=None): + """ + Prepare a block device for use as a Ceph OSD + + A block device will only be prepared once during the lifetime + of the calling charm unit; future executions will be skipped. + + :param: dev: Full path to block device to use + :param: osd_format: Format for OSD filesystem + :param: osd_journal: List of block devices to use for OSD journals + :param: ignore_errors: Don't fail in the event of any errors during + processing + :param: encrypt: Encrypt block devices using 'key_manager' + :param: key_manager: Key management approach for encryption keys + :raises subprocess.CalledProcessError: in the event that any supporting + subprocess operation failed + :raises ValueError: if an invalid key_manager is provided + """ + if key_manager not in KEY_MANAGERS: + raise ValueError('Unsupported key manager: {}'.format(key_manager)) + + db = kv() + osd_devices = db.get('osd-devices', []) + try: + if dev in osd_devices: + log('Device {} already processed by charm,' + ' skipping'.format(dev)) + return + + if not os.path.exists(dev): + log('Path {} does not exist - bailing'.format(dev)) + return + + if not is_block_device(dev): + log('Path {} is not a block device - bailing'.format(dev)) + return + + if is_osd_disk(dev): + log('Looks like {} is already an' + ' OSD data or journal, skipping.'.format(dev)) + if is_device_mounted(dev): + osd_devices.append(dev) + return + + if is_device_mounted(dev): + log('Looks like {} is in use, skipping.'.format(dev)) + return + + if is_active_bluestore_device(dev): + log('{} is in use as an active bluestore block device,' + ' skipping.'.format(dev)) + osd_devices.append(dev) + return + + if is_mapped_luks_device(dev): + log('{} is a mapped LUKS device,' + ' skipping.'.format(dev)) + return + + if cmp_pkgrevno('ceph', '12.2.4') >= 0: + cmd = _ceph_volume(dev, + osd_journal, + encrypt, + key_manager, + osd_id) + else: + cmd = _ceph_disk(dev, + osd_format, + osd_journal, + encrypt) + + try: + status_set('maintenance', 'Initializing device {}'.format(dev)) + log("osdize cmd: {}".format(cmd)) + subprocess.check_call(cmd) + except subprocess.CalledProcessError: + try: + lsblk_output = subprocess.check_output( + ['lsblk', '-P']).decode('UTF-8') + except subprocess.CalledProcessError as e: + log("Couldn't get lsblk output: {}".format(e), ERROR) + if ignore_errors: + log('Unable to initialize device: {}'.format(dev), WARNING) + if lsblk_output: + log('lsblk output: {}'.format(lsblk_output), DEBUG) + else: + log('Unable to initialize device: {}'.format(dev), ERROR) + if lsblk_output: + log('lsblk output: {}'.format(lsblk_output), WARNING) + raise + + # NOTE: Record processing of device only on success to ensure that + # the charm only tries to initialize a device of OSD usage + # once during its lifetime. + osd_devices.append(dev) + finally: + db.set('osd-devices', osd_devices) + db.flush() + + +def _ceph_disk(dev, osd_format, osd_journal, encrypt=False): + """ + Prepare a device for usage as a Ceph OSD using ceph-disk + + :param: dev: Full path to use for OSD block device setup, + The function looks up realpath of the device + :param: osd_journal: List of block devices to use for OSD journals + :param: encrypt: Use block device encryption (unsupported) + :returns: list. 'ceph-disk' command and required parameters for + execution by check_call + """ + cmd = ['ceph-disk', 'prepare'] + + if encrypt: + cmd.append('--dmcrypt') + + cmd.append('--bluestore') + wal = get_devices('bluestore-wal') + if wal: + cmd.append('--block.wal') + least_used_wal = find_least_used_utility_device(wal) + cmd.append(least_used_wal) + db = get_devices('bluestore-db') + if db: + cmd.append('--block.db') + least_used_db = find_least_used_utility_device(db) + cmd.append(least_used_db) + + cmd.append(os.path.realpath(dev)) + + if osd_journal: + least_used = find_least_used_utility_device(osd_journal) + cmd.append(least_used) + + return cmd + + +def _ceph_volume(dev, osd_journal, encrypt=False, key_manager=CEPH_KEY_MANAGER, + osd_id=None): + """ + Prepare and activate a device for usage as a Ceph OSD using ceph-volume. + + This also includes creation of all PV's, VG's and LV's required to + support the initialization of the OSD. + + :param: dev: Full path to use for OSD block device setup + :param: osd_journal: List of block devices to use for OSD journals + :param: encrypt: Use block device encryption + :param: key_manager: dm-crypt Key Manager to use + :param: osd_id: The OSD-id to recycle, or None to create a new one + :raises subprocess.CalledProcessError: in the event that any supporting + LVM operation failed. + :returns: list. 'ceph-volume' command and required parameters for + execution by check_call + """ + cmd = ['ceph-volume', 'lvm', 'create'] + + osd_fsid = str(uuid.uuid4()) + cmd.append('--osd-fsid') + cmd.append(osd_fsid) + cmd.append('--bluestore') + main_device_type = 'block' + + if encrypt and key_manager == CEPH_KEY_MANAGER: + cmd.append('--dmcrypt') + + if osd_id is not None: + cmd.extend(['--osd-id', str(osd_id)]) + + cmd.append('--data') + cmd.append(_allocate_logical_volume(dev=dev, + lv_type=main_device_type, + osd_fsid=osd_fsid, + encrypt=encrypt, + key_manager=key_manager)) + + for extra_volume in ('wal', 'db'): + devices = get_devices('bluestore-{}'.format(extra_volume)) + if devices: + cmd.append('--block.{}'.format(extra_volume)) + least_used = find_least_used_utility_device(devices, + lvs=True) + cmd.append(_allocate_logical_volume( + dev=least_used, + lv_type=extra_volume, + osd_fsid=osd_fsid, + size='{}M'.format(calculate_volume_size(extra_volume)), + shared=True, + encrypt=encrypt, + key_manager=key_manager) + ) + + return cmd + + +def _partition_name(dev): + """ + Derive the first partition name for a block device + + :param: dev: Full path to block device. + :returns: str: Full path to first partition on block device. + """ + if dev[-1].isdigit(): + return '{}p1'.format(dev) + else: + return '{}1'.format(dev) + + +def is_active_bluestore_device(dev): + """ + Determine whether provided device is part of an active + bluestore based OSD (as its block component). + + :param: dev: Full path to block device to check for Bluestore usage. + :returns: boolean: indicating whether device is in active use. + """ + if not lvm.is_lvm_physical_volume(dev): + return False + + vg_name = lvm.list_lvm_volume_group(dev) + try: + lv_name = lvm.list_logical_volumes('vg_name={}'.format(vg_name))[0] + except IndexError: + return False + + block_symlinks = glob.glob('/var/lib/ceph/osd/ceph-*/block') + for block_candidate in block_symlinks: + if os.path.islink(block_candidate): + target = os.readlink(block_candidate) + if target.endswith(lv_name): + return True + + return False + + +def is_luks_device(dev): + """ + Determine if dev is a LUKS-formatted block device. + + :param: dev: A full path to a block device to check for LUKS header + presence + :returns: boolean: indicates whether a device is used based on LUKS header. + """ + return True if _luks_uuid(dev) else False + + +def is_mapped_luks_device(dev): + """ + Determine if dev is a mapped LUKS device + :param: dev: A full path to a block device to be checked + :returns: boolean: indicates whether a device is mapped + """ + _, dirs, _ = next(os.walk( + '/sys/class/block/{}/holders/' + .format(os.path.basename(os.path.realpath(dev)))) + ) + is_held = len(dirs) > 0 + return is_held and is_luks_device(dev) + + +def get_conf(variable): + """ + Get the value of the given configuration variable from the + cluster. + + :param variable: Ceph configuration variable + :returns: str. configured value for provided variable + + """ + return subprocess.check_output([ + 'ceph-osd', + '--show-config-value={}'.format(variable), + '--no-mon-config', + ]).strip() + + +def calculate_volume_size(lv_type): + """ + Determine the configured size for Bluestore DB/WAL or + Filestore Journal devices + + :param lv_type: volume type (db, wal or journal) + :raises KeyError: if invalid lv_type is supplied + :returns: int. Configured size in megabytes for volume type + """ + # lv_type -> Ceph configuration option + _config_map = { + 'db': 'bluestore_block_db_size', + 'wal': 'bluestore_block_wal_size', + 'journal': 'osd_journal_size', + } + + # default sizes in MB + _default_size = { + 'db': 1024, + 'wal': 576, + 'journal': 1024, + } + + # conversion of Ceph config units to MB + _units = { + 'db': 1048576, # Bytes -> MB + 'wal': 1048576, # Bytes -> MB + 'journal': 1, # Already in MB + } + + configured_size = get_conf(_config_map[lv_type]) + + if configured_size is None or int(configured_size) == 0: + return _default_size[lv_type] + else: + return int(configured_size) / _units[lv_type] + + +def _luks_uuid(dev): + """ + Check to see if dev is a LUKS encrypted volume, returning the UUID + of volume if it is. + + :param: dev: path to block device to check. + :returns: str. UUID of LUKS device or None if not a LUKS device + """ + try: + cmd = ['cryptsetup', 'luksUUID', dev] + return subprocess.check_output(cmd).decode('UTF-8').strip() + except subprocess.CalledProcessError: + return None + + +def _initialize_disk(dev, dev_uuid, encrypt=False, + key_manager=CEPH_KEY_MANAGER): + """ + Initialize a raw block device consuming 100% of the available + disk space. + + Function assumes that block device has already been wiped. + + :param: dev: path to block device to initialize + :param: dev_uuid: UUID to use for any dm-crypt operations + :param: encrypt: Encrypt OSD devices using dm-crypt + :param: key_manager: Key management approach for dm-crypt keys + :raises: subprocess.CalledProcessError: if any parted calls fail + :returns: str: Full path to new partition. + """ + use_vaultlocker = encrypt and key_manager == VAULT_KEY_MANAGER + + if use_vaultlocker: + # NOTE(jamespage): Check to see if already initialized as a LUKS + # volume, which indicates this is a shared block + # device for journal, db or wal volumes. + luks_uuid = _luks_uuid(dev) + if luks_uuid: + return '/dev/mapper/crypt-{}'.format(luks_uuid) + + dm_crypt = '/dev/mapper/crypt-{}'.format(dev_uuid) + + if use_vaultlocker and not os.path.exists(dm_crypt): + subprocess.check_call([ + 'vaultlocker', + 'encrypt', + '--uuid', dev_uuid, + dev, + ]) + subprocess.check_call([ + 'dd', + 'if=/dev/zero', + 'of={}'.format(dm_crypt), + 'bs=512', + 'count=1', + ]) + + if use_vaultlocker: + return dm_crypt + else: + return dev + + +def _allocate_logical_volume(dev, lv_type, osd_fsid, + size=None, shared=False, + encrypt=False, + key_manager=CEPH_KEY_MANAGER): + """ + Allocate a logical volume from a block device, ensuring any + required initialization and setup of PV's and VG's to support + the LV. + + :param: dev: path to block device to allocate from. + :param: lv_type: logical volume type to create + (data, block, journal, wal, db) + :param: osd_fsid: UUID of the OSD associate with the LV + :param: size: Size in LVM format for the device; + if unset 100% of VG + :param: shared: Shared volume group (journal, wal, db) + :param: encrypt: Encrypt OSD devices using dm-crypt + :param: key_manager: dm-crypt Key Manager to use + :raises subprocess.CalledProcessError: in the event that any supporting + LVM or parted operation fails. + :returns: str: String in the format 'vg_name/lv_name'. + """ + lv_name = "osd-{}-{}".format(lv_type, osd_fsid) + current_volumes = lvm.list_logical_volumes() + if shared: + dev_uuid = str(uuid.uuid4()) + else: + dev_uuid = osd_fsid + pv_dev = _initialize_disk(dev, dev_uuid, encrypt, key_manager) + + vg_name = None + if not lvm.is_lvm_physical_volume(pv_dev): + lvm.create_lvm_physical_volume(pv_dev) + if not os.path.exists(pv_dev): + # NOTE: trigger rescan to work around bug 1878752 + rescan_osd_devices() + if shared: + vg_name = 'ceph-{}-{}'.format(lv_type, + str(uuid.uuid4())) + else: + vg_name = 'ceph-{}'.format(osd_fsid) + lvm.create_lvm_volume_group(vg_name, pv_dev) + else: + vg_name = lvm.list_lvm_volume_group(pv_dev) + + if lv_name not in current_volumes: + lvm.create_logical_volume(lv_name, vg_name, size) + + return "{}/{}".format(vg_name, lv_name) + + +def osdize_dir(path, encrypt=False): + """Ask ceph-disk to prepare a directory to become an OSD. + + :param path: str. The directory to osdize + :param encrypt: bool. Should the OSD directory be encrypted at rest + :returns: None + """ + + db = kv() + osd_devices = db.get('osd-devices', []) + if path in osd_devices: + log('Device {} already processed by charm,' + ' skipping'.format(path)) + return + + for t in ['upstart', 'systemd']: + if os.path.exists(os.path.join(path, t)): + log('Path {} is already used as an OSD dir - bailing'.format(path)) + return + + if cmp_pkgrevno('ceph', "0.56.6") < 0: + log('Unable to use directories for OSDs with ceph < 0.56.6', + level=ERROR) + return + + mkdir(path, owner=ceph_user(), group=ceph_user(), perms=0o755) + chownr('/var/lib/ceph', ceph_user(), ceph_user()) + cmd = [ + 'sudo', '-u', ceph_user(), + 'ceph-disk', + 'prepare', + '--data-dir', + path + ] + if cmp_pkgrevno('ceph', '0.60') >= 0: + if encrypt: + cmd.append('--dmcrypt') + cmd.append('--bluestore') + + log("osdize dir cmd: {}".format(cmd)) + subprocess.check_call(cmd) + + # NOTE: Record processing of device only on success to ensure that + # the charm only tries to initialize a device of OSD usage + # once during its lifetime. + osd_devices.append(path) + db.set('osd-devices', osd_devices) + db.flush() + + +def filesystem_mounted(fs): + return subprocess.call(['grep', '-wqs', fs, '/proc/mounts']) == 0 + + +def get_running_osds(): + """Returns a list of the pids of the current running OSD daemons""" + cmd = ['pgrep', 'ceph-osd|crimson-osd'] + try: + result = str(subprocess.check_output(cmd).decode('UTF-8')) + return result.split() + except subprocess.CalledProcessError: + return [] + + +def get_cephfs(service): + """List the Ceph Filesystems that exist. + + :param service: The service name to run the Ceph command under + :returns: list. Returns a list of the Ceph filesystems + """ + if get_version() < 0.86: + # This command wasn't introduced until 0.86 Ceph + return [] + try: + output = str(subprocess + .check_output(["ceph", '--id', service, "fs", "ls"]) + .decode('UTF-8')) + if not output: + return [] + """ + Example subprocess output: + 'name: ip-172-31-23-165, metadata pool: ip-172-31-23-165_metadata, + data pools: [ip-172-31-23-165_data ]\n' + output: filesystems: ['ip-172-31-23-165'] + """ + filesystems = [] + for line in output.splitlines(): + parts = line.split(',') + for part in parts: + if "name" in part: + filesystems.append(part.split(' ')[1]) + return filesystems + except subprocess.CalledProcessError: + return [] + + +def wait_for_all_monitors_to_upgrade(new_version, upgrade_key): + """Fairly self explanatory name. This function will wait + for all monitors in the cluster to upgrade or it will + return after a timeout period has expired. + + :param new_version: str of the version to watch + :param upgrade_key: the cephx key name to use + """ + done = False + start_time = time.time() + monitor_list = [] + + mon_map = get_mon_map('admin') + if mon_map['monmap']['mons']: + for mon in mon_map['monmap']['mons']: + monitor_list.append(mon['name']) + while not done: + try: + done = all(monitor_key_exists(upgrade_key, "{}_{}_{}_done".format( + "mon", mon, new_version + )) for mon in monitor_list) + current_time = time.time() + if current_time > (start_time + 10 * 60): + raise Exception + else: + # Wait 30 seconds and test again if all monitors are upgraded + time.sleep(30) + except subprocess.CalledProcessError: + raise + + +# Edge cases: +# 1. Previous node dies on upgrade, can we retry? +def roll_monitor_cluster(new_version, upgrade_key): + """This is tricky to get right so here's what we're going to do. + + There's 2 possible cases: Either I'm first in line or not. + If I'm not first in line I'll wait a random time between 5-30 seconds + and test to see if the previous monitor is upgraded yet. + + :param new_version: str of the version to upgrade to + :param upgrade_key: the cephx key name to use when upgrading + """ + log('roll_monitor_cluster called with {}'.format(new_version)) + my_name = socket.gethostname() + monitor_list = [] + mon_map = get_mon_map('admin') + if mon_map['monmap']['mons']: + for mon in mon_map['monmap']['mons']: + monitor_list.append(mon['name']) + else: + status_set('blocked', 'Unable to get monitor cluster information') + sys.exit(1) + log('monitor_list: {}'.format(monitor_list)) + + # A sorted list of OSD unit names + mon_sorted_list = sorted(monitor_list) + + # Install packages immediately but defer restarts to when it's our time. + upgrade_monitor(new_version, restart_daemons=False) + try: + position = mon_sorted_list.index(my_name) + log("upgrade position: {}".format(position)) + if position == 0: + # I'm first! Roll + # First set a key to inform others I'm about to roll + lock_and_roll(upgrade_key=upgrade_key, + service='mon', + my_name=my_name, + version=new_version) + else: + # Check if the previous node has finished + status_set('waiting', + 'Waiting on {} to finish upgrading'.format( + mon_sorted_list[position - 1])) + wait_on_previous_node(upgrade_key=upgrade_key, + service='mon', + previous_node=mon_sorted_list[position - 1], + version=new_version) + lock_and_roll(upgrade_key=upgrade_key, + service='mon', + my_name=my_name, + version=new_version) + # NOTE(jamespage): + # Wait until all monitors have upgraded before bootstrapping + # the ceph-mgr daemons due to use of new mgr keyring profiles + if new_version == 'luminous': + wait_for_all_monitors_to_upgrade(new_version=new_version, + upgrade_key=upgrade_key) + bootstrap_manager() + + # NOTE(jmcvaughn): + # Nautilus and later binaries use msgr2 by default, but existing + # clusters that have been upgraded from pre-Nautilus will not + # automatically have msgr2 enabled. Without this, Ceph will show + # a warning only (with no impact to operations), but newly added units + # will not be able to join the cluster. Therefore, we ensure it is + # enabled on upgrade for all versions including and after Nautilus + # (to cater for previous charm versions that will not have done this). + nautilus_or_later = cmp_pkgrevno('ceph-common', '14.0.0') >= 0 + if nautilus_or_later: + wait_for_all_monitors_to_upgrade(new_version=new_version, + upgrade_key=upgrade_key) + enable_msgr2() + except ValueError: + log("Failed to find {} in list {}.".format( + my_name, mon_sorted_list)) + status_set('blocked', 'failed to upgrade monitor') + + +# For E731 we can't assign a lambda, therefore, instead pass this. +def noop(): + pass + + +def upgrade_monitor(new_version, kick_function=None, restart_daemons=True): + """Upgrade the current Ceph monitor to the new version + + :param new_version: String version to upgrade to. + """ + if kick_function is None: + kick_function = noop + current_version = get_version() + status_set("maintenance", "Upgrading monitor") + log("Current Ceph version is {}".format(current_version)) + log("Upgrading to: {}".format(new_version)) + + # Needed to determine if whether to stop/start ceph-mgr + luminous_or_later = cmp_pkgrevno('ceph-common', '12.2.0') >= 0 + # Needed to differentiate between systemd unit names + nautilus_or_later = cmp_pkgrevno('ceph-common', '14.0.0') >= 0 + kick_function() + try: + add_source(config('source'), config('key')) + apt_update(fatal=True) + except subprocess.CalledProcessError as err: + log("Adding the Ceph source failed with message: {}".format( + err)) + status_set("blocked", "Upgrade to {} failed".format(new_version)) + sys.exit(1) + kick_function() + + try: + apt_install(packages=determine_packages(), fatal=True) + rm_packages = determine_packages_to_remove() + if rm_packages: + apt_purge(packages=rm_packages, fatal=True) + except subprocess.CalledProcessError as err: + log("Upgrading packages failed " + "with message: {}".format(err)) + status_set("blocked", "Upgrade to {} failed".format(new_version)) + sys.exit(1) + + if not restart_daemons: + log("Packages upgraded but not restarting daemons yet.") + return + + try: + if systemd(): + if nautilus_or_later: + systemd_unit = 'ceph-mon@{}'.format(socket.gethostname()) + else: + systemd_unit = 'ceph-mon' + service_stop(systemd_unit) + log("restarting ceph-mgr.target maybe: {}" + .format(luminous_or_later)) + if luminous_or_later: + service_stop('ceph-mgr.target') + else: + service_stop('ceph-mon-all') + + kick_function() + + owner = ceph_user() + + # Ensure the files and directories under /var/lib/ceph is chowned + # properly as part of the move to the Jewel release, which moved the + # ceph daemons to running as ceph:ceph instead of root:root. + if new_version == 'jewel': + # Ensure the ownership of Ceph's directories is correct + chownr(path=os.path.join(os.sep, "var", "lib", "ceph"), + owner=owner, + group=owner, + follow_links=True) + + kick_function() + + # Ensure that mon directory is user writable + hostname = socket.gethostname() + path = '/var/lib/ceph/mon/ceph-{}'.format(hostname) + mkdir(path, owner=ceph_user(), group=ceph_user(), + perms=0o755) + + if systemd(): + if nautilus_or_later: + systemd_unit = 'ceph-mon@{}'.format(socket.gethostname()) + else: + systemd_unit = 'ceph-mon' + service_restart(systemd_unit) + log("starting ceph-mgr.target maybe: {}".format(luminous_or_later)) + if luminous_or_later: + # due to BUG: #1849874 we have to force a restart to get it to + # drop the previous version of ceph-manager and start the new + # one. + service_restart('ceph-mgr.target') + else: + service_start('ceph-mon-all') + except subprocess.CalledProcessError as err: + log("Stopping ceph and upgrading packages failed " + "with message: {}".format(err)) + status_set("blocked", "Upgrade to {} failed".format(new_version)) + sys.exit(1) + + +def lock_and_roll(upgrade_key, service, my_name, version): + """Create a lock on the Ceph monitor cluster and upgrade. + + :param upgrade_key: str. The cephx key to use + :param service: str. The cephx id to use + :param my_name: str. The current hostname + :param version: str. The version we are upgrading to + """ + start_timestamp = time.time() + + log('monitor_key_set {}_{}_{}_start {}'.format( + service, + my_name, + version, + start_timestamp)) + monitor_key_set(upgrade_key, "{}_{}_{}_start".format( + service, my_name, version), start_timestamp) + + # alive indication: + alive_function = ( + lambda: monitor_key_set( + upgrade_key, "{}_{}_{}_alive" + .format(service, my_name, version), time.time())) + dog = WatchDog(kick_interval=3 * 60, + kick_function=alive_function) + + log("Rolling") + + # This should be quick + if service == 'osd': + upgrade_osd(version, kick_function=dog.kick_the_dog) + elif service == 'mon': + upgrade_monitor(version, kick_function=dog.kick_the_dog) + else: + log("Unknown service {}. Unable to upgrade".format(service), + level=ERROR) + log("Done") + + stop_timestamp = time.time() + # Set a key to inform others I am finished + log('monitor_key_set {}_{}_{}_done {}'.format(service, + my_name, + version, + stop_timestamp)) + status_set('maintenance', 'Finishing upgrade') + monitor_key_set(upgrade_key, "{}_{}_{}_done".format(service, + my_name, + version), + stop_timestamp) + + +def wait_on_previous_node(upgrade_key, service, previous_node, version): + """A lock that sleeps the current thread while waiting for the previous + node to finish upgrading. + + :param upgrade_key: + :param service: str. the cephx id to use + :param previous_node: str. The name of the previous node to wait on + :param version: str. The version we are upgrading to + :returns: None + """ + log("Previous node is: {}".format(previous_node)) + + previous_node_started_f = ( + lambda: monitor_key_exists( + upgrade_key, + "{}_{}_{}_start".format(service, previous_node, version))) + previous_node_finished_f = ( + lambda: monitor_key_exists( + upgrade_key, + "{}_{}_{}_done".format(service, previous_node, version))) + previous_node_alive_time_f = ( + lambda: monitor_key_get( + upgrade_key, + "{}_{}_{}_alive".format(service, previous_node, version))) + + # wait for 30 minutes until the previous node starts. We don't proceed + # unless we get a start condition. + try: + WatchDog.wait_until(previous_node_started_f, timeout=30 * 60) + except WatchDog.WatchDogTimeoutException: + log("Waited for previous node to start for 30 minutes. " + "It didn't start, so may have a serious issue. Continuing with " + "upgrade of this node.", + level=WARNING) + return + + # keep the time it started from this nodes' perspective. + previous_node_started_at = time.time() + log("Detected that previous node {} has started. Time now: {}" + .format(previous_node, previous_node_started_at)) + + # Now wait for the node to complete. The node may optionally be kicking + # with the *_alive key, which allows this node to wait longer as it 'knows' + # the other node is proceeding. + try: + WatchDog.timed_wait(kicked_at_function=previous_node_alive_time_f, + complete_function=previous_node_finished_f, + wait_time=30 * 60, + compatibility_wait_time=10 * 60, + max_kick_interval=5 * 60) + except WatchDog.WatchDogDeadException: + # previous node was kicking, but timed out; log this condition and move + # on. + now = time.time() + waited = int((now - previous_node_started_at) / 60) + log("Previous node started, but has now not ticked for 5 minutes. " + "Waited total of {} mins on node {}. current time: {} > " + "previous node start time: {}. " + "Continuing with upgrade of this node." + .format(waited, previous_node, now, previous_node_started_at), + level=WARNING) + except WatchDog.WatchDogTimeoutException: + # previous node never kicked, or simply took too long; log this + # condition and move on. + now = time.time() + waited = int((now - previous_node_started_at) / 60) + log("Previous node is taking too long; assuming it has died." + "Waited {} mins on node {}. current time: {} > " + "previous node start time: {}. " + "Continuing with upgrade of this node." + .format(waited, previous_node, now, previous_node_started_at), + level=WARNING) + + +class WatchDog(object): + """Watch a dog; basically a kickable timer with a timeout between two async + units. + + The idea is that you have an overall timeout and then can kick that timeout + with intermediary hits, with a max time between those kicks allowed. + + Note that this watchdog doesn't rely on the clock of the other side; just + roughly when it detects when the other side started. All timings are based + on the local clock. + + The kicker will not 'kick' more often than a set interval, regardless of + how often the kick_the_dog() function is called. The kicker provides a + function (lambda: -> None) that is called when the kick interval is + reached. + + The waiter calls the static method with a check function + (lambda: -> Boolean) that indicates when the wait should be over and the + maximum interval to wait. e.g. 30 minutes with a 5 minute kick interval. + + So the waiter calls wait(f, 30, 3) and the kicker sets up a 3 minute kick + interval, or however long it is expected for the key to propagate and to + allow for other delays. + + There is a compatibility mode where if the otherside never kicks, then it + simply waits for the compatibility timer. + """ + + class WatchDogDeadException(Exception): + pass + + class WatchDogTimeoutException(Exception): + pass + + def __init__(self, kick_interval=3 * 60, kick_function=None): + """Initialise a new WatchDog + + :param kick_interval: the interval when this side kicks the other in + seconds. + :type kick_interval: Int + :param kick_function: The function to call that does the kick. + :type kick_function: Callable[] + """ + self.start_time = time.time() + self.last_run_func = None + self.last_kick_at = None + self.kick_interval = kick_interval + self.kick_f = kick_function + + def kick_the_dog(self): + """Might call the kick_function if it's time. + + This function can be called as frequently as needed, but will run the + self.kick_function after kick_interval seconds have passed. + """ + now = time.time() + if (self.last_run_func is None or + (now - self.last_run_func > self.kick_interval)): + if self.kick_f is not None: + self.kick_f() + self.last_run_func = now + self.last_kick_at = now + + @staticmethod + def wait_until(wait_f, timeout=10 * 60): + """Wait for timeout seconds until the passed function return True. + + :param wait_f: The function to call that will end the wait. + :type wait_f: Callable[[], Boolean] + :param timeout: The time to wait in seconds. + :type timeout: int + """ + start_time = time.time() + while not wait_f(): + now = time.time() + if now > start_time + timeout: + raise WatchDog.WatchDogTimeoutException() + wait_time = random.randrange(5, 30) + log('wait_until: waiting for {} seconds'.format(wait_time)) + time.sleep(wait_time) + + @staticmethod + def timed_wait(kicked_at_function, + complete_function, + wait_time=30 * 60, + compatibility_wait_time=10 * 60, + max_kick_interval=5 * 60): + """Wait a maximum time with an intermediate 'kick' time. + + This function will wait for max_kick_interval seconds unless the + kicked_at_function() call returns a time that is not older that + max_kick_interval (in seconds). i.e. the other side can signal that it + is still doing things during the max_kick_interval as long as it kicks + at least every max_kick_interval seconds. + + The maximum wait is "wait_time", but the otherside must keep kicking + during this period. + + The "compatibility_wait_time" is used if the other side never kicks + (i.e. the kicked_at_function() always returns None. In this case the + function wait up to "compatibility_wait_time". + + Note that the type of the return from the kicked_at_function is an + Optional[str], not a Float. The function will coerce this to a float + for the comparison. This represents the return value of + time.time() at the "other side". It's a string to simplify the + function obtaining the time value from the other side. + + The function raises WatchDogTimeoutException if either the + compatibility_wait_time or the wait_time are exceeded. + + The function raises WatchDogDeadException if the max_kick_interval is + exceeded. + + Note that it is possible that the first kick interval is extended to + compatibility_wait_time if the "other side" doesn't kick immediately. + The best solution is for the other side to kick early and often. + + :param kicked_at_function: The function to call to retrieve the time + that the other side 'kicked' at. None if the other side hasn't + kicked. + :type kicked_at_function: Callable[[], Optional[str]] + :param complete_function: The callable that returns True when done. + :type complete_function: Callable[[], Boolean] + :param wait_time: the maximum time to wait, even with kicks, in + seconds. + :type wait_time: int + :param compatibility_wait_time: The time to wait if no kicks are + received, in seconds. + :type compatibility_wait_time: int + :param max_kick_interval: The maximum time allowed between kicks before + the wait is over, in seconds: + :type max_kick_interval: int + :raises: WatchDog.WatchDogTimeoutException, + WatchDog.WatchDogDeadException + """ + start_time = time.time() + while True: + if complete_function(): + break + # the time when the waiting for unit last kicked. + kicked_at = kicked_at_function() + now = time.time() + if kicked_at is None: + # assume other end doesn't do alive kicks + if (now - start_time > compatibility_wait_time): + raise WatchDog.WatchDogTimeoutException() + else: + # other side is participating in kicks; must kick at least + # every 'max_kick_interval' to stay alive. + if (now - float(kicked_at) > max_kick_interval): + raise WatchDog.WatchDogDeadException() + if (now - start_time > wait_time): + raise WatchDog.WatchDogTimeoutException() + delay_time = random.randrange(5, 30) + log('waiting for {} seconds'.format(delay_time)) + time.sleep(delay_time) + + +def get_upgrade_position(osd_sorted_list, match_name): + """Return the upgrade position for the given OSD. + + :param osd_sorted_list: OSDs sorted + :type osd_sorted_list: [str] + :param match_name: The OSD name to match + :type match_name: str + :returns: The position of the name + :rtype: int + :raises: ValueError if name is not found + """ + for index, item in enumerate(osd_sorted_list): + if item.name == match_name: + return index + raise ValueError("OSD name '{}' not found in get_upgrade_position list" + .format(match_name)) + + +# Edge cases: +# 1. Previous node dies on upgrade, can we retry? +# 2. This assumes that the OSD failure domain is not set to OSD. +# It rolls an entire server at a time. +def roll_osd_cluster(new_version, upgrade_key): + """This is tricky to get right so here's what we're going to do. + + There's 2 possible cases: Either I'm first in line or not. + If I'm not first in line I'll wait a random time between 5-30 seconds + and test to see if the previous OSD is upgraded yet. + + TODO: If you're not in the same failure domain it's safe to upgrade + 1. Examine all pools and adopt the most strict failure domain policy + Example: Pool 1: Failure domain = rack + Pool 2: Failure domain = host + Pool 3: Failure domain = row + + outcome: Failure domain = host + + :param new_version: str of the version to upgrade to + :param upgrade_key: the cephx key name to use when upgrading + """ + log('roll_osd_cluster called with {}'.format(new_version)) + my_name = socket.gethostname() + osd_tree = get_osd_tree(service=upgrade_key) + # A sorted list of OSD unit names + osd_sorted_list = sorted(osd_tree) + log("osd_sorted_list: {}".format(osd_sorted_list)) + + try: + position = get_upgrade_position(osd_sorted_list, my_name) + log("upgrade position: {}".format(position)) + if position == 0: + # I'm first! Roll + # First set a key to inform others I'm about to roll + lock_and_roll(upgrade_key=upgrade_key, + service='osd', + my_name=my_name, + version=new_version) + else: + # Check if the previous node has finished + status_set('waiting', + 'Waiting on {} to finish upgrading'.format( + osd_sorted_list[position - 1].name)) + wait_on_previous_node( + upgrade_key=upgrade_key, + service='osd', + previous_node=osd_sorted_list[position - 1].name, + version=new_version) + lock_and_roll(upgrade_key=upgrade_key, + service='osd', + my_name=my_name, + version=new_version) + except ValueError: + log("Failed to find name {} in list {}".format( + my_name, osd_sorted_list)) + status_set('blocked', 'failed to upgrade osd') + + +def upgrade_osd(new_version, kick_function=None): + """Upgrades the current OSD + + :param new_version: str. The new version to upgrade to + """ + if kick_function is None: + kick_function = noop + + current_version = get_version() + status_set("maintenance", "Upgrading OSD") + log("Current Ceph version is {}".format(current_version)) + log("Upgrading to: {}".format(new_version)) + + try: + add_source(config('source'), config('key')) + apt_update(fatal=True) + except subprocess.CalledProcessError as err: + log("Adding the Ceph sources failed with message: {}".format( + err)) + status_set("blocked", "Upgrade to {} failed".format(new_version)) + sys.exit(1) + + kick_function() + + try: + # Upgrade the packages before restarting the daemons. + status_set('maintenance', 'Upgrading packages to %s' % new_version) + apt_install(packages=determine_packages(), fatal=True) + kick_function() + + # If the upgrade does not need an ownership update of any of the + # directories in the OSD service directory, then simply restart + # all of the OSDs at the same time as this will be the fastest + # way to update the code on the node. + if not dirs_need_ownership_update('osd'): + log('Restarting all OSDs to load new binaries', DEBUG) + with maintain_all_osd_states(): + if systemd(): + service_restart('ceph-osd.target') + else: + service_restart('ceph-osd-all') + return + + # Need to change the ownership of all directories which are not OSD + # directories as well. + # TODO - this should probably be moved to the general upgrade function + # and done before mon/OSD. + update_owner(CEPH_BASE_DIR, recurse_dirs=False) + non_osd_dirs = filter(lambda x: not x == 'osd', + os.listdir(CEPH_BASE_DIR)) + non_osd_dirs = map(lambda x: os.path.join(CEPH_BASE_DIR, x), + non_osd_dirs) + for i, path in enumerate(non_osd_dirs): + if i % 100 == 0: + kick_function() + update_owner(path) + + # Fast service restart wasn't an option because each of the OSD + # directories need the ownership updated for all the files on + # the OSD. Walk through the OSDs one-by-one upgrading the OSD. + for osd_dir in _get_child_dirs(OSD_BASE_DIR): + kick_function() + try: + osd_num = _get_osd_num_from_dirname(osd_dir) + _upgrade_single_osd(osd_num, osd_dir) + except ValueError as ex: + # Directory could not be parsed - junk directory? + log('Could not parse OSD directory %s: %s' % (osd_dir, ex), + WARNING) + continue + + except (subprocess.CalledProcessError, IOError) as err: + log("Stopping Ceph and upgrading packages failed " + "with message: {}".format(err)) + status_set("blocked", "Upgrade to {} failed".format(new_version)) + sys.exit(1) + + +def _upgrade_single_osd(osd_num, osd_dir): + """Upgrades the single OSD directory. + + :param osd_num: the num of the OSD + :param osd_dir: the directory of the OSD to upgrade + :raises CalledProcessError: if an error occurs in a command issued as part + of the upgrade process + :raises IOError: if an error occurs reading/writing to a file as part + of the upgrade process + """ + with maintain_osd_state(osd_num): + stop_osd(osd_num) + disable_osd(osd_num) + update_owner(osd_dir) + enable_osd(osd_num) + start_osd(osd_num) + + +def stop_osd(osd_num): + """Stops the specified OSD number. + + :param osd_num: the OSD number to stop + """ + if systemd(): + service_stop('ceph-osd@{}'.format(osd_num)) + else: + service_stop('ceph-osd', id=osd_num) + + +def start_osd(osd_num): + """Starts the specified OSD number. + + :param osd_num: the OSD number to start. + """ + if systemd(): + service_start('ceph-osd@{}'.format(osd_num)) + else: + service_start('ceph-osd', id=osd_num) + + +def disable_osd(osd_num): + """Disables the specified OSD number. + + Ensures that the specified OSD will not be automatically started at the + next reboot of the system. Due to differences between init systems, + this method cannot make any guarantees that the specified OSD cannot be + started manually. + + :param osd_num: the OSD id which should be disabled. + :raises CalledProcessError: if an error occurs invoking the systemd cmd + to disable the OSD + :raises IOError, OSError: if the attempt to read/remove the ready file in + an upstart enabled system fails + """ + if systemd(): + # When running under systemd, the individual ceph-osd daemons run as + # templated units and can be directly addressed by referring to the + # templated service name ceph-osd@. Additionally, systemd + # allows one to disable a specific templated unit by running the + # 'systemctl disable ceph-osd@' command. When disabled, the + # OSD should remain disabled until re-enabled via systemd. + # Note: disabling an already disabled service in systemd returns 0, so + # no need to check whether it is enabled or not. + cmd = ['systemctl', 'disable', 'ceph-osd@{}'.format(osd_num)] + subprocess.check_call(cmd) + else: + # Neither upstart nor the ceph-osd upstart script provides for + # disabling the starting of an OSD automatically. The specific OSD + # cannot be prevented from running manually, however it can be + # prevented from running automatically on reboot by removing the + # 'ready' file in the OSD's root directory. This is due to the + # ceph-osd-all upstart script checking for the presence of this file + # before starting the OSD. + ready_file = os.path.join(OSD_BASE_DIR, 'ceph-{}'.format(osd_num), + 'ready') + if os.path.exists(ready_file): + os.unlink(ready_file) + + +def enable_osd(osd_num): + """Enables the specified OSD number. + + Ensures that the specified osd_num will be enabled and ready to start + automatically in the event of a reboot. + + :param osd_num: the osd id which should be enabled. + :raises CalledProcessError: if the call to the systemd command issued + fails when enabling the service + :raises IOError: if the attempt to write the ready file in an upstart + enabled system fails + """ + if systemd(): + cmd = ['systemctl', 'enable', 'ceph-osd@{}'.format(osd_num)] + subprocess.check_call(cmd) + else: + # When running on upstart, the OSDs are started via the ceph-osd-all + # upstart script which will only start the OSD if it has a 'ready' + # file. Make sure that file exists. + ready_file = os.path.join(OSD_BASE_DIR, 'ceph-{}'.format(osd_num), + 'ready') + with open(ready_file, 'w') as f: + f.write('ready') + + # Make sure the correct user owns the file. It shouldn't be necessary + # as the upstart script should run with root privileges, but its better + # to have all the files matching ownership. + update_owner(ready_file) + + +def update_owner(path, recurse_dirs=True): + """Changes the ownership of the specified path. + + Changes the ownership of the specified path to the new ceph daemon user + using the system's native chown functionality. This may take awhile, + so this method will issue a set_status for any changes of ownership which + recurses into directory structures. + + :param path: the path to recursively change ownership for + :param recurse_dirs: boolean indicating whether to recursively change the + ownership of all the files in a path's subtree or to + simply change the ownership of the path. + :raises CalledProcessError: if an error occurs issuing the chown system + command + """ + user = ceph_user() + user_group = '{ceph_user}:{ceph_user}'.format(ceph_user=user) + cmd = ['chown', user_group, path] + if os.path.isdir(path) and recurse_dirs: + status_set('maintenance', ('Updating ownership of %s to %s' % + (path, user))) + cmd.insert(1, '-R') + + log('Changing ownership of {path} to {user}'.format( + path=path, user=user_group), DEBUG) + start = datetime.now() + subprocess.check_call(cmd) + elapsed_time = (datetime.now() - start) + + log('Took {secs} seconds to change the ownership of path: {path}'.format( + secs=elapsed_time.total_seconds(), path=path), DEBUG) + + +def get_osd_state(osd_num, osd_goal_state=None): + """Get OSD state or loop until OSD state matches OSD goal state. + + If osd_goal_state is None, just return the current OSD state. + If osd_goal_state is not None, loop until the current OSD state matches + the OSD goal state. + + :param osd_num: the OSD id to get state for + :param osd_goal_state: (Optional) string indicating state to wait for + Defaults to None + :returns: Returns a str, the OSD state. + :rtype: str + """ + while True: + asok = "/var/run/ceph/ceph-osd.{}.asok".format(osd_num) + cmd = [ + 'ceph', + 'daemon', + asok, + 'status' + ] + try: + result = json.loads(str(subprocess + .check_output(cmd) + .decode('UTF-8'))) + except (subprocess.CalledProcessError, ValueError) as e: + log("{}".format(e), level=DEBUG) + continue + osd_state = result['state'] + log("OSD {} state: {}, goal state: {}".format( + osd_num, osd_state, osd_goal_state), level=DEBUG) + if not osd_goal_state: + return osd_state + if osd_state == osd_goal_state: + return osd_state + time.sleep(3) + + +def get_all_osd_states(osd_goal_states=None): + """Get all OSD states or loop until all OSD states match OSD goal states. + + If osd_goal_states is None, just return a dictionary of current OSD states. + If osd_goal_states is not None, loop until the current OSD states match + the OSD goal states. + + :param osd_goal_states: (Optional) dict indicating states to wait for + Defaults to None + :returns: Returns a dictionary of current OSD states. + :rtype: dict + """ + osd_states = {} + for osd_num in get_local_osd_ids(): + if not osd_goal_states: + osd_states[osd_num] = get_osd_state(osd_num) + else: + osd_states[osd_num] = get_osd_state( + osd_num, + osd_goal_state=osd_goal_states[osd_num]) + return osd_states + + +@contextmanager +def maintain_osd_state(osd_num): + """Ensure the state of an OSD is maintained. + + Ensures the state of an OSD is the same at the end of a block nested + in a with statement as it was at the beginning of the block. + + :param osd_num: the OSD id to maintain state for + """ + osd_state = get_osd_state(osd_num) + try: + yield + finally: + get_osd_state(osd_num, osd_goal_state=osd_state) + + +@contextmanager +def maintain_all_osd_states(): + """Ensure all local OSD states are maintained. + + Ensures the states of all local OSDs are the same at the end of a + block nested in a with statement as they were at the beginning of + the block. + """ + osd_states = get_all_osd_states() + try: + yield + finally: + get_all_osd_states(osd_goal_states=osd_states) + + +def list_pools(client='admin'): + """This will list the current pools that Ceph has + + :param client: (Optional) client id for Ceph key to use + Defaults to ``admin`` + :type client: str + :returns: Returns a list of available pools. + :rtype: list + :raises: subprocess.CalledProcessError if the subprocess fails to run. + """ + try: + pool_list = [] + pools = subprocess.check_output(['rados', '--id', client, 'lspools'], + universal_newlines=True, + stderr=subprocess.STDOUT) + for pool in pools.splitlines(): + pool_list.append(pool) + return pool_list + except subprocess.CalledProcessError as err: + log("rados lspools failed with error: {}".format(err.output)) + raise + + +def get_pool_param(pool, param, client='admin'): + """Get parameter from pool. + + :param pool: Name of pool to get variable from + :type pool: str + :param param: Name of variable to get + :type param: str + :param client: (Optional) client id for Ceph key to use + Defaults to ``admin`` + :type client: str + :returns: Value of variable on pool or None + :rtype: str or None + :raises: subprocess.CalledProcessError + """ + try: + output = subprocess.check_output( + ['ceph', '--id', client, 'osd', 'pool', 'get', pool, param], + universal_newlines=True, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as cp: + if cp.returncode == 2 and 'ENOENT: option' in cp.output: + return None + raise + if ':' in output: + return output.split(':')[1].lstrip().rstrip() + + +def get_pool_erasure_profile(pool, client='admin'): + """Get erasure code profile for pool. + + :param pool: Name of pool to get variable from + :type pool: str + :param client: (Optional) client id for Ceph key to use + Defaults to ``admin`` + :type client: str + :returns: Erasure code profile of pool or None + :rtype: str or None + :raises: subprocess.CalledProcessError + """ + try: + return get_pool_param(pool, 'erasure_code_profile', client=client) + except subprocess.CalledProcessError as cp: + if cp.returncode == 13 and 'EACCES: pool' in cp.output: + # Not a Erasure coded pool + return None + raise + + +def get_pool_quota(pool, client='admin'): + """Get pool quota. + + :param pool: Name of pool to get variable from + :type pool: str + :param client: (Optional) client id for Ceph key to use + Defaults to ``admin`` + :type client: str + :returns: Dictionary with quota variables + :rtype: dict + :raises: subprocess.CalledProcessError + """ + output = subprocess.check_output( + ['ceph', '--id', client, 'osd', 'pool', 'get-quota', pool], + universal_newlines=True, stderr=subprocess.STDOUT) + rc = re.compile(r'\s+max\s+(\S+)\s*:\s+(\d+)') + result = {} + for line in output.splitlines(): + m = rc.match(line) + if m: + result.update({'max_{}'.format(m.group(1)): m.group(2)}) + return result + + +def get_pool_applications(pool='', client='admin'): + """Get pool applications. + + :param pool: (Optional) Name of pool to get applications for + Defaults to get for all pools + :type pool: str + :param client: (Optional) client id for Ceph key to use + Defaults to ``admin`` + :type client: str + :returns: Dictionary with pool name as key + :rtype: dict + :raises: subprocess.CalledProcessError + """ + + cmd = ['ceph', '--id', client, 'osd', 'pool', 'application', 'get'] + if pool: + cmd.append(pool) + try: + output = subprocess.check_output(cmd, + universal_newlines=True, + stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as cp: + if cp.returncode == 2 and 'ENOENT' in cp.output: + return {} + raise + return json.loads(output) + + +def list_pools_detail(): + """Get detailed information about pools. + + Structure: + {'pool_name_1': {'applications': {'application': {}}, + 'parameters': {'pg_num': '42', 'size': '42'}, + 'quota': {'max_bytes': '1000', + 'max_objects': '10'}, + }, + 'pool_name_2': ... + } + + :returns: Dictionary with detailed pool information. + :rtype: dict + :raises: subproces.CalledProcessError + """ + get_params = ['pg_num', 'size'] + result = {} + applications = get_pool_applications() + for pool in list_pools(): + result[pool] = { + 'applications': applications.get(pool, {}), + 'parameters': {}, + 'quota': get_pool_quota(pool), + } + for param in get_params: + result[pool]['parameters'].update({ + param: get_pool_param(pool, param)}) + erasure_profile = get_pool_erasure_profile(pool) + if erasure_profile: + result[pool]['parameters'].update({ + 'erasure_code_profile': erasure_profile}) + return result + + +def dirs_need_ownership_update(service): + """Determines if directories still need change of ownership. + + Examines the set of directories under the /var/lib/ceph/{service} directory + and determines if they have the correct ownership or not. This is + necessary due to the upgrade from Hammer to Jewel where the daemon user + changes from root: to ceph:. + + :param service: the name of the service folder to check (e.g. OSD, mon) + :returns: boolean. True if the directories need a change of ownership, + False otherwise. + :raises IOError: if an error occurs reading the file stats from one of + the child directories. + :raises OSError: if the specified path does not exist or some other error + """ + expected_owner = expected_group = ceph_user() + path = os.path.join(CEPH_BASE_DIR, service) + for child in _get_child_dirs(path): + curr_owner, curr_group = owner(child) + + if (curr_owner == expected_owner) and (curr_group == expected_group): + continue + + # NOTE(lathiat): when config_changed runs on reboot, the OSD might not + # yet be mounted or started, and the underlying directory the OSD is + # mounted to is expected to be owned by root. So skip the check. This + # may also happen for OSD directories for OSDs that were removed. + if (service == 'osd' and + not os.path.exists(os.path.join(child, 'magic'))): + continue + + log('Directory "%s" needs its ownership updated' % child, DEBUG) + return True + + # All child directories had the expected ownership + return False + + +# A dict of valid Ceph upgrade paths. Mapping is old -> new +UPGRADE_PATHS = collections.OrderedDict([ + ('firefly', 'hammer'), + ('hammer', 'jewel'), + ('jewel', 'luminous'), + ('luminous', 'mimic'), + ('mimic', 'nautilus'), + ('nautilus', 'octopus'), + ('octopus', 'pacific'), + ('pacific', 'quincy'), + ('quincy', 'reef'), + ('reef', 'squid'), +]) + +# Map UCA codenames to Ceph codenames +UCA_CODENAME_MAP = { + 'icehouse': 'firefly', + 'juno': 'firefly', + 'kilo': 'hammer', + 'liberty': 'hammer', + 'mitaka': 'jewel', + 'newton': 'jewel', + 'ocata': 'jewel', + 'pike': 'luminous', + 'queens': 'luminous', + 'rocky': 'mimic', + 'stein': 'mimic', + 'train': 'nautilus', + 'ussuri': 'octopus', + 'victoria': 'octopus', + 'wallaby': 'pacific', + 'xena': 'pacific', + 'yoga': 'quincy', + 'zed': 'quincy', + 'antelope': 'quincy', + 'bobcat': 'reef', + 'caracal': 'squid', +} + + +def pretty_print_upgrade_paths(): + """Pretty print supported upgrade paths for Ceph""" + return ["{} -> {}".format(key, value) + for key, value in UPGRADE_PATHS.items()] + + +def resolve_ceph_version(source): + """Resolves a version of Ceph based on source configuration + based on Ubuntu Cloud Archive pockets. + + @param: source: source configuration option of charm + :returns: Ceph release codename or None if not resolvable + """ + os_release = get_os_codename_install_source(source) + return UCA_CODENAME_MAP.get(os_release) + + +def get_ceph_pg_stat(): + """Returns the result of 'ceph pg stat'. + + :returns: dict + """ + try: + tree = str(subprocess + .check_output(['ceph', 'pg', 'stat', '--format=json']) + .decode('UTF-8')) + try: + json_tree = json.loads(tree) + if not json_tree['num_pg_by_state']: + return None + return json_tree + except ValueError as v: + log("Unable to parse ceph pg stat json: {}. Error: {}".format( + tree, v)) + raise + except subprocess.CalledProcessError as e: + log("ceph pg stat command failed with message: {}".format(e)) + raise + + +def get_ceph_health(): + """Returns the health of the cluster from a 'ceph status' + + :returns: dict tree of ceph status + :raises: CalledProcessError if our ceph command fails to get the overall + status, use get_ceph_health()['overall_status']. + """ + try: + tree = str(subprocess + .check_output(['ceph', 'status', '--format=json']) + .decode('UTF-8')) + try: + json_tree = json.loads(tree) + # Make sure children are present in the JSON + if not json_tree['overall_status']: + return None + + return json_tree + except ValueError as v: + log("Unable to parse ceph tree json: {}. Error: {}".format( + tree, v)) + raise + except subprocess.CalledProcessError as e: + log("ceph status command failed with message: {}".format(e)) + raise + + +def reweight_osd(osd_num, new_weight): + """Changes the crush weight of an OSD to the value specified. + + :param osd_num: the OSD id which should be changed + :param new_weight: the new weight for the OSD + :returns: bool. True if output looks right, else false. + :raises CalledProcessError: if an error occurs invoking the systemd cmd + """ + try: + cmd_result = str(subprocess + .check_output(['ceph', 'osd', 'crush', + 'reweight', "osd.{}".format(osd_num), + new_weight], + stderr=subprocess.STDOUT) + .decode('UTF-8')) + expected_result = "reweighted item id {ID} name \'osd.{ID}\'".format( + ID=osd_num) + " to {}".format(new_weight) + log(cmd_result) + if expected_result in cmd_result: + return True + return False + except subprocess.CalledProcessError as e: + log("ceph osd crush reweight command failed" + " with message: {}".format(e)) + raise + + +def determine_packages(): + """Determines packages for installation. + + :returns: list of Ceph packages + """ + packages = PACKAGES.copy() + if CompareHostReleases(lsb_release()['DISTRIB_CODENAME']) >= 'eoan': + btrfs_package = 'btrfs-progs' + else: + btrfs_package = 'btrfs-tools' + packages.append(btrfs_package) + return packages + + +def determine_packages_to_remove(): + """Determines packages for removal + + Note: if in a container, then the CHRONY_PACKAGE is removed. + + :returns: list of packages to be removed + :rtype: List[str] + """ + rm_packages = REMOVE_PACKAGES.copy() + if is_container(): + rm_packages.extend(filter_missing_packages([CHRONY_PACKAGE])) + return rm_packages + + +def bootstrap_manager(): + hostname = socket.gethostname() + path = '/var/lib/ceph/mgr/ceph-{}'.format(hostname) + keyring = os.path.join(path, 'keyring') + + if os.path.exists(keyring): + log('bootstrap_manager: mgr already initialized.') + else: + mkdir(path, owner=ceph_user(), group=ceph_user()) + subprocess.check_call(['ceph', 'auth', 'get-or-create', + 'mgr.{}'.format(hostname), 'mon', + 'allow profile mgr', 'osd', 'allow *', + 'mds', 'allow *', '--out-file', + keyring]) + chownr(path, ceph_user(), ceph_user()) + + unit = 'ceph-mgr@{}'.format(hostname) + subprocess.check_call(['systemctl', 'enable', unit]) + service_restart(unit) + + +def enable_msgr2(): + """ + Enables msgr2 + + :raises: subprocess.CalledProcessError if the command fails + """ + cmd = ['ceph', 'mon', 'enable-msgr2'] + subprocess.check_call(cmd) + + +def osd_noout(enable): + """Sets or unsets 'noout' + + :param enable: bool. True to set noout, False to unset. + :returns: bool. True if output looks right. + :raises CalledProcessError: if an error occurs invoking the systemd cmd + """ + operation = { + True: 'set', + False: 'unset', + } + try: + subprocess.check_call(['ceph', '--id', 'admin', + 'osd', operation[enable], + 'noout']) + log('running ceph osd {} noout'.format(operation[enable])) + return True + except subprocess.CalledProcessError as e: + log(e) + raise + + +class OSDConfigSetError(Exception): + """Error occurred applying OSD settings.""" + pass + + +def apply_osd_settings(settings): + """Applies the provided OSD settings + + Apply the provided settings to all local OSD unless settings are already + present. Settings stop being applied on encountering an error. + + :param settings: dict. Dictionary of settings to apply. + :returns: bool. True if commands ran successfully. + :raises: OSDConfigSetError + """ + current_settings = {} + base_cmd = 'ceph daemon osd.{osd_id} config --format=json' + get_cmd = base_cmd + ' get {key}' + set_cmd = base_cmd + ' set {key} {value}' + + def _get_cli_key(key): + return key.replace(' ', '_') + # Retrieve the current values to check keys are correct and to make this a + # noop if setting are already applied. + for osd_id in get_local_osd_ids(): + for key, value in sorted(settings.items()): + cli_key = _get_cli_key(key) + cmd = get_cmd.format(osd_id=osd_id, key=cli_key) + out = json.loads( + subprocess.check_output(cmd.split()).decode('UTF-8')) + if 'error' in out: + log("Error retrieving OSD setting: {}".format(out['error']), + level=ERROR) + return False + current_settings[key] = out[cli_key] + settings_diff = { + k: v + for k, v in settings.items() + if str(v) != str(current_settings[k])} + for key, value in sorted(settings_diff.items()): + log("Setting {} to {}".format(key, value), level=DEBUG) + cmd = set_cmd.format( + osd_id=osd_id, + key=_get_cli_key(key), + value=value) + out = json.loads( + subprocess.check_output(cmd.split()).decode('UTF-8')) + if 'error' in out: + log("Error applying OSD setting: {}".format(out['error']), + level=ERROR) + raise OSDConfigSetError + return True + + +def enabled_manager_modules(): + """Return a list of enabled manager modules. + + :rtype: List[str] + """ + cmd = ['ceph', 'mgr', 'module', 'ls'] + quincy_or_later = cmp_pkgrevno('ceph-common', '17.1.0') >= 0 + if quincy_or_later: + cmd.append('--format=json') + try: + modules = subprocess.check_output(cmd).decode('UTF-8') + except subprocess.CalledProcessError as e: + log("Failed to list ceph modules: {}".format(e), WARNING) + return [] + modules = json.loads(modules) + return modules['enabled_modules'] + + +def is_mgr_module_enabled(module): + """Is a given manager module enabled. + + :param module: + :type module: str + :returns: Whether the named module is enabled + :rtype: bool + """ + return module in enabled_manager_modules() + + +is_dashboard_enabled = functools.partial(is_mgr_module_enabled, 'dashboard') + + +def mgr_enable_module(module): + """Enable a Ceph Manager Module. + + :param module: The module name to enable + :type module: str + + :raises: subprocess.CalledProcessError + """ + if not is_mgr_module_enabled(module): + subprocess.check_call(['ceph', 'mgr', 'module', 'enable', module]) + return True + return False + + +mgr_enable_dashboard = functools.partial(mgr_enable_module, 'dashboard') + + +def mgr_disable_module(module): + """Enable a Ceph Manager Module. + + :param module: The module name to enable + :type module: str + + :raises: subprocess.CalledProcessError + """ + if is_mgr_module_enabled(module): + subprocess.check_call(['ceph', 'mgr', 'module', 'disable', module]) + return True + return False + + +mgr_disable_dashboard = functools.partial(mgr_disable_module, 'dashboard') + + +def ceph_config_set(name, value, who): + """Set a Ceph config option + + :param name: key to set + :type name: str + :param value: value corresponding to key + :type value: str + :param who: Config area the key is associated with (e.g. 'dashboard') + :type who: str + + :raises: subprocess.CalledProcessError + """ + subprocess.check_call(['ceph', 'config', 'set', who, name, value]) + + +mgr_config_set = functools.partial(ceph_config_set, who='mgr') + + +def ceph_config_get(name, who): + """Retrieve the value of a Ceph config option + + :param name: key to lookup + :type name: str + :param who: Config area the key is associated with (e.g. 'dashboard') + :type who: str + :returns: Value associated with key + :rtype: str + :raises: subprocess.CalledProcessError + """ + return subprocess.check_output( + ['ceph', 'config', 'get', who, name]).decode('UTF-8') + + +mgr_config_get = functools.partial(ceph_config_get, who='mgr') + + +def _dashboard_set_ssl_artifact(path, artifact_name, hostname=None): + """Set SSL dashboard config option. + + :param path: Path to file + :type path: str + :param artifact_name: Option name for setting the artifact + :type artifact_name: str + :param hostname: If hostname is set artifact will only be associated with + the dashboard on that host. + :type hostname: str + :raises: subprocess.CalledProcessError + """ + cmd = ['ceph', 'dashboard', artifact_name] + if hostname: + cmd.append(hostname) + cmd.extend(['-i', path]) + log(cmd, level=DEBUG) + subprocess.check_call(cmd) + + +dashboard_set_ssl_certificate = functools.partial( + _dashboard_set_ssl_artifact, + artifact_name='set-ssl-certificate') + + +dashboard_set_ssl_certificate_key = functools.partial( + _dashboard_set_ssl_artifact, + artifact_name='set-ssl-certificate-key') diff --git a/ceph-proxy/metadata.yaml b/ceph-proxy/metadata.yaml new file mode 100644 index 00000000..efff7340 --- /dev/null +++ b/ceph-proxy/metadata.yaml @@ -0,0 +1,24 @@ +name: ceph-proxy +summary: Proxy to Juju external Ceph cluster +maintainer: OpenStack Charmers +description: | + Ceph is a distributed storage and network file system designed to provide + excellent performance, reliability, and scalability. +docs: https://discourse.charmhub.io/t/ceph-proxy-docs-index/11218 +tags: +- openstack +- storage +- file-servers +- misc +series: +- jammy +extra-bindings: + public: + cluster: +provides: + client: + interface: ceph-client + radosgw: + interface: ceph-radosgw + mds: + interface: ceph-mds diff --git a/ceph-proxy/osci.yaml b/ceph-proxy/osci.yaml new file mode 100644 index 00000000..3478aab2 --- /dev/null +++ b/ceph-proxy/osci.yaml @@ -0,0 +1,42 @@ +- project: + templates: + - charm-unit-jobs-py38 + - charm-unit-jobs-py39 + - charm-unit-jobs-py310 + - charm-functional-jobs + check: + jobs: + - jammy-yoga-ec + - lunar-antelope-ec: + voting: false + - mantic-bobcat-ec: + voting: false + vars: + needs_charm_build: true + charm_build_name: ceph-proxy + build_type: charmcraft + charmcraft_channel: 2.x/stable +- job: + name: jammy-yoga-ec + parent: func-target + dependencies: + - osci-lint + - charm-build + - name: tox-py310 + soft: true + vars: + tox_extra_args: '-- erasure-coded:jammy-yoga-ec' +- job: + name: lunar-antelope-ec + parent: func-target + dependencies: + - jammy-yoga-ec + vars: + tox_extra_args: -- erasure-coded:lunar-antelope-ec +- job: + name: mantic-bobcat-ec + parent: func-target + dependencies: + - jammy-yoga-ec + vars: + tox_extra_args: -- erasure-coded:mantic-bobcat-ec diff --git a/ceph-proxy/rename.sh b/ceph-proxy/rename.sh new file mode 100755 index 00000000..d0c35c97 --- /dev/null +++ b/ceph-proxy/rename.sh @@ -0,0 +1,13 @@ +#!/bin/bash +charm=$(grep "charm_build_name" osci.yaml | awk '{print $2}') +echo "renaming ${charm}_*.charm to ${charm}.charm" +echo -n "pwd: " +pwd +ls -al +echo "Removing bad downloaded charm maybe?" +if [[ -e "${charm}.charm" ]]; +then + rm "${charm}.charm" +fi +echo "Renaming charm here." +mv ${charm}_*.charm ${charm}.charm diff --git a/ceph-proxy/requirements.txt b/ceph-proxy/requirements.txt new file mode 100644 index 00000000..3b1cb7b1 --- /dev/null +++ b/ceph-proxy/requirements.txt @@ -0,0 +1,29 @@ +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of *requirements.txt files for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools +# +# TODO: Distill the func test requirements from the lint/unit test +# requirements. They are intertwined. Also, Zaza itself should specify +# all of its own requirements and if it doesn't, fix it there. +# +pbr==5.6.0 +simplejson>=2.2.0 +netifaces>=0.10.4 + +# NOTE: newer versions of cryptography require a Rust compiler to build, +# see +# * https://github.com/openstack-charmers/zaza/issues/421 +# * https://mail.python.org/pipermail/cryptography-dev/2021-January/001003.html +# +cryptography<3.4 + +# Strange import error with newer netaddr: +netaddr>0.7.16,<0.8.0 + +Jinja2>=2.6 # BSD License (3 clause) +six>=1.9.0 + +dnspython + +psutil>=1.1.1,<2.0.0 diff --git a/ceph-proxy/revision b/ceph-proxy/revision new file mode 100644 index 00000000..ffda4e73 --- /dev/null +++ b/ceph-proxy/revision @@ -0,0 +1 @@ +105 \ No newline at end of file diff --git a/ceph-proxy/setup.cfg b/ceph-proxy/setup.cfg new file mode 100644 index 00000000..37083b62 --- /dev/null +++ b/ceph-proxy/setup.cfg @@ -0,0 +1,5 @@ +[nosetests] +verbosity=2 +with-coverage=1 +cover-erase=1 +cover-package=hooks diff --git a/ceph-proxy/templates/ceph.conf b/ceph-proxy/templates/ceph.conf new file mode 100644 index 00000000..3795912f --- /dev/null +++ b/ceph-proxy/templates/ceph.conf @@ -0,0 +1,17 @@ +[global] + +auth cluster required = {{ auth_supported }} +auth service required = {{ auth_supported }} +auth client required = {{ auth_supported }} + +keyring = /etc/ceph/$cluster.$name.keyring +mon host = {{ mon_hosts }} +fsid = {{ fsid }} + +log to syslog = {{ use_syslog }} +err to syslog = {{ use_syslog }} +clog to syslog = {{ use_syslog }} +mon cluster log to syslog = {{ use_syslog }} +debug mon = {{ loglevel }}/5 +debug osd = {{ loglevel }}/5 + diff --git a/ceph-proxy/templates/ceph.keyring b/ceph-proxy/templates/ceph.keyring new file mode 100644 index 00000000..30832f94 --- /dev/null +++ b/ceph-proxy/templates/ceph.keyring @@ -0,0 +1,3 @@ +[{{ admin_user }}] + key = {{admin_key}} + diff --git a/ceph-proxy/templates/mon.keyring b/ceph-proxy/templates/mon.keyring new file mode 100644 index 00000000..b8aa5bc4 --- /dev/null +++ b/ceph-proxy/templates/mon.keyring @@ -0,0 +1,3 @@ +[{{ admin_user }}] + key = {{admin_key}} + diff --git a/ceph-proxy/test-requirements.txt b/ceph-proxy/test-requirements.txt new file mode 100644 index 00000000..43248e4c --- /dev/null +++ b/ceph-proxy/test-requirements.txt @@ -0,0 +1,50 @@ +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of *requirements.txt files for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools + +# NOTE: newer versions of cryptography require a Rust compiler to build, +# see +# * https://github.com/openstack-charmers/zaza/issues/421 +# * https://mail.python.org/pipermail/cryptography-dev/2021-January/001003.html +# +cryptography<3.4 + +requests>=2.18.4 + +stestr>=2.2.0 + +# Dependency of stestr. Workaround for +# https://github.com/mtreinish/stestr/issues/145 +cliff<3.0.0 + +# Dependencies of stestr. Newer versions use keywords that didn't exist in +# python 3.5 yet (e.g. "ModuleNotFoundError") +importlib-metadata<3.0.0; python_version < '3.6' +importlib-resources<3.0.0; python_version < '3.6' + +# Some Zuul nodes sometimes pull newer versions of these dependencies which +# dropped support for python 3.5: +osprofiler<2.7.0;python_version<'3.6' +stevedore<1.31.0;python_version<'3.6' +debtcollector<1.22.0;python_version<'3.6' +oslo.utils<=3.41.0;python_version<'3.6' + +coverage>=4.5.2 +pyudev # for ceph-* charm unit tests (need to fix the ceph-* charm unit tests/mocking) +git+https://github.com/openstack-charmers/zaza.git#egg=zaza +git+https://github.com/openstack-charmers/zaza-openstack-tests.git#egg=zaza.openstack + +# Needed for charm-glance: +git+https://opendev.org/openstack/tempest.git#egg=tempest;python_version>='3.8' +tempest<30.0.0;python_version<'3.8' and python_version >= '3.6' +tempest<24.0.0;python_version<'3.6' + +croniter # needed for charm-rabbitmq-server unit tests + +# icey: pyopenssl 22 introduces a requirement on newer OpenSSL which causes test +# failures. Pin pyopenssl to resolve the failure. +pyopenssl<=22.0.0 + +pydantic < 2 +cosl diff --git a/ceph-proxy/tests/bundles/jammy-antelope.yaml b/ceph-proxy/tests/bundles/jammy-antelope.yaml new file mode 100644 index 00000000..89e3da79 --- /dev/null +++ b/ceph-proxy/tests/bundles/jammy-antelope.yaml @@ -0,0 +1,214 @@ +variables: + openstack-origin: &openstack-origin cloud:jammy-antelope + +series: jammy + +comment: +- 'machines section to decide order of deployment. database sooner = faster' +machines: + '0': + constraints: mem=3072M + '1': + constraints: mem=3072M + '2': + constraints: mem=3072M + '3': + '4': + '5': + '6': + '7': + '8': + '9': + '10': + '11': + '12': + '13': + '14': + '15': + '16': + '17': + '18': + +applications: + + cinder-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + glance-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + keystone-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + + mysql-innodb-cluster: + charm: ch:mysql-innodb-cluster + num_units: 3 + to: + - '0' + - '1' + - '2' + channel: 8.0/edge + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + expected-osd-count: 3 + source: *openstack-origin + to: + - '3' + - '4' + - '5' + channel: reef/edge + + ceph-osd: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 10G + options: + source: *openstack-origin + to: + - '6' + - '7' + - '8' + channel: reef/edge + + ceph-proxy: + charm: ../../ceph-proxy.charm + num_units: 1 + options: + source: *openstack-origin + to: + - '9' + + ceph-radosgw: + charm: ch:ceph-radosgw + num_units: 1 + options: + source: *openstack-origin + to: + - '10' + channel: reef/edge + + cinder: + charm: ch:cinder + num_units: 1 + options: + openstack-origin: *openstack-origin + block-device: "" + ephemeral-unmount: "" + glance-api-version: 2 + overwrite: "false" + constraints: mem=2048 + to: + - '11' + channel: 2023.2/edge + + cinder-ceph: + charm: ch:cinder-ceph + options: + restrict-ceph-pools: True + channel: 2023.2/edge + + keystone: + charm: ch:keystone + num_units: 1 + options: + openstack-origin: *openstack-origin + admin-password: openstack + constraints: mem=1024 + to: + - '12' + channel: 2023.2/edge + + rabbitmq-server: + charm: ch:rabbitmq-server + num_units: 1 + constraints: mem=1024 + to: + - '13' + channel: 3.9/edge + + glance: + charm: ch:glance + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '14' + channel: 2023.2/edge + + nova-compute: + charm: ch:nova-compute + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '15' + channel: 2023.2/edge + + ubuntu: # used to test mounts + charm: ch:ubuntu + num_units: 2 + to: + - '16' + - '17' + + ceph-fs: + charm: ch:ceph-fs + channel: reef/edge + num_units: 1 + to: + - '18' + +relations: + + - - 'ceph-osd:mon' + - 'ceph-mon:osd' + + - - 'ceph-proxy:radosgw' + - 'ceph-radosgw:mon' + + - - 'cinder:amqp' + - 'rabbitmq-server:amqp' + + - - 'cinder:shared-db' + - 'cinder-mysql-router:shared-db' + - - 'cinder-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'keystone:shared-db' + - 'keystone-mysql-router:shared-db' + - - 'keystone-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'cinder:identity-service' + - 'keystone:identity-service' + + - - 'cinder-ceph:storage-backend' + - 'cinder:storage-backend' + + - - 'cinder-ceph:ceph' + - 'ceph-proxy:client' + + - - 'glance:image-service' + - 'nova-compute:image-service' + + - - 'glance:identity-service' + - 'keystone:identity-service' + + - - 'glance:shared-db' + - 'glance-mysql-router:shared-db' + - - 'glance-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'nova-compute:ceph-access' + - 'cinder-ceph:ceph-access' + + - - 'nova-compute:amqp' + - 'rabbitmq-server:amqp' + + - - 'ceph-proxy:mds' + - 'ceph-fs:ceph-mds' diff --git a/ceph-proxy/tests/bundles/jammy-bobcat.yaml b/ceph-proxy/tests/bundles/jammy-bobcat.yaml new file mode 100644 index 00000000..1b9ce6ba --- /dev/null +++ b/ceph-proxy/tests/bundles/jammy-bobcat.yaml @@ -0,0 +1,214 @@ +variables: + openstack-origin: &openstack-origin cloud:jammy-bobcat + +series: jammy + +comment: +- 'machines section to decide order of deployment. database sooner = faster' +machines: + '0': + constraints: mem=3072M + '1': + constraints: mem=3072M + '2': + constraints: mem=3072M + '3': + '4': + '5': + '6': + '7': + '8': + '9': + '10': + '11': + '12': + '13': + '14': + '15': + '16': + '17': + '18': + +applications: + + cinder-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + glance-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + keystone-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + + mysql-innodb-cluster: + charm: ch:mysql-innodb-cluster + num_units: 3 + to: + - '0' + - '1' + - '2' + channel: 8.0/edge + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + expected-osd-count: 3 + source: *openstack-origin + to: + - '3' + - '4' + - '5' + channel: reef/edge + + ceph-osd: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 10G + options: + source: *openstack-origin + to: + - '6' + - '7' + - '8' + channel: reef/edge + + ceph-proxy: + charm: ../../ceph-proxy.charm + num_units: 1 + options: + source: *openstack-origin + to: + - '9' + + ceph-radosgw: + charm: ch:ceph-radosgw + num_units: 1 + options: + source: *openstack-origin + to: + - '10' + channel: reef/edge + + cinder: + charm: ch:cinder + num_units: 1 + options: + openstack-origin: *openstack-origin + block-device: "" + ephemeral-unmount: "" + glance-api-version: 2 + overwrite: "false" + constraints: mem=2048 + to: + - '11' + channel: 2023.2/edge + + cinder-ceph: + charm: ch:cinder-ceph + options: + restrict-ceph-pools: True + channel: 2023.2/edge + + keystone: + charm: ch:keystone + num_units: 1 + options: + openstack-origin: *openstack-origin + admin-password: openstack + constraints: mem=1024 + to: + - '12' + channel: 2023.2/edge + + rabbitmq-server: + charm: ch:rabbitmq-server + num_units: 1 + constraints: mem=1024 + to: + - '13' + channel: 3.9/edge + + glance: + charm: ch:glance + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '14' + channel: 2023.2/edge + + nova-compute: + charm: ch:nova-compute + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '15' + channel: 2023.2/edge + + ubuntu: # used to test mounts + charm: ch:ubuntu + num_units: 2 + to: + - '16' + - '17' + + ceph-fs: + charm: ch:ceph-fs + channel: reef/edge + num_units: 1 + to: + - '18' + +relations: + + - - 'ceph-osd:mon' + - 'ceph-mon:osd' + + - - 'ceph-proxy:radosgw' + - 'ceph-radosgw:mon' + + - - 'cinder:amqp' + - 'rabbitmq-server:amqp' + + - - 'cinder:shared-db' + - 'cinder-mysql-router:shared-db' + - - 'cinder-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'keystone:shared-db' + - 'keystone-mysql-router:shared-db' + - - 'keystone-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'cinder:identity-service' + - 'keystone:identity-service' + + - - 'cinder-ceph:storage-backend' + - 'cinder:storage-backend' + + - - 'cinder-ceph:ceph' + - 'ceph-proxy:client' + + - - 'glance:image-service' + - 'nova-compute:image-service' + + - - 'glance:identity-service' + - 'keystone:identity-service' + + - - 'glance:shared-db' + - 'glance-mysql-router:shared-db' + - - 'glance-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'nova-compute:ceph-access' + - 'cinder-ceph:ceph-access' + + - - 'nova-compute:amqp' + - 'rabbitmq-server:amqp' + + - - 'ceph-proxy:mds' + - 'ceph-fs:ceph-mds' diff --git a/ceph-proxy/tests/bundles/jammy-caracal.yaml b/ceph-proxy/tests/bundles/jammy-caracal.yaml new file mode 100644 index 00000000..cc8b24e9 --- /dev/null +++ b/ceph-proxy/tests/bundles/jammy-caracal.yaml @@ -0,0 +1,69 @@ +variables: + openstack-origin: &openstack-origin cloud:jammy-caracal + +series: jammy + +machines: + '0': + constraints: cores=2 mem=4G root-disk=25G virt-type=virtual-machine + '1': + constraints: cores=2 mem=4G root-disk=25G virt-type=virtual-machine + '2': + constraints: cores=2 mem=4G root-disk=25G virt-type=virtual-machine + '3': + '4': + '5': + '6': + +applications: + ceph-osd: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 'loop,10G' + to: + - '0' + - '1' + - '2' + channel: latest/edge + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + expected-osd-count: 3 + channel: latest/edge + to: + - '3' + - '4' + - '5' + + ceph-proxy: + charm: ../../../ceph-proxy.charm + num_units: 1 + options: + source: distro + to: + - '6' + + ceph-radosgw: + charm: ch:ceph-radosgw + num_units: 1 + channel: latest/edge + + ceph-fs: + charm: ch:ceph-fs + channel: latest/edge + num_units: 1 + to: + - '2' + +relations: + - - 'ceph-osd:mon' + - 'ceph-mon:osd' + + - - 'ceph-proxy:radosgw' + - 'ceph-radosgw:mon' + + - - 'ceph-proxy:mds' + - 'ceph-fs:ceph-mds' diff --git a/ceph-proxy/tests/bundles/jammy-yoga-ec.yaml b/ceph-proxy/tests/bundles/jammy-yoga-ec.yaml new file mode 100644 index 00000000..b24cecfe --- /dev/null +++ b/ceph-proxy/tests/bundles/jammy-yoga-ec.yaml @@ -0,0 +1,224 @@ +variables: + openstack-origin: &openstack-origin distro + +series: jammy + +comment: +- 'machines section to decide order of deployment. database sooner = faster' +machines: + '0': + constraints: mem=3072M + '1': + constraints: mem=3072M + '2': + constraints: mem=3072M + '3': + '4': + '5': + '6': + '7': + '8': + '9': + '10': + '11': + '12': + '13': + '14': + '15': + '16': + '17': + '18': + +applications: + + cinder-mysql-router: + charm: ch:mysql-router + channel: latest/edge + glance-mysql-router: + charm: ch:mysql-router + channel: latest/edge + keystone-mysql-router: + charm: ch:mysql-router + channel: latest/edge + + mysql-innodb-cluster: + charm: ch:mysql-innodb-cluster + num_units: 3 + to: + - '0' + - '1' + - '2' + channel: latest/edge + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + expected-osd-count: 3 + source: *openstack-origin + to: + - '3' + - '4' + - '5' + channel: latest/edge + + ceph-osd: + charm: ch:ceph-osd + num_units: 6 + storage: + osd-devices: 10G + options: + source: *openstack-origin + to: + - '6' + - '7' + - '8' + - '16' + - '17' + - '18' + channel: latest/edge + + ceph-proxy: + charm: ../../ceph-proxy.charm + num_units: 1 + options: + source: *openstack-origin + to: + - '9' + + ceph-radosgw: + charm: ch:ceph-radosgw + num_units: 1 + options: + source: *openstack-origin + pool-type: erasure-coded + ec-profile-k: 4 + ec-profile-m: 2 + to: + - '10' + channel: latest/edge + + cinder: + charm: ch:cinder + num_units: 1 + options: + openstack-origin: *openstack-origin + block-device: "" + ephemeral-unmount: "" + glance-api-version: 2 + overwrite: "false" + constraints: mem=2048 + to: + - '11' + channel: latest/edge + + cinder-ceph: + charm: ch:cinder-ceph + options: + restrict-ceph-pools: True + pool-type: erasure-coded + ec-profile-k: 4 + ec-profile-m: 2 + ec-profile-plugin: lrc + ec-profile-locality: 3 + channel: latest/edge + + keystone: + charm: ch:keystone + num_units: 1 + options: + openstack-origin: *openstack-origin + admin-password: openstack + constraints: mem=1024 + to: + - '12' + channel: latest/edge + + rabbitmq-server: + charm: ch:rabbitmq-server + num_units: 1 + constraints: mem=1024 + to: + - '13' + channel: latest/edge + + glance: + charm: ch:glance + num_units: 1 + options: + openstack-origin: *openstack-origin + pool-type: erasure-coded + ec-profile-k: 4 + ec-profile-m: 2 + ec-profile-plugin: jerasure + to: + - '14' + channel: latest/edge + + nova-compute: + charm: ch:nova-compute + num_units: 1 + options: + openstack-origin: *openstack-origin + pool-type: erasure-coded + ec-profile-k: 4 + ec-profile-m: 2 + ec-profile-plugin: isa + libvirt-image-backend: rbd + to: + - '15' + channel: latest/edge + + +relations: + + - - 'ceph-osd:mon' + - 'ceph-mon:osd' + + - - 'ceph-proxy:radosgw' + - 'ceph-radosgw:mon' + + - - 'cinder:amqp' + - 'rabbitmq-server:amqp' + + - - 'cinder:shared-db' + - 'cinder-mysql-router:shared-db' + - - 'cinder-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'keystone:shared-db' + - 'keystone-mysql-router:shared-db' + - - 'keystone-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'cinder:identity-service' + - 'keystone:identity-service' + + - - 'cinder-ceph:storage-backend' + - 'cinder:storage-backend' + + - - 'cinder-ceph:ceph' + - 'ceph-proxy:client' + + - - 'glance:image-service' + - 'nova-compute:image-service' + + - - 'glance:identity-service' + - 'keystone:identity-service' + + - - 'glance:shared-db' + - 'glance-mysql-router:shared-db' + - - 'glance-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'glance:ceph' + - 'ceph-proxy:client' + + - - 'nova-compute:ceph-access' + - 'cinder-ceph:ceph-access' + + - - 'nova-compute:amqp' + - 'rabbitmq-server:amqp' + + - - 'nova-compute:ceph' + - 'ceph-proxy:client' diff --git a/ceph-proxy/tests/bundles/jammy-yoga.yaml b/ceph-proxy/tests/bundles/jammy-yoga.yaml new file mode 100644 index 00000000..12ee6212 --- /dev/null +++ b/ceph-proxy/tests/bundles/jammy-yoga.yaml @@ -0,0 +1,214 @@ +variables: + openstack-origin: &openstack-origin distro + +series: jammy + +comment: +- 'machines section to decide order of deployment. database sooner = faster' +machines: + '0': + constraints: mem=3072M + '1': + constraints: mem=3072M + '2': + constraints: mem=3072M + '3': + '4': + '5': + '6': + '7': + '8': + '9': + '10': + '11': + '12': + '13': + '14': + '15': + '16': + '17': + '18': + +applications: + + cinder-mysql-router: + charm: ch:mysql-router + channel: latest/edge + glance-mysql-router: + charm: ch:mysql-router + channel: latest/edge + keystone-mysql-router: + charm: ch:mysql-router + channel: latest/edge + + mysql-innodb-cluster: + charm: ch:mysql-innodb-cluster + num_units: 3 + to: + - '0' + - '1' + - '2' + channel: latest/edge + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + expected-osd-count: 3 + source: *openstack-origin + to: + - '3' + - '4' + - '5' + channel: latest/edge + + ceph-osd: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 10G + options: + source: *openstack-origin + to: + - '6' + - '7' + - '8' + channel: latest/edge + + ceph-proxy: + charm: ../../ceph-proxy.charm + num_units: 1 + options: + source: *openstack-origin + to: + - '9' + + ceph-radosgw: + charm: ch:ceph-radosgw + num_units: 1 + options: + source: *openstack-origin + to: + - '10' + channel: latest/edge + + cinder: + charm: ch:cinder + num_units: 1 + options: + openstack-origin: *openstack-origin + block-device: "" + ephemeral-unmount: "" + glance-api-version: 2 + overwrite: "false" + constraints: mem=2048 + to: + - '11' + channel: latest/edge + + cinder-ceph: + charm: ch:cinder-ceph + options: + restrict-ceph-pools: True + channel: latest/edge + + keystone: + charm: ch:keystone + num_units: 1 + options: + openstack-origin: *openstack-origin + admin-password: openstack + constraints: mem=1024 + to: + - '12' + channel: latest/edge + + rabbitmq-server: + charm: ch:rabbitmq-server + num_units: 1 + constraints: mem=1024 + to: + - '13' + channel: latest/edge + + glance: + charm: ch:glance + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '14' + channel: latest/edge + + nova-compute: + charm: ch:nova-compute + num_units: 1 + options: + openstack-origin: *openstack-origin + to: + - '15' + channel: latest/edge + + ubuntu: # used to test mounts + charm: ch:ubuntu + num_units: 2 + to: + - '16' + - '17' + + ceph-fs: + charm: ch:ceph-fs + channel: latest/edge + num_units: 1 + to: + - '18' + +relations: + + - - 'ceph-osd:mon' + - 'ceph-mon:osd' + + - - 'ceph-proxy:radosgw' + - 'ceph-radosgw:mon' + + - - 'cinder:amqp' + - 'rabbitmq-server:amqp' + + - - 'cinder:shared-db' + - 'cinder-mysql-router:shared-db' + - - 'cinder-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'keystone:shared-db' + - 'keystone-mysql-router:shared-db' + - - 'keystone-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'cinder:identity-service' + - 'keystone:identity-service' + + - - 'cinder-ceph:storage-backend' + - 'cinder:storage-backend' + + - - 'cinder-ceph:ceph' + - 'ceph-proxy:client' + + - - 'glance:image-service' + - 'nova-compute:image-service' + + - - 'glance:identity-service' + - 'keystone:identity-service' + + - - 'glance:shared-db' + - 'glance-mysql-router:shared-db' + - - 'glance-mysql-router:db-router' + - 'mysql-innodb-cluster:db-router' + + - - 'nova-compute:ceph-access' + - 'cinder-ceph:ceph-access' + + - - 'nova-compute:amqp' + - 'rabbitmq-server:amqp' + + - - 'ceph-proxy:mds' + - 'ceph-fs:ceph-mds' diff --git a/ceph-proxy/tests/target.py b/ceph-proxy/tests/target.py new file mode 100644 index 00000000..bbb5a35a --- /dev/null +++ b/ceph-proxy/tests/target.py @@ -0,0 +1,268 @@ +# Copyright 2024 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import logging +import tenacity +import json +import subprocess + +import zaza +import zaza.charm_lifecycle.utils as lifecycle_utils +import zaza.model as zaza_model +import zaza.openstack.utilities.ceph as zaza_ceph +import zaza.openstack.utilities.exceptions as zaza_exceptions + + +def setup_ceph_proxy(): + """ + Configure ceph proxy with ceph metadata. + + Fetches admin_keyring and FSID from ceph-mon and + uses those to configure ceph-proxy. + """ + raw_admin_keyring = zaza_model.run_on_leader( + "ceph-mon", 'cat /etc/ceph/ceph.client.admin.keyring')["Stdout"] + admin_keyring = [ + line for line in raw_admin_keyring.split("\n") if "key" in line + ][0].split(' = ')[-1].rstrip() + fsid = zaza_model.run_on_leader("ceph-mon", "leader-get fsid")["Stdout"] + cluster_ips = zaza_model.get_app_ips("ceph-mon") + + proxy_config = { + 'auth-supported': 'cephx', + 'admin-key': admin_keyring, + 'fsid': fsid, + 'monitor-hosts': ' '.join(cluster_ips) + } + + logging.debug('Config: {}'.format(proxy_config)) + + zaza_model.set_application_config("ceph-proxy", proxy_config) + + +class CephProxyTest(unittest.TestCase): + """Test ceph via proxy.""" + + @classmethod + def setUpClass(cls): + """Run class setup for running tests.""" + super(CephProxyTest, cls).setUpClass() + + test_config = lifecycle_utils.get_charm_config(fatal=False) + cls.target_deploy_status = test_config.get('target_deploy_status', {}) + + def test_ceph_health(self): + """Make sure ceph-proxy can communicate with ceph.""" + logging.info('Wait for idle/ready status...') + zaza_model.wait_for_application_states( + states=self.target_deploy_status) + + self.assertEqual( + zaza_model.run_on_leader("ceph-proxy", "sudo ceph health")["Code"], + "0" + ) + + def test_cinder_ceph_restrict_pool_setup(self): + """Make sure cinder-ceph restrict pool was created successfully.""" + try: + zaza_model.get_application('cinder-ceph') + except KeyError: + raise unittest.SkipTest("Skipping OpenStack dependent test") + logging.info('Wait for idle/ready status...') + zaza_model.wait_for_application_states( + states=self.target_deploy_status) + + for attempt in tenacity.Retrying( + wait=tenacity.wait_exponential(multiplier=2, max=32), + reraise=True, stop=tenacity.stop_after_attempt(8), + ): + with attempt: + pools = zaza_ceph.get_ceph_pools('ceph-mon/0') + if 'cinder-ceph' not in pools: + msg = ('cinder-ceph pool not found querying ceph-mon/0,' + 'got: {}'.format(pools)) + raise zaza_exceptions.CephPoolNotFound(msg) + + # Checking for cinder-ceph specific permissions makes + # the test more rugged when we add additional relations + # to ceph for other applications (such as glance and nova). + expected_permissions = [ + "allow rwx pool=cinder-ceph", + "allow class-read object_prefix rbd_children", + ] + cmd = "sudo ceph auth get client.cinder-ceph" + result = zaza_model.run_on_unit('ceph-mon/0', cmd) + output = result.get('Stdout').strip() + + for expected in expected_permissions: + if expected not in output: + msg = ('cinder-ceph pool restriction ({}) was not' + ' configured correctly.' + ' Found: {}'.format(expected, output)) + raise zaza_exceptions.CephPoolNotConfigured(msg) + + +class CephFSWithCephProxyTests(unittest.TestCase): + """Encapsulate CephFS tests.""" + + mounts_share = False + mount_dir = '/mnt/cephfs' + CEPH_MON = 'ceph-proxy' + + def tearDown(self): + """Cleanup after running tests.""" + if self.mounts_share: + for unit in ['ceph-osd/0', 'ceph-osd/1']: + try: + zaza.utilities.generic.run_via_ssh( + unit_name=unit, + cmd='sudo fusermount -u {0} && sudo rmdir {0}'.format( + self.mount_dir)) + except subprocess.CalledProcessError: + logging.warning( + "Failed to cleanup mounts on {}".format(unit)) + + def _mount_share(self, unit_name: str, + retry: bool = True): + self._install_dependencies(unit_name) + self._install_keyring(unit_name) + ssh_cmd = ( + 'sudo mkdir -p {0} && ' + 'sudo ceph-fuse {0}'.format(self.mount_dir) + ) + if retry: + for attempt in tenacity.Retrying( + stop=tenacity.stop_after_attempt(5), + wait=tenacity.wait_exponential(multiplier=3, + min=2, max=10)): + with attempt: + zaza.utilities.generic.run_via_ssh( + unit_name=unit_name, + cmd=ssh_cmd) + else: + zaza.utilities.generic.run_via_ssh( + unit_name=unit_name, + cmd=ssh_cmd) + self.mounts_share = True + + def _install_keyring(self, unit_name: str): + + keyring = zaza_model.run_on_leader( + self.CEPH_MON, 'cat /etc/ceph/ceph.client.admin.keyring')['Stdout'] + config = zaza_model.run_on_leader( + self.CEPH_MON, 'cat /etc/ceph/ceph.conf')['Stdout'] + commands = [ + 'sudo mkdir -p /etc/ceph', + "echo '{}' | sudo tee /etc/ceph/ceph.conf".format(config), + "echo '{}' | " + 'sudo tee /etc/ceph/ceph.client.admin.keyring'.format(keyring) + ] + for cmd in commands: + zaza.utilities.generic.run_via_ssh( + unit_name=unit_name, + cmd=cmd) + + def _install_dependencies(self, unit: str): + zaza.utilities.generic.run_via_ssh( + unit_name=unit, + cmd='sudo apt-get install -yq ceph-fuse') + + @classmethod + def setUpClass(cls): + """Run class setup for running tests.""" + super(CephFSWithCephProxyTests, cls).setUpClass() + + @tenacity.retry( + stop=tenacity.stop_after_attempt(5), + wait=tenacity.wait_exponential(multiplier=3, min=2, max=10)) + def _write_testing_file_on_instance(self, instance_name: str): + zaza.utilities.generic.run_via_ssh( + unit_name=instance_name, + cmd='echo -n "test" | sudo tee {}/test'.format(self.mount_dir)) + + @tenacity.retry( + stop=tenacity.stop_after_attempt(5), + wait=tenacity.wait_exponential(multiplier=3, min=2, max=10)) + def _verify_testing_file_on_instance(self, instance_name: str): + output = zaza_model.run_on_unit( + instance_name, 'sudo cat {}/test'.format(self.mount_dir))['Stdout'] + self.assertEqual('test', output.strip()) + + def test_cephfs_share(self): + """Test that CephFS shares can be accessed on two instances. + + 1. Spawn two servers + 2. mount it on both + 3. write a file on one + 4. read it on the other + 5. profit + """ + self._mount_share('ceph-osd/0') + self._mount_share('ceph-osd/1') + + self._write_testing_file_on_instance('ceph-osd/0') + self._verify_testing_file_on_instance('ceph-osd/1') + + def test_conf(self): + """Test ceph to ensure juju config options are properly set.""" + self.TESTED_UNIT = 'ceph-fs/0' + + def _get_conf(): + """get/parse ceph daemon response into dict. + + :returns dict: Current configuration of the Ceph MDS daemon + :rtype: dict + """ + cmd = "sudo ceph daemon mds.$HOSTNAME config show" + conf = zaza_model.run_on_unit(self.TESTED_UNIT, cmd) + return json.loads(conf['Stdout']) + + @tenacity.retry( + wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), + stop=tenacity.stop_after_attempt(10)) + def _change_conf_check(mds_config): + """Change configs, then assert to ensure config was set. + + Doesn't return a value. + """ + zaza_model.set_application_config('ceph-fs', mds_config) + results = _get_conf() + self.assertEqual( + results['mds_cache_memory_limit'], + mds_config['mds-cache-memory-limit']) + self.assertAlmostEqual( + float(results['mds_cache_reservation']), + float(mds_config['mds-cache-reservation'])) + self.assertAlmostEqual( + float(results['mds_health_cache_threshold']), + float(mds_config['mds-health-cache-threshold'])) + + # ensure defaults are set + mds_config = {'mds-cache-memory-limit': '4294967296', + 'mds-cache-reservation': '0.05', + 'mds-health-cache-threshold': '1.5'} + _change_conf_check(mds_config) + + # change defaults + mds_config = {'mds-cache-memory-limit': '8589934592', + 'mds-cache-reservation': '0.10', + 'mds-health-cache-threshold': '2'} + _change_conf_check(mds_config) + + # Restore config to keep tests idempotent + mds_config = {'mds-cache-memory-limit': '4294967296', + 'mds-cache-reservation': '0.05', + 'mds-health-cache-threshold': '1.5'} + _change_conf_check(mds_config) diff --git a/ceph-proxy/tests/tests.yaml b/ceph-proxy/tests/tests.yaml new file mode 100644 index 00000000..04cfb599 --- /dev/null +++ b/ceph-proxy/tests/tests.yaml @@ -0,0 +1,29 @@ +charm_name: ceph-proxy + +gate_bundles: + - jammy-caracal + +smoke_bundles: + - jammy-caracal + +dev_bundles: + - jammy-caracal + +configure: + - tests.target.setup_ceph_proxy + +tests: + - zaza.charm_tests.lifecycle.refresh.CharmRefreshAll + - tests.target.CephProxyTest + - tests.target.CephFSWithCephProxyTests + +target_deploy_status: + ceph-proxy: + workload-status: blocked + workload-status-message-prefix: "Ensure FSID and admin-key are set" + ceph-radosgw: + workload-status: waiting + workload-status-message-prefix: "Incomplete relations: mon" + ceph-fs: + workload-status: waiting + workload-status-message-prefix: "'ceph-mds' incomplete" diff --git a/ceph-proxy/tox.ini b/ceph-proxy/tox.ini new file mode 100644 index 00000000..41e25414 --- /dev/null +++ b/ceph-proxy/tox.ini @@ -0,0 +1,154 @@ +# Classic charm (with zaza): ./tox.ini +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of tox.ini for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools +# +# TODO: Distill the func test requirements from the lint/unit test +# requirements. They are intertwined. Also, Zaza itself should specify +# all of its own requirements and if it doesn't, fix it there. +[tox] +envlist = pep8,py3 +skipsdist = True +# NOTE: Avoid build/test env pollution by not enabling sitepackages. +sitepackages = False +# NOTE: Avoid false positives by not skipping missing interpreters. +skip_missing_interpreters = False + +# NOTE: https://wiki.canonical.com/engineering/OpenStack/InstallLatestToxOnOsci +minversion = 3.18.0 + +[testenv] +setenv = VIRTUAL_ENV={envdir} + PYTHONHASHSEED=0 + TEST_JUJU3=1 + CHARM_DIR={envdir} + CHARMS_ARTIFACT_DIR={toxinidir}/.. +install_command = + pip install {opts} {packages} +commands = stestr run --slowest {posargs} +allowlist_externals = + charmcraft + {toxinidir}/rename.sh +passenv = + HOME + TERM + CS_* + OS_* + TEST_* +deps = -r{toxinidir}/test-requirements.txt + +[testenv:build] +basepython = python3 +deps = -r{toxinidir}/build-requirements.txt +commands = + charmcraft clean + charmcraft -v pack + {toxinidir}/rename.sh + +[testenv:py36] +basepython = python3.6 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py37] +basepython = python3.7 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py38] +basepython = python3.8 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py39] +basepython = python3.9 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py310] +basepython = python3.10 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py311] +basepython = python3.11 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py312] +basepython = python3.12 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py3] +basepython = python3 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:pep8] +basepython = python3 +deps = flake8 + charm-tools +commands = flake8 {posargs} unit_tests tests actions files + +[testenv:cover] +# Technique based heavily upon +# https://github.com/openstack/nova/blob/master/tox.ini +basepython = python3 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt +setenv = + {[testenv]setenv} + PYTHON=coverage run +commands = + coverage erase + stestr run --slowest {posargs} + coverage combine + coverage html -d cover + coverage xml -o cover/coverage.xml + coverage report + +[coverage:run] +branch = True +concurrency = multiprocessing +parallel = True +source = + . +omit = + .tox/* + */charmhelpers/* + unit_tests/* + +[testenv:venv] +basepython = python3 +commands = {posargs} + +[testenv:func-noop] +basepython = python3 +commands = + functest-run-suite --help + +[testenv:func] +basepython = python3 +commands = + functest-run-suite --keep-model + +[testenv:func-smoke] +basepython = python3 +commands = + functest-run-suite --keep-model --smoke + +[testenv:func-dev] +basepython = python3 +commands = + functest-run-suite --keep-model --dev + +[testenv:func-target] +basepython = python3 +commands = + functest-run-suite --keep-model --bundle {posargs} + +[flake8] +ignore = E402,E226,W503,W504 +exclude = */charmhelpers diff --git a/ceph-proxy/unit_tests/__init__.py b/ceph-proxy/unit_tests/__init__.py new file mode 100644 index 00000000..34acae80 --- /dev/null +++ b/ceph-proxy/unit_tests/__init__.py @@ -0,0 +1,20 @@ +import os +import sys + + +_path = os.path.dirname(os.path.realpath(__file__)) +_actions = os.path.abspath(os.path.join(_path, '../actions')) +_hooks = os.path.abspath(os.path.join(_path, '../hooks')) +_charmhelpers = os.path.abspath(os.path.join(_path, '../charmhelpers')) +_unit_tests = os.path.abspath(os.path.join(_path, '../unit_tests')) + + +def _add_path(path): + if path not in sys.path: + sys.path.insert(1, path) + + +_add_path(_actions) +_add_path(_hooks) +_add_path(_charmhelpers) +_add_path(_unit_tests) diff --git a/ceph-proxy/unit_tests/test_ceph.py b/ceph-proxy/unit_tests/test_ceph.py new file mode 100644 index 00000000..211c61b6 --- /dev/null +++ b/ceph-proxy/unit_tests/test_ceph.py @@ -0,0 +1,96 @@ +import collections +import subprocess +import unittest + +from unittest import mock + +import ceph + + +class CephTestCase(unittest.TestCase): + def setUp(self): + super(CephTestCase, self).setUp() + + @staticmethod + def populated_config_side_effect(key): + return { + 'user-keys': + 'client.cinder-ceph:AQAij2tbMNjMOhAAqInpXQLFrltDgmYid6KXbg== ' + 'client.glance:AQCnjmtbuEACMxAA7joUmgLIGI4/3LKkPzUy8g== ' + 'client.gnocchi:AQDk7qJb0csAFRAAQqPU6HchVW3PT6ymgXdI/A== ' + 'client.nova-compute-kvm:' + 'AQBkjmtb1hWxLxAA3UhxSblgFSCtHVoZ8W6rNQ== ' + 'client.radosgw.gateway:' + 'AQBljmtb65mrHhAAGy9VRkfsatWVLb9EpoWDfw==', + 'admin-user': 'client.myadmin' + }[key] + + @staticmethod + def empty_config_side_effect(key): + return { + 'user-keys': '', + 'admin-user': 'client.myadmin' + }[key] + + @mock.patch('ceph.config') + def test_config_user_key_populated(self, mock_config): + user_name = 'glance' + user_key = 'AQCnjmtbuEACMxAA7joUmgLIGI4/3LKkPzUy8g==' + + mock_config.side_effect = self.populated_config_side_effect + named_key = ceph._config_user_key(user_name) + self.assertEqual(user_key, named_key) + + @mock.patch('ceph.config') + def test_config_empty_user_key(self, mock_config): + user_name = 'cinder-ceph' + + mock_config.side_effect = self.empty_config_side_effect + named_key = ceph._config_user_key(user_name) + self.assertEqual(named_key, None) + + @mock.patch.object(ceph, 'ceph_user') + @mock.patch('subprocess.check_output') + @mock.patch('ceph.config') + def test_get_named_key_new(self, mock_config, mock_check_output, + mock_ceph_user): + mock_ceph_user.return_value = 'ceph' + user_name = 'cinder-ceph' + expected_key = 'AQCnjmtbuEACMxAA7joUmgLIGI4/3LKkPzUy8g==' + expected_output = ('[client.testuser]\n key = {}' + .format(expected_key)) + + def check_output_side_effect(cmd): + if 'get-or-create' in cmd: + return expected_output.encode('utf-8') + else: + raise subprocess.CalledProcessError(1, "") + + mock_config.side_effect = self.empty_config_side_effect + mock_check_output.side_effect = check_output_side_effect + named_key = ceph.get_named_key(user_name) + print(named_key) + + self.assertEqual(expected_key, named_key) + + @mock.patch('subprocess.check_output') + @mock.patch('ceph.get_unit_hostname') + @mock.patch('ceph.ceph_user') + @mock.patch('ceph.config') + def test_get_named_key_existing(self, mock_config, mock_ceph_user, + mock_get_unit_hostname, mock_check_output): + user_name = 'cinder-ceph' + expected_key = 'AQCnjmtbuEACMxAA7joUmgLIGI4/3LKkPzUy8g==' + expected_output = ('[client.testuser]\n key = {}' + .format(expected_key)) + caps = collections.OrderedDict([('mon', ['allow rw']), + ('osd', ['allow rwx'])]) + ceph_user = 'ceph' + ceph_proxy_host = 'cephproxy' + mock_get_unit_hostname.return_value = ceph_proxy_host + + mock_check_output.return_value = expected_output.encode('utf-8') + mock_config.side_effect = self.empty_config_side_effect + mock_ceph_user.return_value = ceph_user + named_key = ceph.get_named_key(user_name, caps) + self.assertEqual(named_key, expected_key) diff --git a/ceph-proxy/unit_tests/test_ceph_hooks.py b/ceph-proxy/unit_tests/test_ceph_hooks.py new file mode 100644 index 00000000..8706b4cb --- /dev/null +++ b/ceph-proxy/unit_tests/test_ceph_hooks.py @@ -0,0 +1,231 @@ +from unittest import mock +import sys + +# python-apt is not installed as part of test-requirements but is imported by +# some charmhelpers modules so create a fake import. +mock_apt = mock.MagicMock() +sys.modules['apt'] = mock_apt +mock_apt.apt_pkg = mock.MagicMock() + +mock_apt_pkg = mock.MagicMock() +sys.modules['apt_pkg'] = mock_apt_pkg +mock_apt_pkg.upstream_version = mock.MagicMock() +mock_apt_pkg.upstream_version.return_value = '10.1.2-0ubuntu1' + +import test_utils +import ceph_hooks as hooks + +CEPH_KEY = 'AQDmP6dYWto6AhAAPKMkuvdFZYPRaiboU27IsA==' +CEPH_GET_KEY = """[client.admin] + key = %s + caps mds = "allow *" + caps mon = "allow *" + caps osd = "allow *" +""" % CEPH_KEY + +TO_PATCH = [ + 'config', + 'install_alternative', + 'mkdir', + 'related_units', + 'relation_get', + 'relation_ids', + 'relation_set', + 'remote_unit', + 'render', + 'service_name', + 'log' +] + + +def fake_log(message, level=None): + print("juju-log %s: %s" % (level, message)) + + +class TestHooks(test_utils.CharmTestCase): + def setUp(self): + super(TestHooks, self).setUp(hooks, TO_PATCH) + self.service_name.return_value = 'ceph-service' + self.config.side_effect = lambda x: self.test_config.get(x) + self.remote_unit.return_value = 'client/0' + self.log.side_effect = fake_log + + @mock.patch.object(hooks.ceph, 'ceph_user') + @mock.patch.object(hooks, 'filter_installed_packages') + @mock.patch('subprocess.check_output') + @mock.patch('ceph_hooks.apt_install') + def test_radosgw_relation(self, mock_apt_install, mock_check_output, + mock_filter_installed_packages, mock_ceph_user): + mock_filter_installed_packages.return_value = [] + mock_ceph_user.return_value = 'ceph' + settings = {'ceph-public-address': '127.0.0.1:1234 [::1]:4321', + 'radosgw_key': CEPH_KEY, + 'auth': 'cephx', + 'fsid': 'some-fsid'} + + mock_check_output.return_value = CEPH_GET_KEY.encode() + self.relation_get.return_value = {} + self.test_config.set('monitor-hosts', settings['ceph-public-address']) + self.test_config.set('fsid', settings['fsid']) + self.test_config.set('admin-key', 'some-admin-key') + hooks.radosgw_relation() + self.relation_set.assert_called_with(relation_id=None, + relation_settings=settings) + mock_apt_install.assert_called_with(packages=[]) + + @mock.patch('ceph.ceph_user') + @mock.patch.object(hooks, 'mds_relation_joined', autospec=True) + @mock.patch.object(hooks, 'radosgw_relation') + @mock.patch.object(hooks, 'client_relation_joined') + def test_emit_cephconf(self, mock_client_rel, mock_rgw_rel, + mock_mds_rel, mock_ceph_user): + mock_ceph_user.return_value = 'ceph-user' + self.test_config.set('monitor-hosts', '127.0.0.1:1234') + self.test_config.set('fsid', 'abc123') + self.test_config.set('admin-key', 'key123') + self.test_config.set('admin-user', 'client.myadmin') + + def c(k): + x = {'radosgw': ['rados:1'], + 'client': ['client:1'], + 'rados:1': ['rados/1'], + 'client:1': ['client/1'], + 'mds': ['mds:2'], + 'mds:2': ['mds/3'], + } + return x[k] + + self.relation_ids.side_effect = c + self.related_units.side_effect = c + + hooks.emit_cephconf() + + context = {'auth_supported': self.test_config.get('auth-supported'), + 'mon_hosts': self.test_config.get('monitor-hosts'), + 'fsid': self.test_config.get('fsid'), + 'use_syslog': str(self.test_config.get( + 'use-syslog')).lower(), + 'loglevel': self.test_config.get('loglevel')} + + dirname = '/var/lib/charm/ceph-service' + self.mkdir.assert_called_with(dirname, owner='ceph-user', + group='ceph-user') + self.render.assert_any_call('ceph.conf', + '%s/ceph.conf' % dirname, + context, perms=0o644) + self.install_alternative.assert_called_with('ceph.conf', + '/etc/ceph/ceph.conf', + '%s/ceph.conf' % dirname, + 100) + keyring_template = 'ceph.keyring' + keyring_name = 'ceph.{}.keyring'.format( + self.test_config.get('admin-user')) + context = { + 'admin_key': self.test_config.get('admin-key'), + 'admin_user': self.test_config.get('admin-user'), + } + self.render.assert_any_call(keyring_template, + '/etc/ceph/' + keyring_name, + context, owner='ceph-user', perms=0o600) + + mock_rgw_rel.assert_called_with(relid='rados:1', unit='rados/1') + mock_client_rel.assert_called_with(relid='client:1', unit='client/1') + mock_mds_rel.assert_called_with(relid='mds:2', unit='mds/3') + + @mock.patch.object(hooks.ceph, 'ceph_user') + @mock.patch('subprocess.check_output') + def test_client_relation_joined(self, mock_check_output, mock_ceph_user): + mock_check_output.return_value = CEPH_GET_KEY.encode() + mock_ceph_user.return_value = 'ceph' + self.test_config.set('monitor-hosts', '127.0.0.1:1234') + self.test_config.set('fsid', 'abc123') + self.test_config.set('admin-key', 'some-admin-key') + self.related_units.return_value = ['client/0'] + + hooks.client_relation_joined('client:1') + + data = {'key': CEPH_KEY, + 'auth': 'cephx', + 'ceph-public-address': self.test_config.get('monitor-hosts')} + + self.relation_set.assert_called_with(relation_id='client:1', + relation_settings=data) + + @mock.patch('ceph_hooks.emit_cephconf') + @mock.patch('ceph_hooks.package_install') + def test_config_get_skips_package_update(self, + mock_package_install, + mock_emit_cephconf): + previous_test_config = test_utils.TestConfig() + previous_test_config.set('source', 'distro') + previous_test_config.set('key', '') + previous = mock.MagicMock().return_value + previous.previous.side_effect = lambda x: previous_test_config.get(x) + self.config.side_effect = [previous, "distro", ""] + hooks.config_changed() + mock_package_install.assert_not_called() + mock_emit_cephconf.assert_any_call() + + @mock.patch('subprocess.check_output', autospec=True) + @mock.patch('ceph.config', autospec=True) + @mock.patch('ceph.get_mds_key', autospec=True) + @mock.patch('ceph.ceph_user', autospec=True) + def test_mds_relation_joined(self, ceph_user, get_mds_key, ceph_config, + check_output): + my_mds_key = '1234-key' + mds_name = 'adjusted-mayfly' + rid = 'mds:1' + ceph_user.return_value = 'ceph' + get_mds_key.return_value = my_mds_key + ceph_config.side_effect = self.test_config.get + + settings = {'ceph-public-address': '127.0.0.1:1234 [::1]:4321', + 'auth': 'cephx', + 'fsid': 'some-fsid'} + + rel_data_get = {'broker_req': 'my-uuid', + 'mds-name': mds_name} + rel_data_set = {'broker-rsp-client-0': 'foobar', + '%s_mds_key' % mds_name: my_mds_key} + rel_data_set.update(settings) + + def fake_relation_get(attribute=None, rid=None, unit=None): + if attribute: + return rel_data_get[attribute] + else: + return rel_data_get + + self.relation_get.side_effect = fake_relation_get + + # unconfigured ceph-proxy + with mock.patch.object(hooks, 'log') as log: + hooks.mds_relation_joined() + log.assert_called_with( + 'MDS: FSID or admin key not provided, please configure them', + level='INFO') + + # Configure ceph-proxy with the ceph details. + self.test_config.set('monitor-hosts', settings['ceph-public-address']) + self.test_config.set('fsid', settings['fsid']) + self.test_config.set('admin-key', 'some-admin-key') + + with mock.patch.object(hooks, 'process_requests') as process_requests: + process_requests.return_value = 'foobar' + hooks.mds_relation_joined(relid=rid) + process_requests.assert_called_with('my-uuid') + self.relation_set.assert_called_with( + relation_id=rid, relation_settings=rel_data_set) + + @mock.patch('ceph_hooks.emit_cephconf') + @mock.patch('ceph_hooks.package_install') + def test_update_apt_source(self, mock_package_install, mock_emit_cephconf): + + previous_test_config = test_utils.TestConfig() + previous_test_config.set('source', 'distro') + previous_test_config.set('key', '') + previous = mock.MagicMock().return_value + previous.previous.side_effect = lambda x: previous_test_config.get(x) + self.config.side_effect = [previous, "cloud:cosmic-mimic", ""] + hooks.config_changed() + mock_package_install.assert_called_with() + mock_emit_cephconf.assert_called_with() diff --git a/ceph-proxy/unit_tests/test_utils.py b/ceph-proxy/unit_tests/test_utils.py new file mode 100644 index 00000000..372b2d8a --- /dev/null +++ b/ceph-proxy/unit_tests/test_utils.py @@ -0,0 +1,121 @@ +import logging +import unittest +import os +import yaml + +from contextlib import contextmanager +from unittest.mock import patch, MagicMock + + +def load_config(): + ''' + Walk backwords from __file__ looking for config.yaml, load and return the + 'options' section' + ''' + config = None + f = __file__ + while config is None: + d = os.path.dirname(f) + if os.path.isfile(os.path.join(d, 'config.yaml')): + config = os.path.join(d, 'config.yaml') + break + f = d + + if not config: + logging.error('Could not find config.yaml in any parent directory ' + 'of %s. ' % f) + raise Exception + + return yaml.safe_load(open(config).read())['options'] + + +def get_default_config(): + ''' + Load default charm config from config.yaml return as a dict. + If no default is set in config.yaml, its value is None. + ''' + default_config = {} + config = load_config() + for k, v in config.items(): + if 'default' in v: + default_config[k] = v['default'] + else: + default_config[k] = None + return default_config + + +class CharmTestCase(unittest.TestCase): + + def setUp(self, obj, patches): + super(CharmTestCase, self).setUp() + self.patches = patches + self.obj = obj + self.test_config = TestConfig() + self.test_relation = TestRelation() + self.patch_all() + + def patch(self, method): + _m = patch.object(self.obj, method) + mock = _m.start() + self.addCleanup(_m.stop) + return mock + + def patch_all(self): + for method in self.patches: + setattr(self, method, self.patch(method)) + + +class TestConfig(object): + + def __init__(self): + self.config = get_default_config() + + def get(self, attr=None): + if not attr: + return self.get_all() + try: + return self.config[attr] + except KeyError: + return None + + def get_all(self): + return self.config + + def set(self, attr, value): + if attr not in self.config: + raise KeyError + self.config[attr] = value + + +class TestRelation(object): + + def __init__(self, relation_data={}): + self.relation_data = relation_data + + def set(self, relation_data): + self.relation_data = relation_data + + def get(self, attr=None, unit=None, rid=None): + if attr is None: + return self.relation_data + elif attr in self.relation_data: + return self.relation_data[attr] + return None + + +@contextmanager +def patch_open(): + '''Patch open() to allow mocking both open() itself and the file that is + yielded. + + Yields the mock for "open" and "file", respectively.''' + mock_open = MagicMock(spec=open) + mock_file = MagicMock(spec=file) # noqa - transitional py2 py3 + + @contextmanager + def stub_open(*args, **kwargs): + mock_open(*args, **kwargs) + yield mock_file + + with patch('__builtin__.open', stub_open): + yield mock_open, mock_file diff --git a/ceph-radosgw/.gitignore b/ceph-radosgw/.gitignore new file mode 100644 index 00000000..813cc4a8 --- /dev/null +++ b/ceph-radosgw/.gitignore @@ -0,0 +1,13 @@ +bin +.coverage +.testrepository +.tox +tags +*.sw[nop] +*.charm +*.pyc +.idea +.unit-state.db +func-results.json +.stestr/ +**/__pycache__ diff --git a/ceph-radosgw/.gitreview b/ceph-radosgw/.gitreview new file mode 100644 index 00000000..fb258f86 --- /dev/null +++ b/ceph-radosgw/.gitreview @@ -0,0 +1,6 @@ +[gerrit] +host=review.opendev.org +port=29418 +project=openstack/charm-ceph-radosgw.git + +defaultbranch=stable/squid-jammy diff --git a/ceph-radosgw/.project b/ceph-radosgw/.project new file mode 100644 index 00000000..51d6166f --- /dev/null +++ b/ceph-radosgw/.project @@ -0,0 +1,17 @@ + + + ceph-radosgw + + + + + + org.python.pydev.PyDevBuilder + + + + + + org.python.pydev.pythonNature + + diff --git a/ceph-radosgw/.pydevproject b/ceph-radosgw/.pydevproject new file mode 100644 index 00000000..03181631 --- /dev/null +++ b/ceph-radosgw/.pydevproject @@ -0,0 +1,15 @@ + + + + python 2.7 + + Default + + + /${PROJECT_DIR_NAME}/lib + /${PROJECT_DIR_NAME}/hooks + /${PROJECT_DIR_NAME}/unit_tests + /${PROJECT_DIR_NAME}/actions + + + diff --git a/ceph-radosgw/.stestr.conf b/ceph-radosgw/.stestr.conf new file mode 100644 index 00000000..5fcccaca --- /dev/null +++ b/ceph-radosgw/.stestr.conf @@ -0,0 +1,3 @@ +[DEFAULT] +test_path=./unit_tests +top_dir=./ diff --git a/ceph-radosgw/.zuul.yaml b/ceph-radosgw/.zuul.yaml new file mode 100644 index 00000000..77259668 --- /dev/null +++ b/ceph-radosgw/.zuul.yaml @@ -0,0 +1,4 @@ +- project: + templates: + - openstack-python3-charm-zed-jobs + - openstack-python3-charm-jobs diff --git a/ceph-radosgw/LICENSE b/ceph-radosgw/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/ceph-radosgw/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ceph-radosgw/Makefile b/ceph-radosgw/Makefile new file mode 100644 index 00000000..03a08092 --- /dev/null +++ b/ceph-radosgw/Makefile @@ -0,0 +1,27 @@ +#!/usr/bin/make +PYTHON := /usr/bin/env python3 + +lint: + @tox -e pep8 + +test: + @echo Starting unit tests... + @tox -e py27 + +functional_test: + @echo Starting Amulet tests... + @tox -e func27 + +bin/charm_helpers_sync.py: + @mkdir -p bin + @curl -o bin/charm_helpers_sync.py https://raw.githubusercontent.com/juju/charm-helpers/master/tools/charm_helpers_sync/charm_helpers_sync.py + +bin/git_sync.py: + @mkdir -p bin + @wget -O bin/git_sync.py https://raw.githubusercontent.com/CanonicalLtd/git-sync/master/git_sync.py + +sync: bin/charm_helpers_sync.py + @$(PYTHON) bin/charm_helpers_sync.py -c charm-helpers-hooks.yaml + +ceph-sync: bin/git_sync.py + $(PYTHON) bin/git_sync.py -d lib -s https://github.com/openstack/charms.ceph.git diff --git a/ceph-radosgw/README.md b/ceph-radosgw/README.md new file mode 100644 index 00000000..b6dd248c --- /dev/null +++ b/ceph-radosgw/README.md @@ -0,0 +1,277 @@ +# Overview + +[Ceph][ceph-upstream] is a unified, distributed storage system designed for +excellent performance, reliability, and scalability. + +The ceph-radosgw charm deploys the RADOS Gateway, a S3 and Swift compatible +HTTP gateway. The deployment is done within the context of an existing Ceph +cluster. + +# Usage + +## Configuration + +This section covers common and/or important configuration options. See file +`config.yaml` for the full list of options, along with their descriptions and +default values. See the [Juju documentation][juju-docs-config-apps] for details +on configuring applications. + +#### `pool-type` + +The `pool-type` option dictates the storage pool type. See section 'Ceph pool +type' for more information. + +#### `source` + +The `source` option states the software sources. A common value is an OpenStack +UCA release (e.g. 'cloud:xenial-queens' or 'cloud:bionic-ussuri'). See [Ceph +and the UCA][cloud-archive-ceph]. The underlying host's existing apt sources +will be used if this option is not specified (this behaviour can be explicitly +chosen by using the value of 'distro'). + +## Ceph pool type + +Ceph storage pools can be configured to ensure data resiliency either through +replication or by erasure coding. This charm supports both types via the +`pool-type` configuration option, which can take on the values of 'replicated' +and 'erasure-coded'. The default value is 'replicated'. + +For this charm, the pool type will be associated with Object storage. + +> **Note**: Erasure-coded pools are supported starting with Ceph Luminous. + +### Replicated pools + +Replicated pools use a simple replication strategy in which each written object +is copied, in full, to multiple OSDs within the cluster. + +The `ceph-osd-replication-count` option sets the replica count for any object +stored within the rgw pools. Increasing this value increases data resilience at +the cost of consuming more real storage in the Ceph cluster. The default value +is '3'. + +> **Important**: The `ceph-osd-replication-count` option must be set prior to + adding the relation to the ceph-mon application. Otherwise, the pool's + configuration will need to be set by interfacing with the cluster directly. + +### Erasure coded pools + +Erasure coded pools use a technique that allows for the same resiliency as +replicated pools, yet reduces the amount of space required. Written data is +split into data chunks and error correction chunks, which are both distributed +throughout the cluster. + +> **Note**: Erasure coded pools require more memory and CPU cycles than + replicated pools do. + +When using erasure coded pools for Object storage multiple pools will be +created: one erasure coded pool ('rgw.buckets.data' for storing actual RGW +data) and several replicated pools (for storing RGW omap metadata). The +`ceph-osd-replication-count` configuration option only applies to the metadata +(replicated) pools. + +Erasure coded pools can be configured via options whose names begin with the +`ec-` prefix. + +> **Important**: It is strongly recommended to tailor the `ec-profile-k` and + `ec-profile-m` options to the needs of the given environment. These latter + options have default values of '1' and '2' respectively, which result in the + same space requirements as those of a replicated pool. + +See [Ceph Erasure Coding][cdg-ceph-erasure-coding] in the [OpenStack Charms +Deployment Guide][cdg] for more information. + +## Ceph BlueStore compression + +This charm supports [BlueStore inline compression][ceph-bluestore-compression] +for its associated Ceph storage pool(s). The feature is enabled by assigning a +compression mode via the `bluestore-compression-mode` configuration option. The +default behaviour is to disable compression. + +The efficiency of compression depends heavily on what type of data is stored +in the pool and the charm provides a set of configuration options to fine tune +the compression behaviour. + +> **Note**: BlueStore compression is supported starting with Ceph Mimic. + +## Deployment + +Ceph RADOS Gateway is often containerised. Here a single unit is deployed to a +new container on machine '1' within an existing Ceph cluster: + + juju deploy --to lxd:1 ceph-radosgw + juju add-relation ceph-radosgw:mon ceph-mon:radosgw + +If the RADOS Gateway is being integrated into OpenStack then a relation to the +keystone application is needed: + + juju add-relation ceph-radosgw:identity-service keystone:identity-service + +Expose the service: + + juju expose ceph-radosgw + +> **Note**: The `expose` command is only required if the backing cloud blocks + traffic by default. In general, MAAS is the only cloud type that does not + employ firewalling. + +The Gateway can be accessed over port 80 (as per `juju status ceph-radosgw` +output). + +## Multi-site replication + +The charm supports native replication between multiple RADOS Gateway +deployments. This is documented under [Ceph RADOS Gateway multisite +replication][cdg-ceph-radosgw-multisite] in the [OpenStack Charms Deployment +Guide][cdg]. + +## Tenant namespacing + +By default, Ceph RADOS Gateway puts all tenant buckets into the same global +namespace, disallowing multiple tenants to have buckets with the same name. +Tenant namespacing can be enabled in this charm by deploying with configuration +like: + + ceph-radosgw: + charm: cs:ceph-radosgw + num_units: 1 + options: + namespace-tenants: True + +Enabling tenant namespacing will place all tenant buckets into their own +namespace under their tenant id, as well as adding the tenant's ID parameter to +the Keystone endpoint registration to allow seamless integration with OpenStack. +Tenant namespacing cannot be toggled on in an existing installation as it will +remove tenant access to existing buckets. Toggling this option on an already +deployed RADOS Gateway will have no effect. + +## Access + +For security reasons the charm is not designed to administer the Ceph cluster. +A user (e.g. 'ubuntu') for the Ceph Object Gateway service will need to be +created manually: + + juju ssh ceph-mon/0 'sudo radosgw-admin user create \ + --uid="ubuntu" --display-name="Charmed Ceph"' + +## Keystone integration (Swift) + +Ceph RGW supports Keystone authentication of Swift requests. This is enabled +by adding a relation to an existing keystone application: + + juju add-relation ceph-radosgw:identity-service keystone:identity-service + +## High availability + +When more than one unit is deployed with the [hacluster][hacluster-charm] +application the charm will bring up an HA active/active cluster. + +There are two mutually exclusive high availability options: using virtual IP(s) +or DNS. In both cases the hacluster subordinate charm is used to provide the +Corosync and Pacemaker backend HA functionality. + +See [OpenStack high availability][cdg-ha-apps] in the [OpenStack Charms +Deployment Guide][cdg] for details. + +## S3 Interface Support + +This charm provides [s3 charm interface support][s3spec]. This means +it can act as a provider for applications wishing to make use of S3 +object storage via this relation. An application that implements the +s3 requirer side of this relation will can be related to ceph-radosgw. +Using the mysql-operator charm as an example: + + juju add-relation ceph-radosgw:s3 mysql:s3-parameters + +Upon forming that relation, ceph-radosgw will create a bucket for use +by the requirer, and transmit access information back to the requirer. +The requirer then could use this to connect to the S3 endpoint to +store application data. + +Only a single bucket will be created per requirer application. If an +application relation is removed, the bucket *will* be preserved. If +subsequently the application reestablishes the relation, the bucket +will be reused. + + + +## Network spaces + +This charm supports the use of Juju [network spaces][juju-docs-spaces] (Juju +`v.2.0`). This feature optionally allows specific types of the application's +network traffic to be bound to subnets that the underlying hardware is +connected to. + +> **Note**: Spaces must be configured in the backing cloud prior to deployment. + +API endpoints can be bound to distinct network spaces supporting the network +separation of public, internal and admin endpoints. + +For example, providing that spaces 'public-space', 'internal-space', and +'admin-space' exist, the deploy command above could look like this: + + juju deploy ceph-radosgw \ + --bind "public=public-space internal=internal-space admin=admin-space" + +Alternatively, configuration can be provided as part of a bundle: + +```yaml + ceph-radosgw: + charm: cs:ceph-radosgw + num_units: 1 + bindings: + public: public-space + internal: internal-space + admin: admin-space +``` + +> **Note**: Existing ceph-radosgw units configured with the `os-admin-network`, + `os-internal-network`, `os-public-network`, `os-public-hostname`, + `os-internal-hostname`, or `os-admin-hostname` options will continue to + honour them. Furthermore, these options override any space bindings, if set. + +## Actions + +This section lists Juju [actions][juju-docs-actions] supported by the charm. +Actions allow specific operations to be performed on a per-unit basis. To +display action descriptions run `juju actions ceph-radosgw`. If the charm is +not deployed then see file `actions.yaml`. + +* `pause` +* `promote` +* `readonly` +* `readwrite` +* `resume` +* `tidydefaults` +* `enable-buckets-sync` +* `disable-buckets-sync` +* `reset-buckets-sync` + +# Documentation + +The OpenStack Charms project maintains two documentation guides: + +* [OpenStack Charm Guide][cg]: for project information, including development + and support notes +* [OpenStack Charms Deployment Guide][cdg]: for charm usage information + +# Bugs + +Please report bugs on [Launchpad][lp-bugs-charm-ceph-radosgw]. + + + +[juju-docs-actions]: https://jaas.ai/docs/actions +[ceph-upstream]: https://ceph.io +[hacluster-charm]: https://jaas.ai/hacluster +[cg]: https://docs.openstack.org/charm-guide +[cdg]: https://docs.openstack.org/project-deploy-guide/charm-deployment-guide +[cdg-ha-apps]: https://docs.openstack.org/project-deploy-guide/charm-deployment-guide/latest/app-ha.html#ha-applications +[cloud-archive-ceph]: https://wiki.ubuntu.com/OpenStack/CloudArchive#Ceph_and_the_UCA +[juju-docs-config-apps]: https://juju.is/docs/configuring-applications +[cdg-ceph-erasure-coding]: https://docs.openstack.org/project-deploy-guide/charm-deployment-guide/latest/app-erasure-coding.html +[lp-bugs-charm-ceph-radosgw]: https://bugs.launchpad.net/charm-ceph-radosgw/+filebug +[juju-docs-spaces]: https://jaas.ai/docs/spaces +[cdg-ceph-radosgw-multisite]: https://docs.openstack.org/project-deploy-guide/charm-deployment-guide/latest/app-rgw-multisite.html +[ceph-bluestore-compression]: https://docs.ceph.com/en/latest/rados/configuration/bluestore-config-ref/#inline-compression +[s3spec]: https://github.com/canonical/charm-relation-interfaces/tree/main/interfaces/s3/v0 diff --git a/ceph-radosgw/actions.yaml b/ceph-radosgw/actions.yaml new file mode 100644 index 00000000..5b24635a --- /dev/null +++ b/ceph-radosgw/actions.yaml @@ -0,0 +1,47 @@ +pause: + description: Pause the ceph-radosgw unit. +resume: + description: Resume the ceph-radosgw unit. +promote: + description: Promote the zone associated with the local units to master/default (multi-site). +readonly: + description: Mark the zone associated with the local units as read only (multi-site). +readwrite: + description: Mark the zone associated with the local units as read/write (multi-site). +tidydefaults: + description: Delete default zone and zonegroup configuration (multi-site). +force-enable-multisite: + description: Reconfigure provided Zone and Zonegroup for migration to multisite. + params: + zone: + type: string + description: Existing Zone to be reconfigured as the 'zone' config value. + zonegroup: + type: string + description: Existing Zonegroup to be reconfigured as the 'zonegroup' config value. +enable-buckets-sync: + description: | + Enable buckets sync in the multi-site replication. This is meant to be + used only when the default zonegroup sync policy is not "enabled", but it is + "allowed". + params: + buckets: + type: string + description: Comma-separated list of buckets' names to enable syncing. +disable-buckets-sync: + description: | + Forbid buckets sync in the multi-site replication. This is useful when you + want to disable syncing for some buckets, but you want to sync all the + other buckets. + params: + buckets: + type: string + description: Comma-separated list of buckets' names to disable syncing. +reset-buckets-sync: + description: | + Reset buckets sync policy. After this is executed, the buckets will be + synced according to the default zone group sync policy. + params: + buckets: + type: string + description: Comma-separated list of buckets' names to reset sync policy. diff --git a/ceph-radosgw/actions/actions.py b/ceph-radosgw/actions/actions.py new file mode 100755 index 00000000..bced130b --- /dev/null +++ b/ceph-radosgw/actions/actions.py @@ -0,0 +1,435 @@ +#!/usr/bin/env python3 +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import subprocess +import sys +import uuid + +sys.path.append('hooks/') + +import multisite + +from charmhelpers.core.hookenv import ( + action_fail, + config, + is_leader, + leader_set, + action_set, + action_get, + log, + ERROR, + DEBUG, +) +from charmhelpers.contrib.openstack.ip import ( + canonical_url, + PUBLIC, +) +from utils import ( + pause_unit_helper, + resume_unit_helper, + register_configs, + listen_port, + service_name, +) +from charmhelpers.core.host import ( + service_restart, +) + +DEFAULT_SYNC_POLICY_ID = 'default' + + +def pause(args): + """Pause the Ceilometer services. + @raises Exception should the service fail to stop. + """ + pause_unit_helper(register_configs()) + + +def resume(args): + """Resume the Ceilometer services. + @raises Exception should the service fail to start.""" + resume_unit_helper(register_configs()) + + +def promote(args): + """Promote zone associated with local RGW units to master/default""" + zone = config('zone') + zonegroup = config('zonegroup') + if not is_leader(): + action_fail('This action can only be executed on leader unit.') + return + if not zone: + action_fail('No zone configuration set, not promoting') + return + try: + multisite.modify_zone(zone, + default=True, master=True) + multisite.update_period(zonegroup=zonegroup, zone=zone) + leader_set(restart_nonce=str(uuid.uuid4())) + service_restart(service_name()) + action_set( + values={'message': 'zone:{} promoted to ' + 'master/default'.format(zone)} + ) + except subprocess.CalledProcessError as cpe: + action_fail('Unable to promote zone:{} ' + 'to master: {}'.format(zone, cpe.output)) + + +def readonly(args): + """Mark zone associated with local RGW units as read only""" + zone = config('zone') + if not zone: + action_fail('No zone configuration set, not marking read only') + return + try: + multisite.modify_zone(zone, readonly=True) + multisite.update_period() + action_set( + values={ + 'message': 'zone:{} marked as read only'.format(zone) + } + ) + except subprocess.CalledProcessError as cpe: + action_fail('Unable mark zone:{} ' + 'as read only: {}'.format(zone, cpe.output)) + + +def readwrite(args): + """Mark zone associated with local RGW units as read write""" + zone = config('zone') + if not zone: + action_fail('No zone configuration set, not marking read write') + return + try: + multisite.modify_zone(zone, readonly=False) + multisite.update_period() + action_set( + values={ + 'message': 'zone:{} marked as read write'.format(zone) + } + ) + except subprocess.CalledProcessError as cpe: + action_fail('Unable mark zone:{} ' + 'as read write: {}'.format(zone, cpe.output)) + + +def tidydefaults(args): + """Delete default zone and zonegroup metadata""" + zone = config('zone') + if not zone: + action_fail('No zone configuration set, not deleting defaults') + return + try: + multisite.tidy_defaults() + action_set( + values={ + 'message': 'default zone and zonegroup deleted' + } + ) + except subprocess.CalledProcessError as cpe: + action_fail('Unable delete default zone and zonegroup' + ': {} - {}'.format(zone, cpe.output)) + + +def force_enable_multisite(args): + """Configure provided zone and zonegroup according to Multisite Config + + In a situation when multiple zone or zonegroups are configured on the + primary site, the decision for which pair to use in multisite system + is taken through this action. It takes provided parameters (zone name + and zonegroup name) and rename/ modify them appropriately. + """ + public_url = '{}:{}'.format( + canonical_url(register_configs(), PUBLIC), + listen_port(), + ) + current_zone = action_get("zone") + current_zonegroup = action_get("zonegroup") + endpoints = [public_url] + realm = config('realm') + new_zone = config('zone') + new_zonegroup = config('zonegroup') + + log("zone:{}, zonegroup:{}, endpoints:{}, realm:{}, new_zone:{}, " + "new_zonegroup:{}".format( + current_zone, current_zonegroup, endpoints, + realm, new_zone, new_zonegroup + ), level=DEBUG) + + if not is_leader(): + action_fail('This action can only be executed on leader unit.') + return + + if not all((realm, new_zonegroup, new_zone)): + action_fail("Missing required charm configurations realm({}), " + "zonegroup({}) and zone({}).".format( + realm, new_zonegroup, new_zone + )) + return + + if current_zone not in multisite.list_zones(): + action_fail('Provided zone {} does not exist.'.format(current_zone)) + return + + if current_zonegroup not in multisite.list_zonegroups(): + action_fail('Provided zone {} does not exist.' + .format(current_zonegroup)) + return + + try: + # Rename chosen zonegroup/zone as per charm config value. + rename_result = multisite.rename_multisite_config( + [current_zonegroup], + new_zonegroup, + [current_zone], new_zone + ) + if rename_result is None: + action_fail('Failed to rename zone {} or zonegroup {}.' + .format(current_zone, current_zonegroup)) + return + + # Configure zonegroup/zone as master for multisite. + modify_result = multisite.modify_multisite_config( + new_zone, new_zonegroup, + realm=realm, + endpoints=endpoints + ) + if modify_result is None: + action_fail('Failed to configure zone {} or zonegroup {}.' + .format(new_zonegroup, new_zone)) + return + + leader_set(restart_nonce=str(uuid.uuid4())) + service_restart(service_name()) + action_set( + values={ + 'message': 'Multisite Configuration Resolved' + } + ) + except subprocess.CalledProcessError as cpe: + message = "Failed to configure zone ({}) and zonegroup ({})".format( + current_zone, current_zonegroup + ) + log(message, level=ERROR) + action_fail(message + " : {}".format(cpe.output)) + + +def is_multisite_sync_policy_action_allowed(): + """Check if the current Juju unit is allowed to run sync policy actions. + + This method checks if the current Juju unit is allowed to execute + the Juju actions to configure Multisite sync policies: + * enable-buckets-sync + * disable-buckets-sync + * reset-buckets-sync + These Juju actions are allowed to run only on the leader unit of the + primary RGW zone. + + :return: Whether the current Juju unit is allowed to run the Multisite + sync policy Juju actions. + :rtype: Boolean + """ + if not is_leader(): + action_fail("This action can only be executed on leader unit.") + return False + + realm = config('realm') + zone = config('zone') + zonegroup = config('zonegroup') + + if not all((realm, zonegroup, zone)): + action_fail("Missing required charm configurations realm({}), " + "zonegroup({}) and zone({}).".format( + realm, zonegroup, zone + )) + return False + + if not multisite.is_multisite_configured(zone=zone, zonegroup=zonegroup): + action_fail("Multisite is not configured") + return False + + zonegroup_info = multisite.get_zonegroup_info(zonegroup) + if zonegroup_info is None: + action_fail("Failed to fetch zonegroup ({}) info".format(zonegroup)) + return False + + zone_info = multisite.get_zone_info(zone) + if zone_info is None: + action_fail("Failed to fetch zone ({}) info".format(zone)) + return False + + if zonegroup_info['master_zone'] != zone_info['id']: + action_fail('This action can only be executed on primary RGW ' + 'application units.') + return False + + return True + + +def update_buckets_sync_policy(buckets, sync_policy_state): + """Update the sync policy state for all the given buckets. + + This method gets a list of bucket names and a sync policy state to set + for all of them. The sync policy state can be one of the following: + "allowed", "enabled", or "forbidden". Validation for the sync policy + state is done in the "multisite.create_sync_group" module method. + + The sync policy state is set by creating a bucket-level sync group with + the given state, followed by a sync group pipe that match all the source + and destination buckets. If the bucket already has a sync group, it is + updated with the new state. + + :param buckets: List of bucket names. + :type buckets: list + :param sync_policy_state: The sync policy state to set for the buckets. + :type sync_policy_state: str + """ + zone = config('zone') + zonegroup = config('zonegroup') + existing_buckets = multisite.list_buckets(zonegroup=zonegroup, zone=zone) + messages = [] + for bucket in buckets: + if bucket in existing_buckets: + multisite.create_sync_group( + bucket=bucket, + group_id=DEFAULT_SYNC_POLICY_ID, + status=sync_policy_state) + multisite.create_sync_group_pipe( + bucket=bucket, + group_id=DEFAULT_SYNC_POLICY_ID, + pipe_id=DEFAULT_SYNC_POLICY_ID, + source_zones=['*'], + dest_zones=['*']) + message = 'Updated "{}" bucket sync policy to "{}"'.format( + bucket, sync_policy_state) + else: + message = ('Bucket "{}" does not exist in the zonegroup "{}" and ' + 'zone "{}"'.format(bucket, zonegroup, zone)) + log(message) + messages.append(message) + action_set( + values={ + 'message': '\n'.join(messages) + } + ) + + +def reset_buckets_sync_policy(buckets): + """Reset the sync policy state for all the given buckets. + + For every bucket in the given list, this method resets the sync policy + state. This is done by removing the bucket-level sync group. + + :param buckets: List of bucket names. + :type buckets: list + """ + zone = config('zone') + zonegroup = config('zonegroup') + existing_buckets = multisite.list_buckets(zonegroup=zonegroup, zone=zone) + messages = [] + for bucket in buckets: + if bucket in existing_buckets: + multisite.remove_sync_group( + bucket=bucket, + group_id=DEFAULT_SYNC_POLICY_ID) + message = 'Reset "{}" bucket sync policy'.format(bucket) + else: + message = ('Bucket "{}" does not exist in the zonegroup "{}" and ' + 'zone "{}"'.format(bucket, zonegroup, zone)) + log(message) + messages.append(message) + action_set( + values={ + 'message': '\n'.join(messages) + } + ) + + +def enable_buckets_sync(args): + """Enable sync for the given buckets""" + if not is_multisite_sync_policy_action_allowed(): + return + try: + update_buckets_sync_policy( + buckets=action_get('buckets').split(','), + sync_policy_state=multisite.SYNC_POLICY_ENABLED, + ) + except subprocess.CalledProcessError as cpe: + message = "Failed to enable sync for the given buckets" + log(message, level=ERROR) + action_fail(message + " : {}".format(cpe.output)) + + +def disable_buckets_sync(args): + """Disable sync for the given buckets""" + if not is_multisite_sync_policy_action_allowed(): + return + try: + update_buckets_sync_policy( + buckets=action_get('buckets').split(','), + sync_policy_state=multisite.SYNC_POLICY_FORBIDDEN, + ) + except subprocess.CalledProcessError as cpe: + message = "Failed to disable sync for the given buckets" + log(message, level=ERROR) + action_fail(message + " : {}".format(cpe.output)) + + +def reset_buckets_sync(args): + """Reset sync policy for the given buckets""" + if not is_multisite_sync_policy_action_allowed(): + return + try: + reset_buckets_sync_policy(buckets=action_get('buckets').split(',')) + except subprocess.CalledProcessError as cpe: + message = "Failed to reset sync for the given buckets" + log(message, level=ERROR) + action_fail(message + " : {}".format(cpe.output)) + + +# A dictionary of all the defined actions to callables (which take +# parsed arguments). +ACTIONS = { + "pause": pause, + "resume": resume, + "promote": promote, + "readonly": readonly, + "readwrite": readwrite, + "tidydefaults": tidydefaults, + "force-enable-multisite": force_enable_multisite, + "enable-buckets-sync": enable_buckets_sync, + "disable-buckets-sync": disable_buckets_sync, + "reset-buckets-sync": reset_buckets_sync, +} + + +def main(args): + action_name = os.path.basename(args[0]) + try: + action = ACTIONS[action_name] + except KeyError: + return "Action %s undefined" % action_name + else: + try: + action(args) + except Exception as e: + action_fail(str(e)) + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/ceph-radosgw/actions/disable-buckets-sync b/ceph-radosgw/actions/disable-buckets-sync new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/ceph-radosgw/actions/disable-buckets-sync @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/ceph-radosgw/actions/enable-buckets-sync b/ceph-radosgw/actions/enable-buckets-sync new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/ceph-radosgw/actions/enable-buckets-sync @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/ceph-radosgw/actions/force-enable-multisite b/ceph-radosgw/actions/force-enable-multisite new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/ceph-radosgw/actions/force-enable-multisite @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/ceph-radosgw/actions/pause b/ceph-radosgw/actions/pause new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/ceph-radosgw/actions/pause @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/ceph-radosgw/actions/promote b/ceph-radosgw/actions/promote new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/ceph-radosgw/actions/promote @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/ceph-radosgw/actions/readonly b/ceph-radosgw/actions/readonly new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/ceph-radosgw/actions/readonly @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/ceph-radosgw/actions/readwrite b/ceph-radosgw/actions/readwrite new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/ceph-radosgw/actions/readwrite @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/ceph-radosgw/actions/reset-buckets-sync b/ceph-radosgw/actions/reset-buckets-sync new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/ceph-radosgw/actions/reset-buckets-sync @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/ceph-radosgw/actions/resume b/ceph-radosgw/actions/resume new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/ceph-radosgw/actions/resume @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/ceph-radosgw/actions/tidydefaults b/ceph-radosgw/actions/tidydefaults new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/ceph-radosgw/actions/tidydefaults @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/ceph-radosgw/bindep.txt b/ceph-radosgw/bindep.txt new file mode 100644 index 00000000..bdbe8d56 --- /dev/null +++ b/ceph-radosgw/bindep.txt @@ -0,0 +1,3 @@ +libffi-dev [platform:dpkg] +libxml2-dev [platform:dpkg] +libxslt1-dev [platform:dpkg] diff --git a/ceph-radosgw/bundles/bionic-rocky-multisite.yaml b/ceph-radosgw/bundles/bionic-rocky-multisite.yaml new file mode 100644 index 00000000..ab5ecf59 --- /dev/null +++ b/ceph-radosgw/bundles/bionic-rocky-multisite.yaml @@ -0,0 +1,73 @@ +options: + source: &source cloud:bionic-rocky +series: bionic +applications: + east-ceph-radosgw: + charm: cs:~openstack-charmers-next/ceph-radosgw-multisite + num_units: 1 + options: + source: *source + realm: testrealm + zonegroup: testzonegroup + zone: east-1 + region: east-1 + east-ceph-osd: + charm: cs:~openstack-charmers-next/ceph-osd + num_units: 3 + storage: + osd-devices: 'cinder,10G' + options: + source: *source + east-ceph-mon: + charm: cs:~openstack-charmers-next/ceph-mon + num_units: 3 + options: + source: *source + west-ceph-radosgw: + charm: cs:~openstack-charmers-next/ceph-radosgw-multisite + num_units: 1 + options: + source: *source + realm: testrealm + zonegroup: testzonegroup + zone: west-1 + region: west-1 + west-ceph-osd: + charm: cs:~openstack-charmers-next/ceph-osd + num_units: 3 + storage: + osd-devices: 'cinder,10G' + options: + source: *source + west-ceph-mon: + charm: cs:~openstack-charmers-next/ceph-mon + num_units: 3 + options: + source: *source + percona-cluster: + charm: cs:~openstack-charmers-next/percona-cluster + num_units: 1 + keystone: + expose: True + charm: cs:~openstack-charmers-next/keystone + num_units: 1 + options: + openstack-origin: *source + region: "east-1 west-1" +relations: +- - keystone:shared-db + - percona-cluster:shared-db +- - east-ceph-osd:mon + - east-ceph-mon:osd +- - east-ceph-radosgw:mon + - east-ceph-mon:radosgw +- - east-ceph-radosgw:identity-service + - keystone:identity-service +- - west-ceph-osd:mon + - west-ceph-mon:osd +- - west-ceph-radosgw:mon + - west-ceph-mon:radosgw +- - west-ceph-radosgw:identity-service + - keystone:identity-service +- - west-ceph-radosgw:master + - east-ceph-radosgw:slave diff --git a/ceph-radosgw/bundles/us-east.yaml b/ceph-radosgw/bundles/us-east.yaml new file mode 100644 index 00000000..9483897c --- /dev/null +++ b/ceph-radosgw/bundles/us-east.yaml @@ -0,0 +1,41 @@ +machines: + '0': + constraints: + '1': + constraints: + '2': + constraints: +series: bionic +applications: + ceph-mon: + charm: 'cs:ceph-mon' + num_units: 3 + options: + expected-osd-count: 9 + to: + - lxd:0 + - lxd:1 + - lxd:2 + ceph-osd: + charm: 'cs:ceph-osd' + num_units: 3 + options: + osd-devices: "/dev/disk/by-dname/bcache1 /dev/disk/by-dname/bcache2 /dev/disk/by-dname/bcache3" + to: + - 0 + - 1 + - 2 + rgw-us-east: + charm: 'cs:ceph-radosgw' + num_units: 1 + options: + realm: replicated + zone: us-east + zonegroup: us + to: + - lxd:0 +relations: + - - 'ceph-mon:osd' + - 'ceph-osd:mon' + - - 'rgw-us-east:mon' + - 'ceph-mon:radosgw' diff --git a/ceph-radosgw/bundles/us-west.yaml b/ceph-radosgw/bundles/us-west.yaml new file mode 100644 index 00000000..5a87c37a --- /dev/null +++ b/ceph-radosgw/bundles/us-west.yaml @@ -0,0 +1,41 @@ +machines: + '0': + constraints: + '1': + constraints: + '2': + constraints: +series: bionic +applications: + ceph-mon: + charm: 'cs:ceph-mon' + num_units: 3 + options: + expected-osd-count: 9 + to: + - lxd:0 + - lxd:1 + - lxd:2 + ceph-osd: + charm: 'cs:ceph-osd' + num_units: 3 + options: + osd-devices: "/dev/disk/by-dname/bcache1 /dev/disk/by-dname/bcache2 /dev/disk/by-dname/bcache3" + to: + - 0 + - 1 + - 2 + rgw-us-west: + charm: 'cs:ceph-radosgw' + num_units: 1 + options: + realm: replicated + zone: us-west + zonegroup: us + to: + - lxd:0 +relations: + - - 'ceph-mon:osd' + - 'ceph-osd:mon' + - - 'rgw-us-west:mon' + - 'ceph-mon:radosgw' diff --git a/ceph-radosgw/charm-helpers-hooks.yaml b/ceph-radosgw/charm-helpers-hooks.yaml new file mode 100644 index 00000000..03ff1064 --- /dev/null +++ b/ceph-radosgw/charm-helpers-hooks.yaml @@ -0,0 +1,19 @@ +repo: https://github.com/juju/charm-helpers +destination: hooks/charmhelpers +include: + - core + - cli + - osplatform + - fetch + - contrib.python + - contrib.storage.linux + - contrib.hahelpers: + - apache + - cluster + - payload.execd + - contrib.network.ip + - contrib.openstack|inc=* + - contrib.charmsupport + - contrib.hardening|inc=* + - contrib.hardware + - contrib.openstack.policyd diff --git a/ceph-radosgw/charmcraft.yaml b/ceph-radosgw/charmcraft.yaml new file mode 100644 index 00000000..20675a9e --- /dev/null +++ b/ceph-radosgw/charmcraft.yaml @@ -0,0 +1,21 @@ +type: charm + +parts: + charm: + plugin: dump + source: . + +base: ubuntu@22.04 +platforms: + amd64: + build-on: amd64 + build-for: amd64 + arm64: + build-on: arm64 + build-for: arm64 + ppc64el: + build-on: ppc64el + build-for: ppc64el + s390x: + build-on: s390x + build-for: s390x diff --git a/ceph-radosgw/config.yaml b/ceph-radosgw/config.yaml new file mode 100644 index 00000000..45be3aae --- /dev/null +++ b/ceph-radosgw/config.yaml @@ -0,0 +1,567 @@ +options: + loglevel: + type: int + default: 1 + description: RadosGW debug level. Max is 20. + source: + type: string + default: caracal + description: | + Optional repository from which to install. May be one of the following: + distro (default), ppa:somecustom/ppa, a deb url sources entry, + or a supported Ubuntu Cloud Archive e.g. + . + cloud:- + cloud:-/updates + cloud:-/staging + cloud:-/proposed + . + See https://wiki.ubuntu.com/OpenStack/CloudArchive for info on which + cloud archives are available and supported. + . + Note that a minimum ceph version of 0.48.2 is required for use with this + charm which is NOT provided by the packages in the main Ubuntu archive + for precise but is provided in the Ubuntu cloud archive. + key: + type: string + default: + description: | + Key ID to import to the apt keyring to support use with arbitary source + configuration from outside of Launchpad archives or PPA's. + harden: + type: string + default: + description: | + Apply system hardening. Supports a space-delimited list of modules + to run. Supported modules currently include os, ssh, apache and mysql. + config-flags: + type: string + default: + description: | + User provided Ceph configuration. Supports a string representation of + a python dictionary where each top-level key represents a section in + the ceph.conf template. You may only use sections supported in the + template. + . + WARNING: this is not the recommended way to configure the underlying + services that this charm installs and is used at the user's own risk. + This option is mainly provided as a stop-gap for users that either + want to test the effect of modifying some config or who have found + a critical bug in the way the charm has configured their services + and need it fixed immediately. We ask that whenever this is used, + that the user consider opening a bug on this charm at + http://bugs.launchpad.net/charms providing an explanation of why the + config was needed so that we may consider it for inclusion as a + natively supported config in the charm. + port: + type: int + default: + description: | + The port that the RADOS Gateway will listen on. + . + The default is 80 when no TLS is configured and 443 when TLS is + configured. + prefer-ipv6: + type: boolean + default: False + description: | + If True enables IPv6 support. The charm will expect network interfaces + to be configured with an IPv6 address. If set to False (default) IPv4 + is expected. + . + NOTE: these charms do not currently support IPv6 privacy extension. In + order for this charm to function correctly, the privacy extension must be + disabled and a non-temporary address must be configured/available on + your network interface. + pool-prefix: + type: string + default: + description: | + DEPRECATED, use zone instead - pool name can be inherited from the zone config + option. The rados gateway stores objects in many different pools. If you + would like to have multiple rados gateways each pointing to a separate + set of pools set this prefix. The charm will then set up a new set of pools. + If your prefix has a dash in it that will be used to split the prefix into + region and zone. Please read the documentation on federated rados gateways + for more information on region and zone. + restrict-ceph-pools: + type: boolean + default: False + description: | + Optionally restrict Ceph key permissions to access pools as required. + ceph-osd-replication-count: + type: int + default: 3 + description: | + This value dictates the number of replicas ceph must make of any object + it stores within RGW pools. Note that once the RGW pools have been + created, changing this value will not have any effect (although it can be + changed in ceph by manually configuring your ceph cluster). + rgw-buckets-pool-weight: + type: int + default: 20 + description: | + Defines a relative weighting of the pool as a percentage of the total + amount of data in the Ceph cluster. This effectively weights the number + of placement groups for the pool created to be appropriately portioned + to the amount of data expected. For example, if the amount of data loaded + into the RADOS Gateway/S3 interface is expected to be reserved for or + consume 20% of the data in the Ceph cluster, then this value would be + specified as 20. + rgw-lightweight-pool-pg-num: + type: int + default: -1 + description: | + When the Rados Gatway is installed it, by default, creates pools with + pg_num 8 which, in the majority of cases is suboptimal. A few rgw pools + tend to carry more data than others e.g. .rgw.buckets tends to be larger + than most. So, for pools with greater requirements than others the charm + will apply the optimal value i.e. corresponding to the number of OSDs + up+in the cluster at the time the pool is created. For others it will use + this value which can be altered depending on how big you cluster is. Note + that once a pool has been created, changes to this setting will be + ignored. Setting this value to -1, enables the number of placement + groups to be calculated based on the Ceph placement group calculator. + relaxed-s3-bucket-names: + type: boolean + default: false + description: | + Enables relaxed S3 bucket names rules for US region buckets. This + allows for bucket names with any combination of letters, numbers, + periods, dashes and underscores up to 255 characters long, as long + as bucket names are unique and not formatted as IP addresses. + + https://docs.ceph.com/en/latest/radosgw/s3/bucketops/ + pool-type: + type: string + default: replicated + description: | + Ceph pool type to use for storage - valid values include ‘replicated’ + and ‘erasure-coded’. + ec-profile-name: + type: string + default: + description: | + Name for the EC profile to be created for the EC pools. If not defined + a profile name will be generated based on the name of the pool used by + the application. + ec-rbd-metadata-pool: + type: string + default: + description: | + Name of the metadata pool to be created (for RBD use-cases). If not + defined a metadata pool name will be generated based on the name of + the data pool used by the application. The metadata pool is always + replicated, not erasure coded. + ec-profile-k: + type: int + default: 1 + description: | + Number of data chunks that will be used for EC data pool. K+M factors + should never be greater than the number of available zones (or hosts) + for balancing. + ec-profile-m: + type: int + default: 2 + description: | + Number of coding chunks that will be used for EC data pool. K+M factors + should never be greater than the number of available zones (or hosts) + for balancing. + ec-profile-locality: + type: int + default: + description: | + (lrc plugin - l) Group the coding and data chunks into sets of size l. + For instance, for k=4 and m=2, when l=3 two groups of three are created. + Each set can be recovered without reading chunks from another set. Note + that using the lrc plugin does incur more raw storage usage than isa or + jerasure in order to reduce the cost of recovery operations. + ec-profile-crush-locality: + type: string + default: + description: | + (lrc plugin) The type of the crush bucket in which each set of chunks + defined by l will be stored. For instance, if it is set to rack, each + group of l chunks will be placed in a different rack. It is used to + create a CRUSH rule step such as step choose rack. If it is not set, + no such grouping is done. + ec-profile-durability-estimator: + type: int + default: + description: | + (shec plugin - c) The number of parity chunks each of which includes + each data chunk in its calculation range. The number is used as a + durability estimator. For instance, if c=2, 2 OSDs can be down + without losing data. + ec-profile-helper-chunks: + type: int + default: + description: | + (clay plugin - d) Number of OSDs requested to send data during + recovery of a single chunk. d needs to be chosen such that + k+1 <= d <= k+m-1. Larger the d, the better the savings. + ec-profile-scalar-mds: + type: string + default: + description: | + (clay plugin) specifies the plugin that is used as a building + block in the layered construction. It can be one of jerasure, + isa, shec (defaults to jerasure). + ec-profile-plugin: + type: string + default: jerasure + description: | + EC plugin to use for this applications pool. The following list of + plugins acceptable - jerasure, lrc, isa, shec, clay. + ec-profile-technique: + type: string + default: + description: | + EC profile technique used for this applications pool - will be + validated based on the plugin configured via ec-profile-plugin. + Supported techniques are ‘reed_sol_van’, ‘reed_sol_r6_op’, + ‘cauchy_orig’, ‘cauchy_good’, ‘liber8tion’ for jerasure, + ‘reed_sol_van’, ‘cauchy’ for isa and ‘single’, ‘multiple’ + for shec. + ec-profile-device-class: + type: string + default: + description: | + Device class from CRUSH map to use for placement groups for + erasure profile - valid values: ssd, hdd or nvme (or leave + unset to not use a device class). + # Keystone integration + operator-roles: + type: string + default: "Member,member" + description: | + Comma-separated list of Swift operator roles; used when integrating with + OpenStack Keystone. + admin-roles: + type: string + default: "Admin" + description: | + Comma-separated list of Swift admin roles; used when integrating with + OpenStack Keystone. Admin roles can set the user quota amount. + region: + type: string + default: RegionOne + description: | + OpenStack region that the RADOS gateway supports; used when integrating + with OpenStack Keystone. + cache-size: + type: int + default: 500 + description: Number of keystone tokens to hold in local cache. + # HA config + use-syslog: + type: boolean + default: False + description: | + If set to True, supporting services will log to syslog. + dns-ha: + type: boolean + default: False + description: | + Use DNS HA with MAAS 2.0. Note if this is set do not set vip + settings below. + vip: + type: string + default: + description: | + Virtual IP(s) to use to front API services in HA configuration. + . + If multiple networks are being used, a VIP should be provided for each + network, separated by spaces. + ha-bindiface: + type: string + default: eth0 + description: | + Default network interface on which HA cluster will bind to communication + with the other members of the HA Cluster. + ha-mcastport: + type: int + default: 5414 + description: | + Default multicast port number that will be used to communicate between + HA Cluster nodes. + # Network config (by default all access is over 'private-address') + os-admin-network: + type: string + default: + description: | + The IP address and netmask of the OpenStack Admin network (e.g. + 192.168.0.0/24) + . + This network will be used for admin endpoints. + os-internal-network: + type: string + default: + description: | + The IP address and netmask of the OpenStack Internal network (e.g. + 192.168.0.0/24) + . + This network will be used for internal endpoints. + os-public-network: + type: string + default: + description: | + The IP address and netmask of the OpenStack Public network (e.g. + 192.168.0.0/24) + . + This network will be used for public endpoints. + os-public-hostname: + type: string + default: + description: | + The hostname or address of the public endpoints created for ceph-radosgw + in the keystone identity provider. + . + This value will be used for public endpoints. For example, an + os-public-hostname set to 'files.example.com' with will create + the following public endpoint for the ceph-radosgw: + . + https://files.example.com:80/swift/v1 + os-internal-hostname: + type: string + default: + description: | + The hostname or address of the internal endpoints created for + ceph-radosgw in the keystone identity provider. + . + This value will be used for internal endpoints. For example, an + os-internal-hostname set to 'files.internal.example.com' with will create + the following internal endpoint for the ceph-radosgw: + . + https://files.internal.example.com:80/swift/v1 + os-admin-hostname: + type: string + default: + description: | + The hostname or address of the admin endpoints created for ceph-radosgw + in the keystone identity provider. + . + This value will be used for admin endpoints. For example, an + os-admin-hostname set to 'files.admin.example.com' with will create + the following admin endpoint for the ceph-radosgw: + . + https://files.admin.example.com:80/swift/v1 + # Monitoring config + nagios_context: + type: string + default: "juju" + description: | + Used by the nrpe-external-master subordinate charm. + A string that will be prepended to instance name to set the host name + in nagios. So for instance the hostname would be something like: + . + juju-myservice-0 + . + If you're running multiple environments with the same services in them + this allows you to differentiate between them. + nagios_servicegroups: + type: string + default: "" + description: | + A comma-separated list of nagios servicegroups. If left empty, + the nagios_context will be used as the servicegroup + # HAProxy Parameters + haproxy-server-timeout: + type: int + default: + description: | + Server timeout configuration in ms for haproxy, used in HA + configurations. If not provided, default value of 90000ms is used. + haproxy-client-timeout: + type: int + default: + description: | + Client timeout configuration in ms for haproxy, used in HA + configurations. If not provided, default value of 90000ms is used. + haproxy-queue-timeout: + type: int + default: + description: | + Queue timeout configuration in ms for haproxy, used in HA + configurations. If not provided, default value of 9000ms is used. + haproxy-connect-timeout: + type: int + default: + description: | + Connect timeout configuration in ms for haproxy, used in HA + configurations. If not provided, default value of 9000ms is used. + + # External SSL Parameters + ssl_cert: + type: string + default: + description: | + SSL certificate to install and use for API ports. Setting this value + and ssl_key will enable reverse proxying, point Glance's entry in the + Keystone catalog to use https, and override any certificate and key + issued by Keystone (if it is configured to do so). + ssl_key: + type: string + default: + description: SSL key to use with certificate specified as ssl_cert. + ssl_ca: + type: string + default: + description: | + SSL CA to use with the certificate and key provided - this is only + required if you are providing a privately signed ssl_cert and ssl_key. + # Multi Site Options + realm: + type: string + default: + description: | + Name of RADOS Gateway Realm to create for multi-site replication. Setting + this option will enable support for multi-site replication, at which + point the zonegroup and zone options must also be provided. + zonegroup: + type: string + default: + description: | + Name of RADOS Gateway Zone Group to create for multi-site replication. + zone: + type: string + default: default + description: | + Name of RADOS Gateway Zone to create for multi-site replication. This + option must be specific to the local site e.g. us-west or us-east. + sync-policy-state: + type: string + default: enabled + description: | + This setting is used by the primary ceph-radosgw in multi-site + replication. + + By default, all the buckets are synced from a primary RGW zone to the + secondary zone. This config option allows us to have selective buckets + sync. If this is set, it will be used as the default policy state for + all the buckets in the zonegroup. + + Valid values are: + * enabled - sync is allowed and enabled + * allowed - sync is allowed + * forbidden - sync is not allowed + sync-policy-flow-type: + type: string + default: symmetrical + description: | + This setting is used by the secondary ceph-radosgw in multi-site + replication, and it's effective only when 'sync-policy-state' config is + set on the primary ceph-radosgw. + + Valid values are: + * directional - data is only synced in one direction, from primary to + secondary. + * symmetrical - data is synced in both directions. + namespace-tenants: + type: boolean + default: False + description: | + Enable tenant namespacing. If tenant namespacing is enabled, keystone + tenants will be implicitly added to a matching tenant in radosgw, in + addition to updating the catalog URL to allow radosgw to support + publicly-readable containers and temporary URLS. This namespacing + also allows multiple tenants to create buckets with the same names, + as the bucket names are namespaced into the tenant namespaces in the + RADOS gateway. + + This configuration option will not be enabled on a charm upgrade, and + cannot be toggled on in an existing installation as it will remove + tenant access to existing buckets. + bluestore-compression-algorithm: + type: string + default: + description: | + Compressor to use (if any) for pools requested by this charm. + . + NOTE: The ceph-osd charm sets a global default for this value (defaults + to 'lz4' unless configured by the end user) which will be used unless + specified for individual pools. + bluestore-compression-mode: + type: string + default: + description: | + Policy for using compression on pools requested by this charm. + . + 'none' means never use compression. + 'passive' means use compression when clients hint that data is + compressible. + 'aggressive' means use compression unless clients hint that + data is not compressible. + 'force' means use compression under all circumstances even if the clients + hint that the data is not compressible. + bluestore-compression-required-ratio: + type: float + default: + description: | + The ratio of the size of the data chunk after compression relative to the + original size must be at least this small in order to store the + compressed version on pools requested by this charm. + bluestore-compression-min-blob-size: + type: int + default: + description: | + Chunks smaller than this are never compressed on pools requested by + this charm. + bluestore-compression-min-blob-size-hdd: + type: int + default: + description: | + Value of bluestore compression min blob size for rotational media on + pools requested by this charm. + bluestore-compression-min-blob-size-ssd: + type: int + default: + description: | + Value of bluestore compression min blob size for solid state media on + pools requested by this charm. + bluestore-compression-max-blob-size: + type: int + default: + description: | + Chunks larger than this are broken into smaller blobs sizing bluestore + compression max blob size before being compressed on pools requested by + this charm. + bluestore-compression-max-blob-size-hdd: + type: int + default: + description: | + Value of bluestore compression max blob size for rotational media on + pools requested by this charm. + bluestore-compression-max-blob-size-ssd: + type: int + default: + description: | + Value of bluestore compression max blob size for solid state media on + pools requested by this charm. + rgw-swift-versioning-enabled: + type: boolean + default: False + description: | + If True, swift object versioning will be enabled for radosgw. + + NOTE: X-Versions-Location is the only versioning-related header that + radosgw interprets. X-History-Location, supported by native OpenStack + Swift, is currently not supported by radosgw. + http-frontend: + type: string + default: + description: | + Frontend HTTP engine to use for the Ceph RADOS Gateway; For Octopus and + later this defaults to 'beast' and for older releases (and on architectures + where beast is not supported) 'civetweb'. Civetweb support is removed at + Ceph Quincy. + virtual-hosted-bucket-enabled: + type: boolean + default: false + description: | + If true, radosgw is configured to allow the use of virtual hosted bucket + name. This also requires the creation of a DNS CNAME to point all wildcard + subdomains (*.radosgw.domain) to the radosgw IP (or VIP). The host name part + (radosgw.domain) is taken from os-public-hostname so it must have a value too. + + https://docs.ceph.com/en/latest/radosgw/s3/commons/ diff --git a/ceph-radosgw/copyright b/ceph-radosgw/copyright new file mode 100644 index 00000000..e0dee89e --- /dev/null +++ b/ceph-radosgw/copyright @@ -0,0 +1,16 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0 + +Files: * +Copyright: Copyright 2011, Canonical Ltd., All Rights Reserved. +License: Apache-2.0 + Licensed under the Apache License, Version 2.0 (the "License"); you may + not use this file except in compliance with the License. You may obtain + a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + License for the specific language governing permissions and limitations + under the License. diff --git a/ceph-radosgw/files/www/s3gw.fcgi b/ceph-radosgw/files/www/s3gw.fcgi new file mode 100755 index 00000000..e766fcb9 --- /dev/null +++ b/ceph-radosgw/files/www/s3gw.fcgi @@ -0,0 +1,2 @@ +#!/bin/sh +exec /usr/bin/radosgw -c /etc/ceph/ceph.conf -n client.radosgw.gateway diff --git a/ceph-radosgw/hardening.yaml b/ceph-radosgw/hardening.yaml new file mode 100644 index 00000000..314bb385 --- /dev/null +++ b/ceph-radosgw/hardening.yaml @@ -0,0 +1,5 @@ +# Overrides file for contrib.hardening. See README.hardening in +# contrib.hardening for info on how to use this file. +ssh: + server: + use_pam: 'yes' # juju requires this diff --git a/ceph-radosgw/hooks/ceph_radosgw_context.py b/ceph-radosgw/hooks/ceph_radosgw_context.py new file mode 100644 index 00000000..babc0a74 --- /dev/null +++ b/ceph-radosgw/hooks/ceph_radosgw_context.py @@ -0,0 +1,355 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import socket +import tempfile +import shutil + +import multisite +from charmhelpers.contrib.openstack import context +from charmhelpers.contrib.hahelpers.cluster import ( + determine_api_port, + determine_apache_port, + https, +) +from charmhelpers.core.host import ( + cmp_pkgrevno, + arch, +) +from charmhelpers.core.hookenv import ( + DEBUG, + WARNING, + ERROR, + config, + log, + related_units, + relation_get, + relation_ids, + unit_public_ip, + leader_get, +) +from charmhelpers.contrib.network.ip import ( + format_ipv6_addr, + get_ipv6_addr, +) +from charmhelpers.contrib.storage.linux.ceph import CephConfContext + +import utils + + +BEAST_FRONTEND = 'beast' +CIVETWEB_FRONTEND = 'civetweb' +SUPPORTED_FRONTENDS = (BEAST_FRONTEND, CIVETWEB_FRONTEND) +UNSUPPORTED_BEAST_ARCHS = ('s390x', 'riscv64') + + +class ApacheSSLContext(context.ApacheSSLContext): + interfaces = ['https'] + service_namespace = 'ceph-radosgw' + + def __call__(self): + self.external_ports = [utils.listen_port()] + ctx = super(ApacheSSLContext, self).__call__() + ctx['virtual_hosted_bucket_enabled'] = \ + config('virtual-hosted-bucket-enabled') + return ctx + + +class HAProxyContext(context.HAProxyContext): + + def __call__(self): + ctxt = super(HAProxyContext, self).__call__() + port = utils.listen_port() + service = 'cephradosgw-server' + + # Apache ports + a_cephradosgw_api = determine_apache_port(port, singlenode_mode=True) + + port_mapping = { + service: [port, a_cephradosgw_api] + } + + ctxt['cephradosgw_bind_port'] = determine_api_port( + port, + singlenode_mode=True, + ) + + # for haproxy.conf + backend_options = { + service: [{ + 'option': 'httpchk GET /swift/healthcheck', + }] + } + + ctxt['service_ports'] = port_mapping + ctxt['backend_options'] = backend_options + ctxt['https'] = https() + + return ctxt + + +class IdentityServiceContext(context.IdentityServiceContext): + interfaces = ['identity-service'] + + def __call__(self): + ctxt = super(IdentityServiceContext, self).__call__() + if not ctxt: + return + + if cmp_pkgrevno('radosgw', "10.2.0") >= 0: + ctxt['auth_keystone_v3_supported'] = True + + if (not ctxt.get('admin_domain_id') and + float(ctxt.get('api_version', '2.0')) < 3): + ctxt.pop('admin_domain_id') + + ctxt['auth_type'] = 'keystone' + if cmp_pkgrevno('radosgw', '15.0.0') < 0: + ctxt['keystone_revocation_parameter_supported'] = True + if cmp_pkgrevno('radosgw', "11.0.0") >= 0: + ctxt['user_roles'] = config('operator-roles') + ctxt['admin_roles'] = config('admin-roles') + else: + ctxt['user_roles'] = config('operator-roles') + if config('admin-roles'): + ctxt['user_roles'] += (',' + config('admin-roles')) + ctxt['cache_size'] = config('cache-size') + ctxt['namespace_tenants'] = leader_get('namespace_tenants') == 'True' + if self.context_complete(ctxt): + return ctxt + return {} + + +def ensure_host_resolvable_v6(hostname): + """Ensure that we can resolve our hostname to an IPv6 address by adding it + to /etc/hosts if it is not already resolvable. + """ + try: + socket.getaddrinfo(hostname, None, socket.AF_INET6) + except socket.gaierror: + log("Host '%s' is not ipv6 resolvable - adding to /etc/hosts" % + hostname, level=DEBUG) + else: + log("Host '%s' appears to be ipv6 resolvable" % (hostname), + level=DEBUG) + return + + # This must be the backend address used by haproxy + host_addr = get_ipv6_addr(exc_list=[config('vip')])[0] + dtmp = tempfile.mkdtemp() + try: + tmp_hosts = os.path.join(dtmp, 'hosts') + shutil.copy('/etc/hosts', tmp_hosts) + with open(tmp_hosts, 'a+') as fd: + lines = fd.readlines() + for line in lines: + key = r"^%s\s+" % (host_addr) + if re.search(key, line): + break + else: + fd.write("%s\t%s\n" % (host_addr, hostname)) + + os.rename(tmp_hosts, '/etc/hosts') + finally: + shutil.rmtree(dtmp) + + +def resolve_http_frontend(): + """Automatically determine the HTTP frontend configuration + + Determines the best HTTP frontend configuration based + on the Ceph release in use and the architecture of the + machine being used. + + :returns http frontend configuration to use. + :rtype: str + """ + octopus_or_later = cmp_pkgrevno('radosgw', '15.2.0') >= 0 + pacific_or_later = cmp_pkgrevno('radosgw', '16.2.0') >= 0 + if octopus_or_later: + # Pacific or later supports beast on all architectures + # but octopus does not support s390x or riscv64 + if not pacific_or_later and arch() in UNSUPPORTED_BEAST_ARCHS: + return CIVETWEB_FRONTEND + else: + return BEAST_FRONTEND + return CIVETWEB_FRONTEND + + +def validate_http_frontend(frontend_config): + """Validate HTTP frontend configuration + + :param frontend_config: user provided config value + :type: str + :raises: ValueError if the provided config is not valid + """ + mimic_or_later = cmp_pkgrevno('radosgw', '13.2.0') >= 0 + pacific_or_later = cmp_pkgrevno('radosgw', '16.2.0') >= 0 + quincy_or_later = cmp_pkgrevno('radosgw', '17.0.0') >= 0 + if frontend_config not in SUPPORTED_FRONTENDS: + e = ('Please provide either civetweb or beast for ' + 'http-frontend configuration') + log(e, level=ERROR) + raise ValueError(e) + if frontend_config == BEAST_FRONTEND: + if not mimic_or_later: + e = ('Use of the beast HTTP frontend requires Ceph ' + 'mimic or later.') + log(e, level=ERROR) + raise ValueError(e) + if not pacific_or_later and arch() in UNSUPPORTED_BEAST_ARCHS: + e = ('Use of the beast HTTP frontend on {} requires Ceph ' + 'pacific or later.'.format(arch())) + log(e, level=ERROR) + raise ValueError(e) + if frontend_config == CIVETWEB_FRONTEND and quincy_or_later: + e = 'Civetweb frontend is not supported after Ceph Pacific.' + log(e, level=ERROR) + raise ValueError(e) + + +class MonContext(context.CephContext): + interfaces = ['mon'] + + def __call__(self): + if not relation_ids(self.interfaces[0]): + return {} + + host = socket.gethostname() + systemd_rgw = False + + mon_hosts = [] + auths = [] + fsid = None + + for rid in relation_ids(self.interfaces[0]): + for unit in related_units(rid): + if fsid is None: + fsid = relation_get('fsid', rid=rid, unit=unit) + _auth = relation_get('auth', rid=rid, unit=unit) + if _auth: + auths.append(_auth) + + ceph_pub_addr = relation_get('ceph-public-address', rid=rid, + unit=unit) + unit_priv_addr = relation_get('private-address', rid=rid, + unit=unit) + ceph_addr = ceph_pub_addr or unit_priv_addr + ceph_addr = format_ipv6_addr(ceph_addr) or ceph_addr + if ceph_addr: + mon_hosts.append(ceph_addr) + if relation_get('rgw.{}_key'.format(host), rid=rid, unit=unit): + systemd_rgw = True + + if len(set(auths)) != 1: + e = ("Inconsistent or absent auth returned by mon units. Setting " + "auth_supported to 'none'") + log(e, level=WARNING) + auth = 'none' + else: + auth = auths[0] + + # /etc/init.d/radosgw mandates that a dns name is used for this + # parameter so ensure that address is resolvable + if config('prefer-ipv6'): + ensure_host_resolvable_v6(host) + + port = determine_api_port(utils.listen_port(), singlenode_mode=True) + if config('prefer-ipv6'): + port = "[::]:%s" % (port) + + http_frontend = config('http-frontend') + if not http_frontend: + http_frontend = resolve_http_frontend() + else: + validate_http_frontend(http_frontend) + + mon_hosts.sort() + ctxt = { + 'auth_supported': auth, + 'mon_hosts': ' '.join(mon_hosts), + 'hostname': host, + 'old_auth': cmp_pkgrevno('radosgw', "0.51") < 0, + 'systemd_rgw': systemd_rgw, + 'use_syslog': str(config('use-syslog')).lower(), + 'loglevel': config('loglevel'), + 'port': port, + 'ipv6': config('prefer-ipv6'), + 'virtual_hosted_bucket_enabled': + config('virtual-hosted-bucket-enabled'), + # The public unit IP is only used in case the authentication is + # *Not* keystone - in which case it is used to make sure the + # storage endpoint returned by the built-in auth is the HAproxy + # (since it defaults to the port the service runs on, and that is + # not available externally). ~tribaal + 'unit_public_ip': unit_public_ip(), + 'fsid': fsid, + 'rgw_swift_versioning': config('rgw-swift-versioning-enabled'), + 'relaxed_s3_bucket_names': config('relaxed-s3-bucket-names'), + 'frontend': http_frontend, + 'behind_https_proxy': https(), + } + if config('virtual-hosted-bucket-enabled'): + if config('os-public-hostname'): + ctxt['public_hostname'] = config('os-public-hostname') + else: + log("When virtual_hosted_bucket_enabled is true, " + "os_public_hostname must have a value.", level=WARNING) + + # NOTE(dosaboy): these sections must correspond to what is supported in + # the config template. + sections = ['global', 'client.radosgw.gateway'] + user_provided = CephConfContext(permitted_sections=sections)() + user_provided = {k.replace('.', '_'): user_provided[k] + for k in user_provided} + ctxt.update(user_provided) + + if self.context_complete(ctxt): + # Multi-site zone configuration is optional, + # so add after assessment + zone = config('zone') + zonegroup = config('zonegroup') + realm = config('realm') + log("config: zone {} zonegroup {} realm {}" + .format(zone, zonegroup, realm), level=DEBUG) + if zone in multisite.plain_list('zone'): + ctxt['rgw_zone'] = zone + if zonegroup in multisite.plain_list('zonegroup'): + ctxt['rgw_zonegroup'] = zonegroup + if realm in multisite.plain_list('realm'): + ctxt['rgw_realm'] = realm + return ctxt + + return {} + + def context_complete(self, ctxt): + """Overridden here to ensure the context is actually complete. + + We set `key` and `auth` to None here, by default, to ensure + that the context will always evaluate to incomplete until the + Ceph relation has actually sent these details; otherwise, + there is a potential race condition between the relation + appearing and the first unit actually setting this data on the + relation. + + :param ctxt: The current context members + :type ctxt: Dict[str, ANY] + :returns: True if the context is complete + :rtype: bool + """ + if 'fsid' not in ctxt: + return False + return context.OSContextGenerator.context_complete(self, ctxt) diff --git a/ceph-radosgw/hooks/ceph_rgw.py b/ceph-radosgw/hooks/ceph_rgw.py new file mode 100644 index 00000000..85067dd6 --- /dev/null +++ b/ceph-radosgw/hooks/ceph_rgw.py @@ -0,0 +1,226 @@ +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import subprocess + +import charmhelpers.contrib.openstack.context as ch_context + +from charmhelpers.core.hookenv import ( + config, + service_name, +) + +from charmhelpers.core.host import ( + mkdir, + symlink, +) +from charmhelpers.contrib.storage.linux.ceph import ( + CephBrokerRq, +) + +CEPH_DIR = '/etc/ceph' +CEPH_RADOSGW_DIR = '/var/lib/ceph/radosgw' +_radosgw_keyring = "keyring.rados.gateway" +CEPH_POOL_APP_NAME = 'rgw' + + +def import_radosgw_key(key, name=None): + if name: + keyring_path = os.path.join(CEPH_RADOSGW_DIR, + 'ceph-{}'.format(name), + 'keyring') + link_path = os.path.join(CEPH_DIR, + 'ceph.client.{}.keyring'.format(name)) + owner = group = 'ceph' + else: + keyring_path = os.path.join(CEPH_DIR, _radosgw_keyring) + link_path = None + owner = group = 'root' + + exists = os.path.exists(keyring_path) + if not exists: + mkdir(path=os.path.dirname(keyring_path), + owner=owner, group=group, perms=0o750) + + cmd = ['ceph-authtool', keyring_path] + if not exists: + cmd.append('--create-keyring') + cmd.extend([ + '--name=client.{}'.format(name or 'radosgw.gateway'), + '--add-key={}'.format(key) + ]) + subprocess.check_call(cmd) + if not exists: + cmd = [ + 'chown', + '{}:{}'.format(owner, group), + keyring_path + ] + subprocess.check_call(cmd) + # NOTE: add a link to the keyring in /var/lib/ceph + # to /etc/ceph so we can use it for radosgw-admin + # operations for multi-site configuration + if link_path: + symlink(keyring_path, link_path) + + return not exists + + +def normalize_pool_name(pool): + return pool[1:] if pool.startswith('.') else pool + + +def get_create_rgw_pools_rq(prefix=None): + """Pre-create RGW pools so that they have the correct settings. + + If a prefix is provided it will be prepended to each pool name. + + When RGW creates its own pools it will create them with non-optimal + settings (LP: #1476749). + + NOTE: see http://docs.ceph.com/docs/master/radosgw/config-ref/#pools and + http://docs.ceph.com/docs/master/radosgw/config/#create-pools for + list of supported/required pools. + """ + def _add_light_pool(rq, pool, pg_num, prefix=None): + # Per the Ceph PG Calculator, all of the lightweight pools get 0.10% + # of the data by default and only the .rgw.buckets.* get higher values + weights = { + '.rgw.buckets.index': 3.00, + '.rgw.buckets.extra': 1.00 + } + w = weights.get(pool, 0.10) + if prefix: + pool = "{prefix}{pool}".format(prefix=prefix, pool=pool) + + pool = normalize_pool_name(pool) + if pg_num > 0: + rq.add_op_create_pool(name=pool, replica_count=replicas, + pg_num=pg_num, group='objects', + app_name=CEPH_POOL_APP_NAME) + else: + rq.add_op_create_pool(name=pool, replica_count=replicas, + weight=w, group='objects', + app_name=CEPH_POOL_APP_NAME) + + rq = CephBrokerRq() + replicas = config('ceph-osd-replication-count') + + prefix = prefix or 'default' + # Buckets likely to contain the most data and therefore + # requiring the most PGs + heavy = [ + '.rgw.buckets.data' + ] + bucket_weight = config('rgw-buckets-pool-weight') + bluestore_compression = ch_context.CephBlueStoreCompressionContext() + + if config('pool-type') == 'erasure-coded': + # General EC plugin config + plugin = config('ec-profile-plugin') + technique = config('ec-profile-technique') + device_class = config('ec-profile-device-class') + bdm_k = config('ec-profile-k') + bdm_m = config('ec-profile-m') + # LRC plugin config + bdm_l = config('ec-profile-locality') + crush_locality = config('ec-profile-crush-locality') + # SHEC plugin config + bdm_c = config('ec-profile-durability-estimator') + # CLAY plugin config + bdm_d = config('ec-profile-helper-chunks') + scalar_mds = config('ec-profile-scalar-mds') + # Profile name + service = service_name() + profile_name = ( + config('ec-profile-name') or "{}-profile".format(service) + ) + rq.add_op_create_erasure_profile( + name=profile_name, + k=bdm_k, m=bdm_m, + lrc_locality=bdm_l, + lrc_crush_locality=crush_locality, + shec_durability_estimator=bdm_c, + clay_helper_chunks=bdm_d, + clay_scalar_mds=scalar_mds, + device_class=device_class, + erasure_type=plugin, + erasure_technique=technique + ) + + for pool in heavy: + pool = "{prefix}{pool}".format(prefix=prefix, pool=pool) + # NOTE(fnordahl): once we deprecate Python 3.5 support we can do + # the unpacking of the BlueStore compression arguments as part of + # the function arguments. Until then we need to build the dict + # prior to the function call. + kwargs = { + 'name': normalize_pool_name(pool), + 'erasure_profile': profile_name, + 'weight': bucket_weight, + 'group': "objects", + 'app_name': CEPH_POOL_APP_NAME, + } + kwargs.update(bluestore_compression.get_kwargs()) + rq.add_op_create_erasure_pool(**kwargs) + else: + for pool in heavy: + pool = "{prefix}{pool}".format(prefix=prefix, pool=pool) + # NOTE(fnordahl): once we deprecate Python 3.5 support we can do + # the unpacking of the BlueStore compression arguments as part of + # the function arguments. Until then we need to build the dict + # prior to the function call. + kwargs = { + 'name': normalize_pool_name(pool), + 'replica_count': replicas, + 'weight': bucket_weight, + 'group': 'objects', + 'app_name': CEPH_POOL_APP_NAME, + } + kwargs.update(bluestore_compression.get_kwargs()) + rq.add_op_create_replicated_pool(**kwargs) + + # NOTE: we want these pools to have a smaller pg_num/pgp_num than the + # others since they are not expected to contain as much data + light = [ + '.rgw.control', + '.rgw.data.root', + '.rgw.gc', + '.rgw.log', + '.rgw.intent-log', + '.rgw.meta', + '.rgw.otp', + '.rgw.usage', + '.rgw.users.keys', + '.rgw.users.email', + '.rgw.users.swift', + '.rgw.users.uid', + '.rgw.buckets.extra', + '.rgw.buckets.index', + ] + pg_num = config('rgw-lightweight-pool-pg-num') + for pool in light: + _add_light_pool(rq, pool, pg_num, prefix) + + # RadosGW creates this pool automatically from Quincy on. + # _add_light_pool(rq, '.rgw.root', pg_num) + + if config('restrict-ceph-pools'): + rq.add_op_request_access_to_group(name="objects", + permission='rwx', + key_name='radosgw.gateway') + + return rq diff --git a/ceph-radosgw/hooks/certificates-relation-broken b/ceph-radosgw/hooks/certificates-relation-broken new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/certificates-relation-broken @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/certificates-relation-changed b/ceph-radosgw/hooks/certificates-relation-changed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/certificates-relation-changed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/certificates-relation-departed b/ceph-radosgw/hooks/certificates-relation-departed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/certificates-relation-departed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/certificates-relation-joined b/ceph-radosgw/hooks/certificates-relation-joined new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/certificates-relation-joined @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/charmhelpers/__init__.py b/ceph-radosgw/hooks/charmhelpers/__init__.py new file mode 100644 index 00000000..ddf30450 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/__init__.py @@ -0,0 +1,84 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Bootstrap charm-helpers, installing its dependencies if necessary using +# only standard libraries. +import functools +import inspect +import subprocess + + +try: + import yaml # NOQA:F401 +except ImportError: + subprocess.check_call(['apt-get', 'install', '-y', 'python3-yaml']) + import yaml # NOQA:F401 + + +# Holds a list of mapping of mangled function names that have been deprecated +# using the @deprecate decorator below. This is so that the warning is only +# printed once for each usage of the function. +__deprecated_functions = {} + + +def deprecate(warning, date=None, log=None): + """Add a deprecation warning the first time the function is used. + + The date which is a string in semi-ISO8660 format indicates the year-month + that the function is officially going to be removed. + + usage: + + @deprecate('use core/fetch/add_source() instead', '2017-04') + def contributed_add_source_thing(...): + ... + + And it then prints to the log ONCE that the function is deprecated. + The reason for passing the logging function (log) is so that hookenv.log + can be used for a charm if needed. + + :param warning: String to indicate what is to be used instead. + :param date: Optional string in YYYY-MM format to indicate when the + function will definitely (probably) be removed. + :param log: The log function to call in order to log. If None, logs to + stdout + """ + def wrap(f): + + @functools.wraps(f) + def wrapped_f(*args, **kwargs): + try: + module = inspect.getmodule(f) + file = inspect.getsourcefile(f) + lines = inspect.getsourcelines(f) + f_name = "{}-{}-{}..{}-{}".format( + module.__name__, file, lines[0], lines[-1], f.__name__) + except (IOError, TypeError): + # assume it was local, so just use the name of the function + f_name = f.__name__ + if f_name not in __deprecated_functions: + __deprecated_functions[f_name] = True + s = "DEPRECATION WARNING: Function {} is being removed".format( + f.__name__) + if date: + s = "{} on/around {}".format(s, date) + if warning: + s = "{} : {}".format(s, warning) + if log: + log(s) + else: + print(s) + return f(*args, **kwargs) + return wrapped_f + return wrap diff --git a/ceph-radosgw/hooks/charmhelpers/cli/__init__.py b/ceph-radosgw/hooks/charmhelpers/cli/__init__.py new file mode 100644 index 00000000..2b0c4b7a --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/cli/__init__.py @@ -0,0 +1,187 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import argparse +import sys + +import charmhelpers.core.unitdata + + +class OutputFormatter(object): + def __init__(self, outfile=sys.stdout): + self.formats = ( + "raw", + "json", + "py", + "yaml", + "csv", + "tab", + ) + self.outfile = outfile + + def add_arguments(self, argument_parser): + formatgroup = argument_parser.add_mutually_exclusive_group() + choices = self.supported_formats + formatgroup.add_argument("--format", metavar='FMT', + help="Select output format for returned data, " + "where FMT is one of: {}".format(choices), + choices=choices, default='raw') + for fmt in self.formats: + fmtfunc = getattr(self, fmt) + formatgroup.add_argument("-{}".format(fmt[0]), + "--{}".format(fmt), action='store_const', + const=fmt, dest='format', + help=fmtfunc.__doc__) + + @property + def supported_formats(self): + return self.formats + + def raw(self, output): + """Output data as raw string (default)""" + if isinstance(output, (list, tuple)): + output = '\n'.join(map(str, output)) + self.outfile.write(str(output)) + + def py(self, output): + """Output data as a nicely-formatted python data structure""" + import pprint + pprint.pprint(output, stream=self.outfile) + + def json(self, output): + """Output data in JSON format""" + import json + json.dump(output, self.outfile) + + def yaml(self, output): + """Output data in YAML format""" + import yaml + yaml.safe_dump(output, self.outfile) + + def csv(self, output): + """Output data as excel-compatible CSV""" + import csv + csvwriter = csv.writer(self.outfile) + csvwriter.writerows(output) + + def tab(self, output): + """Output data in excel-compatible tab-delimited format""" + import csv + csvwriter = csv.writer(self.outfile, dialect=csv.excel_tab) + csvwriter.writerows(output) + + def format_output(self, output, fmt='raw'): + fmtfunc = getattr(self, fmt) + fmtfunc(output) + + +class CommandLine(object): + argument_parser = None + subparsers = None + formatter = None + exit_code = 0 + + def __init__(self): + if not self.argument_parser: + self.argument_parser = argparse.ArgumentParser(description='Perform common charm tasks') + if not self.formatter: + self.formatter = OutputFormatter() + self.formatter.add_arguments(self.argument_parser) + if not self.subparsers: + self.subparsers = self.argument_parser.add_subparsers(help='Commands') + + def subcommand(self, command_name=None): + """ + Decorate a function as a subcommand. Use its arguments as the + command-line arguments""" + def wrapper(decorated): + cmd_name = command_name or decorated.__name__ + subparser = self.subparsers.add_parser(cmd_name, + description=decorated.__doc__) + for args, kwargs in describe_arguments(decorated): + subparser.add_argument(*args, **kwargs) + subparser.set_defaults(func=decorated) + return decorated + return wrapper + + def test_command(self, decorated): + """ + Subcommand is a boolean test function, so bool return values should be + converted to a 0/1 exit code. + """ + decorated._cli_test_command = True + return decorated + + def no_output(self, decorated): + """ + Subcommand is not expected to return a value, so don't print a spurious None. + """ + decorated._cli_no_output = True + return decorated + + def subcommand_builder(self, command_name, description=None): + """ + Decorate a function that builds a subcommand. Builders should accept a + single argument (the subparser instance) and return the function to be + run as the command.""" + def wrapper(decorated): + subparser = self.subparsers.add_parser(command_name) + func = decorated(subparser) + subparser.set_defaults(func=func) + subparser.description = description or func.__doc__ + return wrapper + + def run(self): + "Run cli, processing arguments and executing subcommands." + arguments = self.argument_parser.parse_args() + argspec = inspect.getfullargspec(arguments.func) + vargs = [] + for arg in argspec.args: + vargs.append(getattr(arguments, arg)) + if argspec.varargs: + vargs.extend(getattr(arguments, argspec.varargs)) + output = arguments.func(*vargs) + if getattr(arguments.func, '_cli_test_command', False): + self.exit_code = 0 if output else 1 + output = '' + if getattr(arguments.func, '_cli_no_output', False): + output = '' + self.formatter.format_output(output, arguments.format) + if charmhelpers.core.unitdata._KV: + charmhelpers.core.unitdata._KV.flush() + + +cmdline = CommandLine() + + +def describe_arguments(func): + """ + Analyze a function's signature and return a data structure suitable for + passing in as arguments to an argparse parser's add_argument() method.""" + + argspec = inspect.getfullargspec(func) + # we should probably raise an exception somewhere if func includes **kwargs + if argspec.defaults: + positional_args = argspec.args[:-len(argspec.defaults)] + keyword_names = argspec.args[-len(argspec.defaults):] + for arg, default in zip(keyword_names, argspec.defaults): + yield ('--{}'.format(arg),), {'default': default} + else: + positional_args = argspec.args + + for arg in positional_args: + yield (arg,), {} + if argspec.varargs: + yield (argspec.varargs,), {'nargs': '*'} diff --git a/ceph-radosgw/hooks/charmhelpers/cli/benchmark.py b/ceph-radosgw/hooks/charmhelpers/cli/benchmark.py new file mode 100644 index 00000000..303af14b --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/cli/benchmark.py @@ -0,0 +1,34 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import cmdline +from charmhelpers.contrib.benchmark import Benchmark + + +@cmdline.subcommand(command_name='benchmark-start') +def start(): + Benchmark.start() + + +@cmdline.subcommand(command_name='benchmark-finish') +def finish(): + Benchmark.finish() + + +@cmdline.subcommand_builder('benchmark-composite', description="Set the benchmark composite score") +def service(subparser): + subparser.add_argument("value", help="The composite score.") + subparser.add_argument("units", help="The units the composite score represents, i.e., 'reads/sec'.") + subparser.add_argument("direction", help="'asc' if a lower score is better, 'desc' if a higher score is better.") + return Benchmark.set_composite_score diff --git a/ceph-radosgw/hooks/charmhelpers/cli/commands.py b/ceph-radosgw/hooks/charmhelpers/cli/commands.py new file mode 100644 index 00000000..b9310565 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/cli/commands.py @@ -0,0 +1,30 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This module loads sub-modules into the python runtime so they can be +discovered via the inspect module. In order to prevent flake8 from (rightfully) +telling us these are unused modules, throw a ' # noqa' at the end of each import +so that the warning is suppressed. +""" + +from . import CommandLine # noqa + +""" +Import the sub-modules which have decorated subcommands to register with chlp. +""" +from . import host # noqa +from . import benchmark # noqa +from . import unitdata # noqa +from . import hookenv # noqa diff --git a/ceph-radosgw/hooks/charmhelpers/cli/hookenv.py b/ceph-radosgw/hooks/charmhelpers/cli/hookenv.py new file mode 100644 index 00000000..bd72f448 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/cli/hookenv.py @@ -0,0 +1,21 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import cmdline +from charmhelpers.core import hookenv + + +cmdline.subcommand('relation-id')(hookenv.relation_id._wrapped) +cmdline.subcommand('service-name')(hookenv.service_name) +cmdline.subcommand('remote-service-name')(hookenv.remote_service_name._wrapped) diff --git a/ceph-radosgw/hooks/charmhelpers/cli/host.py b/ceph-radosgw/hooks/charmhelpers/cli/host.py new file mode 100644 index 00000000..40396849 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/cli/host.py @@ -0,0 +1,29 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import cmdline +from charmhelpers.core import host + + +@cmdline.subcommand() +def mounts(): + "List mounts" + return host.mounts() + + +@cmdline.subcommand_builder('service', description="Control system services") +def service(subparser): + subparser.add_argument("action", help="The action to perform (start, stop, etc...)") + subparser.add_argument("service_name", help="Name of the service to control") + return host.service diff --git a/ceph-radosgw/hooks/charmhelpers/cli/unitdata.py b/ceph-radosgw/hooks/charmhelpers/cli/unitdata.py new file mode 100644 index 00000000..acce846f --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/cli/unitdata.py @@ -0,0 +1,46 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import cmdline +from charmhelpers.core import unitdata + + +@cmdline.subcommand_builder('unitdata', description="Store and retrieve data") +def unitdata_cmd(subparser): + nested = subparser.add_subparsers() + + get_cmd = nested.add_parser('get', help='Retrieve data') + get_cmd.add_argument('key', help='Key to retrieve the value of') + get_cmd.set_defaults(action='get', value=None) + + getrange_cmd = nested.add_parser('getrange', help='Retrieve multiple data') + getrange_cmd.add_argument('key', metavar='prefix', + help='Prefix of the keys to retrieve') + getrange_cmd.set_defaults(action='getrange', value=None) + + set_cmd = nested.add_parser('set', help='Store data') + set_cmd.add_argument('key', help='Key to set') + set_cmd.add_argument('value', help='Value to store') + set_cmd.set_defaults(action='set') + + def _unitdata_cmd(action, key, value): + if action == 'get': + return unitdata.kv().get(key) + elif action == 'getrange': + return unitdata.kv().getrange(key) + elif action == 'set': + unitdata.kv().set(key, value) + unitdata.kv().flush() + return '' + return _unitdata_cmd diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/charmsupport/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/charmsupport/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/charmsupport/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/charmsupport/nrpe.py b/ceph-radosgw/hooks/charmhelpers/contrib/charmsupport/nrpe.py new file mode 100644 index 00000000..ac002bc6 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/charmsupport/nrpe.py @@ -0,0 +1,576 @@ +# Copyright 2012-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Compatibility with the nrpe-external-master charm""" +# +# Authors: +# Matthew Wedgwood + +import glob +import grp +import json +import os +import pwd +import re +import shlex +import shutil +import subprocess +import yaml + +from charmhelpers.core.hookenv import ( + application_name, + config, + ERROR, + hook_name, + local_unit, + log, + relation_get, + relation_ids, + relation_set, + relations_of_type, +) + +from charmhelpers.core.host import service +from charmhelpers.core import host + +# This module adds compatibility with the nrpe-external-master and plain nrpe +# subordinate charms. To use it in your charm: +# +# 1. Update metadata.yaml +# +# provides: +# (...) +# nrpe-external-master: +# interface: nrpe-external-master +# scope: container +# +# and/or +# +# provides: +# (...) +# local-monitors: +# interface: local-monitors +# scope: container + +# +# 2. Add the following to config.yaml +# +# nagios_context: +# default: "juju" +# type: string +# description: | +# Used by the nrpe subordinate charms. +# A string that will be prepended to instance name to set the host name +# in nagios. So for instance the hostname would be something like: +# juju-myservice-0 +# If you're running multiple environments with the same services in them +# this allows you to differentiate between them. +# nagios_servicegroups: +# default: "" +# type: string +# description: | +# A comma-separated list of nagios servicegroups. +# If left empty, the nagios_context will be used as the servicegroup +# +# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master +# +# 4. Update your hooks.py with something like this: +# +# from charmsupport.nrpe import NRPE +# (...) +# def update_nrpe_config(): +# nrpe_compat = NRPE() +# nrpe_compat.add_check( +# shortname = "myservice", +# description = "Check MyService", +# check_cmd = "check_http -w 2 -c 10 http://localhost" +# ) +# nrpe_compat.add_check( +# "myservice_other", +# "Check for widget failures", +# check_cmd = "/srv/myapp/scripts/widget_check" +# ) +# nrpe_compat.write() +# +# def config_changed(): +# (...) +# update_nrpe_config() +# +# def nrpe_external_master_relation_changed(): +# update_nrpe_config() +# +# def local_monitors_relation_changed(): +# update_nrpe_config() +# +# 4.a If your charm is a subordinate charm set primary=False +# +# from charmsupport.nrpe import NRPE +# (...) +# def update_nrpe_config(): +# nrpe_compat = NRPE(primary=False) +# +# 5. ln -s hooks.py nrpe-external-master-relation-changed +# ln -s hooks.py local-monitors-relation-changed + + +class CheckException(Exception): + pass + + +class Check(object): + shortname_re = '[A-Za-z0-9-_.@]+$' + service_template = (""" +#--------------------------------------------------- +# This file is Juju managed +#--------------------------------------------------- +define service {{ + use active-service + host_name {nagios_hostname} + service_description {nagios_hostname}[{shortname}] """ + """{description} + check_command check_nrpe!{command} + servicegroups {nagios_servicegroup} +{service_config_overrides} +}} +""") + + def __init__(self, shortname, description, check_cmd, max_check_attempts=None): + super(Check, self).__init__() + # XXX: could be better to calculate this from the service name + if not re.match(self.shortname_re, shortname): + raise CheckException("shortname must match {}".format( + Check.shortname_re)) + self.shortname = shortname + self.command = "check_{}".format(shortname) + # Note: a set of invalid characters is defined by the + # Nagios server config + # The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()= + self.description = description + self.check_cmd = self._locate_cmd(check_cmd) + self.max_check_attempts = max_check_attempts + + def _get_check_filename(self): + return os.path.join(NRPE.nrpe_confdir, '{}.cfg'.format(self.command)) + + def _get_service_filename(self, hostname): + return os.path.join(NRPE.nagios_exportdir, + 'service__{}_{}.cfg'.format(hostname, self.command)) + + def _locate_cmd(self, check_cmd): + search_path = ( + '/usr/lib/nagios/plugins', + '/usr/local/lib/nagios/plugins', + ) + parts = shlex.split(check_cmd) + for path in search_path: + if os.path.exists(os.path.join(path, parts[0])): + command = os.path.join(path, parts[0]) + if len(parts) > 1: + safe_args = [shlex.quote(arg) for arg in parts[1:]] + command += " " + " ".join(safe_args) + return command + log('Check command not found: {}'.format(parts[0])) + return '' + + def _remove_service_files(self): + if not os.path.exists(NRPE.nagios_exportdir): + return + for f in os.listdir(NRPE.nagios_exportdir): + if f.endswith('_{}.cfg'.format(self.command)): + os.remove(os.path.join(NRPE.nagios_exportdir, f)) + + def remove(self, hostname): + nrpe_check_file = self._get_check_filename() + if os.path.exists(nrpe_check_file): + os.remove(nrpe_check_file) + self._remove_service_files() + + def write(self, nagios_context, hostname, nagios_servicegroups): + nrpe_check_file = self._get_check_filename() + with open(nrpe_check_file, 'w') as nrpe_check_config: + nrpe_check_config.write("# check {}\n".format(self.shortname)) + if nagios_servicegroups: + nrpe_check_config.write( + "# The following header was added automatically by juju\n") + nrpe_check_config.write( + "# Modifying it will affect nagios monitoring and alerting\n") + nrpe_check_config.write( + "# servicegroups: {}\n".format(nagios_servicegroups)) + nrpe_check_config.write("command[{}]={}\n".format( + self.command, self.check_cmd)) + + if not os.path.exists(NRPE.nagios_exportdir): + log('Not writing service config as {} is not accessible'.format( + NRPE.nagios_exportdir)) + else: + self.write_service_config(nagios_context, hostname, + nagios_servicegroups) + + def write_service_config(self, nagios_context, hostname, + nagios_servicegroups): + self._remove_service_files() + + if self.max_check_attempts: + service_config_overrides = ' max_check_attempts {}'.format( + self.max_check_attempts + ) # Note indentation is here rather than in the template to avoid trailing spaces + else: + service_config_overrides = '' # empty string to avoid printing 'None' + templ_vars = { + 'nagios_hostname': hostname, + 'nagios_servicegroup': nagios_servicegroups, + 'description': self.description, + 'shortname': self.shortname, + 'command': self.command, + 'service_config_overrides': service_config_overrides, + } + nrpe_service_text = Check.service_template.format(**templ_vars) + nrpe_service_file = self._get_service_filename(hostname) + with open(nrpe_service_file, 'w') as nrpe_service_config: + nrpe_service_config.write(str(nrpe_service_text)) + + def run(self): + subprocess.call(self.check_cmd) + + +class NRPE(object): + nagios_logdir = '/var/log/nagios' + nagios_exportdir = '/var/lib/nagios/export' + nrpe_confdir = '/etc/nagios/nrpe.d' + homedir = '/var/lib/nagios' # home dir provided by nagios-nrpe-server + + def __init__(self, hostname=None, primary=True): + super(NRPE, self).__init__() + self.config = config() + self.primary = primary + self.nagios_context = self.config['nagios_context'] + if 'nagios_servicegroups' in self.config and self.config['nagios_servicegroups']: + self.nagios_servicegroups = self.config['nagios_servicegroups'] + else: + self.nagios_servicegroups = self.nagios_context + self.unit_name = local_unit().replace('/', '-') + if hostname: + self.hostname = hostname + else: + nagios_hostname = get_nagios_hostname() + if nagios_hostname: + self.hostname = nagios_hostname + else: + self.hostname = "{}-{}".format(self.nagios_context, self.unit_name) + self.checks = [] + # Iff in an nrpe-external-master relation hook, set primary status + relation = relation_ids('nrpe-external-master') + if relation: + log("Setting charm primary status {}".format(primary)) + for rid in relation: + relation_set(relation_id=rid, relation_settings={'primary': self.primary}) + self.remove_check_queue = set() + + @classmethod + def does_nrpe_conf_dir_exist(cls): + """Return True if th nrpe_confdif directory exists.""" + return os.path.isdir(cls.nrpe_confdir) + + def add_check(self, *args, **kwargs): + shortname = None + if kwargs.get('shortname') is None: + if len(args) > 0: + shortname = args[0] + else: + shortname = kwargs['shortname'] + + self.checks.append(Check(*args, **kwargs)) + try: + self.remove_check_queue.remove(shortname) + except KeyError: + pass + + def remove_check(self, *args, **kwargs): + if kwargs.get('shortname') is None: + raise ValueError('shortname of check must be specified') + + # Use sensible defaults if they're not specified - these are not + # actually used during removal, but they're required for constructing + # the Check object; check_disk is chosen because it's part of the + # nagios-plugins-basic package. + if kwargs.get('check_cmd') is None: + kwargs['check_cmd'] = 'check_disk' + if kwargs.get('description') is None: + kwargs['description'] = '' + + check = Check(*args, **kwargs) + check.remove(self.hostname) + self.remove_check_queue.add(kwargs['shortname']) + + def write(self): + try: + nagios_uid = pwd.getpwnam('nagios').pw_uid + nagios_gid = grp.getgrnam('nagios').gr_gid + except Exception: + log("Nagios user not set up, nrpe checks not updated") + return + + if not os.path.exists(NRPE.nagios_logdir): + os.mkdir(NRPE.nagios_logdir) + os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid) + + nrpe_monitors = {} + monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}} + + # check that the charm can write to the conf dir. If not, then nagios + # probably isn't installed, and we can defer. + if not self.does_nrpe_conf_dir_exist(): + return + + for nrpecheck in self.checks: + nrpecheck.write(self.nagios_context, self.hostname, + self.nagios_servicegroups) + nrpe_monitors[nrpecheck.shortname] = { + "command": nrpecheck.command, + } + # If we were passed max_check_attempts, add that to the relation data + if nrpecheck.max_check_attempts is not None: + nrpe_monitors[nrpecheck.shortname]['max_check_attempts'] = nrpecheck.max_check_attempts + + # update-status hooks are configured to firing every 5 minutes by + # default. When nagios-nrpe-server is restarted, the nagios server + # reports checks failing causing unnecessary alerts. Let's not restart + # on update-status hooks. + if not hook_name() == 'update-status': + service('restart', 'nagios-nrpe-server') + + monitor_ids = relation_ids("local-monitors") + \ + relation_ids("nrpe-external-master") + for rid in monitor_ids: + reldata = relation_get(unit=local_unit(), rid=rid) + if 'monitors' in reldata: + # update the existing set of monitors with the new data + old_monitors = yaml.safe_load(reldata['monitors']) + old_nrpe_monitors = old_monitors['monitors']['remote']['nrpe'] + # remove keys that are in the remove_check_queue + old_nrpe_monitors = {k: v for k, v in old_nrpe_monitors.items() + if k not in self.remove_check_queue} + # update/add nrpe_monitors + old_nrpe_monitors.update(nrpe_monitors) + old_monitors['monitors']['remote']['nrpe'] = old_nrpe_monitors + # write back to the relation + relation_set(relation_id=rid, monitors=yaml.dump(old_monitors)) + else: + # write a brand new set of monitors, as no existing ones. + relation_set(relation_id=rid, monitors=yaml.dump(monitors)) + + self.remove_check_queue.clear() + + +def get_nagios_hostcontext(relation_name='nrpe-external-master'): + """ + Query relation with nrpe subordinate, return the nagios_host_context + + :param str relation_name: Name of relation nrpe sub joined to + """ + for rel in relations_of_type(relation_name): + if 'nagios_host_context' in rel: + return rel['nagios_host_context'] + + +def get_nagios_hostname(relation_name='nrpe-external-master'): + """ + Query relation with nrpe subordinate, return the nagios_hostname + + :param str relation_name: Name of relation nrpe sub joined to + """ + for rel in relations_of_type(relation_name): + if 'nagios_hostname' in rel: + return rel['nagios_hostname'] + + +def get_nagios_unit_name(relation_name='nrpe-external-master'): + """ + Return the nagios unit name prepended with host_context if needed + + :param str relation_name: Name of relation nrpe sub joined to + """ + host_context = get_nagios_hostcontext(relation_name) + if host_context: + unit = "%s:%s" % (host_context, local_unit()) + else: + unit = local_unit() + return unit + + +def add_init_service_checks(nrpe, services, unit_name, immediate_check=True): + """ + Add checks for each service in list + + :param NRPE nrpe: NRPE object to add check to + :param list services: List of services to check + :param str unit_name: Unit name to use in check description + :param bool immediate_check: For sysv init, run the service check immediately + """ + # check_haproxy is redundant in the presence of check_crm. See LP Bug#1880601 for details. + # just remove check_haproxy if haproxy is added as a lsb resource in hacluster. + for rid in relation_ids("ha"): + ha_resources = relation_get("json_resources", rid=rid, unit=local_unit()) + if ha_resources: + try: + ha_resources_parsed = json.loads(ha_resources) + except ValueError as e: + log('Could not parse JSON from ha resources. {}'.format(e), level=ERROR) + raise + if "lsb:haproxy" in ha_resources_parsed.values(): + if "haproxy" in services: + log("removed check_haproxy. This service will be monitored by check_crm") + services.remove("haproxy") + for svc in services: + # Don't add a check for these services from neutron-gateway + if svc in ['ext-port', 'os-charm-phy-nic-mtu']: + next + + upstart_init = '/etc/init/%s.conf' % svc + sysv_init = '/etc/init.d/%s' % svc + + if host.init_is_systemd(service_name=svc): + nrpe.add_check( + shortname=svc, + description='process check {%s}' % unit_name, + check_cmd='check_systemd.py %s' % svc + ) + elif os.path.exists(upstart_init): + nrpe.add_check( + shortname=svc, + description='process check {%s}' % unit_name, + check_cmd='check_upstart_job %s' % svc + ) + elif os.path.exists(sysv_init): + cronpath = '/etc/cron.d/nagios-service-check-%s' % svc + checkpath = '%s/service-check-%s.txt' % (nrpe.homedir, svc) + croncmd = ( + '/usr/local/lib/nagios/plugins/check_exit_status.pl ' + '-e -s /etc/init.d/%s status' % svc + ) + cron_file = '*/5 * * * * root %s > %s\n' % (croncmd, checkpath) + f = open(cronpath, 'w') + f.write(cron_file) + f.close() + nrpe.add_check( + shortname=svc, + description='service check {%s}' % unit_name, + check_cmd='check_status_file.py -f %s' % checkpath, + ) + # if /var/lib/nagios doesn't exist open(checkpath, 'w') will fail + # (LP: #1670223). + if immediate_check and os.path.isdir(nrpe.homedir): + f = open(checkpath, 'w') + subprocess.call( + croncmd.split(), + stdout=f, + stderr=subprocess.STDOUT + ) + f.close() + os.chmod(checkpath, 0o644) + + +def copy_nrpe_checks(nrpe_files_dir=None): + """ + Copy the nrpe checks into place + + """ + NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins' + if nrpe_files_dir is None: + # determine if "charmhelpers" is in CHARMDIR or CHARMDIR/hooks + for segment in ['.', 'hooks']: + nrpe_files_dir = os.path.abspath(os.path.join( + os.getenv('CHARM_DIR'), + segment, + 'charmhelpers', + 'contrib', + 'openstack', + 'files')) + if os.path.isdir(nrpe_files_dir): + break + else: + raise RuntimeError("Couldn't find charmhelpers directory") + if not os.path.exists(NAGIOS_PLUGINS): + os.makedirs(NAGIOS_PLUGINS) + for fname in glob.glob(os.path.join(nrpe_files_dir, "check_*")): + if os.path.isfile(fname): + shutil.copy2(fname, + os.path.join(NAGIOS_PLUGINS, os.path.basename(fname))) + + +def add_haproxy_checks(nrpe, unit_name): + """ + Add checks for each service in list + + :param NRPE nrpe: NRPE object to add check to + :param str unit_name: Unit name to use in check description + """ + nrpe.add_check( + shortname='haproxy_servers', + description='Check HAProxy {%s}' % unit_name, + check_cmd='check_haproxy.sh') + nrpe.add_check( + shortname='haproxy_queue', + description='Check HAProxy queue depth {%s}' % unit_name, + check_cmd='check_haproxy_queue_depth.sh') + + +def remove_deprecated_check(nrpe, deprecated_services): + """ + Remove checks for deprecated services in list + + :param nrpe: NRPE object to remove check from + :type nrpe: NRPE + :param deprecated_services: List of deprecated services that are removed + :type deprecated_services: list + """ + for dep_svc in deprecated_services: + log('Deprecated service: {}'.format(dep_svc)) + nrpe.remove_check(shortname=dep_svc) + + +def add_deferred_restarts_check(nrpe): + """ + Add NRPE check for services with deferred restarts. + + :param NRPE nrpe: NRPE object to add check to + """ + unit_name = local_unit().replace('/', '-') + shortname = unit_name + '_deferred_restarts' + check_cmd = 'check_deferred_restarts.py --application {}'.format( + application_name()) + + log('Adding deferred restarts nrpe check: {}'.format(shortname)) + nrpe.add_check( + shortname=shortname, + description='Check deferred service restarts {}'.format(unit_name), + check_cmd=check_cmd) + + +def remove_deferred_restarts_check(nrpe): + """ + Remove NRPE check for services with deferred service restarts. + + :param NRPE nrpe: NRPE object to remove check from + """ + unit_name = local_unit().replace('/', '-') + shortname = unit_name + '_deferred_restarts' + check_cmd = 'check_deferred_restarts.py --application {}'.format( + application_name()) + + log('Removing deferred restarts nrpe check: {}'.format(shortname)) + nrpe.remove_check( + shortname=shortname, + description='Check deferred service restarts {}'.format(unit_name), + check_cmd=check_cmd) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/charmsupport/volumes.py b/ceph-radosgw/hooks/charmhelpers/contrib/charmsupport/volumes.py new file mode 100644 index 00000000..f7c6fbdc --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/charmsupport/volumes.py @@ -0,0 +1,173 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +''' +Functions for managing volumes in juju units. One volume is supported per unit. +Subordinates may have their own storage, provided it is on its own partition. + +Configuration stanzas:: + + volume-ephemeral: + type: boolean + default: true + description: > + If false, a volume is mounted as specified in "volume-map" + If true, ephemeral storage will be used, meaning that log data + will only exist as long as the machine. YOU HAVE BEEN WARNED. + volume-map: + type: string + default: {} + description: > + YAML map of units to device names, e.g: + "{ rsyslog/0: /dev/vdb, rsyslog/1: /dev/vdb }" + Service units will raise a configure-error if volume-ephemeral + is 'true' and no volume-map value is set. Use 'juju set' to set a + value and 'juju resolved' to complete configuration. + +Usage:: + + from charmsupport.volumes import configure_volume, VolumeConfigurationError + from charmsupport.hookenv import log, ERROR + def post_mount_hook(): + stop_service('myservice') + def post_mount_hook(): + start_service('myservice') + + if __name__ == '__main__': + try: + configure_volume(before_change=pre_mount_hook, + after_change=post_mount_hook) + except VolumeConfigurationError: + log('Storage could not be configured', ERROR) + +''' + +# XXX: Known limitations +# - fstab is neither consulted nor updated + +import os +from charmhelpers.core import hookenv +from charmhelpers.core import host +import yaml + + +MOUNT_BASE = '/srv/juju/volumes' + + +class VolumeConfigurationError(Exception): + '''Volume configuration data is missing or invalid''' + pass + + +def get_config(): + '''Gather and sanity-check volume configuration data''' + volume_config = {} + config = hookenv.config() + + errors = False + + if config.get('volume-ephemeral') in (True, 'True', 'true', 'Yes', 'yes'): + volume_config['ephemeral'] = True + else: + volume_config['ephemeral'] = False + + try: + volume_map = yaml.safe_load(config.get('volume-map', '{}')) + except yaml.YAMLError as e: + hookenv.log("Error parsing YAML volume-map: {}".format(e), + hookenv.ERROR) + errors = True + if volume_map is None: + # probably an empty string + volume_map = {} + elif not isinstance(volume_map, dict): + hookenv.log("Volume-map should be a dictionary, not {}".format( + type(volume_map))) + errors = True + + volume_config['device'] = volume_map.get(os.environ['JUJU_UNIT_NAME']) + if volume_config['device'] and volume_config['ephemeral']: + # asked for ephemeral storage but also defined a volume ID + hookenv.log('A volume is defined for this unit, but ephemeral ' + 'storage was requested', hookenv.ERROR) + errors = True + elif not volume_config['device'] and not volume_config['ephemeral']: + # asked for permanent storage but did not define volume ID + hookenv.log('Ephemeral storage was requested, but there is no volume ' + 'defined for this unit.', hookenv.ERROR) + errors = True + + unit_mount_name = hookenv.local_unit().replace('/', '-') + volume_config['mountpoint'] = os.path.join(MOUNT_BASE, unit_mount_name) + + if errors: + return None + return volume_config + + +def mount_volume(config): + if os.path.exists(config['mountpoint']): + if not os.path.isdir(config['mountpoint']): + hookenv.log('Not a directory: {}'.format(config['mountpoint'])) + raise VolumeConfigurationError() + else: + host.mkdir(config['mountpoint']) + if os.path.ismount(config['mountpoint']): + unmount_volume(config) + if not host.mount(config['device'], config['mountpoint'], persist=True): + raise VolumeConfigurationError() + + +def unmount_volume(config): + if os.path.ismount(config['mountpoint']): + if not host.umount(config['mountpoint'], persist=True): + raise VolumeConfigurationError() + + +def managed_mounts(): + '''List of all mounted managed volumes''' + return filter(lambda mount: mount[0].startswith(MOUNT_BASE), host.mounts()) + + +def configure_volume(before_change=lambda: None, after_change=lambda: None): + '''Set up storage (or don't) according to the charm's volume configuration. + Returns the mount point or "ephemeral". before_change and after_change + are optional functions to be called if the volume configuration changes. + ''' + + config = get_config() + if not config: + hookenv.log('Failed to read volume configuration', hookenv.CRITICAL) + raise VolumeConfigurationError() + + if config['ephemeral']: + if os.path.ismount(config['mountpoint']): + before_change() + unmount_volume(config) + after_change() + return 'ephemeral' + else: + # persistent storage + if os.path.ismount(config['mountpoint']): + mounts = dict(managed_mounts()) + if mounts.get(config['mountpoint']) != config['device']: + before_change() + unmount_volume(config) + mount_volume(config) + after_change() + else: + before_change() + mount_volume(config) + after_change() + return config['mountpoint'] diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hahelpers/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/hahelpers/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hahelpers/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hahelpers/apache.py b/ceph-radosgw/hooks/charmhelpers/contrib/hahelpers/apache.py new file mode 100644 index 00000000..a54702bc --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hahelpers/apache.py @@ -0,0 +1,90 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Copyright 2012 Canonical Ltd. +# +# This file is sourced from lp:openstack-charm-helpers +# +# Authors: +# James Page +# Adam Gandelman +# + +import os + +from charmhelpers.core import host +from charmhelpers.core.hookenv import ( + config as config_get, + relation_get, + relation_ids, + related_units as relation_list, + log, + INFO, +) + +# This file contains the CA cert from the charms ssl_ca configuration +# option, in future the file name should be updated reflect that. +CONFIG_CA_CERT_FILE = 'keystone_juju_ca_cert' + + +def get_cert(cn=None): + # TODO: deal with multiple https endpoints via charm config + cert = config_get('ssl_cert') + key = config_get('ssl_key') + if not (cert and key): + log("Inspecting identity-service relations for SSL certificate.", + level=INFO) + cert = key = None + if cn: + ssl_cert_attr = 'ssl_cert_{}'.format(cn) + ssl_key_attr = 'ssl_key_{}'.format(cn) + else: + ssl_cert_attr = 'ssl_cert' + ssl_key_attr = 'ssl_key' + for r_id in relation_ids('identity-service'): + for unit in relation_list(r_id): + if not cert: + cert = relation_get(ssl_cert_attr, + rid=r_id, unit=unit) + if not key: + key = relation_get(ssl_key_attr, + rid=r_id, unit=unit) + return (cert, key) + + +def get_ca_cert(): + ca_cert = config_get('ssl_ca') + if ca_cert is None: + log("Inspecting identity-service relations for CA SSL certificate.", + level=INFO) + for r_id in (relation_ids('identity-service') + + relation_ids('identity-credentials')): + for unit in relation_list(r_id): + if ca_cert is None: + ca_cert = relation_get('ca_cert', + rid=r_id, unit=unit) + return ca_cert + + +def retrieve_ca_cert(cert_file): + cert = None + if os.path.isfile(cert_file): + with open(cert_file, 'rb') as crt: + cert = crt.read() + return cert + + +def install_ca_cert(ca_cert): + host.install_ca_cert(ca_cert, CONFIG_CA_CERT_FILE) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hahelpers/cluster.py b/ceph-radosgw/hooks/charmhelpers/contrib/hahelpers/cluster.py new file mode 100644 index 00000000..7b309256 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hahelpers/cluster.py @@ -0,0 +1,455 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Copyright 2012 Canonical Ltd. +# +# Authors: +# James Page +# Adam Gandelman +# + +""" +Helpers for clustering and determining "cluster leadership" and other +clustering-related helpers. +""" + +import functools +import subprocess +import os +import time + +from socket import gethostname as get_unit_hostname + +from charmhelpers.core.hookenv import ( + log, + relation_ids, + related_units as relation_list, + relation_get, + config as config_get, + INFO, + DEBUG, + WARNING, + unit_get, + is_leader as juju_is_leader, + status_set, +) +from charmhelpers.core.host import ( + modulo_distribution, +) +from charmhelpers.core.decorators import ( + retry_on_exception, +) +from charmhelpers.core.strutils import ( + bool_from_string, +) + +DC_RESOURCE_NAME = 'DC' + + +class HAIncompleteConfig(Exception): + pass + + +class HAIncorrectConfig(Exception): + pass + + +class CRMResourceNotFound(Exception): + pass + + +class CRMDCNotFound(Exception): + pass + + +def is_elected_leader(resource): + """ + Returns True if the charm executing this is the elected cluster leader. + + It relies on two mechanisms to determine leadership: + 1. If juju is sufficiently new and leadership election is supported, + the is_leader command will be used. + 2. If the charm is part of a corosync cluster, call corosync to + determine leadership. + 3. If the charm is not part of a corosync cluster, the leader is + determined as being "the alive unit with the lowest unit number". In + other words, the oldest surviving unit. + """ + try: + return juju_is_leader() + except NotImplementedError: + log('Juju leadership election feature not enabled' + ', using fallback support', + level=WARNING) + + if is_clustered(): + if not is_crm_leader(resource): + log('Deferring action to CRM leader.', level=INFO) + return False + else: + peers = peer_units() + if peers and not oldest_peer(peers): + log('Deferring action to oldest service unit.', level=INFO) + return False + return True + + +def is_clustered(): + for r_id in (relation_ids('ha') or []): + for unit in (relation_list(r_id) or []): + clustered = relation_get('clustered', + rid=r_id, + unit=unit) + if clustered: + return True + return False + + +def is_crm_dc(): + """ + Determine leadership by querying the pacemaker Designated Controller + """ + cmd = ['crm', 'status'] + try: + status = subprocess.check_output( + cmd, stderr=subprocess.STDOUT).decode('utf-8') + except subprocess.CalledProcessError as ex: + raise CRMDCNotFound(str(ex)) + + current_dc = '' + for line in status.split('\n'): + if line.startswith('Current DC'): + # Current DC: juju-lytrusty-machine-2 (168108163) + # - partition with quorum + current_dc = line.split(':')[1].split()[0] + if current_dc == get_unit_hostname(): + return True + elif current_dc == 'NONE': + raise CRMDCNotFound('Current DC: NONE') + + return False + + +@retry_on_exception(5, base_delay=2, + exc_type=(CRMResourceNotFound, CRMDCNotFound)) +def is_crm_leader(resource, retry=False): + """ + Returns True if the charm calling this is the elected corosync leader, + as returned by calling the external "crm" command. + + We allow this operation to be retried to avoid the possibility of getting a + false negative. See LP #1396246 for more info. + """ + if resource == DC_RESOURCE_NAME: + return is_crm_dc() + cmd = ['crm', 'resource', 'show', resource] + try: + status = subprocess.check_output( + cmd, stderr=subprocess.STDOUT).decode('utf-8') + except subprocess.CalledProcessError: + status = None + + if status and get_unit_hostname() in status: + return True + + if status and "resource %s is NOT running" % (resource) in status: + raise CRMResourceNotFound("CRM resource %s not found" % (resource)) + + return False + + +def is_leader(resource): + log("is_leader is deprecated. Please consider using is_crm_leader " + "instead.", level=WARNING) + return is_crm_leader(resource) + + +def peer_units(peer_relation="cluster"): + peers = [] + for r_id in (relation_ids(peer_relation) or []): + for unit in (relation_list(r_id) or []): + peers.append(unit) + return peers + + +def peer_ips(peer_relation='cluster', addr_key='private-address'): + '''Return a dict of peers and their private-address''' + peers = {} + for r_id in relation_ids(peer_relation): + for unit in relation_list(r_id): + peers[unit] = relation_get(addr_key, rid=r_id, unit=unit) + return peers + + +def oldest_peer(peers): + """Determines who the oldest peer is by comparing unit numbers.""" + local_unit_no = int(os.getenv('JUJU_UNIT_NAME').split('/')[1]) + for peer in peers: + remote_unit_no = int(peer.split('/')[1]) + if remote_unit_no < local_unit_no: + return False + return True + + +def eligible_leader(resource): + log("eligible_leader is deprecated. Please consider using " + "is_elected_leader instead.", level=WARNING) + return is_elected_leader(resource) + + +def https(): + ''' + Determines whether enough data has been provided in configuration + or relation data to configure HTTPS + . + returns: boolean + ''' + use_https = config_get('use-https') + if use_https and bool_from_string(use_https): + return True + if config_get('ssl_cert') and config_get('ssl_key'): + return True + # Local import to avoid ciruclar dependency. + import charmhelpers.contrib.openstack.cert_utils as cert_utils + if ( + cert_utils.get_certificate_request() and not + cert_utils.get_requests_for_local_unit("certificates") + ): + return False + for r_id in relation_ids('certificates'): + for unit in relation_list(r_id): + ca = relation_get('ca', rid=r_id, unit=unit) + if ca: + return True + for r_id in relation_ids('identity-service'): + for unit in relation_list(r_id): + # TODO - needs fixing for new helper as ssl_cert/key suffixes with CN + rel_state = [ + relation_get('https_keystone', rid=r_id, unit=unit), + relation_get('ca_cert', rid=r_id, unit=unit), + ] + # NOTE: works around (LP: #1203241) + if (None not in rel_state) and ('' not in rel_state): + return True + return False + + +def determine_api_port(public_port, singlenode_mode=False): + ''' + Determine correct API server listening port based on + existence of HTTPS reverse proxy and/or haproxy. + + public_port: int: standard public port for given service + + singlenode_mode: boolean: Shuffle ports when only a single unit is present + + returns: int: the correct listening port for the API service + ''' + i = 0 + if singlenode_mode: + i += 1 + elif len(peer_units()) > 0 or is_clustered(): + i += 1 + if https(): + i += 1 + return public_port - (i * 10) + + +def determine_apache_port(public_port, singlenode_mode=False): + ''' + Description: Determine correct apache listening port based on public IP + + state of the cluster. + + public_port: int: standard public port for given service + + singlenode_mode: boolean: Shuffle ports when only a single unit is present + + returns: int: the correct listening port for the HAProxy service + ''' + i = 0 + if singlenode_mode: + i += 1 + elif len(peer_units()) > 0 or is_clustered(): + i += 1 + return public_port - (i * 10) + + +determine_apache_port_single = functools.partial( + determine_apache_port, singlenode_mode=True) + + +def get_hacluster_config(exclude_keys=None): + ''' + Obtains all relevant configuration from charm configuration required + for initiating a relation to hacluster: + + ha-bindiface, ha-mcastport, vip, os-internal-hostname, + os-admin-hostname, os-public-hostname, os-access-hostname + + param: exclude_keys: list of setting key(s) to be excluded. + returns: dict: A dict containing settings keyed by setting name. + raises: HAIncompleteConfig if settings are missing or incorrect. + ''' + settings = ['ha-bindiface', 'ha-mcastport', 'vip', 'os-internal-hostname', + 'os-admin-hostname', 'os-public-hostname', 'os-access-hostname'] + conf = {} + for setting in settings: + if exclude_keys and setting in exclude_keys: + continue + + conf[setting] = config_get(setting) + + if not valid_hacluster_config(): + raise HAIncorrectConfig('Insufficient or incorrect config data to ' + 'configure hacluster.') + return conf + + +def valid_hacluster_config(): + ''' + Check that either vip or dns-ha is set. If dns-ha then one of os-*-hostname + must be set. + + Note: ha-bindiface and ha-macastport both have defaults and will always + be set. We only care that either vip or dns-ha is set. + + :returns: boolean: valid config returns true. + raises: HAIncompatibileConfig if settings conflict. + raises: HAIncompleteConfig if settings are missing. + ''' + vip = config_get('vip') + dns = config_get('dns-ha') + if not (bool(vip) ^ bool(dns)): + msg = ('HA: Either vip or dns-ha must be set but not both in order to ' + 'use high availability') + status_set('blocked', msg) + raise HAIncorrectConfig(msg) + + # If dns-ha then one of os-*-hostname must be set + if dns: + dns_settings = ['os-internal-hostname', 'os-admin-hostname', + 'os-public-hostname', 'os-access-hostname'] + # At this point it is unknown if one or all of the possible + # network spaces are in HA. Validate at least one is set which is + # the minimum required. + for setting in dns_settings: + if config_get(setting): + log('DNS HA: At least one hostname is set {}: {}' + ''.format(setting, config_get(setting)), + level=DEBUG) + return True + + msg = ('DNS HA: At least one os-*-hostname(s) must be set to use ' + 'DNS HA') + status_set('blocked', msg) + raise HAIncompleteConfig(msg) + + log('VIP HA: VIP is set {}'.format(vip), level=DEBUG) + return True + + +def canonical_url(configs, vip_setting='vip'): + ''' + Returns the correct HTTP URL to this host given the state of HTTPS + configuration and hacluster. + + :configs : OSTemplateRenderer: A config tempating object to inspect for + a complete https context. + + :vip_setting: str: Setting in charm config that specifies + VIP address. + ''' + scheme = 'http' + if 'https' in configs.complete_contexts(): + scheme = 'https' + if is_clustered(): + addr = config_get(vip_setting) + else: + addr = unit_get('private-address') + return '%s://%s' % (scheme, addr) + + +def distributed_wait(modulo=None, wait=None, operation_name='operation'): + ''' Distribute operations by waiting based on modulo_distribution + + If modulo and or wait are not set, check config_get for those values. + If config values are not set, default to modulo=3 and wait=30. + + :param modulo: int The modulo number creates the group distribution + :param wait: int The constant time wait value + :param operation_name: string Operation name for status message + i.e. 'restart' + :side effect: Calls config_get() + :side effect: Calls log() + :side effect: Calls status_set() + :side effect: Calls time.sleep() + ''' + if modulo is None: + modulo = config_get('modulo-nodes') or 3 + if wait is None: + wait = config_get('known-wait') or 30 + if juju_is_leader(): + # The leader should never wait + calculated_wait = 0 + else: + # non_zero_wait=True guarantees the non-leader who gets modulo 0 + # will still wait + calculated_wait = modulo_distribution(modulo=modulo, wait=wait, + non_zero_wait=True) + msg = "Waiting {} seconds for {} ...".format(calculated_wait, + operation_name) + log(msg, DEBUG) + status_set('maintenance', msg) + time.sleep(calculated_wait) + + +def get_managed_services_and_ports(services, external_ports, + external_services=None, + port_conv_f=determine_apache_port_single): + """Get the services and ports managed by this charm. + + Return only the services and corresponding ports that are managed by this + charm. This excludes haproxy when there is a relation with hacluster. This + is because this charm passes responsibility for stopping and starting + haproxy to hacluster. + + Similarly, if a relation with hacluster exists then the ports returned by + this method correspond to those managed by the apache server rather than + haproxy. + + :param services: List of services. + :type services: List[str] + :param external_ports: List of ports managed by external services. + :type external_ports: List[int] + :param external_services: List of services to be removed if ha relation is + present. + :type external_services: List[str] + :param port_conv_f: Function to apply to ports to calculate the ports + managed by services controlled by this charm. + :type port_convert_func: f() + :returns: A tuple containing a list of services first followed by a list of + ports. + :rtype: Tuple[List[str], List[int]] + """ + if external_services is None: + external_services = ['haproxy'] + if relation_ids('ha'): + for svc in external_services: + try: + services.remove(svc) + except ValueError: + pass + external_ports = [port_conv_f(p) for p in external_ports] + return services, external_ports diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/README.hardening.md b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/README.hardening.md new file mode 100644 index 00000000..91280c03 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/README.hardening.md @@ -0,0 +1,38 @@ +# Juju charm-helpers hardening library + +## Description + +This library provides multiple implementations of system and application +hardening that conform to the standards of http://hardening.io/. + +Current implementations include: + + * OS + * SSH + * MySQL + * Apache + +## Requirements + +* Juju Charms + +## Usage + +1. Synchronise this library into your charm and add the harden() decorator + (from contrib.hardening.harden) to any functions or methods you want to use + to trigger hardening of your application/system. + +2. Add a config option called 'harden' to your charm config.yaml and set it to + a space-delimited list of hardening modules you want to run e.g. "os ssh" + +3. Override any config defaults (contrib.hardening.defaults) by adding a file + called hardening.yaml to your charm root containing the name(s) of the + modules whose settings you want override at root level and then any settings + with overrides e.g. + + os: + general: + desktop_enable: True + +4. Now just run your charm as usual and hardening will be applied each time the + hook runs. diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/__init__.py new file mode 100644 index 00000000..30a3e943 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/apache/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/apache/__init__.py new file mode 100644 index 00000000..58bebd84 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/apache/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from os import path + +TEMPLATES_DIR = path.join(path.dirname(__file__), 'templates') diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/apache/checks/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/apache/checks/__init__.py new file mode 100644 index 00000000..3bc2ebd4 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/apache/checks/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import ( + log, + DEBUG, +) +from charmhelpers.contrib.hardening.apache.checks import config + + +def run_apache_checks(): + log("Starting Apache hardening checks.", level=DEBUG) + checks = config.get_audits() + for check in checks: + log("Running '%s' check" % (check.__class__.__name__), level=DEBUG) + check.ensure_compliance() + + log("Apache hardening checks complete.", level=DEBUG) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/apache/checks/config.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/apache/checks/config.py new file mode 100644 index 00000000..e81a5f0b --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/apache/checks/config.py @@ -0,0 +1,101 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import subprocess + + +from charmhelpers.core.hookenv import ( + log, + INFO, +) +from charmhelpers.contrib.hardening.audits.file import ( + FilePermissionAudit, + DirectoryPermissionAudit, + NoReadWriteForOther, + TemplatedFile, + DeletedFile +) +from charmhelpers.contrib.hardening.audits.apache import DisabledModuleAudit +from charmhelpers.contrib.hardening.apache import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get Apache hardening config audits. + + :returns: dictionary of audits + """ + if subprocess.call(['which', 'apache2'], stdout=subprocess.PIPE) != 0: + log("Apache server does not appear to be installed on this node - " + "skipping apache hardening", level=INFO) + return [] + + context = ApacheConfContext() + settings = utils.get_settings('apache') + audits = [ + FilePermissionAudit(paths=os.path.join( + settings['common']['apache_dir'], 'apache2.conf'), + user='root', group='root', mode=0o0640), + + TemplatedFile(os.path.join(settings['common']['apache_dir'], + 'mods-available/alias.conf'), + context, + TEMPLATES_DIR, + mode=0o0640, + user='root', + service_actions=[{'service': 'apache2', + 'actions': ['restart']}]), + + TemplatedFile(os.path.join(settings['common']['apache_dir'], + 'conf-enabled/99-hardening.conf'), + context, + TEMPLATES_DIR, + mode=0o0640, + user='root', + service_actions=[{'service': 'apache2', + 'actions': ['restart']}]), + + DirectoryPermissionAudit(settings['common']['apache_dir'], + user='root', + group='root', + mode=0o0750), + + DisabledModuleAudit(settings['hardening']['modules_to_disable']), + + NoReadWriteForOther(settings['common']['apache_dir']), + + DeletedFile(['/var/www/html/index.html']) + ] + + return audits + + +class ApacheConfContext(object): + """Defines the set of key/value pairs to set in a apache config file. + + This context, when called, will return a dictionary containing the + key/value pairs of setting to specify in the + /etc/apache/conf-enabled/hardening.conf file. + """ + def __call__(self): + settings = utils.get_settings('apache') + ctxt = settings['hardening'] + + out = subprocess.check_output(['apache2', '-v']).decode('utf-8') + ctxt['apache_version'] = re.search(r'.+version: Apache/(.+?)\s.+', + out).group(1) + ctxt['apache_icondir'] = '/usr/share/apache2/icons/' + return ctxt diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/apache/templates/99-hardening.conf b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/apache/templates/99-hardening.conf new file mode 100644 index 00000000..22b68041 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/apache/templates/99-hardening.conf @@ -0,0 +1,32 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### + + + + # http://httpd.apache.org/docs/2.4/upgrading.html + {% if apache_version > '2.2' -%} + Require all granted + {% else -%} + Order Allow,Deny + Deny from all + {% endif %} + + + + + Options -Indexes -FollowSymLinks + AllowOverride None + + + + Options -Indexes -FollowSymLinks + AllowOverride None + + +TraceEnable {{ traceenable }} +ServerTokens {{ servertokens }} + +SSLHonorCipherOrder {{ honor_cipher_order }} +SSLCipherSuite {{ cipher_suite }} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/apache/templates/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/apache/templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/apache/templates/alias.conf b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/apache/templates/alias.conf new file mode 100644 index 00000000..e46a58a3 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/apache/templates/alias.conf @@ -0,0 +1,31 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### + + # + # Aliases: Add here as many aliases as you need (with no limit). The format is + # Alias fakename realname + # + # Note that if you include a trailing / on fakename then the server will + # require it to be present in the URL. So "/icons" isn't aliased in this + # example, only "/icons/". If the fakename is slash-terminated, then the + # realname must also be slash terminated, and if the fakename omits the + # trailing slash, the realname must also omit it. + # + # We include the /icons/ alias for FancyIndexed directory listings. If + # you do not use FancyIndexing, you may comment this out. + # + Alias /icons/ "{{ apache_icondir }}/" + + + Options -Indexes -MultiViews -FollowSymLinks + AllowOverride None +{% if apache_version == '2.4' -%} + Require all granted +{% else -%} + Order allow,deny + Allow from all +{% endif %} + + diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/audits/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/audits/__init__.py new file mode 100644 index 00000000..6dd5b05f --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/audits/__init__.py @@ -0,0 +1,54 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class BaseAudit(object): # NO-QA + """Base class for hardening checks. + + The lifecycle of a hardening check is to first check to see if the system + is in compliance for the specified check. If it is not in compliance, the + check method will return a value which will be supplied to the. + """ + def __init__(self, *args, **kwargs): + self.unless = kwargs.get('unless', None) + super(BaseAudit, self).__init__() + + def ensure_compliance(self): + """Checks to see if the current hardening check is in compliance or + not. + + If the check that is performed is not in compliance, then an exception + should be raised. + """ + pass + + def _take_action(self): + """Determines whether to perform the action or not. + + Checks whether or not an action should be taken. This is determined by + the truthy value for the unless parameter. If unless is a callback + method, it will be invoked with no parameters in order to determine + whether or not the action should be taken. Otherwise, the truthy value + of the unless attribute will determine if the action should be + performed. + """ + # Do the action if there isn't an unless override. + if self.unless is None: + return True + + # Invoke the callback if there is one. + if hasattr(self.unless, '__call__'): + return not self.unless() + + return not self.unless diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/audits/apache.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/audits/apache.py new file mode 100644 index 00000000..31db8f62 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/audits/apache.py @@ -0,0 +1,101 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import subprocess + +from charmhelpers.core.hookenv import ( + log, + INFO, + ERROR, +) + +from charmhelpers.contrib.hardening.audits import BaseAudit + + +class DisabledModuleAudit(BaseAudit): + """Audits Apache2 modules. + + Determines if the apache2 modules are enabled. If the modules are enabled + then they are removed in the ensure_compliance. + """ + def __init__(self, modules): + if modules is None: + self.modules = [] + elif isinstance(modules, str): + self.modules = [modules] + else: + self.modules = modules + + def ensure_compliance(self): + """Ensures that the modules are not loaded.""" + if not self.modules: + return + + try: + loaded_modules = self._get_loaded_modules() + non_compliant_modules = [] + for module in self.modules: + if module in loaded_modules: + log("Module '%s' is enabled but should not be." % + (module), level=INFO) + non_compliant_modules.append(module) + + if len(non_compliant_modules) == 0: + return + + for module in non_compliant_modules: + self._disable_module(module) + self._restart_apache() + except subprocess.CalledProcessError as e: + log('Error occurred auditing apache module compliance. ' + 'This may have been already reported. ' + 'Output is: %s' % e.output, level=ERROR) + + @staticmethod + def _get_loaded_modules(): + """Returns the modules which are enabled in Apache.""" + output = subprocess.check_output(['apache2ctl', '-M']).decode('utf-8') + modules = [] + for line in output.splitlines(): + # Each line of the enabled module output looks like: + # module_name (static|shared) + # Plus a header line at the top of the output which is stripped + # out by the regex. + matcher = re.search(r'^ (\S*)_module (\S*)', line) + if matcher: + modules.append(matcher.group(1)) + return modules + + @staticmethod + def _disable_module(module): + """Disables the specified module in Apache.""" + try: + subprocess.check_call(['a2dismod', module]) + except subprocess.CalledProcessError as e: + # Note: catch error here to allow the attempt of disabling + # multiple modules in one go rather than failing after the + # first module fails. + log('Error occurred disabling module %s. ' + 'Output is: %s' % (module, e.output), level=ERROR) + + @staticmethod + def _restart_apache(): + """Restarts the apache process""" + subprocess.check_output(['service', 'apache2', 'restart']) + + @staticmethod + def is_ssl_enabled(): + """Check if SSL module is enabled or not""" + return 'ssl' in DisabledModuleAudit._get_loaded_modules() diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/audits/apt.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/audits/apt.py new file mode 100644 index 00000000..1b22925b --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/audits/apt.py @@ -0,0 +1,101 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.fetch import ( + apt_cache, + apt_purge +) +from charmhelpers.core.hookenv import ( + log, + DEBUG, + WARNING, +) +from charmhelpers.contrib.hardening.audits import BaseAudit +from charmhelpers.fetch import ubuntu_apt_pkg as apt_pkg + + +class AptConfig(BaseAudit): + + def __init__(self, config, **kwargs): + self.config = config + + def verify_config(self): + apt_pkg.init() + for cfg in self.config: + value = apt_pkg.config.get(cfg['key'], cfg.get('default', '')) + if value and value != cfg['expected']: + log("APT config '%s' has unexpected value '%s' " + "(expected='%s')" % + (cfg['key'], value, cfg['expected']), level=WARNING) + + def ensure_compliance(self): + self.verify_config() + + +class RestrictedPackages(BaseAudit): + """Class used to audit restricted packages on the system.""" + + def __init__(self, pkgs, **kwargs): + super(RestrictedPackages, self).__init__(**kwargs) + if isinstance(pkgs, str) or not hasattr(pkgs, '__iter__'): + self.pkgs = pkgs.split() + else: + self.pkgs = pkgs + + def ensure_compliance(self): + cache = apt_cache() + + for p in self.pkgs: + if p not in cache: + continue + + pkg = cache[p] + if not self.is_virtual_package(pkg): + if not pkg.current_ver: + log("Package '%s' is not installed." % pkg.name, + level=DEBUG) + continue + else: + log("Restricted package '%s' is installed" % pkg.name, + level=WARNING) + self.delete_package(cache, pkg) + else: + log("Checking restricted virtual package '%s' provides" % + pkg.name, level=DEBUG) + self.delete_package(cache, pkg) + + def delete_package(self, cache, pkg): + """Deletes the package from the system. + + Deletes the package form the system, properly handling virtual + packages. + + :param cache: the apt cache + :param pkg: the package to remove + """ + if self.is_virtual_package(pkg): + log("Package '%s' appears to be virtual - purging provides" % + pkg.name, level=DEBUG) + for _p in pkg.provides_list: + self.delete_package(cache, _p[2].parent_pkg) + elif not pkg.current_ver: + log("Package '%s' not installed" % pkg.name, level=DEBUG) + return + else: + log("Purging package '%s'" % pkg.name, level=DEBUG) + apt_purge(pkg.name) + + def is_virtual_package(self, pkg): + return (pkg.get('has_provides', False) and + not pkg.get('has_versions', False)) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/audits/file.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/audits/file.py new file mode 100644 index 00000000..84cc2494 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/audits/file.py @@ -0,0 +1,549 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import grp +import os +import pwd +import re + +from subprocess import ( + CalledProcessError, + check_output, + check_call, +) +from traceback import format_exc +from stat import ( + S_ISGID, + S_ISUID +) + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + INFO, + WARNING, + ERROR, +) +from charmhelpers.core import unitdata +from charmhelpers.core.host import file_hash +from charmhelpers.contrib.hardening.audits import BaseAudit +from charmhelpers.contrib.hardening.templating import ( + get_template_path, + render_and_write, +) +from charmhelpers.contrib.hardening import utils + + +class BaseFileAudit(BaseAudit): + """Base class for file audits. + + Provides api stubs for compliance check flow that must be used by any class + that implemented this one. + """ + + def __init__(self, paths, always_comply=False, *args, **kwargs): + """ + :param paths: string path of list of paths of files we want to apply + compliance checks are criteria to. + :param always_comply: if true compliance criteria is always applied + else compliance is skipped for non-existent + paths. + """ + super(BaseFileAudit, self).__init__(*args, **kwargs) + self.always_comply = always_comply + if isinstance(paths, str) or not hasattr(paths, '__iter__'): + self.paths = [paths] + else: + self.paths = paths + + def ensure_compliance(self): + """Ensure that the all registered files comply to registered criteria. + """ + for p in self.paths: + if os.path.exists(p): + if self.is_compliant(p): + continue + + log('File %s is not in compliance.' % p, level=INFO) + else: + if not self.always_comply: + log("Non-existent path '%s' - skipping compliance check" + % (p), level=INFO) + continue + + if self._take_action(): + log("Applying compliance criteria to '%s'" % (p), level=INFO) + self.comply(p) + + def is_compliant(self, path): + """Audits the path to see if it is compliance. + + :param path: the path to the file that should be checked. + """ + raise NotImplementedError + + def comply(self, path): + """Enforces the compliance of a path. + + :param path: the path to the file that should be enforced. + """ + raise NotImplementedError + + @classmethod + def _get_stat(cls, path): + """Returns the Posix st_stat information for the specified file path. + + :param path: the path to get the st_stat information for. + :returns: an st_stat object for the path or None if the path doesn't + exist. + """ + return os.stat(path) + + +class FilePermissionAudit(BaseFileAudit): + """Implements an audit for file permissions and ownership for a user. + + This class implements functionality that ensures that a specific user/group + will own the file(s) specified and that the permissions specified are + applied properly to the file. + """ + def __init__(self, paths, user, group=None, mode=0o600, **kwargs): + self.user = user + self.group = group + self.mode = mode + super(FilePermissionAudit, self).__init__(paths, user, group, mode, + **kwargs) + + @property + def user(self): + return self._user + + @user.setter + def user(self, name): + try: + user = pwd.getpwnam(name) + except KeyError: + log('Unknown user %s' % name, level=ERROR) + user = None + self._user = user + + @property + def group(self): + return self._group + + @group.setter + def group(self, name): + try: + group = None + if name: + group = grp.getgrnam(name) + else: + group = grp.getgrgid(self.user.pw_gid) + except KeyError: + log('Unknown group %s' % name, level=ERROR) + self._group = group + + def is_compliant(self, path): + """Checks if the path is in compliance. + + Used to determine if the path specified meets the necessary + requirements to be in compliance with the check itself. + + :param path: the file path to check + :returns: True if the path is compliant, False otherwise. + """ + stat = self._get_stat(path) + user = self.user + group = self.group + + compliant = True + if stat.st_uid != user.pw_uid or stat.st_gid != group.gr_gid: + log('File %s is not owned by %s:%s.' % (path, user.pw_name, + group.gr_name), + level=INFO) + compliant = False + + # POSIX refers to the st_mode bits as corresponding to both the + # file type and file permission bits, where the least significant 12 + # bits (o7777) are the suid (11), sgid (10), sticky bits (9), and the + # file permission bits (8-0) + perms = stat.st_mode & 0o7777 + if perms != self.mode: + log('File %s has incorrect permissions, currently set to %s' % + (path, oct(stat.st_mode & 0o7777)), level=INFO) + compliant = False + + return compliant + + def comply(self, path): + """Issues a chown and chmod to the file paths specified.""" + utils.ensure_permissions(path, self.user.pw_name, self.group.gr_name, + self.mode) + + +class DirectoryPermissionAudit(FilePermissionAudit): + """Performs a permission check for the specified directory path.""" + + def __init__(self, paths, user, group=None, mode=0o600, + recursive=True, **kwargs): + super(DirectoryPermissionAudit, self).__init__(paths, user, group, + mode, **kwargs) + self.recursive = recursive + + def is_compliant(self, path): + """Checks if the directory is compliant. + + Used to determine if the path specified and all of its children + directories are in compliance with the check itself. + + :param path: the directory path to check + :returns: True if the directory tree is compliant, otherwise False. + """ + if not os.path.isdir(path): + log('Path specified %s is not a directory.' % path, level=ERROR) + raise ValueError("%s is not a directory." % path) + + if not self.recursive: + return super(DirectoryPermissionAudit, self).is_compliant(path) + + compliant = True + for root, dirs, _ in os.walk(path): + if len(dirs) > 0: + continue + + if not super(DirectoryPermissionAudit, self).is_compliant(root): + compliant = False + continue + + return compliant + + def comply(self, path): + for root, dirs, _ in os.walk(path): + if len(dirs) > 0: + super(DirectoryPermissionAudit, self).comply(root) + + +class ReadOnly(BaseFileAudit): + """Audits that files and folders are read only.""" + def __init__(self, paths, *args, **kwargs): + super(ReadOnly, self).__init__(paths=paths, *args, **kwargs) + + def is_compliant(self, path): + try: + output = check_output(['find', path, '-perm', '-go+w', + '-type', 'f']).strip() + + # The find above will find any files which have permission sets + # which allow too broad of write access. As such, the path is + # compliant if there is no output. + if output: + return False + + return True + except CalledProcessError as e: + log('Error occurred checking finding writable files for %s. ' + 'Error information is: command %s failed with returncode ' + '%d and output %s.\n%s' % (path, e.cmd, e.returncode, e.output, + format_exc(e)), level=ERROR) + return False + + def comply(self, path): + try: + check_output(['chmod', 'go-w', '-R', path]) + except CalledProcessError as e: + log('Error occurred removing writeable permissions for %s. ' + 'Error information is: command %s failed with returncode ' + '%d and output %s.\n%s' % (path, e.cmd, e.returncode, e.output, + format_exc(e)), level=ERROR) + + +class NoReadWriteForOther(BaseFileAudit): + """Ensures that the files found under the base path are readable or + writable by anyone other than the owner or the group. + """ + def __init__(self, paths): + super(NoReadWriteForOther, self).__init__(paths) + + def is_compliant(self, path): + try: + cmd = ['find', path, '-perm', '-o+r', '-type', 'f', '-o', + '-perm', '-o+w', '-type', 'f'] + output = check_output(cmd).strip() + + # The find above here will find any files which have read or + # write permissions for other, meaning there is too broad of access + # to read/write the file. As such, the path is compliant if there's + # no output. + if output: + return False + + return True + except CalledProcessError as e: + log('Error occurred while finding files which are readable or ' + 'writable to the world in %s. ' + 'Command output is: %s.' % (path, e.output), level=ERROR) + + def comply(self, path): + try: + check_output(['chmod', '-R', 'o-rw', path]) + except CalledProcessError as e: + log('Error occurred attempting to change modes of files under ' + 'path %s. Output of command is: %s' % (path, e.output)) + + +class NoSUIDSGIDAudit(BaseFileAudit): + """Audits that specified files do not have SUID/SGID bits set.""" + def __init__(self, paths, *args, **kwargs): + super(NoSUIDSGIDAudit, self).__init__(paths=paths, *args, **kwargs) + + def is_compliant(self, path): + stat = self._get_stat(path) + if (stat.st_mode & (S_ISGID | S_ISUID)) != 0: + return False + + return True + + def comply(self, path): + try: + log('Removing suid/sgid from %s.' % path, level=DEBUG) + check_output(['chmod', '-s', path]) + except CalledProcessError as e: + log('Error occurred removing suid/sgid from %s.' + 'Error information is: command %s failed with returncode ' + '%d and output %s.\n%s' % (path, e.cmd, e.returncode, e.output, + format_exc(e)), level=ERROR) + + +class TemplatedFile(BaseFileAudit): + """The TemplatedFileAudit audits the contents of a templated file. + + This audit renders a file from a template, sets the appropriate file + permissions, then generates a hashsum with which to check the content + changed. + """ + def __init__(self, path, context, template_dir, mode, user='root', + group='root', service_actions=None, **kwargs): + self.context = context + self.user = user + self.group = group + self.mode = mode + self.template_dir = template_dir + self.service_actions = service_actions + super(TemplatedFile, self).__init__(paths=path, always_comply=True, + **kwargs) + + def is_compliant(self, path): + """Determines if the templated file is compliant. + + A templated file is only compliant if it has not changed (as + determined by its sha256 hashsum) AND its file permissions are set + appropriately. + + :param path: the path to check compliance. + """ + same_templates = self.templates_match(path) + same_content = self.contents_match(path) + same_permissions = self.permissions_match(path) + + if same_content and same_permissions and same_templates: + return True + + return False + + def run_service_actions(self): + """Run any actions on services requested.""" + if not self.service_actions: + return + + for svc_action in self.service_actions: + name = svc_action['service'] + actions = svc_action['actions'] + log("Running service '%s' actions '%s'" % (name, actions), + level=DEBUG) + for action in actions: + cmd = ['service', name, action] + try: + check_call(cmd) + except CalledProcessError as exc: + log("Service name='%s' action='%s' failed - %s" % + (name, action, exc), level=WARNING) + + def comply(self, path): + """Ensures the contents and the permissions of the file. + + :param path: the path to correct + """ + dirname = os.path.dirname(path) + if not os.path.exists(dirname): + os.makedirs(dirname) + + self.pre_write() + render_and_write(self.template_dir, path, self.context()) + utils.ensure_permissions(path, self.user, self.group, self.mode) + self.run_service_actions() + self.save_checksum(path) + self.post_write() + + def pre_write(self): + """Invoked prior to writing the template.""" + pass + + def post_write(self): + """Invoked after writing the template.""" + pass + + def templates_match(self, path): + """Determines if the template files are the same. + + The template file equality is determined by the hashsum of the + template files themselves. If there is no hashsum, then the content + cannot be sure to be the same so treat it as if they changed. + Otherwise, return whether or not the hashsums are the same. + + :param path: the path to check + :returns: boolean + """ + template_path = get_template_path(self.template_dir, path) + key = 'hardening:template:%s' % template_path + template_checksum = file_hash(template_path) + kv = unitdata.kv() + stored_tmplt_checksum = kv.get(key) + if not stored_tmplt_checksum: + kv.set(key, template_checksum) + kv.flush() + log('Saved template checksum for %s.' % template_path, + level=DEBUG) + # Since we don't have a template checksum, then assume it doesn't + # match and return that the template is different. + return False + elif stored_tmplt_checksum != template_checksum: + kv.set(key, template_checksum) + kv.flush() + log('Updated template checksum for %s.' % template_path, + level=DEBUG) + return False + + # Here the template hasn't changed based upon the calculated + # checksum of the template and what was previously stored. + return True + + def contents_match(self, path): + """Determines if the file content is the same. + + This is determined by comparing hashsum of the file contents and + the saved hashsum. If there is no hashsum, then the content cannot + be sure to be the same so treat them as if they are not the same. + Otherwise, return True if the hashsums are the same, False if they + are not the same. + + :param path: the file to check. + """ + checksum = file_hash(path) + + kv = unitdata.kv() + stored_checksum = kv.get('hardening:%s' % path) + if not stored_checksum: + # If the checksum hasn't been generated, return False to ensure + # the file is written and the checksum stored. + log('Checksum for %s has not been calculated.' % path, level=DEBUG) + return False + elif stored_checksum != checksum: + log('Checksum mismatch for %s.' % path, level=DEBUG) + return False + + return True + + def permissions_match(self, path): + """Determines if the file owner and permissions match. + + :param path: the path to check. + """ + audit = FilePermissionAudit(path, self.user, self.group, self.mode) + return audit.is_compliant(path) + + def save_checksum(self, path): + """Calculates and saves the checksum for the path specified. + + :param path: the path of the file to save the checksum. + """ + checksum = file_hash(path) + kv = unitdata.kv() + kv.set('hardening:%s' % path, checksum) + kv.flush() + + +class DeletedFile(BaseFileAudit): + """Audit to ensure that a file is deleted.""" + def __init__(self, paths): + super(DeletedFile, self).__init__(paths) + + def is_compliant(self, path): + return not os.path.exists(path) + + def comply(self, path): + os.remove(path) + + +class FileContentAudit(BaseFileAudit): + """Audit the contents of a file.""" + def __init__(self, paths, cases, **kwargs): + # Cases we expect to pass + self.pass_cases = cases.get('pass', []) + # Cases we expect to fail + self.fail_cases = cases.get('fail', []) + super(FileContentAudit, self).__init__(paths, **kwargs) + + def is_compliant(self, path): + """ + Given a set of content matching cases i.e. tuple(regex, bool) where + bool value denotes whether or not regex is expected to match, check that + all cases match as expected with the contents of the file. Cases can be + expected to pass of fail. + + :param path: Path of file to check. + :returns: Boolean value representing whether or not all cases are + found to be compliant. + """ + log("Auditing contents of file '%s'" % (path), level=DEBUG) + with open(path, 'r') as fd: + contents = fd.read() + + matches = 0 + for pattern in self.pass_cases: + key = re.compile(pattern, flags=re.MULTILINE) + results = re.search(key, contents) + if results: + matches += 1 + else: + log("Pattern '%s' was expected to pass but instead it failed" + % (pattern), level=WARNING) + + for pattern in self.fail_cases: + key = re.compile(pattern, flags=re.MULTILINE) + results = re.search(key, contents) + if not results: + matches += 1 + else: + log("Pattern '%s' was expected to fail but instead it passed" + % (pattern), level=WARNING) + + total = len(self.pass_cases) + len(self.fail_cases) + log("Checked %s cases and %s passed" % (total, matches), level=DEBUG) + return matches == total + + def comply(self, *args, **kwargs): + """NOOP since we just issue warnings. This is to avoid the + NotImplememtedError. + """ + log("Not applying any compliance criteria, only checks.", level=INFO) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/apache.yaml b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/apache.yaml new file mode 100644 index 00000000..0f940d4c --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/apache.yaml @@ -0,0 +1,16 @@ +# NOTE: this file contains the default configuration for the 'apache' hardening +# code. If you want to override any settings you must add them to a file +# called hardening.yaml in the root directory of your charm using the +# name 'apache' as the root key followed by any of the following with new +# values. + +common: + apache_dir: '/etc/apache2' + +hardening: + traceenable: 'off' + allowed_http_methods: "GET POST" + modules_to_disable: [ cgi, cgid ] + servertokens: 'Prod' + honor_cipher_order: 'on' + cipher_suite: 'ALL:+MEDIUM:+HIGH:!LOW:!MD5:!RC4:!eNULL:!aNULL:!3DES' diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/apache.yaml.schema b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/apache.yaml.schema new file mode 100644 index 00000000..c112137c --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/apache.yaml.schema @@ -0,0 +1,12 @@ +# NOTE: this schema must contain all valid keys from it's associated defaults +# file. It is used to validate user-provided overrides. +common: + apache_dir: + traceenable: + +hardening: + allowed_http_methods: + modules_to_disable: + servertokens: + honor_cipher_order: + cipher_suite: diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/mysql.yaml b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/mysql.yaml new file mode 100644 index 00000000..682d22bf --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/mysql.yaml @@ -0,0 +1,38 @@ +# NOTE: this file contains the default configuration for the 'mysql' hardening +# code. If you want to override any settings you must add them to a file +# called hardening.yaml in the root directory of your charm using the +# name 'mysql' as the root key followed by any of the following with new +# values. + +hardening: + mysql-conf: /etc/mysql/my.cnf + hardening-conf: /etc/mysql/conf.d/hardening.cnf + +security: + # @see http://www.symantec.com/connect/articles/securing-mysql-step-step + # @see http://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_chroot + chroot: None + + # @see http://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_safe-user-create + safe-user-create: 1 + + # @see http://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_secure-auth + secure-auth: 1 + + # @see http://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_symbolic-links + skip-symbolic-links: 1 + + # @see http://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_skip-show-database + skip-show-database: True + + # @see http://dev.mysql.com/doc/refman/5.7/en/server-system-variables.html#sysvar_local_infile + local-infile: 0 + + # @see https://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_allow-suspicious-udfs + allow-suspicious-udfs: 0 + + # @see https://dev.mysql.com/doc/refman/5.7/en/server-system-variables.html#sysvar_automatic_sp_privileges + automatic-sp-privileges: 0 + + # @see https://dev.mysql.com/doc/refman/5.7/en/server-options.html#option_mysqld_secure-file-priv + secure-file-priv: /tmp diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/mysql.yaml.schema b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/mysql.yaml.schema new file mode 100644 index 00000000..2edf325c --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/mysql.yaml.schema @@ -0,0 +1,15 @@ +# NOTE: this schema must contain all valid keys from it's associated defaults +# file. It is used to validate user-provided overrides. +hardening: + mysql-conf: + hardening-conf: +security: + chroot: + safe-user-create: + secure-auth: + skip-symbolic-links: + skip-show-database: + local-infile: + allow-suspicious-udfs: + automatic-sp-privileges: + secure-file-priv: diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/os.yaml b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/os.yaml new file mode 100644 index 00000000..9a8627b5 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/os.yaml @@ -0,0 +1,68 @@ +# NOTE: this file contains the default configuration for the 'os' hardening +# code. If you want to override any settings you must add them to a file +# called hardening.yaml in the root directory of your charm using the +# name 'os' as the root key followed by any of the following with new +# values. + +general: + desktop_enable: False # (type:boolean) + +environment: + extra_user_paths: [] + umask: 027 + root_path: / + +auth: + pw_max_age: 60 + # discourage password cycling + pw_min_age: 7 + retries: 5 + lockout_time: 600 + timeout: 60 + allow_homeless: False # (type:boolean) + pam_passwdqc_enable: True # (type:boolean) + pam_passwdqc_options: 'min=disabled,disabled,16,12,8' + root_ttys: + console + tty1 + tty2 + tty3 + tty4 + tty5 + tty6 + uid_min: 1000 + gid_min: 1000 + sys_uid_min: 100 + sys_uid_max: 999 + sys_gid_min: 100 + sys_gid_max: 999 + chfn_restrict: + +security: + users_allow: [] + suid_sgid_enforce: True # (type:boolean) + # user-defined blacklist and whitelist + suid_sgid_blacklist: [] + suid_sgid_whitelist: [] + # if this is True, remove any suid/sgid bits from files that were not in the whitelist + suid_sgid_dry_run_on_unknown: False # (type:boolean) + suid_sgid_remove_from_unknown: False # (type:boolean) + # remove packages with known issues + packages_clean: True # (type:boolean) + packages_list: + xinetd + inetd + ypserv + telnet-server + rsh-server + rsync + kernel_enable_module_loading: True # (type:boolean) + kernel_enable_core_dump: False # (type:boolean) + ssh_tmout: 300 + +sysctl: + kernel_secure_sysrq: 244 # 4 + 16 + 32 + 64 + 128 + kernel_enable_sysrq: False # (type:boolean) + forwarding: False # (type:boolean) + ipv6_enable: False # (type:boolean) + arp_restricted: True # (type:boolean) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/os.yaml.schema b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/os.yaml.schema new file mode 100644 index 00000000..cc3b9c20 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/os.yaml.schema @@ -0,0 +1,43 @@ +# NOTE: this schema must contain all valid keys from it's associated defaults +# file. It is used to validate user-provided overrides. +general: + desktop_enable: +environment: + extra_user_paths: + umask: + root_path: +auth: + pw_max_age: + pw_min_age: + retries: + lockout_time: + timeout: + allow_homeless: + pam_passwdqc_enable: + pam_passwdqc_options: + root_ttys: + uid_min: + gid_min: + sys_uid_min: + sys_uid_max: + sys_gid_min: + sys_gid_max: + chfn_restrict: +security: + users_allow: + suid_sgid_enforce: + suid_sgid_blacklist: + suid_sgid_whitelist: + suid_sgid_dry_run_on_unknown: + suid_sgid_remove_from_unknown: + packages_clean: + packages_list: + kernel_enable_module_loading: + kernel_enable_core_dump: + ssh_tmout: +sysctl: + kernel_secure_sysrq: + kernel_enable_sysrq: + forwarding: + ipv6_enable: + arp_restricted: diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/ssh.yaml b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/ssh.yaml new file mode 100644 index 00000000..cd529bca --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/ssh.yaml @@ -0,0 +1,49 @@ +# NOTE: this file contains the default configuration for the 'ssh' hardening +# code. If you want to override any settings you must add them to a file +# called hardening.yaml in the root directory of your charm using the +# name 'ssh' as the root key followed by any of the following with new +# values. + +common: + service_name: 'ssh' + network_ipv6_enable: False # (type:boolean) + ports: [22] + remote_hosts: [] + +client: + package: 'openssh-client' + cbc_required: False # (type:boolean) + weak_hmac: False # (type:boolean) + weak_kex: False # (type:boolean) + roaming: False + password_authentication: 'no' + +server: + host_key_files: ['/etc/ssh/ssh_host_rsa_key', '/etc/ssh/ssh_host_dsa_key', + '/etc/ssh/ssh_host_ecdsa_key'] + cbc_required: False # (type:boolean) + weak_hmac: False # (type:boolean) + weak_kex: False # (type:boolean) + allow_root_with_key: False # (type:boolean) + allow_tcp_forwarding: 'no' + allow_agent_forwarding: 'no' + allow_x11_forwarding: 'no' + use_privilege_separation: 'sandbox' + listen_to: ['0.0.0.0'] + use_pam: 'no' + package: 'openssh-server' + password_authentication: 'no' + alive_interval: '600' + alive_count: '3' + sftp_enable: False # (type:boolean) + sftp_group: 'sftponly' + sftp_chroot: '/home/%u' + deny_users: [] + allow_users: [] + deny_groups: [] + allow_groups: [] + print_motd: 'no' + print_last_log: 'no' + use_dns: 'no' + max_auth_tries: 2 + max_sessions: 10 diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/ssh.yaml.schema b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/ssh.yaml.schema new file mode 100644 index 00000000..d05e054b --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/defaults/ssh.yaml.schema @@ -0,0 +1,42 @@ +# NOTE: this schema must contain all valid keys from it's associated defaults +# file. It is used to validate user-provided overrides. +common: + service_name: + network_ipv6_enable: + ports: + remote_hosts: +client: + package: + cbc_required: + weak_hmac: + weak_kex: + roaming: + password_authentication: +server: + host_key_files: + cbc_required: + weak_hmac: + weak_kex: + allow_root_with_key: + allow_tcp_forwarding: + allow_agent_forwarding: + allow_x11_forwarding: + use_privilege_separation: + listen_to: + use_pam: + package: + password_authentication: + alive_interval: + alive_count: + sftp_enable: + sftp_group: + sftp_chroot: + deny_users: + allow_users: + deny_groups: + allow_groups: + print_motd: + print_last_log: + use_dns: + max_auth_tries: + max_sessions: diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/harden.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/harden.py new file mode 100644 index 00000000..45ad076d --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/harden.py @@ -0,0 +1,93 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict + +from charmhelpers.core.hookenv import ( + config, + log, + DEBUG, + WARNING, +) +from charmhelpers.contrib.hardening.host.checks import run_os_checks +from charmhelpers.contrib.hardening.ssh.checks import run_ssh_checks +from charmhelpers.contrib.hardening.mysql.checks import run_mysql_checks +from charmhelpers.contrib.hardening.apache.checks import run_apache_checks + +_DISABLE_HARDENING_FOR_UNIT_TEST = False + + +def harden(overrides=None): + """Hardening decorator. + + This is the main entry point for running the hardening stack. In order to + run modules of the stack you must add this decorator to charm hook(s) and + ensure that your charm config.yaml contains the 'harden' option set to + one or more of the supported modules. Setting these will cause the + corresponding hardening code to be run when the hook fires. + + This decorator can and should be applied to more than one hook or function + such that hardening modules are called multiple times. This is because + subsequent calls will perform auditing checks that will report any changes + to resources hardened by the first run (and possibly perform compliance + actions as a result of any detected infractions). + + :param overrides: Optional list of stack modules used to override those + provided with 'harden' config. + :returns: Returns value returned by decorated function once executed. + """ + if overrides is None: + overrides = [] + + def _harden_inner1(f): + _logged = False + + def _harden_inner2(*args, **kwargs): + # knock out hardening via a config var; normally it won't get + # disabled. + nonlocal _logged + if _DISABLE_HARDENING_FOR_UNIT_TEST: + return f(*args, **kwargs) + if not _logged: + log("Hardening function '%s'" % (f.__name__), level=DEBUG) + _logged = True + RUN_CATALOG = OrderedDict([('os', run_os_checks), + ('ssh', run_ssh_checks), + ('mysql', run_mysql_checks), + ('apache', run_apache_checks)]) + + enabled = overrides[:] or (config("harden") or "").split() + if enabled: + modules_to_run = [] + # modules will always be performed in the following order + for module, func in RUN_CATALOG.items(): + if module in enabled: + enabled.remove(module) + modules_to_run.append(func) + + if enabled: + log("Unknown hardening modules '%s' - ignoring" % + (', '.join(enabled)), level=WARNING) + + for hardener in modules_to_run: + log("Executing hardening module '%s'" % + (hardener.__name__), level=DEBUG) + hardener() + else: + log("No hardening applied to '%s'" % (f.__name__), level=DEBUG) + + return f(*args, **kwargs) + return _harden_inner2 + + return _harden_inner1 diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/__init__.py new file mode 100644 index 00000000..58bebd84 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from os import path + +TEMPLATES_DIR = path.join(path.dirname(__file__), 'templates') diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/__init__.py new file mode 100644 index 00000000..0e7e409f --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/__init__.py @@ -0,0 +1,48 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import ( + log, + DEBUG, +) +from charmhelpers.contrib.hardening.host.checks import ( + apt, + limits, + login, + minimize_access, + pam, + profile, + securetty, + suid_sgid, + sysctl +) + + +def run_os_checks(): + log("Starting OS hardening checks.", level=DEBUG) + checks = apt.get_audits() + checks.extend(limits.get_audits()) + checks.extend(login.get_audits()) + checks.extend(minimize_access.get_audits()) + checks.extend(pam.get_audits()) + checks.extend(profile.get_audits()) + checks.extend(securetty.get_audits()) + checks.extend(suid_sgid.get_audits()) + checks.extend(sysctl.get_audits()) + + for check in checks: + log("Running '%s' check" % (check.__class__.__name__), level=DEBUG) + check.ensure_compliance() + + log("OS hardening checks complete.", level=DEBUG) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/apt.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/apt.py new file mode 100644 index 00000000..7ce41b00 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/apt.py @@ -0,0 +1,37 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.utils import get_settings +from charmhelpers.contrib.hardening.audits.apt import ( + AptConfig, + RestrictedPackages, +) + + +def get_audits(): + """Get OS hardening apt audits. + + :returns: dictionary of audits + """ + audits = [AptConfig([{'key': 'APT::Get::AllowUnauthenticated', + 'expected': 'false'}])] + + settings = get_settings('os') + clean_packages = settings['security']['packages_clean'] + if clean_packages: + security_packages = settings['security']['packages_list'] + if security_packages: + audits.append(RestrictedPackages(security_packages)) + + return audits diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/limits.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/limits.py new file mode 100644 index 00000000..e94f5ebe --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/limits.py @@ -0,0 +1,53 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.audits.file import ( + DirectoryPermissionAudit, + TemplatedFile, +) +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get OS hardening security limits audits. + + :returns: dictionary of audits + """ + audits = [] + settings = utils.get_settings('os') + + # Ensure that the /etc/security/limits.d directory is only writable + # by the root user, but others can execute and read. + audits.append(DirectoryPermissionAudit('/etc/security/limits.d', + user='root', group='root', + mode=0o755)) + + # If core dumps are not enabled, then don't allow core dumps to be + # created as they may contain sensitive information. + if not settings['security']['kernel_enable_core_dump']: + audits.append(TemplatedFile('/etc/security/limits.d/10.hardcore.conf', + SecurityLimitsContext(), + template_dir=TEMPLATES_DIR, + user='root', group='root', mode=0o0440)) + return audits + + +class SecurityLimitsContext(object): + + def __call__(self): + settings = utils.get_settings('os') + ctxt = {'disable_core_dump': + not settings['security']['kernel_enable_core_dump']} + return ctxt diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/login.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/login.py new file mode 100644 index 00000000..fd500c8b --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/login.py @@ -0,0 +1,63 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.audits.file import TemplatedFile +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get OS hardening login.defs audits. + + :returns: dictionary of audits + """ + audits = [TemplatedFile('/etc/login.defs', LoginContext(), + template_dir=TEMPLATES_DIR, + user='root', group='root', mode=0o0444)] + return audits + + +class LoginContext(object): + + def __call__(self): + settings = utils.get_settings('os') + + # Octal numbers in yaml end up being turned into decimal, + # so check if the umask is entered as a string (e.g. '027') + # or as an octal umask as we know it (e.g. 002). If its not + # a string assume it to be octal and turn it into an octal + # string. + umask = settings['environment']['umask'] + if not isinstance(umask, str): + umask = '%s' % oct(umask) + + ctxt = { + 'additional_user_paths': + settings['environment']['extra_user_paths'], + 'umask': umask, + 'pwd_max_age': settings['auth']['pw_max_age'], + 'pwd_min_age': settings['auth']['pw_min_age'], + 'uid_min': settings['auth']['uid_min'], + 'sys_uid_min': settings['auth']['sys_uid_min'], + 'sys_uid_max': settings['auth']['sys_uid_max'], + 'gid_min': settings['auth']['gid_min'], + 'sys_gid_min': settings['auth']['sys_gid_min'], + 'sys_gid_max': settings['auth']['sys_gid_max'], + 'login_retries': settings['auth']['retries'], + 'login_timeout': settings['auth']['timeout'], + 'chfn_restrict': settings['auth']['chfn_restrict'], + 'allow_login_without_home': settings['auth']['allow_homeless'] + } + + return ctxt diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/minimize_access.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/minimize_access.py new file mode 100644 index 00000000..6e64be00 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/minimize_access.py @@ -0,0 +1,50 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.audits.file import ( + FilePermissionAudit, + ReadOnly, +) +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get OS hardening access audits. + + :returns: dictionary of audits + """ + audits = [] + settings = utils.get_settings('os') + + # Remove write permissions from $PATH folders for all regular users. + # This prevents changing system-wide commands from normal users. + path_folders = {'/usr/local/sbin', + '/usr/local/bin', + '/usr/sbin', + '/usr/bin', + '/bin'} + extra_user_paths = settings['environment']['extra_user_paths'] + path_folders.update(extra_user_paths) + audits.append(ReadOnly(path_folders)) + + # Only allow the root user to have access to the shadow file. + audits.append(FilePermissionAudit('/etc/shadow', 'root', 'root', 0o0600)) + + if 'change_user' not in settings['security']['users_allow']: + # su should only be accessible to user and group root, unless it is + # expressly defined to allow users to change to root via the + # security_users_allow config option. + audits.append(FilePermissionAudit('/bin/su', 'root', 'root', 0o750)) + + return audits diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/pam.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/pam.py new file mode 100644 index 00000000..9b38d5f0 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/pam.py @@ -0,0 +1,132 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import ( + check_output, + CalledProcessError, +) + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + ERROR, +) +from charmhelpers.fetch import ( + apt_install, + apt_purge, + apt_update, +) +from charmhelpers.contrib.hardening.audits.file import ( + TemplatedFile, + DeletedFile, +) +from charmhelpers.contrib.hardening import utils +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR + + +def get_audits(): + """Get OS hardening PAM authentication audits. + + :returns: dictionary of audits + """ + audits = [] + + settings = utils.get_settings('os') + + if settings['auth']['pam_passwdqc_enable']: + audits.append(PasswdqcPAM('/etc/passwdqc.conf')) + + if settings['auth']['retries']: + audits.append(Tally2PAM('/usr/share/pam-configs/tally2')) + else: + audits.append(DeletedFile('/usr/share/pam-configs/tally2')) + + return audits + + +class PasswdqcPAMContext(object): + + def __call__(self): + ctxt = {} + settings = utils.get_settings('os') + + ctxt['auth_pam_passwdqc_options'] = \ + settings['auth']['pam_passwdqc_options'] + + return ctxt + + +class PasswdqcPAM(TemplatedFile): + """The PAM Audit verifies the linux PAM settings.""" + def __init__(self, path): + super(PasswdqcPAM, self).__init__(path=path, + template_dir=TEMPLATES_DIR, + context=PasswdqcPAMContext(), + user='root', + group='root', + mode=0o0640) + + def pre_write(self): + # Always remove? + for pkg in ['libpam-ccreds', 'libpam-cracklib']: + log("Purging package '%s'" % pkg, level=DEBUG), + apt_purge(pkg) + + apt_update(fatal=True) + for pkg in ['libpam-passwdqc']: + log("Installing package '%s'" % pkg, level=DEBUG), + apt_install(pkg) + + def post_write(self): + """Updates the PAM configuration after the file has been written""" + try: + check_output(['pam-auth-update', '--package']) + except CalledProcessError as e: + log('Error calling pam-auth-update: %s' % e, level=ERROR) + + +class Tally2PAMContext(object): + + def __call__(self): + ctxt = {} + settings = utils.get_settings('os') + + ctxt['auth_lockout_time'] = settings['auth']['lockout_time'] + ctxt['auth_retries'] = settings['auth']['retries'] + + return ctxt + + +class Tally2PAM(TemplatedFile): + """The PAM Audit verifies the linux PAM settings.""" + def __init__(self, path): + super(Tally2PAM, self).__init__(path=path, + template_dir=TEMPLATES_DIR, + context=Tally2PAMContext(), + user='root', + group='root', + mode=0o0640) + + def pre_write(self): + # Always remove? + apt_purge('libpam-ccreds') + apt_update(fatal=True) + apt_install('libpam-modules') + + def post_write(self): + """Updates the PAM configuration after the file has been written""" + try: + check_output(['pam-auth-update', '--package']) + except CalledProcessError as e: + log('Error calling pam-auth-update: %s' % e, level=ERROR) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/profile.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/profile.py new file mode 100644 index 00000000..2727428d --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/profile.py @@ -0,0 +1,49 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.audits.file import TemplatedFile +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get OS hardening profile audits. + + :returns: dictionary of audits + """ + audits = [] + + settings = utils.get_settings('os') + # If core dumps are not enabled, then don't allow core dumps to be + # created as they may contain sensitive information. + if not settings['security']['kernel_enable_core_dump']: + audits.append(TemplatedFile('/etc/profile.d/pinerolo_profile.sh', + ProfileContext(), + template_dir=TEMPLATES_DIR, + mode=0o0755, user='root', group='root')) + if settings['security']['ssh_tmout']: + audits.append(TemplatedFile('/etc/profile.d/99-hardening.sh', + ProfileContext(), + template_dir=TEMPLATES_DIR, + mode=0o0644, user='root', group='root')) + return audits + + +class ProfileContext(object): + + def __call__(self): + settings = utils.get_settings('os') + ctxt = {'ssh_tmout': + settings['security']['ssh_tmout']} + return ctxt diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/securetty.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/securetty.py new file mode 100644 index 00000000..34cd0217 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/securetty.py @@ -0,0 +1,37 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.contrib.hardening.audits.file import TemplatedFile +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get OS hardening Secure TTY audits. + + :returns: dictionary of audits + """ + audits = [] + audits.append(TemplatedFile('/etc/securetty', SecureTTYContext(), + template_dir=TEMPLATES_DIR, + mode=0o0400, user='root', group='root')) + return audits + + +class SecureTTYContext(object): + + def __call__(self): + settings = utils.get_settings('os') + ctxt = {'ttys': settings['auth']['root_ttys']} + return ctxt diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/suid_sgid.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/suid_sgid.py new file mode 100644 index 00000000..bcbe3fde --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/suid_sgid.py @@ -0,0 +1,129 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import subprocess + +from charmhelpers.core.hookenv import ( + log, + INFO, +) +from charmhelpers.contrib.hardening.audits.file import NoSUIDSGIDAudit +from charmhelpers.contrib.hardening import utils + + +BLACKLIST = ['/usr/bin/rcp', '/usr/bin/rlogin', '/usr/bin/rsh', + '/usr/libexec/openssh/ssh-keysign', + '/usr/lib/openssh/ssh-keysign', + '/sbin/netreport', + '/usr/sbin/usernetctl', + '/usr/sbin/userisdnctl', + '/usr/sbin/pppd', + '/usr/bin/lockfile', + '/usr/bin/mail-lock', + '/usr/bin/mail-unlock', + '/usr/bin/mail-touchlock', + '/usr/bin/dotlockfile', + '/usr/bin/arping', + '/usr/sbin/uuidd', + '/usr/bin/mtr', + '/usr/lib/evolution/camel-lock-helper-1.2', + '/usr/lib/pt_chown', + '/usr/lib/eject/dmcrypt-get-device', + '/usr/lib/mc/cons.saver'] + +WHITELIST = ['/bin/mount', '/bin/ping', '/bin/su', '/bin/umount', + '/sbin/pam_timestamp_check', '/sbin/unix_chkpwd', '/usr/bin/at', + '/usr/bin/gpasswd', '/usr/bin/locate', '/usr/bin/newgrp', + '/usr/bin/passwd', '/usr/bin/ssh-agent', + '/usr/libexec/utempter/utempter', '/usr/sbin/lockdev', + '/usr/sbin/sendmail.sendmail', '/usr/bin/expiry', + '/bin/ping6', '/usr/bin/traceroute6.iputils', + '/sbin/mount.nfs', '/sbin/umount.nfs', + '/sbin/mount.nfs4', '/sbin/umount.nfs4', + '/usr/bin/crontab', + '/usr/bin/wall', '/usr/bin/write', + '/usr/bin/screen', + '/usr/bin/mlocate', + '/usr/bin/chage', '/usr/bin/chfn', '/usr/bin/chsh', + '/bin/fusermount', + '/usr/bin/pkexec', + '/usr/bin/sudo', '/usr/bin/sudoedit', + '/usr/sbin/postdrop', '/usr/sbin/postqueue', + '/usr/sbin/suexec', + '/usr/lib/squid/ncsa_auth', '/usr/lib/squid/pam_auth', + '/usr/kerberos/bin/ksu', + '/usr/sbin/ccreds_validate', + '/usr/bin/Xorg', + '/usr/bin/X', + '/usr/lib/dbus-1.0/dbus-daemon-launch-helper', + '/usr/lib/vte/gnome-pty-helper', + '/usr/lib/libvte9/gnome-pty-helper', + '/usr/lib/libvte-2.90-9/gnome-pty-helper'] + + +def get_audits(): + """Get OS hardening suid/sgid audits. + + :returns: dictionary of audits + """ + checks = [] + settings = utils.get_settings('os') + if not settings['security']['suid_sgid_enforce']: + log("Skipping suid/sgid hardening", level=INFO) + return checks + + # Build the blacklist and whitelist of files for suid/sgid checks. + # There are a total of 4 lists: + # 1. the system blacklist + # 2. the system whitelist + # 3. the user blacklist + # 4. the user whitelist + # + # The blacklist is the set of paths which should NOT have the suid/sgid bit + # set and the whitelist is the set of paths which MAY have the suid/sgid + # bit setl. The user whitelist/blacklist effectively override the system + # whitelist/blacklist. + u_b = settings['security']['suid_sgid_blacklist'] + u_w = settings['security']['suid_sgid_whitelist'] + + blacklist = set(BLACKLIST) - set(u_w + u_b) + whitelist = set(WHITELIST) - set(u_b + u_w) + + checks.append(NoSUIDSGIDAudit(blacklist)) + + dry_run = settings['security']['suid_sgid_dry_run_on_unknown'] + + if settings['security']['suid_sgid_remove_from_unknown'] or dry_run: + # If the policy is a dry_run (e.g. complain only) or remove unknown + # suid/sgid bits then find all of the paths which have the suid/sgid + # bit set and then remove the whitelisted paths. + root_path = settings['environment']['root_path'] + unknown_paths = find_paths_with_suid_sgid(root_path) - set(whitelist) + checks.append(NoSUIDSGIDAudit(unknown_paths, unless=dry_run)) + + return checks + + +def find_paths_with_suid_sgid(root_path): + """Finds all paths/files which have an suid/sgid bit enabled. + + Starting with the root_path, this will recursively find all paths which + have an suid or sgid bit set. + """ + cmd = ['find', root_path, '-perm', '-4000', '-o', '-perm', '-2000', + '-type', 'f', '!', '-path', '/proc/*', '-print'] + + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, _ = p.communicate() + return set(out.split('\n')) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/sysctl.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/sysctl.py new file mode 100644 index 00000000..8a57d83d --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/checks/sysctl.py @@ -0,0 +1,208 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import platform +import re +import subprocess + +from charmhelpers.core.hookenv import ( + log, + INFO, + WARNING, +) +from charmhelpers.contrib.hardening import utils +from charmhelpers.contrib.hardening.audits.file import ( + FilePermissionAudit, + TemplatedFile, +) +from charmhelpers.contrib.hardening.host import TEMPLATES_DIR + + +SYSCTL_DEFAULTS = """net.ipv4.ip_forward=%(net_ipv4_ip_forward)s +net.ipv6.conf.all.forwarding=%(net_ipv6_conf_all_forwarding)s +net.ipv4.conf.all.rp_filter=1 +net.ipv4.conf.default.rp_filter=1 +net.ipv4.icmp_echo_ignore_broadcasts=1 +net.ipv4.icmp_ignore_bogus_error_responses=1 +net.ipv4.icmp_ratelimit=100 +net.ipv4.icmp_ratemask=88089 +net.ipv6.conf.all.disable_ipv6=%(net_ipv6_conf_all_disable_ipv6)s +net.ipv4.tcp_timestamps=%(net_ipv4_tcp_timestamps)s +net.ipv4.conf.all.arp_ignore=%(net_ipv4_conf_all_arp_ignore)s +net.ipv4.conf.all.arp_announce=%(net_ipv4_conf_all_arp_announce)s +net.ipv4.tcp_rfc1337=1 +net.ipv4.tcp_syncookies=1 +net.ipv4.conf.all.shared_media=1 +net.ipv4.conf.default.shared_media=1 +net.ipv4.conf.all.accept_source_route=0 +net.ipv4.conf.default.accept_source_route=0 +net.ipv4.conf.all.accept_redirects=0 +net.ipv4.conf.default.accept_redirects=0 +net.ipv6.conf.all.accept_redirects=0 +net.ipv6.conf.default.accept_redirects=0 +net.ipv4.conf.all.secure_redirects=0 +net.ipv4.conf.default.secure_redirects=0 +net.ipv4.conf.all.send_redirects=0 +net.ipv4.conf.default.send_redirects=0 +net.ipv4.conf.all.log_martians=0 +net.ipv6.conf.default.router_solicitations=0 +net.ipv6.conf.default.accept_ra_rtr_pref=0 +net.ipv6.conf.default.accept_ra_pinfo=0 +net.ipv6.conf.default.accept_ra_defrtr=0 +net.ipv6.conf.default.autoconf=0 +net.ipv6.conf.default.dad_transmits=0 +net.ipv6.conf.default.max_addresses=1 +net.ipv6.conf.all.accept_ra=0 +net.ipv6.conf.default.accept_ra=0 +kernel.modules_disabled=%(kernel_modules_disabled)s +kernel.sysrq=%(kernel_sysrq)s +fs.suid_dumpable=%(fs_suid_dumpable)s +kernel.randomize_va_space=2 +""" + + +def get_audits(): + """Get OS hardening sysctl audits. + + :returns: dictionary of audits + """ + audits = [] + settings = utils.get_settings('os') + + # Apply the sysctl settings which are configured to be applied. + audits.append(SysctlConf()) + # Make sure that only root has access to the sysctl.conf file, and + # that it is read-only. + audits.append(FilePermissionAudit('/etc/sysctl.conf', + user='root', + group='root', mode=0o0440)) + # If module loading is not enabled, then ensure that the modules + # file has the appropriate permissions and rebuild the initramfs + if not settings['security']['kernel_enable_module_loading']: + audits.append(ModulesTemplate()) + + return audits + + +class ModulesContext(object): + + def __call__(self): + settings = utils.get_settings('os') + with open('/proc/cpuinfo', 'r') as fd: + cpuinfo = fd.readlines() + + for line in cpuinfo: + match = re.search(r"^vendor_id\s+:\s+(.+)", line) + if match: + vendor = match.group(1) + + if vendor == "GenuineIntel": + vendor = "intel" + elif vendor == "AuthenticAMD": + vendor = "amd" + + ctxt = {'arch': platform.processor(), + 'cpuVendor': vendor, + 'desktop_enable': settings['general']['desktop_enable']} + + return ctxt + + +class ModulesTemplate(object): + + def __init__(self): + super(ModulesTemplate, self).__init__('/etc/initramfs-tools/modules', + ModulesContext(), + templates_dir=TEMPLATES_DIR, + user='root', group='root', + mode=0o0440) + + def post_write(self): + subprocess.check_call(['update-initramfs', '-u']) + + +class SysCtlHardeningContext(object): + def __call__(self): + settings = utils.get_settings('os') + ctxt = {'sysctl': {}} + + log("Applying sysctl settings", level=INFO) + extras = {'net_ipv4_ip_forward': 0, + 'net_ipv6_conf_all_forwarding': 0, + 'net_ipv6_conf_all_disable_ipv6': 1, + 'net_ipv4_tcp_timestamps': 0, + 'net_ipv4_conf_all_arp_ignore': 0, + 'net_ipv4_conf_all_arp_announce': 0, + 'kernel_sysrq': 0, + 'fs_suid_dumpable': 0, + 'kernel_modules_disabled': 1} + + if settings['sysctl']['ipv6_enable']: + extras['net_ipv6_conf_all_disable_ipv6'] = 0 + + if settings['sysctl']['forwarding']: + extras['net_ipv4_ip_forward'] = 1 + extras['net_ipv6_conf_all_forwarding'] = 1 + + if settings['sysctl']['arp_restricted']: + extras['net_ipv4_conf_all_arp_ignore'] = 1 + extras['net_ipv4_conf_all_arp_announce'] = 2 + + if settings['security']['kernel_enable_module_loading']: + extras['kernel_modules_disabled'] = 0 + + if settings['sysctl']['kernel_enable_sysrq']: + sysrq_val = settings['sysctl']['kernel_secure_sysrq'] + extras['kernel_sysrq'] = sysrq_val + + if settings['security']['kernel_enable_core_dump']: + extras['fs_suid_dumpable'] = 1 + + settings.update(extras) + for d in (SYSCTL_DEFAULTS % settings).split(): + d = d.strip().partition('=') + key = d[0].strip() + path = os.path.join('/proc/sys', key.replace('.', '/')) + if not os.path.exists(path): + log("Skipping '%s' since '%s' does not exist" % (key, path), + level=WARNING) + continue + + ctxt['sysctl'][key] = d[2] or None + + return { + 'sysctl_settings': [(k, v) for k, v in ctxt['sysctl'].items()] + } + + +class SysctlConf(TemplatedFile): + """An audit check for sysctl settings.""" + def __init__(self): + self.conffile = '/etc/sysctl.d/99-juju-hardening.conf' + super(SysctlConf, self).__init__(self.conffile, + SysCtlHardeningContext(), + template_dir=TEMPLATES_DIR, + user='root', group='root', + mode=0o0440) + + def post_write(self): + try: + subprocess.check_call(['sysctl', '-p', self.conffile]) + except subprocess.CalledProcessError as e: + # NOTE: on some systems if sysctl cannot apply all settings it + # will return non-zero as well. + log("sysctl command returned an error (maybe some " + "keys could not be set) - %s" % (e), + level=WARNING) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/10.hardcore.conf b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/10.hardcore.conf new file mode 100644 index 00000000..0014191f --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/10.hardcore.conf @@ -0,0 +1,8 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +{% if disable_core_dump -%} +# Prevent core dumps for all users. These are usually only needed by developers and may contain sensitive information. +* hard core 0 +{% endif %} \ No newline at end of file diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/99-hardening.sh b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/99-hardening.sh new file mode 100644 index 00000000..616cef46 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/99-hardening.sh @@ -0,0 +1,5 @@ +TMOUT={{ tmout }} +readonly TMOUT +export TMOUT + +readonly HISTFILE diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/99-juju-hardening.conf b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/99-juju-hardening.conf new file mode 100644 index 00000000..101f1e1d --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/99-juju-hardening.conf @@ -0,0 +1,7 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +{% for key, value in sysctl_settings -%} +{{ key }}={{ value }} +{% endfor -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/login.defs b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/login.defs new file mode 100644 index 00000000..7d107637 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/login.defs @@ -0,0 +1,349 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# +# /etc/login.defs - Configuration control definitions for the login package. +# +# Three items must be defined: MAIL_DIR, ENV_SUPATH, and ENV_PATH. +# If unspecified, some arbitrary (and possibly incorrect) value will +# be assumed. All other items are optional - if not specified then +# the described action or option will be inhibited. +# +# Comment lines (lines beginning with "#") and blank lines are ignored. +# +# Modified for Linux. --marekm + +# REQUIRED for useradd/userdel/usermod +# Directory where mailboxes reside, _or_ name of file, relative to the +# home directory. If you _do_ define MAIL_DIR and MAIL_FILE, +# MAIL_DIR takes precedence. +# +# Essentially: +# - MAIL_DIR defines the location of users mail spool files +# (for mbox use) by appending the username to MAIL_DIR as defined +# below. +# - MAIL_FILE defines the location of the users mail spool files as the +# fully-qualified filename obtained by prepending the user home +# directory before $MAIL_FILE +# +# NOTE: This is no more used for setting up users MAIL environment variable +# which is, starting from shadow 4.0.12-1 in Debian, entirely the +# job of the pam_mail PAM modules +# See default PAM configuration files provided for +# login, su, etc. +# +# This is a temporary situation: setting these variables will soon +# move to /etc/default/useradd and the variables will then be +# no more supported +MAIL_DIR /var/mail +#MAIL_FILE .mail + +# +# Enable logging and display of /var/log/faillog login failure info. +# This option conflicts with the pam_tally PAM module. +# +FAILLOG_ENAB yes + +# +# Enable display of unknown usernames when login failures are recorded. +# +# WARNING: Unknown usernames may become world readable. +# See #290803 and #298773 for details about how this could become a security +# concern +LOG_UNKFAIL_ENAB no + +# +# Enable logging of successful logins +# +LOG_OK_LOGINS yes + +# +# Enable "syslog" logging of su activity - in addition to sulog file logging. +# SYSLOG_SG_ENAB does the same for newgrp and sg. +# +SYSLOG_SU_ENAB yes +SYSLOG_SG_ENAB yes + +# +# If defined, all su activity is logged to this file. +# +#SULOG_FILE /var/log/sulog + +# +# If defined, file which maps tty line to TERM environment parameter. +# Each line of the file is in a format something like "vt100 tty01". +# +#TTYTYPE_FILE /etc/ttytype + +# +# If defined, login failures will be logged here in a utmp format +# last, when invoked as lastb, will read /var/log/btmp, so... +# +FTMP_FILE /var/log/btmp + +# +# If defined, the command name to display when running "su -". For +# example, if this is defined as "su" then a "ps" will display the +# command is "-su". If not defined, then "ps" would display the +# name of the shell actually being run, e.g. something like "-sh". +# +SU_NAME su + +# +# If defined, file which inhibits all the usual chatter during the login +# sequence. If a full pathname, then hushed mode will be enabled if the +# user's name or shell are found in the file. If not a full pathname, then +# hushed mode will be enabled if the file exists in the user's home directory. +# +HUSHLOGIN_FILE .hushlogin +#HUSHLOGIN_FILE /etc/hushlogins + +# +# *REQUIRED* The default PATH settings, for superuser and normal users. +# +# (they are minimal, add the rest in the shell startup files) +ENV_SUPATH PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +ENV_PATH PATH=/usr/local/bin:/usr/bin:/bin{% if additional_user_paths %}{{ additional_user_paths }}{% endif %} + +# +# Terminal permissions +# +# TTYGROUP Login tty will be assigned this group ownership. +# TTYPERM Login tty will be set to this permission. +# +# If you have a "write" program which is "setgid" to a special group +# which owns the terminals, define TTYGROUP to the group number and +# TTYPERM to 0620. Otherwise leave TTYGROUP commented out and assign +# TTYPERM to either 622 or 600. +# +# In Debian /usr/bin/bsd-write or similar programs are setgid tty +# However, the default and recommended value for TTYPERM is still 0600 +# to not allow anyone to write to anyone else console or terminal + +# Users can still allow other people to write them by issuing +# the "mesg y" command. + +TTYGROUP tty +TTYPERM 0600 + +# +# Login configuration initializations: +# +# ERASECHAR Terminal ERASE character ('\010' = backspace). +# KILLCHAR Terminal KILL character ('\025' = CTRL/U). +# UMASK Default "umask" value. +# +# The ERASECHAR and KILLCHAR are used only on System V machines. +# +# UMASK is the default umask value for pam_umask and is used by +# useradd and newusers to set the mode of the new home directories. +# 022 is the "historical" value in Debian for UMASK +# 027, or even 077, could be considered better for privacy +# There is no One True Answer here : each sysadmin must make up his/her +# mind. +# +# If USERGROUPS_ENAB is set to "yes", that will modify this UMASK default value +# for private user groups, i. e. the uid is the same as gid, and username is +# the same as the primary group name: for these, the user permissions will be +# used as group permissions, e. g. 022 will become 002. +# +# Prefix these values with "0" to get octal, "0x" to get hexadecimal. +# +ERASECHAR 0177 +KILLCHAR 025 +UMASK {{ umask }} + +# Enable setting of the umask group bits to be the same as owner bits (examples: `022` -> `002`, `077` -> `007`) for non-root users, if the uid is the same as gid, and username is the same as the primary group name. +# If set to yes, userdel will remove the user´s group if it contains no more members, and useradd will create by default a group with the name of the user. +USERGROUPS_ENAB yes + +# +# Password aging controls: +# +# PASS_MAX_DAYS Maximum number of days a password may be used. +# PASS_MIN_DAYS Minimum number of days allowed between password changes. +# PASS_WARN_AGE Number of days warning given before a password expires. +# +PASS_MAX_DAYS {{ pwd_max_age }} +PASS_MIN_DAYS {{ pwd_min_age }} +PASS_WARN_AGE 7 + +# +# Min/max values for automatic uid selection in useradd +# +UID_MIN {{ uid_min }} +UID_MAX 60000 +# System accounts +SYS_UID_MIN {{ sys_uid_min }} +SYS_UID_MAX {{ sys_uid_max }} + +# Min/max values for automatic gid selection in groupadd +GID_MIN {{ gid_min }} +GID_MAX 60000 +# System accounts +SYS_GID_MIN {{ sys_gid_min }} +SYS_GID_MAX {{ sys_gid_max }} + +# +# Max number of login retries if password is bad. This will most likely be +# overridden by PAM, since the default pam_unix module has it's own built +# in of 3 retries. However, this is a safe fallback in case you are using +# an authentication module that does not enforce PAM_MAXTRIES. +# +LOGIN_RETRIES {{ login_retries }} + +# +# Max time in seconds for login +# +LOGIN_TIMEOUT {{ login_timeout }} + +# +# Which fields may be changed by regular users using chfn - use +# any combination of letters "frwh" (full name, room number, work +# phone, home phone). If not defined, no changes are allowed. +# For backward compatibility, "yes" = "rwh" and "no" = "frwh". +# +{% if chfn_restrict %} +CHFN_RESTRICT {{ chfn_restrict }} +{% endif %} + +# +# Should login be allowed if we can't cd to the home directory? +# Default in no. +# +DEFAULT_HOME {% if allow_login_without_home %} yes {% else %} no {% endif %} + +# +# If defined, this command is run when removing a user. +# It should remove any at/cron/print jobs etc. owned by +# the user to be removed (passed as the first argument). +# +#USERDEL_CMD /usr/sbin/userdel_local + +# +# Enable setting of the umask group bits to be the same as owner bits +# (examples: 022 -> 002, 077 -> 007) for non-root users, if the uid is +# the same as gid, and username is the same as the primary group name. +# +# If set to yes, userdel will remove the user´s group if it contains no +# more members, and useradd will create by default a group with the name +# of the user. +# +USERGROUPS_ENAB yes + +# +# Instead of the real user shell, the program specified by this parameter +# will be launched, although its visible name (argv[0]) will be the shell's. +# The program may do whatever it wants (logging, additional authentication, +# banner, ...) before running the actual shell. +# +# FAKE_SHELL /bin/fakeshell + +# +# If defined, either full pathname of a file containing device names or +# a ":" delimited list of device names. Root logins will be allowed only +# upon these devices. +# +# This variable is used by login and su. +# +#CONSOLE /etc/consoles +#CONSOLE console:tty01:tty02:tty03:tty04 + +# +# List of groups to add to the user's supplementary group set +# when logging in on the console (as determined by the CONSOLE +# setting). Default is none. +# +# Use with caution - it is possible for users to gain permanent +# access to these groups, even when not logged in on the console. +# How to do it is left as an exercise for the reader... +# +# This variable is used by login and su. +# +#CONSOLE_GROUPS floppy:audio:cdrom + +# +# If set to "yes", new passwords will be encrypted using the MD5-based +# algorithm compatible with the one used by recent releases of FreeBSD. +# It supports passwords of unlimited length and longer salt strings. +# Set to "no" if you need to copy encrypted passwords to other systems +# which don't understand the new algorithm. Default is "no". +# +# This variable is deprecated. You should use ENCRYPT_METHOD. +# +MD5_CRYPT_ENAB no + +# +# If set to MD5 , MD5-based algorithm will be used for encrypting password +# If set to SHA256, SHA256-based algorithm will be used for encrypting password +# If set to SHA512, SHA512-based algorithm will be used for encrypting password +# If set to DES, DES-based algorithm will be used for encrypting password (default) +# Overrides the MD5_CRYPT_ENAB option +# +# Note: It is recommended to use a value consistent with +# the PAM modules configuration. +# +ENCRYPT_METHOD SHA512 + +# +# Only used if ENCRYPT_METHOD is set to SHA256 or SHA512. +# +# Define the number of SHA rounds. +# With a lot of rounds, it is more difficult to brute forcing the password. +# But note also that it more CPU resources will be needed to authenticate +# users. +# +# If not specified, the libc will choose the default number of rounds (5000). +# The values must be inside the 1000-999999999 range. +# If only one of the MIN or MAX values is set, then this value will be used. +# If MIN > MAX, the highest value will be used. +# +# SHA_CRYPT_MIN_ROUNDS 5000 +# SHA_CRYPT_MAX_ROUNDS 5000 + +################# OBSOLETED BY PAM ############## +# # +# These options are now handled by PAM. Please # +# edit the appropriate file in /etc/pam.d/ to # +# enable the equivelants of them. +# +############### + +#MOTD_FILE +#DIALUPS_CHECK_ENAB +#LASTLOG_ENAB +#MAIL_CHECK_ENAB +#OBSCURE_CHECKS_ENAB +#PORTTIME_CHECKS_ENAB +#SU_WHEEL_ONLY +#CRACKLIB_DICTPATH +#PASS_CHANGE_TRIES +#PASS_ALWAYS_WARN +#ENVIRON_FILE +#NOLOGINS_FILE +#ISSUE_FILE +#PASS_MIN_LEN +#PASS_MAX_LEN +#ULIMIT +#ENV_HZ +#CHFN_AUTH +#CHSH_AUTH +#FAIL_DELAY + +################# OBSOLETED ####################### +# # +# These options are no more handled by shadow. # +# # +# Shadow utilities will display a warning if they # +# still appear. # +# # +################################################### + +# CLOSE_SESSIONS +# LOGIN_STRING +# NO_PASSWORD_CONSOLE +# QMAIL_DIR + + + diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/modules b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/modules new file mode 100644 index 00000000..ef0354ee --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/modules @@ -0,0 +1,117 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# /etc/modules: kernel modules to load at boot time. +# +# This file contains the names of kernel modules that should be loaded +# at boot time, one per line. Lines beginning with "#" are ignored. +# Parameters can be specified after the module name. + +# Arch +# ---- +# +# Modules for certains builds, contains support modules and some CPU-specific optimizations. + +{% if arch == "x86_64" -%} +# Optimize for x86_64 cryptographic features +twofish-x86_64-3way +twofish-x86_64 +aes-x86_64 +salsa20-x86_64 +blowfish-x86_64 +{% endif -%} + +{% if cpuVendor == "intel" -%} +# Intel-specific optimizations +ghash-clmulni-intel +aesni-intel +kvm-intel +{% endif -%} + +{% if cpuVendor == "amd" -%} +# AMD-specific optimizations +kvm-amd +{% endif -%} + +kvm + + +# Crypto +# ------ + +# Some core modules which comprise strong cryptography. +blowfish_common +blowfish_generic +ctr +cts +lrw +lzo +rmd160 +rmd256 +rmd320 +serpent +sha512_generic +twofish_common +twofish_generic +xts +zlib + + +# Drivers +# ------- + +# Basics +lp +rtc +loop + +# Filesystems +ext2 +btrfs + +{% if desktop_enable -%} +# Desktop +psmouse +snd +snd_ac97_codec +snd_intel8x0 +snd_page_alloc +snd_pcm +snd_timer +soundcore +usbhid +{% endif -%} + +# Lib +# --- +xz + + +# Net +# --- + +# All packets needed for netfilter rules (ie iptables, ebtables). +ip_tables +x_tables +iptable_filter +iptable_nat + +# Targets +ipt_LOG +ipt_REJECT + +# Modules +xt_connlimit +xt_tcpudp +xt_recent +xt_limit +xt_conntrack +nf_conntrack +nf_conntrack_ipv4 +nf_defrag_ipv4 +xt_state +nf_nat + +# Addons +xt_pknock \ No newline at end of file diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/passwdqc.conf b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/passwdqc.conf new file mode 100644 index 00000000..f98d14e5 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/passwdqc.conf @@ -0,0 +1,11 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +Name: passwdqc password strength enforcement +Default: yes +Priority: 1024 +Conflicts: cracklib +Password-Type: Primary +Password: + requisite pam_passwdqc.so {{ auth_pam_passwdqc_options }} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/pinerolo_profile.sh b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/pinerolo_profile.sh new file mode 100644 index 00000000..fd2de791 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/pinerolo_profile.sh @@ -0,0 +1,8 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# Disable core dumps via soft limits for all users. Compliance to this setting +# is voluntary and can be modified by users up to a hard limit. This setting is +# a sane default. +ulimit -S -c 0 > /dev/null 2>&1 diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/securetty b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/securetty new file mode 100644 index 00000000..15b18d4e --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/securetty @@ -0,0 +1,11 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# A list of TTYs, from which root can log in +# see `man securetty` for reference +{% if ttys -%} +{% for tty in ttys -%} +{{ tty }} +{% endfor -%} +{% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/tally2 b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/tally2 new file mode 100644 index 00000000..d9620299 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/host/templates/tally2 @@ -0,0 +1,14 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +Name: tally2 lockout after failed attempts enforcement +Default: yes +Priority: 1024 +Conflicts: cracklib +Auth-Type: Primary +Auth-Initial: + required pam_tally2.so deny={{ auth_retries }} onerr=fail unlock_time={{ auth_lockout_time }} +Account-Type: Primary +Account-Initial: + required pam_tally2.so diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/mysql/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/mysql/__init__.py new file mode 100644 index 00000000..58bebd84 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/mysql/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from os import path + +TEMPLATES_DIR = path.join(path.dirname(__file__), 'templates') diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/mysql/checks/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/mysql/checks/__init__.py new file mode 100644 index 00000000..1990d851 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/mysql/checks/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import ( + log, + DEBUG, +) +from charmhelpers.contrib.hardening.mysql.checks import config + + +def run_mysql_checks(): + log("Starting MySQL hardening checks.", level=DEBUG) + checks = config.get_audits() + for check in checks: + log("Running '%s' check" % (check.__class__.__name__), level=DEBUG) + check.ensure_compliance() + + log("MySQL hardening checks complete.", level=DEBUG) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/mysql/checks/config.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/mysql/checks/config.py new file mode 100644 index 00000000..8bf9f36c --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/mysql/checks/config.py @@ -0,0 +1,86 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import subprocess + +from charmhelpers.core.hookenv import ( + log, + WARNING, +) +from charmhelpers.contrib.hardening.audits.file import ( + FilePermissionAudit, + DirectoryPermissionAudit, + TemplatedFile, +) +from charmhelpers.contrib.hardening.mysql import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get MySQL hardening config audits. + + :returns: dictionary of audits + """ + if subprocess.call(['which', 'mysql'], stdout=subprocess.PIPE) != 0: + log("MySQL does not appear to be installed on this node - " + "skipping mysql hardening", level=WARNING) + return [] + + settings = utils.get_settings('mysql') + hardening_settings = settings['hardening'] + my_cnf = hardening_settings['mysql-conf'] + + audits = [ + FilePermissionAudit(paths=[my_cnf], user='root', + group='root', mode=0o0600), + + TemplatedFile(hardening_settings['hardening-conf'], + MySQLConfContext(), + TEMPLATES_DIR, + mode=0o0750, + user='mysql', + group='root', + service_actions=[{'service': 'mysql', + 'actions': ['restart']}]), + + # MySQL and Percona charms do not allow configuration of the + # data directory, so use the default. + DirectoryPermissionAudit('/var/lib/mysql', + user='mysql', + group='mysql', + recursive=False, + mode=0o755), + + DirectoryPermissionAudit('/etc/mysql', + user='root', + group='root', + recursive=False, + mode=0o700), + ] + + return audits + + +class MySQLConfContext(object): + """Defines the set of key/value pairs to set in a mysql config file. + + This context, when called, will return a dictionary containing the + key/value pairs of setting to specify in the + /etc/mysql/conf.d/hardening.cnf file. + """ + def __call__(self): + settings = utils.get_settings('mysql') + return { + 'mysql_settings': [(k, v) for k, v in settings['security'].items()] + } diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/mysql/templates/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/mysql/templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/mysql/templates/hardening.cnf b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/mysql/templates/hardening.cnf new file mode 100644 index 00000000..8242586c --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/mysql/templates/hardening.cnf @@ -0,0 +1,12 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +[mysqld] +{% for setting, value in mysql_settings -%} +{% if value == 'True' -%} +{{ setting }} +{% elif value != 'None' and value != None -%} +{{ setting }} = {{ value }} +{% endif -%} +{% endfor -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/ssh/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/ssh/__init__.py new file mode 100644 index 00000000..58bebd84 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/ssh/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from os import path + +TEMPLATES_DIR = path.join(path.dirname(__file__), 'templates') diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/ssh/checks/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/ssh/checks/__init__.py new file mode 100644 index 00000000..edaf484b --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/ssh/checks/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import ( + log, + DEBUG, +) +from charmhelpers.contrib.hardening.ssh.checks import config + + +def run_ssh_checks(): + log("Starting SSH hardening checks.", level=DEBUG) + checks = config.get_audits() + for check in checks: + log("Running '%s' check" % (check.__class__.__name__), level=DEBUG) + check.ensure_compliance() + + log("SSH hardening checks complete.", level=DEBUG) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/ssh/checks/config.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/ssh/checks/config.py new file mode 100644 index 00000000..41bed2d1 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/ssh/checks/config.py @@ -0,0 +1,435 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from charmhelpers.contrib.network.ip import ( + get_address_in_network, + get_iface_addr, + is_ip, +) +from charmhelpers.core.hookenv import ( + log, + DEBUG, +) +from charmhelpers.fetch import ( + apt_install, + apt_update, +) +from charmhelpers.core.host import ( + lsb_release, + CompareHostReleases, +) +from charmhelpers.contrib.hardening.audits.file import ( + TemplatedFile, + FileContentAudit, +) +from charmhelpers.contrib.hardening.ssh import TEMPLATES_DIR +from charmhelpers.contrib.hardening import utils + + +def get_audits(): + """Get SSH hardening config audits. + + :returns: dictionary of audits + """ + audits = [SSHConfig(), SSHDConfig(), SSHConfigFileContentAudit(), + SSHDConfigFileContentAudit()] + return audits + + +class SSHConfigContext(object): + + type = 'client' + + def get_macs(self, allow_weak_mac): + if allow_weak_mac: + weak_macs = 'weak' + else: + weak_macs = 'default' + + default = 'hmac-sha2-512,hmac-sha2-256,hmac-ripemd160' + macs = {'default': default, + 'weak': default + ',hmac-sha1'} + + default = ('hmac-sha2-512-etm@openssh.com,' + 'hmac-sha2-256-etm@openssh.com,' + 'hmac-ripemd160-etm@openssh.com,umac-128-etm@openssh.com,' + 'hmac-sha2-512,hmac-sha2-256,hmac-ripemd160') + macs_66 = {'default': default, + 'weak': default + ',hmac-sha1'} + + # Use newer ciphers on Ubuntu Trusty and above + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) >= 'trusty': + log("Detected Ubuntu 14.04 or newer, using new macs", level=DEBUG) + macs = macs_66 + + return macs[weak_macs] + + def get_kexs(self, allow_weak_kex): + if allow_weak_kex: + weak_kex = 'weak' + else: + weak_kex = 'default' + + default = 'diffie-hellman-group-exchange-sha256' + weak = (default + ',diffie-hellman-group14-sha1,' + 'diffie-hellman-group-exchange-sha1,' + 'diffie-hellman-group1-sha1') + kex = {'default': default, + 'weak': weak} + + default = ('curve25519-sha256@libssh.org,' + 'diffie-hellman-group-exchange-sha256') + weak = (default + ',diffie-hellman-group14-sha1,' + 'diffie-hellman-group-exchange-sha1,' + 'diffie-hellman-group1-sha1') + kex_66 = {'default': default, + 'weak': weak} + + # Use newer kex on Ubuntu Trusty and above + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) >= 'trusty': + log('Detected Ubuntu 14.04 or newer, using new key exchange ' + 'algorithms', level=DEBUG) + kex = kex_66 + + return kex[weak_kex] + + def get_ciphers(self, cbc_required): + if cbc_required: + weak_ciphers = 'weak' + else: + weak_ciphers = 'default' + + default = 'aes256-ctr,aes192-ctr,aes128-ctr' + cipher = {'default': default, + 'weak': default + 'aes256-cbc,aes192-cbc,aes128-cbc'} + + default = ('chacha20-poly1305@openssh.com,aes256-gcm@openssh.com,' + 'aes128-gcm@openssh.com,aes256-ctr,aes192-ctr,aes128-ctr') + ciphers_66 = {'default': default, + 'weak': default + ',aes256-cbc,aes192-cbc,aes128-cbc'} + + # Use newer ciphers on ubuntu Trusty and above + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) >= 'trusty': + log('Detected Ubuntu 14.04 or newer, using new ciphers', + level=DEBUG) + cipher = ciphers_66 + + return cipher[weak_ciphers] + + def get_listening(self, listen=['0.0.0.0']): + """Returns a list of addresses SSH can list on + + Turns input into a sensible list of IPs SSH can listen on. Input + must be a python list of interface names, IPs and/or CIDRs. + + :param listen: list of IPs, CIDRs, interface names + + :returns: list of IPs available on the host + """ + if listen == ['0.0.0.0']: + return listen + + value = [] + for network in listen: + try: + ip = get_address_in_network(network=network, fatal=True) + except ValueError: + if is_ip(network): + ip = network + else: + try: + ip = get_iface_addr(iface=network, fatal=False)[0] + except IndexError: + continue + value.append(ip) + if value == []: + return ['0.0.0.0'] + return value + + def __call__(self): + settings = utils.get_settings('ssh') + if settings['common']['network_ipv6_enable']: + addr_family = 'any' + else: + addr_family = 'inet' + + ctxt = { + 'addr_family': addr_family, + 'remote_hosts': settings['common']['remote_hosts'], + 'password_auth_allowed': + settings['client']['password_authentication'], + 'ports': settings['common']['ports'], + 'ciphers': self.get_ciphers(settings['client']['cbc_required']), + 'macs': self.get_macs(settings['client']['weak_hmac']), + 'kexs': self.get_kexs(settings['client']['weak_kex']), + 'roaming': settings['client']['roaming'], + } + return ctxt + + +class SSHConfig(TemplatedFile): + def __init__(self): + path = '/etc/ssh/ssh_config' + super(SSHConfig, self).__init__(path=path, + template_dir=TEMPLATES_DIR, + context=SSHConfigContext(), + user='root', + group='root', + mode=0o0644) + + def pre_write(self): + settings = utils.get_settings('ssh') + apt_update(fatal=True) + apt_install(settings['client']['package']) + if not os.path.exists('/etc/ssh'): + os.makedir('/etc/ssh') + # NOTE: don't recurse + utils.ensure_permissions('/etc/ssh', 'root', 'root', 0o0755, + maxdepth=0) + + def post_write(self): + # NOTE: don't recurse + utils.ensure_permissions('/etc/ssh', 'root', 'root', 0o0755, + maxdepth=0) + + +class SSHDConfigContext(SSHConfigContext): + + type = 'server' + + def __call__(self): + settings = utils.get_settings('ssh') + if settings['common']['network_ipv6_enable']: + addr_family = 'any' + else: + addr_family = 'inet' + + ctxt = { + 'ssh_ip': self.get_listening(settings['server']['listen_to']), + 'password_auth_allowed': + settings['server']['password_authentication'], + 'ports': settings['common']['ports'], + 'addr_family': addr_family, + 'ciphers': self.get_ciphers(settings['server']['cbc_required']), + 'macs': self.get_macs(settings['server']['weak_hmac']), + 'kexs': self.get_kexs(settings['server']['weak_kex']), + 'host_key_files': settings['server']['host_key_files'], + 'allow_root_with_key': settings['server']['allow_root_with_key'], + 'password_authentication': + settings['server']['password_authentication'], + 'use_priv_sep': settings['server']['use_privilege_separation'], + 'use_pam': settings['server']['use_pam'], + 'allow_x11_forwarding': settings['server']['allow_x11_forwarding'], + 'print_motd': settings['server']['print_motd'], + 'print_last_log': settings['server']['print_last_log'], + 'client_alive_interval': + settings['server']['alive_interval'], + 'client_alive_count': settings['server']['alive_count'], + 'allow_tcp_forwarding': settings['server']['allow_tcp_forwarding'], + 'allow_agent_forwarding': + settings['server']['allow_agent_forwarding'], + 'deny_users': settings['server']['deny_users'], + 'allow_users': settings['server']['allow_users'], + 'deny_groups': settings['server']['deny_groups'], + 'allow_groups': settings['server']['allow_groups'], + 'use_dns': settings['server']['use_dns'], + 'sftp_enable': settings['server']['sftp_enable'], + 'sftp_group': settings['server']['sftp_group'], + 'sftp_chroot': settings['server']['sftp_chroot'], + 'max_auth_tries': settings['server']['max_auth_tries'], + 'max_sessions': settings['server']['max_sessions'], + } + return ctxt + + +class SSHDConfig(TemplatedFile): + def __init__(self): + path = '/etc/ssh/sshd_config' + super(SSHDConfig, self).__init__(path=path, + template_dir=TEMPLATES_DIR, + context=SSHDConfigContext(), + user='root', + group='root', + mode=0o0600, + service_actions=[{'service': 'ssh', + 'actions': + ['restart']}]) + + def pre_write(self): + settings = utils.get_settings('ssh') + apt_update(fatal=True) + apt_install(settings['server']['package']) + if not os.path.exists('/etc/ssh'): + os.makedir('/etc/ssh') + # NOTE: don't recurse + utils.ensure_permissions('/etc/ssh', 'root', 'root', 0o0755, + maxdepth=0) + + def post_write(self): + # NOTE: don't recurse + utils.ensure_permissions('/etc/ssh', 'root', 'root', 0o0755, + maxdepth=0) + + +class SSHConfigFileContentAudit(FileContentAudit): + def __init__(self): + self.path = '/etc/ssh/ssh_config' + super(SSHConfigFileContentAudit, self).__init__(self.path, {}) + + def is_compliant(self, *args, **kwargs): + self.pass_cases = [] + self.fail_cases = [] + settings = utils.get_settings('ssh') + + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) >= 'trusty': + if not settings['server']['weak_hmac']: + self.pass_cases.append(r'^MACs.+,hmac-ripemd160$') + else: + self.pass_cases.append(r'^MACs.+,hmac-sha1$') + + if settings['server']['weak_kex']: + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + else: + self.pass_cases.append(r'^KexAlgorithms.+,diffie-hellman-group-exchange-sha256$') # noqa + self.fail_cases.append(r'^KexAlgorithms.*diffie-hellman-group14-sha1[,\s]?') # noqa + + if settings['server']['cbc_required']: + self.pass_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + self.fail_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.pass_cases.append(r'^Ciphers\schacha20-poly1305@openssh.com,.+') # noqa + self.pass_cases.append(r'^Ciphers\s.*aes128-ctr$') + self.pass_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + if not settings['client']['weak_hmac']: + self.fail_cases.append(r'^MACs.+,hmac-sha1$') + else: + self.pass_cases.append(r'^MACs.+,hmac-sha1$') + + if settings['client']['weak_kex']: + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + else: + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256$') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + + if settings['client']['cbc_required']: + self.pass_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + self.fail_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + + if settings['client']['roaming']: + self.pass_cases.append(r'^UseRoaming yes$') + else: + self.fail_cases.append(r'^UseRoaming yes$') + + return super(SSHConfigFileContentAudit, self).is_compliant(*args, + **kwargs) + + +class SSHDConfigFileContentAudit(FileContentAudit): + def __init__(self): + self.path = '/etc/ssh/sshd_config' + super(SSHDConfigFileContentAudit, self).__init__(self.path, {}) + + def is_compliant(self, *args, **kwargs): + self.pass_cases = [] + self.fail_cases = [] + settings = utils.get_settings('ssh') + + _release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(_release) >= 'trusty': + if not settings['server']['weak_hmac']: + self.pass_cases.append(r'^MACs.+,hmac-ripemd160$') + else: + self.pass_cases.append(r'^MACs.+,hmac-sha1$') + + if settings['server']['weak_kex']: + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + else: + self.pass_cases.append(r'^KexAlgorithms.+,diffie-hellman-group-exchange-sha256$') # noqa + self.fail_cases.append(r'^KexAlgorithms.*diffie-hellman-group14-sha1[,\s]?') # noqa + + if settings['server']['cbc_required']: + self.pass_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + self.fail_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.pass_cases.append(r'^Ciphers\schacha20-poly1305@openssh.com,.+') # noqa + self.pass_cases.append(r'^Ciphers\s.*aes128-ctr$') + self.pass_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + if not settings['server']['weak_hmac']: + self.pass_cases.append(r'^MACs.+,hmac-ripemd160$') + else: + self.pass_cases.append(r'^MACs.+,hmac-sha1$') + + if settings['server']['weak_kex']: + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + else: + self.pass_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha256$') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group14-sha1[,\s]?') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group-exchange-sha1[,\s]?') # noqa + self.fail_cases.append(r'^KexAlgorithms\sdiffie-hellman-group1-sha1[,\s]?') # noqa + + if settings['server']['cbc_required']: + self.pass_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.fail_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + else: + self.fail_cases.append(r'^Ciphers\s.*-cbc[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes128-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes192-ctr[,\s]?') + self.pass_cases.append(r'^Ciphers\s.*aes256-ctr[,\s]?') + + if settings['server']['sftp_enable']: + self.pass_cases.append(r'^Subsystem\ssftp') + else: + self.fail_cases.append(r'^Subsystem\ssftp') + + return super(SSHDConfigFileContentAudit, self).is_compliant(*args, + **kwargs) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/ssh/templates/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/ssh/templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/ssh/templates/ssh_config b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/ssh/templates/ssh_config new file mode 100644 index 00000000..9742d8e2 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/ssh/templates/ssh_config @@ -0,0 +1,70 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# This is the ssh client system-wide configuration file. See +# ssh_config(5) for more information. This file provides defaults for +# users, and the values can be changed in per-user configuration files +# or on the command line. + +# Configuration data is parsed as follows: +# 1. command line options +# 2. user-specific file +# 3. system-wide file +# Any configuration value is only changed the first time it is set. +# Thus, host-specific definitions should be at the beginning of the +# configuration file, and defaults at the end. + +# Site-wide defaults for some commonly used options. For a comprehensive +# list of available options, their meanings and defaults, please see the +# ssh_config(5) man page. + +# Restrict the following configuration to be limited to this Host. +{% if remote_hosts -%} +Host {{ ' '.join(remote_hosts) }} +{% endif %} +ForwardAgent no +ForwardX11 no +ForwardX11Trusted yes +RhostsRSAAuthentication no +RSAAuthentication yes +PasswordAuthentication {{ password_auth_allowed }} +HostbasedAuthentication no +GSSAPIAuthentication no +GSSAPIDelegateCredentials no +GSSAPIKeyExchange no +GSSAPITrustDNS no +BatchMode no +CheckHostIP yes +AddressFamily {{ addr_family }} +ConnectTimeout 0 +StrictHostKeyChecking ask +IdentityFile ~/.ssh/identity +IdentityFile ~/.ssh/id_rsa +IdentityFile ~/.ssh/id_dsa +# The port at the destination should be defined +{% for port in ports -%} +Port {{ port }} +{% endfor %} +Protocol 2 +Cipher 3des +{% if ciphers -%} +Ciphers {{ ciphers }} +{%- endif %} +{% if macs -%} +MACs {{ macs }} +{%- endif %} +{% if kexs -%} +KexAlgorithms {{ kexs }} +{%- endif %} +EscapeChar ~ +Tunnel no +TunnelDevice any:any +PermitLocalCommand no +VisualHostKey no +RekeyLimit 1G 1h +SendEnv LANG LC_* +HashKnownHosts yes +{% if roaming -%} +UseRoaming {{ roaming }} +{% endif %} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/ssh/templates/sshd_config b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/ssh/templates/sshd_config new file mode 100644 index 00000000..5f87298a --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/ssh/templates/sshd_config @@ -0,0 +1,159 @@ +############################################################################### +# WARNING: This configuration file is maintained by Juju. Local changes may +# be overwritten. +############################################################################### +# Package generated configuration file +# See the sshd_config(5) manpage for details + +# What ports, IPs and protocols we listen for +{% for port in ports -%} +Port {{ port }} +{% endfor -%} +AddressFamily {{ addr_family }} +# Use these options to restrict which interfaces/protocols sshd will bind to +{% if ssh_ip -%} +{% for ip in ssh_ip -%} +ListenAddress {{ ip }} +{% endfor %} +{%- else -%} +ListenAddress :: +ListenAddress 0.0.0.0 +{% endif -%} +Protocol 2 +{% if ciphers -%} +Ciphers {{ ciphers }} +{% endif -%} +{% if macs -%} +MACs {{ macs }} +{% endif -%} +{% if kexs -%} +KexAlgorithms {{ kexs }} +{% endif -%} +# HostKeys for protocol version 2 +{% for keyfile in host_key_files -%} +HostKey {{ keyfile }} +{% endfor -%} + +# Privilege Separation is turned on for security +{% if use_priv_sep -%} +UsePrivilegeSeparation {{ use_priv_sep }} +{% endif -%} + +# Lifetime and size of ephemeral version 1 server key +KeyRegenerationInterval 3600 +ServerKeyBits 1024 + +# Logging +SyslogFacility AUTH +LogLevel VERBOSE + +# Authentication: +LoginGraceTime 30s +{% if allow_root_with_key -%} +PermitRootLogin without-password +{% else -%} +PermitRootLogin no +{% endif %} +PermitTunnel no +PermitUserEnvironment no +StrictModes yes + +RSAAuthentication yes +PubkeyAuthentication yes +AuthorizedKeysFile %h/.ssh/authorized_keys + +# Don't read the user's ~/.rhosts and ~/.shosts files +IgnoreRhosts yes +# For this to work you will also need host keys in /etc/ssh_known_hosts +RhostsRSAAuthentication no +# similar for protocol version 2 +HostbasedAuthentication no +# Uncomment if you don't trust ~/.ssh/known_hosts for RhostsRSAAuthentication +IgnoreUserKnownHosts yes + +# To enable empty passwords, change to yes (NOT RECOMMENDED) +PermitEmptyPasswords no + +# Change to yes to enable challenge-response passwords (beware issues with +# some PAM modules and threads) +ChallengeResponseAuthentication no + +# Change to no to disable tunnelled clear text passwords +PasswordAuthentication {{ password_authentication }} + +# Kerberos options +KerberosAuthentication no +KerberosGetAFSToken no +KerberosOrLocalPasswd no +KerberosTicketCleanup yes + +# GSSAPI options +GSSAPIAuthentication no +GSSAPICleanupCredentials yes + +X11Forwarding {{ allow_x11_forwarding }} +X11DisplayOffset 10 +X11UseLocalhost yes +GatewayPorts no +PrintMotd {{ print_motd }} +PrintLastLog {{ print_last_log }} +TCPKeepAlive no +UseLogin no + +ClientAliveInterval {{ client_alive_interval }} +ClientAliveCountMax {{ client_alive_count }} +AllowTcpForwarding {{ allow_tcp_forwarding }} +AllowAgentForwarding {{ allow_agent_forwarding }} + +MaxStartups 10:30:100 +#Banner /etc/issue.net + +# Allow client to pass locale environment variables +AcceptEnv LANG LC_* + +# Set this to 'yes' to enable PAM authentication, account processing, +# and session processing. If this is enabled, PAM authentication will +# be allowed through the ChallengeResponseAuthentication and +# PasswordAuthentication. Depending on your PAM configuration, +# PAM authentication via ChallengeResponseAuthentication may bypass +# the setting of "PermitRootLogin without-password". +# If you just want the PAM account and session checks to run without +# PAM authentication, then enable this but set PasswordAuthentication +# and ChallengeResponseAuthentication to 'no'. +UsePAM {{ use_pam }} + +{% if deny_users -%} +DenyUsers {{ deny_users }} +{% endif -%} +{% if allow_users -%} +AllowUsers {{ allow_users }} +{% endif -%} +{% if deny_groups -%} +DenyGroups {{ deny_groups }} +{% endif -%} +{% if allow_groups -%} +AllowGroups allow_groups +{% endif -%} +UseDNS {{ use_dns }} +MaxAuthTries {{ max_auth_tries }} +MaxSessions {{ max_sessions }} + +{% if sftp_enable -%} +# Configuration, in case SFTP is used +## override default of no subsystems +## Subsystem sftp /opt/app/openssh5/libexec/sftp-server +Subsystem sftp internal-sftp -l VERBOSE + +## These lines must appear at the *end* of sshd_config +Match Group {{ sftp_group }} +ForceCommand internal-sftp -l VERBOSE +ChrootDirectory {{ sftp_chroot }} +{% else -%} +# Configuration, in case SFTP is used +## override default of no subsystems +## Subsystem sftp /opt/app/openssh5/libexec/sftp-server +## These lines must appear at the *end* of sshd_config +Match Group sftponly +ForceCommand internal-sftp -l VERBOSE +ChrootDirectory /sftpchroot/home/%u +{% endif %} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/templating.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/templating.py new file mode 100644 index 00000000..4dee5465 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/templating.py @@ -0,0 +1,69 @@ +# Copyright 2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + WARNING, +) + +try: + from jinja2 import FileSystemLoader, Environment +except ImportError: + from charmhelpers.fetch import apt_install + from charmhelpers.fetch import apt_update + apt_update(fatal=True) + apt_install('python3-jinja2', fatal=True) + from jinja2 import FileSystemLoader, Environment + + +# NOTE: function separated from main rendering code to facilitate easier +# mocking in unit tests. +def write(path, data): + with open(path, 'wb') as out: + out.write(data) + + +def get_template_path(template_dir, path): + """Returns the template file which would be used to render the path. + + The path to the template file is returned. + :param template_dir: the directory the templates are located in + :param path: the file path to be written to. + :returns: path to the template file + """ + return os.path.join(template_dir, os.path.basename(path)) + + +def render_and_write(template_dir, path, context): + """Renders the specified template into the file. + + :param template_dir: the directory to load the template from + :param path: the path to write the templated contents to + :param context: the parameters to pass to the rendering engine + """ + env = Environment(loader=FileSystemLoader(template_dir)) + template_file = os.path.basename(path) + template = env.get_template(template_file) + log('Rendering from template: %s' % template.name, level=DEBUG) + rendered_content = template.render(context) + if not rendered_content: + log("Render returned None - skipping '%s'" % path, + level=WARNING) + return + + write(path, rendered_content.encode('utf-8').strip()) + log('Wrote template %s' % path, level=DEBUG) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardening/utils.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/utils.py new file mode 100644 index 00000000..f93851a9 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardening/utils.py @@ -0,0 +1,154 @@ +# Copyright 2016-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import grp +import os +import pwd +import yaml + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + INFO, + WARNING, + ERROR, +) + + +# Global settings cache. Since each hook fire entails a fresh module import it +# is safe to hold this in memory and not risk missing config changes (since +# they will result in a new hook fire and thus re-import). +__SETTINGS__ = {} + + +def _get_defaults(modules): + """Load the default config for the provided modules. + + :param modules: stack modules config defaults to lookup. + :returns: modules default config dictionary. + """ + default = os.path.join(os.path.dirname(__file__), + 'defaults/%s.yaml' % (modules)) + return yaml.safe_load(open(default)) + + +def _get_schema(modules): + """Load the config schema for the provided modules. + + NOTE: this schema is intended to have 1-1 relationship with they keys in + the default config and is used a means to verify valid overrides provided + by the user. + + :param modules: stack modules config schema to lookup. + :returns: modules default schema dictionary. + """ + schema = os.path.join(os.path.dirname(__file__), + 'defaults/%s.yaml.schema' % (modules)) + return yaml.safe_load(open(schema)) + + +def _get_user_provided_overrides(modules): + """Load user-provided config overrides. + + :param modules: stack modules to lookup in user overrides yaml file. + :returns: overrides dictionary. + """ + overrides = os.path.join(os.environ['JUJU_CHARM_DIR'], + 'hardening.yaml') + if os.path.exists(overrides): + log("Found user-provided config overrides file '%s'" % + (overrides), level=DEBUG) + settings = yaml.safe_load(open(overrides)) + if settings and settings.get(modules): + log("Applying '%s' overrides" % (modules), level=DEBUG) + return settings.get(modules) + + log("No overrides found for '%s'" % (modules), level=DEBUG) + else: + log("No hardening config overrides file '%s' found in charm " + "root dir" % (overrides), level=DEBUG) + + return {} + + +def _apply_overrides(settings, overrides, schema): + """Get overrides config overlaid onto modules defaults. + + :param modules: require stack modules config. + :returns: dictionary of modules config with user overrides applied. + """ + if overrides: + for k, v in overrides.items(): + if k in schema: + if schema[k] is None: + settings[k] = v + elif type(schema[k]) is dict: + settings[k] = _apply_overrides(settings[k], overrides[k], + schema[k]) + else: + raise Exception("Unexpected type found in schema '%s'" % + type(schema[k]), level=ERROR) + else: + log("Unknown override key '%s' - ignoring" % (k), level=INFO) + + return settings + + +def get_settings(modules): + global __SETTINGS__ + if modules in __SETTINGS__: + return __SETTINGS__[modules] + + schema = _get_schema(modules) + settings = _get_defaults(modules) + overrides = _get_user_provided_overrides(modules) + __SETTINGS__[modules] = _apply_overrides(settings, overrides, schema) + return __SETTINGS__[modules] + + +def ensure_permissions(path, user, group, permissions, maxdepth=-1): + """Ensure permissions for path. + + If path is a file, apply to file and return. If path is a directory, + apply recursively (if required) to directory contents and return. + + :param user: user name + :param group: group name + :param permissions: octal permissions + :param maxdepth: maximum recursion depth. A negative maxdepth allows + infinite recursion and maxdepth=0 means no recursion. + :returns: None + """ + if not os.path.exists(path): + log("File '%s' does not exist - cannot set permissions" % (path), + level=WARNING) + return + + _user = pwd.getpwnam(user) + os.chown(path, _user.pw_uid, grp.getgrnam(group).gr_gid) + os.chmod(path, permissions) + + if maxdepth == 0: + log("Max recursion depth reached - skipping further recursion", + level=DEBUG) + return + elif maxdepth > 0: + maxdepth -= 1 + + if os.path.isdir(path): + contents = glob.glob("%s/*" % (path)) + for c in contents: + ensure_permissions(c, user=user, group=group, + permissions=permissions, maxdepth=maxdepth) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardware/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardware/__init__.py new file mode 100644 index 00000000..474a8f3b --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardware/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/hardware/pci.py b/ceph-radosgw/hooks/charmhelpers/contrib/hardware/pci.py new file mode 100644 index 00000000..f6b1789a --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/hardware/pci.py @@ -0,0 +1,288 @@ +#!/usr/bin/env python3 +# +# Copyright 2016-2022 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import itertools +import logging +import os +import re +import shlex +import subprocess +import typing + + +def format_pci_addr(pci_addr: str) -> str: + """Format a PCI address with 0 fill for parts + + :param: pci_addr: unformatted PCI address + :type: str + :returns: formatted PCI address + :rtype: str + """ + domain, bus, slot_func = pci_addr.split(":") + slot, func = slot_func.split(".") + return "{}:{}:{}.{}".format( + domain.zfill(4), bus.zfill(2), slot.zfill(2), func + ) + + +def get_sysnet_interfaces_and_macs() -> list: + """Catalog interface information from local system + + each device dict contains: + + interface: logical name + mac_address: MAC address + pci_address: PCI address + state: Current interface state (up/down) + sriov: Boolean indicating whether interface is an SR-IOV + capable device. + sriov_totalvfs: Total VF capacity of device + sriov_numvfs: Configured VF capacity of device + + :returns: array of dict objects containing details of each interface + :rtype: list + """ + net_devs = [] + for sdir in itertools.chain( + glob.glob("/sys/bus/pci/devices/*/net/../"), + glob.glob("/sys/bus/pci/devices/*/virtio*/net/../")): + fq_path = os.path.realpath(sdir) + path = fq_path.split("/") + if "virtio" in path[-1]: + pci_address = path[-2] + else: + pci_address = path[-1] + ifname = get_sysnet_interface(sdir) + if not ifname: + logging.warn("Unable to determine interface name for PCI " + "device {}".format(pci_address)) + continue + device = { + "interface": ifname, + "mac_address": get_sysnet_mac(sdir, ifname), + "pci_address": pci_address, + "state": get_sysnet_device_state(sdir, ifname), + "sriov": is_sriov(sdir), + } + if device["sriov"]: + device["sriov_totalvfs"] = get_sriov_totalvfs(sdir) + device["sriov_numvfs"] = get_sriov_numvfs(sdir) + net_devs.append(device) + + return net_devs + + +def get_sysnet_mac(sysdir: str, ifname: str) -> str: + """Determine MAC address for a device + + :param: sysdir: path to device /sys directory + :type: str + :returns: MAC address of device + :rtype: str + """ + mac_addr_file = os.path.join(sysdir, "net", ifname, "address") + with open(mac_addr_file, "r") as f: + read_data = f.read() + return read_data.strip() + + +def get_sysnet_device_state(sysdir: str, ifname: str) -> str: + """Read operational state of a device + + :param: sysdir: path to device /sys directory + :type: str + :returns: current device state + :rtype: str + """ + state_file = os.path.join(sysdir, "net", ifname, "operstate") + with open(state_file, "r") as f: + read_data = f.read() + return read_data.strip() + + +def is_sriov(sysdir: str) -> bool: + """Determine whether a device is SR-IOV capable + + :param: sysdir: path to device /sys directory + :type: str + :returns: whether device is SR-IOV capable or not + :rtype: bool + """ + return os.path.exists(os.path.join(sysdir, "sriov_totalvfs")) + + +def get_sriov_totalvfs(sysdir: str) -> int: + """Read total VF capacity for a device + + :param: sysdir: path to device /sys directory + :type: str + :returns: number of VF's the device supports + :rtype: int + """ + sriov_totalvfs_file = os.path.join(sysdir, "sriov_totalvfs") + with open(sriov_totalvfs_file, "r") as f: + read_data = f.read() + return int(read_data.strip()) + + +def get_sriov_numvfs(sysdir: str) -> int: + """Read configured VF capacity for a device + + :param: sysdir: path to device /sys directory + :type: str + :returns: number of VF's the device is configured with + :rtype: int + """ + sriov_numvfs_file = os.path.join(sysdir, "sriov_numvfs") + with open(sriov_numvfs_file, "r") as f: + read_data = f.read() + return int(read_data.strip()) + + +# https://github.com/libvirt/libvirt/commit/5b1c525b1f3608156884aed0dc5e925306c1e260 +PF_PHYS_PORT_NAME_REGEX = re.compile(r"(p[0-9]+$)|(p[0-9]+s[0-9]+$)", + re.IGNORECASE) + + +def _phys_port_name_is_pf(sysnetdir: str) -> typing.Optional[bool]: + try: + with open(os.path.join(sysnetdir, "phys_port_name"), "r") as fin: + return (PF_PHYS_PORT_NAME_REGEX.match(fin.read().strip()) + is not None) + except OSError: + return + + +def get_sysnet_interface(sysdir: str) -> typing.Optional[str]: + sysnetdir = os.path.join(sysdir, "net") + netdevs = os.listdir(sysnetdir) + # Return early in case the PCI device only has one netdev + if len(netdevs) == 1: + return netdevs[0] + + # When a PCI device has multiple netdevs we need to figure out which one + # represents the PF + for netdev in netdevs: + if _phys_port_name_is_pf(os.path.join(sysnetdir, netdev)): + return netdev + + +def get_pci_ethernet_addresses() -> list: + """Generate list of PCI addresses for all network adapters + + :returns: list of PCI addresses + :rtype: list + """ + cmd = ["lspci", "-m", "-D"] + lspci_output = subprocess.check_output(cmd).decode("UTF-8") + pci_addresses = [] + for line in lspci_output.split("\n"): + columns = shlex.split(line) + if len(columns) > 1 and columns[1] == "Ethernet controller": + pci_address = columns[0] + pci_addresses.append(format_pci_addr(pci_address)) + return pci_addresses + + +class PCINetDevice(object): + def __init__(self, pci_address): + self.pci_address = pci_address + self.interface_name = None + self.mac_address = None + self.state = None + self.sriov = False + self.sriov_totalvfs = None + self.sriov_numvfs = None + self.update_attributes() + + def update_attributes(self): + self.update_interface_info() + + def update_interface_info(self): + net_devices = get_sysnet_interfaces_and_macs() + for interface in net_devices: + if self.pci_address == interface["pci_address"]: + self.interface_name = interface["interface"] + self.mac_address = interface["mac_address"] + self.state = interface["state"] + self.sriov = interface["sriov"] + if self.sriov: + self.sriov_totalvfs = interface["sriov_totalvfs"] + self.sriov_numvfs = interface["sriov_numvfs"] + + def _set_sriov_numvfs(self, numvfs: int): + sdevice = os.path.join( + "/sys/bus/pci/devices", self.pci_address, "sriov_numvfs" + ) + with open(sdevice, "w") as sh: + sh.write(str(numvfs)) + self.update_attributes() + + def set_sriov_numvfs(self, numvfs: int) -> bool: + """Set the number of VF devices for a SR-IOV PF + + Assuming the device is an SR-IOV device, this function will attempt + to change the number of VF's created by the PF. + + @param numvfs: integer to set the current number of VF's to + @returns boolean indicating whether any changes where made + """ + if self.sriov and numvfs != self.sriov_numvfs: + # NOTE(fnordahl): run-time change of numvfs is disallowed + # without resetting to 0 first. + self._set_sriov_numvfs(0) + self._set_sriov_numvfs(numvfs) + return True + return False + + +class PCINetDevices(object): + def __init__(self): + self.pci_devices = [ + PCINetDevice(dev) for dev in get_pci_ethernet_addresses() + ] + + def update_devices(self): + for pcidev in self.pci_devices: + pcidev.update_attributes() + + def get_macs(self) -> list: + macs = [] + for pcidev in self.pci_devices: + if pcidev.mac_address: + macs.append(pcidev.mac_address) + return macs + + def get_device_from_mac(self, mac: str) -> PCINetDevice: + for pcidev in self.pci_devices: + if pcidev.mac_address == mac: + return pcidev + return None + + def get_device_from_pci_address(self, pci_addr: str) -> PCINetDevice: + for pcidev in self.pci_devices: + if pcidev.pci_address == pci_addr: + return pcidev + return None + + def get_device_from_interface_name( + self, interface_name: str + ) -> PCINetDevice: + for pcidev in self.pci_devices: + if pcidev.interface_name == interface_name: + return pcidev + return None diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/network/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/network/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/network/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/network/ip.py b/ceph-radosgw/hooks/charmhelpers/contrib/network/ip.py new file mode 100644 index 00000000..f3b4864f --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/network/ip.py @@ -0,0 +1,628 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import re +import subprocess +import socket +import ssl + +from functools import partial + +from charmhelpers.fetch import apt_install, apt_update +from charmhelpers.core.hookenv import ( + config, + log, + network_get_primary_address, + unit_get, + WARNING, + NoNetworkBinding, +) + +from charmhelpers.core.host import ( + lsb_release, + CompareHostReleases, +) + +try: + import netifaces +except ImportError: + apt_update(fatal=True) + apt_install('python3-netifaces', fatal=True) + import netifaces + +try: + import netaddr +except ImportError: + apt_update(fatal=True) + apt_install('python3-netaddr', fatal=True) + import netaddr + + +def _validate_cidr(network): + try: + netaddr.IPNetwork(network) + except (netaddr.core.AddrFormatError, ValueError): + raise ValueError("Network (%s) is not in CIDR presentation format" % + network) + + +def no_ip_found_error_out(network): + errmsg = ("No IP address found in network(s): %s" % network) + raise ValueError(errmsg) + + +def _get_ipv6_network_from_address(address): + """Get an netaddr.IPNetwork for the given IPv6 address + :param address: a dict as returned by netifaces.ifaddresses + :returns netaddr.IPNetwork: None if the address is a link local or loopback + address + """ + if address['addr'].startswith('fe80') or address['addr'] == "::1": + return None + + prefix = address['netmask'].split("/") + if len(prefix) > 1: + netmask = prefix[1] + else: + netmask = address['netmask'] + return netaddr.IPNetwork("%s/%s" % (address['addr'], + netmask)) + + +def get_address_in_network(network, fallback=None, fatal=False): + """Get an IPv4 or IPv6 address within the network from the host. + + :param network (str): CIDR presentation format. For example, + '192.168.1.0/24'. Supports multiple networks as a space-delimited list. + :param fallback (str): If no address is found, return fallback. + :param fatal (boolean): If no address is found, fallback is not + set and fatal is True then exit(1). + """ + if network is None: + if fallback is not None: + return fallback + + if fatal: + no_ip_found_error_out(network) + else: + return None + + networks = network.split() or [network] + for network in networks: + _validate_cidr(network) + network = netaddr.IPNetwork(network) + for iface in netifaces.interfaces(): + try: + addresses = netifaces.ifaddresses(iface) + except ValueError: + # If an instance was deleted between + # netifaces.interfaces() run and now, its interfaces are gone + continue + if network.version == 4 and netifaces.AF_INET in addresses: + for addr in addresses[netifaces.AF_INET]: + cidr = netaddr.IPNetwork("%s/%s" % (addr['addr'], + addr['netmask'])) + if cidr in network: + return str(cidr.ip) + + if network.version == 6 and netifaces.AF_INET6 in addresses: + for addr in addresses[netifaces.AF_INET6]: + cidr = _get_ipv6_network_from_address(addr) + if cidr and cidr in network: + return str(cidr.ip) + + if fallback is not None: + return fallback + + if fatal: + no_ip_found_error_out(network) + + return None + + +def is_ipv6(address): + """Determine whether provided address is IPv6 or not.""" + try: + address = netaddr.IPAddress(address) + except netaddr.AddrFormatError: + # probably a hostname - so not an address at all! + return False + + return address.version == 6 + + +def is_address_in_network(network, address): + """ + Determine whether the provided address is within a network range. + + :param network (str): CIDR presentation format. For example, + '192.168.1.0/24'. + :param address: An individual IPv4 or IPv6 address without a net + mask or subnet prefix. For example, '192.168.1.1'. + :returns boolean: Flag indicating whether address is in network. + """ + try: + network = netaddr.IPNetwork(network) + except (netaddr.core.AddrFormatError, ValueError): + raise ValueError("Network (%s) is not in CIDR presentation format" % + network) + + try: + address = netaddr.IPAddress(address) + except (netaddr.core.AddrFormatError, ValueError): + raise ValueError("Address (%s) is not in correct presentation format" % + address) + + if address in network: + return True + else: + return False + + +def _get_for_address(address, key): + """Retrieve an attribute of or the physical interface that + the IP address provided could be bound to. + + :param address (str): An individual IPv4 or IPv6 address without a net + mask or subnet prefix. For example, '192.168.1.1'. + :param key: 'iface' for the physical interface name or an attribute + of the configured interface, for example 'netmask'. + :returns str: Requested attribute or None if address is not bindable. + """ + address = netaddr.IPAddress(address) + for iface in netifaces.interfaces(): + addresses = netifaces.ifaddresses(iface) + if address.version == 4 and netifaces.AF_INET in addresses: + addr = addresses[netifaces.AF_INET][0]['addr'] + netmask = addresses[netifaces.AF_INET][0]['netmask'] + network = netaddr.IPNetwork("%s/%s" % (addr, netmask)) + cidr = network.cidr + if address in cidr: + if key == 'iface': + return iface + else: + return addresses[netifaces.AF_INET][0][key] + + if address.version == 6 and netifaces.AF_INET6 in addresses: + for addr in addresses[netifaces.AF_INET6]: + network = _get_ipv6_network_from_address(addr) + if not network: + continue + + cidr = network.cidr + if address in cidr: + if key == 'iface': + return iface + elif key == 'netmask' and cidr: + return str(cidr).split('/')[1] + else: + return addr[key] + return None + + +get_iface_for_address = partial(_get_for_address, key='iface') + + +get_netmask_for_address = partial(_get_for_address, key='netmask') + + +def resolve_network_cidr(ip_address): + ''' + Resolves the full address cidr of an ip_address based on + configured network interfaces + ''' + netmask = get_netmask_for_address(ip_address) + return str(netaddr.IPNetwork("%s/%s" % (ip_address, netmask)).cidr) + + +def format_ipv6_addr(address): + """If address is IPv6, wrap it in '[]' otherwise return None. + + This is required by most configuration files when specifying IPv6 + addresses. + """ + if is_ipv6(address): + return "[%s]" % address + + return None + + +def is_ipv6_disabled(): + try: + result = subprocess.check_output( + ['sysctl', 'net.ipv6.conf.all.disable_ipv6'], + stderr=subprocess.STDOUT, + universal_newlines=True) + except subprocess.CalledProcessError: + return True + + return "net.ipv6.conf.all.disable_ipv6 = 1" in result + + +def get_iface_addr(iface='eth0', inet_type='AF_INET', inc_aliases=False, + fatal=True, exc_list=None): + """Return the assigned IP address for a given interface, if any. + + :param iface: network interface on which address(es) are expected to + be found. + :param inet_type: inet address family + :param inc_aliases: include alias interfaces in search + :param fatal: if True, raise exception if address not found + :param exc_list: list of addresses to ignore + :return: list of ip addresses + """ + # Extract nic if passed /dev/ethX + if '/' in iface: + iface = iface.split('/')[-1] + + if not exc_list: + exc_list = [] + + try: + inet_num = getattr(netifaces, inet_type) + except AttributeError: + raise Exception("Unknown inet type '%s'" % str(inet_type)) + + interfaces = netifaces.interfaces() + if inc_aliases: + ifaces = [] + for _iface in interfaces: + if iface == _iface or _iface.split(':')[0] == iface: + ifaces.append(_iface) + + if fatal and not ifaces: + raise Exception("Invalid interface '%s'" % iface) + + ifaces.sort() + else: + if iface not in interfaces: + if fatal: + raise Exception("Interface '%s' not found " % (iface)) + else: + return [] + + else: + ifaces = [iface] + + addresses = [] + for netiface in ifaces: + net_info = netifaces.ifaddresses(netiface) + if inet_num in net_info: + for entry in net_info[inet_num]: + if 'addr' in entry and entry['addr'] not in exc_list: + addresses.append(entry['addr']) + + if fatal and not addresses: + raise Exception("Interface '%s' doesn't have any %s addresses." % + (iface, inet_type)) + + return sorted(addresses) + + +get_ipv4_addr = partial(get_iface_addr, inet_type='AF_INET') + + +def get_iface_from_addr(addr): + """Work out on which interface the provided address is configured.""" + for iface in netifaces.interfaces(): + addresses = netifaces.ifaddresses(iface) + for inet_type in addresses: + for _addr in addresses[inet_type]: + _addr = _addr['addr'] + # link local + ll_key = re.compile("(.+)%.*") + raw = re.match(ll_key, _addr) + if raw: + _addr = raw.group(1) + + if _addr == addr: + log("Address '%s' is configured on iface '%s'" % + (addr, iface)) + return iface + + msg = "Unable to infer net iface on which '%s' is configured" % (addr) + raise Exception(msg) + + +def sniff_iface(f): + """Ensure decorated function is called with a value for iface. + + If no iface provided, inject net iface inferred from unit private address. + """ + def iface_sniffer(*args, **kwargs): + if not kwargs.get('iface', None): + kwargs['iface'] = get_iface_from_addr(unit_get('private-address')) + + return f(*args, **kwargs) + + return iface_sniffer + + +@sniff_iface +def get_ipv6_addr(iface=None, inc_aliases=False, fatal=True, exc_list=None, + dynamic_only=True): + """Get assigned IPv6 address for a given interface. + + Returns list of addresses found. If no address found, returns empty list. + + If iface is None, we infer the current primary interface by doing a reverse + lookup on the unit private-address. + + We currently only support scope global IPv6 addresses i.e. non-temporary + addresses. If no global IPv6 address is found, return the first one found + in the ipv6 address list. + + :param iface: network interface on which ipv6 address(es) are expected to + be found. + :param inc_aliases: include alias interfaces in search + :param fatal: if True, raise exception if address not found + :param exc_list: list of addresses to ignore + :param dynamic_only: only recognise dynamic addresses + :return: list of ipv6 addresses + """ + addresses = get_iface_addr(iface=iface, inet_type='AF_INET6', + inc_aliases=inc_aliases, fatal=fatal, + exc_list=exc_list) + + if addresses: + global_addrs = [] + for addr in addresses: + key_scope_link_local = re.compile("^fe80::..(.+)%(.+)") + m = re.match(key_scope_link_local, addr) + if m: + eui_64_mac = m.group(1) + iface = m.group(2) + else: + global_addrs.append(addr) + + if global_addrs: + # Make sure any found global addresses are not temporary + cmd = ['ip', 'addr', 'show', iface] + out = subprocess.check_output( + cmd).decode('UTF-8', errors='replace') + if dynamic_only: + key = re.compile("inet6 (.+)/[0-9]+ scope global.* dynamic.*") + else: + key = re.compile("inet6 (.+)/[0-9]+ scope global.*") + + addrs = [] + for line in out.split('\n'): + line = line.strip() + m = re.match(key, line) + if m and 'temporary' not in line: + # Return the first valid address we find + for addr in global_addrs: + if m.group(1) == addr: + if not dynamic_only or \ + m.group(1).endswith(eui_64_mac): + addrs.append(addr) + + if addrs: + return addrs + + if fatal: + raise Exception("Interface '%s' does not have a scope global " + "non-temporary ipv6 address." % iface) + + return [] + + +def get_bridges(vnic_dir='/sys/devices/virtual/net'): + """Return a list of bridges on the system.""" + b_regex = "%s/*/bridge" % vnic_dir + return [x.replace(vnic_dir, '').split('/')[1] for x in glob.glob(b_regex)] + + +def get_bridge_nics(bridge, vnic_dir='/sys/devices/virtual/net'): + """Return a list of nics comprising a given bridge on the system.""" + brif_regex = "%s/%s/brif/*" % (vnic_dir, bridge) + return [x.split('/')[-1] for x in glob.glob(brif_regex)] + + +def is_bridge_member(nic): + """Check if a given nic is a member of a bridge.""" + for bridge in get_bridges(): + if nic in get_bridge_nics(bridge): + return True + + return False + + +def is_ip(address): + """ + Returns True if address is a valid IP address. + """ + try: + # Test to see if already an IPv4/IPv6 address + address = netaddr.IPAddress(address) + return True + except (netaddr.AddrFormatError, ValueError): + return False + + +def ns_query(address): + try: + import dns.resolver + except ImportError: + apt_install('python3-dnspython', fatal=True) + import dns.resolver + + if isinstance(address, dns.name.Name): + rtype = 'PTR' + elif isinstance(address, str): + rtype = 'A' + else: + return None + + try: + answers = dns.resolver.query(address, rtype) + except (dns.resolver.NXDOMAIN, dns.resolver.NoNameservers): + return None + + if answers: + return str(answers[0]) + return None + + +def get_host_ip(hostname, fallback=None): + """ + Resolves the IP for a given hostname, or returns + the input if it is already an IP. + """ + if is_ip(hostname): + return hostname + + ip_addr = ns_query(hostname) + if not ip_addr: + try: + ip_addr = socket.gethostbyname(hostname) + except Exception: + log("Failed to resolve hostname '%s'" % (hostname), + level=WARNING) + return fallback + return ip_addr + + +def get_hostname(address, fqdn=True): + """ + Resolves hostname for given IP, or returns the input + if it is already a hostname. + """ + if is_ip(address): + try: + import dns.reversename + except ImportError: + apt_install("python3-dnspython", fatal=True) + import dns.reversename + + rev = dns.reversename.from_address(address) + result = ns_query(rev) + + if not result: + try: + result = socket.gethostbyaddr(address)[0] + except Exception: + return None + else: + result = address + + if fqdn: + # strip trailing . + if result.endswith('.'): + return result[:-1] + else: + return result + else: + return result.split('.')[0] + + +class SSLPortCheckInfo(object): + + def __init__(self, key, cert, ca_cert, check_hostname=False): + self.key = key + self.cert = cert + self.ca_cert = ca_cert + # NOTE: by default we do not check hostname since the port check is + # typically performed using 0.0.0.0 which will not match the + # certificate. Hence the default for this is False. + self.check_hostname = check_hostname + + @property + def ssl_context(self): + context = ssl.create_default_context() + context.check_hostname = self.check_hostname + context.load_cert_chain(self.cert, self.key) + context.load_verify_locations(self.ca_cert) + return context + + +def port_has_listener(address, port, sslinfo=None): + """ + Returns True if the address:port is open and being listened to, + else False. By default uses netcat to check ports but if sslinfo is + provided will use an SSL connection instead. + + @param address: an IP address or hostname + @param port: integer port + @param sslinfo: optional SSLPortCheckInfo object. + If provided, the check is performed using an ssl + connection. + + Note calls 'zc' via a subprocess shell + """ + if not sslinfo: + cmd = ['nc', '-z', address, str(port)] + result = subprocess.call(cmd) + return not (bool(result)) + + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) as sock: + ssock = sslinfo.ssl_context.wrap_socket(sock, + server_hostname=address) + ssock.connect((address, port)) + # this bit is crucial to ensure tls close_notify is sent + ssock.unwrap() + + return True + except ConnectionRefusedError: + return False + + +def assert_charm_supports_ipv6(): + """Check whether we are able to support charms ipv6.""" + release = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(release) < "trusty": + raise Exception("IPv6 is not supported in the charms for Ubuntu " + "versions less than Trusty 14.04") + + +def get_relation_ip(interface, cidr_network=None): + """Return this unit's IP for the given interface. + + Allow for an arbitrary interface to use with network-get to select an IP. + Handle all address selection options including passed cidr network and + IPv6. + + Usage: get_relation_ip('amqp', cidr_network='10.0.0.0/8') + + @param interface: string name of the relation. + @param cidr_network: string CIDR Network to select an address from. + @raises Exception if prefer-ipv6 is configured but IPv6 unsupported. + @returns IPv6 or IPv4 address + """ + # Select the interface address first + # For possible use as a fallback below with get_address_in_network + try: + # Get the interface specific IP + address = network_get_primary_address(interface) + except NotImplementedError: + # If network-get is not available + address = get_host_ip(unit_get('private-address')) + except NoNetworkBinding: + log("No network binding for {}".format(interface), WARNING) + address = get_host_ip(unit_get('private-address')) + + if config('prefer-ipv6'): + # Currently IPv6 has priority, eventually we want IPv6 to just be + # another network space. + assert_charm_supports_ipv6() + return get_ipv6_addr()[0] + elif cidr_network: + # If a specific CIDR network is passed get the address from that + # network. + return get_address_in_network(cidr_network, address) + + # Return the interface address + return address diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/alternatives.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/alternatives.py new file mode 100644 index 00000000..547de09c --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/alternatives.py @@ -0,0 +1,44 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +''' Helper for managing alternatives for file conflict resolution ''' + +import subprocess +import shutil +import os + + +def install_alternative(name, target, source, priority=50): + ''' Install alternative configuration ''' + if (os.path.exists(target) and not os.path.islink(target)): + # Move existing file/directory away before installing + shutil.move(target, '{}.bak'.format(target)) + cmd = [ + 'update-alternatives', '--force', '--install', + target, name, source, str(priority) + ] + subprocess.check_call(cmd) + + +def remove_alternative(name, source): + """Remove an installed alternative configuration file + + :param name: string name of the alternative to remove + :param source: string full path to alternative to remove + """ + cmd = [ + 'update-alternatives', '--remove', + name, source + ] + subprocess.check_call(cmd) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/audits/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/audits/__init__.py new file mode 100644 index 00000000..7f7e5f79 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/audits/__init__.py @@ -0,0 +1,212 @@ +# Copyright 2019 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""OpenStack Security Audit code""" + +import collections +from enum import Enum +import traceback + +from charmhelpers.core.host import cmp_pkgrevno +import charmhelpers.contrib.openstack.utils as openstack_utils +import charmhelpers.core.hookenv as hookenv + + +class AuditType(Enum): + OpenStackSecurityGuide = 1 + + +_audits = {} + +Audit = collections.namedtuple('Audit', 'func filters') + + +def audit(*args): + """Decorator to register an audit. + + These are used to generate audits that can be run on a + deployed system that matches the given configuration + + :param args: List of functions to filter tests against + :type args: List[Callable[Dict]] + """ + def wrapper(f): + test_name = f.__name__ + if _audits.get(test_name): + raise RuntimeError( + "Test name '{}' used more than once" + .format(test_name)) + non_callables = [fn for fn in args if not callable(fn)] + if non_callables: + raise RuntimeError( + "Configuration includes non-callable filters: {}" + .format(non_callables)) + _audits[test_name] = Audit(func=f, filters=args) + return f + return wrapper + + +def is_audit_type(*args): + """This audit is included in the specified kinds of audits. + + :param *args: List of AuditTypes to include this audit in + :type args: List[AuditType] + :rtype: Callable[Dict] + """ + def _is_audit_type(audit_options): + if audit_options.get('audit_type') in args: + return True + else: + return False + return _is_audit_type + + +def since_package(pkg, pkg_version): + """This audit should be run after the specified package version (incl). + + :param pkg: Package name to compare + :type pkg: str + :param release: The package version + :type release: str + :rtype: Callable[Dict] + """ + def _since_package(audit_options=None): + return cmp_pkgrevno(pkg, pkg_version) >= 0 + + return _since_package + + +def before_package(pkg, pkg_version): + """This audit should be run before the specified package version (excl). + + :param pkg: Package name to compare + :type pkg: str + :param release: The package version + :type release: str + :rtype: Callable[Dict] + """ + def _before_package(audit_options=None): + return not since_package(pkg, pkg_version)() + + return _before_package + + +def since_openstack_release(pkg, release): + """This audit should run after the specified OpenStack version (incl). + + :param pkg: Package name to compare + :type pkg: str + :param release: The OpenStack release codename + :type release: str + :rtype: Callable[Dict] + """ + def _since_openstack_release(audit_options=None): + _release = openstack_utils.get_os_codename_package(pkg) + return openstack_utils.CompareOpenStackReleases(_release) >= release + + return _since_openstack_release + + +def before_openstack_release(pkg, release): + """This audit should run before the specified OpenStack version (excl). + + :param pkg: Package name to compare + :type pkg: str + :param release: The OpenStack release codename + :type release: str + :rtype: Callable[Dict] + """ + def _before_openstack_release(audit_options=None): + return not since_openstack_release(pkg, release)() + + return _before_openstack_release + + +def it_has_config(config_key): + """This audit should be run based on specified config keys. + + :param config_key: Config key to look for + :type config_key: str + :rtype: Callable[Dict] + """ + def _it_has_config(audit_options): + return audit_options.get(config_key) is not None + + return _it_has_config + + +def run(audit_options): + """Run the configured audits with the specified audit_options. + + :param audit_options: Configuration for the audit + :type audit_options: Config + + :rtype: Dict[str, str] + """ + errors = {} + results = {} + for name, audit in sorted(_audits.items()): + result_name = name.replace('_', '-') + if result_name in audit_options.get('excludes', []): + print( + "Skipping {} because it is" + "excluded in audit config" + .format(result_name)) + continue + if all(p(audit_options) for p in audit.filters): + try: + audit.func(audit_options) + print("{}: PASS".format(name)) + results[result_name] = { + 'success': True, + } + except AssertionError as e: + print("{}: FAIL ({})".format(name, e)) + results[result_name] = { + 'success': False, + 'message': e, + } + except Exception as e: + print("{}: ERROR ({})".format(name, e)) + errors[name] = e + results[result_name] = { + 'success': False, + 'message': e, + } + for name, error in errors.items(): + print("=" * 20) + print("Error in {}: ".format(name)) + traceback.print_tb(error.__traceback__) + print() + return results + + +def action_parse_results(result): + """Parse the result of `run` in the context of an action. + + :param result: The result of running the security-checklist + action on a unit + :type result: Dict[str, Dict[str, str]] + :rtype: int + """ + passed = True + for test, result in result.items(): + if result['success']: + hookenv.action_set({test: 'PASS'}) + else: + hookenv.action_set({test: 'FAIL - {}'.format(result['message'])}) + passed = False + if not passed: + hookenv.action_fail("One or more tests failed") + return 0 if passed else 1 diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/audits/openstack_security_guide.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/audits/openstack_security_guide.py new file mode 100644 index 00000000..79740ed0 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/audits/openstack_security_guide.py @@ -0,0 +1,270 @@ +# Copyright 2019 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import configparser +import glob +import os.path +import subprocess + +from charmhelpers.contrib.openstack.audits import ( + audit, + AuditType, + # filters + is_audit_type, + it_has_config, +) + +from charmhelpers.core.hookenv import ( + cached, +) + +""" +The Security Guide suggests a specific list of files inside the +config directory for the service having 640 specifically, but +by ensuring the containing directory is 750, only the owner can +write, and only the group can read files within the directory. + +By restricting access to the containing directory, we can more +effectively ensure that there is no accidental leakage if a new +file is added to the service without being added to the security +guide, and to this check. +""" +FILE_ASSERTIONS = { + 'barbican': { + '/etc/barbican': {'group': 'barbican', 'mode': '750'}, + }, + 'ceph-mon': { + '/var/lib/charm/ceph-mon/ceph.conf': + {'owner': 'root', 'group': 'root', 'mode': '644'}, + '/etc/ceph/ceph.client.admin.keyring': + {'owner': 'ceph', 'group': 'ceph'}, + '/etc/ceph/rbdmap': {'mode': '644'}, + '/var/lib/ceph': {'owner': 'ceph', 'group': 'ceph', 'mode': '750'}, + '/var/lib/ceph/bootstrap-*/ceph.keyring': + {'owner': 'ceph', 'group': 'ceph', 'mode': '600'} + }, + 'ceph-osd': { + '/var/lib/charm/ceph-osd/ceph.conf': + {'owner': 'ceph', 'group': 'ceph', 'mode': '644'}, + '/var/lib/ceph': {'owner': 'ceph', 'group': 'ceph', 'mode': '750'}, + '/var/lib/ceph/*': {'owner': 'ceph', 'group': 'ceph', 'mode': '755'}, + '/var/lib/ceph/bootstrap-*/ceph.keyring': + {'owner': 'ceph', 'group': 'ceph', 'mode': '600'}, + '/var/lib/ceph/radosgw': + {'owner': 'ceph', 'group': 'ceph', 'mode': '755'}, + }, + 'cinder': { + '/etc/cinder': {'group': 'cinder', 'mode': '750'}, + }, + 'glance': { + '/etc/glance': {'group': 'glance', 'mode': '750'}, + }, + 'keystone': { + '/etc/keystone': + {'owner': 'keystone', 'group': 'keystone', 'mode': '750'}, + }, + 'manilla': { + '/etc/manila': {'group': 'manilla', 'mode': '750'}, + }, + 'neutron-gateway': { + '/etc/neutron': {'group': 'neutron', 'mode': '750'}, + }, + 'neutron-api': { + '/etc/neutron/': {'group': 'neutron', 'mode': '750'}, + }, + 'nova-cloud-controller': { + '/etc/nova': {'group': 'nova', 'mode': '750'}, + }, + 'nova-compute': { + '/etc/nova/': {'group': 'nova', 'mode': '750'}, + }, + 'openstack-dashboard': { + # From security guide + '/etc/openstack-dashboard/local_settings.py': + {'group': 'horizon', 'mode': '640'}, + }, +} + +Ownership = collections.namedtuple('Ownership', 'owner group mode') + + +@cached +def _stat(file): + """ + Get the Ownership information from a file. + + :param file: The path to a file to stat + :type file: str + :returns: owner, group, and mode of the specified file + :rtype: Ownership + :raises subprocess.CalledProcessError: If the underlying stat fails + """ + out = subprocess.check_output( + ['stat', '-c', '%U %G %a', file]).decode('utf-8') + return Ownership(*out.strip().split(' ')) + + +@cached +def _config_ini(path): + """ + Parse an ini file + + :param path: The path to a file to parse + :type file: str + :returns: Configuration contained in path + :rtype: Dict + """ + # When strict is enabled, duplicate options are not allowed in the + # parsed INI; however, Oslo allows duplicate values. This change + # causes us to ignore the duplicate values which is acceptable as + # long as we don't validate any multi-value options + conf = configparser.ConfigParser(strict=False) + conf.read(path) + return dict(conf) + + +def _validate_file_ownership(owner, group, file_name, optional=False): + """ + Validate that a specified file is owned by `owner:group`. + + :param owner: Name of the owner + :type owner: str + :param group: Name of the group + :type group: str + :param file_name: Path to the file to verify + :type file_name: str + :param optional: Is this file optional, + ie: Should this test fail when it's missing + :type optional: bool + """ + try: + ownership = _stat(file_name) + except subprocess.CalledProcessError as e: + print("Error reading file: {}".format(e)) + if not optional: + assert False, "Specified file does not exist: {}".format(file_name) + assert owner == ownership.owner, \ + "{} has an incorrect owner: {} should be {}".format( + file_name, ownership.owner, owner) + assert group == ownership.group, \ + "{} has an incorrect group: {} should be {}".format( + file_name, ownership.group, group) + print("Validate ownership of {}: PASS".format(file_name)) + + +def _validate_file_mode(mode, file_name, optional=False): + """ + Validate that a specified file has the specified permissions. + + :param mode: file mode that is desires + :type owner: str + :param file_name: Path to the file to verify + :type file_name: str + :param optional: Is this file optional, + ie: Should this test fail when it's missing + :type optional: bool + """ + try: + ownership = _stat(file_name) + except subprocess.CalledProcessError as e: + print("Error reading file: {}".format(e)) + if not optional: + assert False, "Specified file does not exist: {}".format(file_name) + assert mode == ownership.mode, \ + "{} has an incorrect mode: {} should be {}".format( + file_name, ownership.mode, mode) + print("Validate mode of {}: PASS".format(file_name)) + + +@cached +def _config_section(config, section): + """Read the configuration file and return a section.""" + path = os.path.join(config.get('config_path'), config.get('config_file')) + conf = _config_ini(path) + return conf.get(section) + + +@audit(is_audit_type(AuditType.OpenStackSecurityGuide), + it_has_config('files')) +def validate_file_ownership(config): + """Verify that configuration files are owned by the correct user/group.""" + files = config.get('files', {}) + for file_name, options in files.items(): + for key in options.keys(): + if key not in ["owner", "group", "mode"]: + raise RuntimeError( + "Invalid ownership configuration: {}".format(key)) + owner = options.get('owner', config.get('owner', 'root')) + group = options.get('group', config.get('group', 'root')) + optional = options.get('optional', config.get('optional', False)) + if '*' in file_name: + for file in glob.glob(file_name): + if file not in files.keys(): + if os.path.isfile(file): + _validate_file_ownership(owner, group, file, optional) + else: + if os.path.isfile(file_name): + _validate_file_ownership(owner, group, file_name, optional) + + +@audit(is_audit_type(AuditType.OpenStackSecurityGuide), + it_has_config('files')) +def validate_file_permissions(config): + """Verify that permissions on configuration files are secure enough.""" + files = config.get('files', {}) + for file_name, options in files.items(): + for key in options.keys(): + if key not in ["owner", "group", "mode"]: + raise RuntimeError( + "Invalid ownership configuration: {}".format(key)) + mode = options.get('mode', config.get('permissions', '600')) + optional = options.get('optional', config.get('optional', False)) + if '*' in file_name: + for file in glob.glob(file_name): + if file not in files.keys(): + if os.path.isfile(file): + _validate_file_mode(mode, file, optional) + else: + if os.path.isfile(file_name): + _validate_file_mode(mode, file_name, optional) + + +@audit(is_audit_type(AuditType.OpenStackSecurityGuide)) +def validate_uses_keystone(audit_options): + """Validate that the service uses Keystone for authentication.""" + section = _config_section(audit_options, 'api') or _config_section(audit_options, 'DEFAULT') + assert section is not None, "Missing section 'api / DEFAULT'" + assert section.get('auth_strategy') == "keystone", \ + "Application is not using Keystone" + + +@audit(is_audit_type(AuditType.OpenStackSecurityGuide)) +def validate_uses_tls_for_keystone(audit_options): + """Verify that TLS is used to communicate with Keystone.""" + section = _config_section(audit_options, 'keystone_authtoken') + assert section is not None, "Missing section 'keystone_authtoken'" + assert not section.get('insecure') and \ + "https://" in section.get("auth_uri"), \ + "TLS is not used for Keystone" + + +@audit(is_audit_type(AuditType.OpenStackSecurityGuide)) +def validate_uses_tls_for_glance(audit_options): + """Verify that TLS is used to communicate with Glance.""" + section = _config_section(audit_options, 'glance') + assert section is not None, "Missing section 'glance'" + assert not section.get('insecure') and \ + "https://" in section.get("api_servers"), \ + "TLS is not used for Glance" diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/cert_utils.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/cert_utils.py new file mode 100644 index 00000000..6620f59f --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/cert_utils.py @@ -0,0 +1,463 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Common python helper functions used for OpenStack charm certificates. + +import os +import json +from base64 import b64decode + +from charmhelpers.contrib.network.ip import ( + get_hostname, + resolve_network_cidr, +) +from charmhelpers.core.hookenv import ( + local_unit, + network_get_primary_address, + config, + related_units, + relation_get, + relation_ids, + remote_service_name, + NoNetworkBinding, + log, + WARNING, + INFO, +) +from charmhelpers.contrib.openstack.ip import ( + resolve_address, + get_vip_in_network, + ADDRESS_MAP, + get_default_api_bindings, + local_address, +) +from charmhelpers.contrib.network.ip import ( + get_relation_ip, +) + +from charmhelpers.core.host import ( + ca_cert_absolute_path, + install_ca_cert, + mkdir, + write_file, +) + +from charmhelpers.contrib.hahelpers.apache import ( + CONFIG_CA_CERT_FILE, +) + + +class CertRequest(object): + + """Create a request for certificates to be generated + """ + + def __init__(self, json_encode=True): + self.entries = [] + self.hostname_entry = None + self.json_encode = json_encode + + def add_entry(self, net_type, cn, addresses): + """Add a request to the batch + + :param net_type: str network space name request is for + :param cn: str Canonical Name for certificate + :param addresses: [] List of addresses to be used as SANs + """ + self.entries.append({ + 'cn': cn, + 'addresses': addresses}) + + def add_hostname_cn(self): + """Add a request for the hostname of the machine""" + ip = local_address(unit_get_fallback='private-address') + addresses = [ip] + # If a vip is being used without os-hostname config or + # network spaces then we need to ensure the local units + # cert has the appropriate vip in the SAN list + vip = get_vip_in_network(resolve_network_cidr(ip)) + if vip: + addresses.append(vip) + self.hostname_entry = { + 'cn': get_hostname(ip), + 'addresses': addresses} + + def add_hostname_cn_ip(self, addresses): + """Add an address to the SAN list for the hostname request + + :param addr: [] List of address to be added + """ + for addr in addresses: + if addr not in self.hostname_entry['addresses']: + self.hostname_entry['addresses'].append(addr) + + def get_request(self): + """Generate request from the batched up entries + + """ + if self.hostname_entry: + self.entries.append(self.hostname_entry) + request = {} + for entry in self.entries: + sans = sorted(list(set(entry['addresses']))) + request[entry['cn']] = {'sans': sans} + if self.json_encode: + req = {'cert_requests': json.dumps(request, sort_keys=True)} + else: + req = {'cert_requests': request} + req['unit_name'] = local_unit().replace('/', '_') + return req + + +def get_certificate_request(json_encode=True, bindings=None): + """Generate a certificate requests based on the network configuration + + :param json_encode: Encode request in JSON or not. Used for setting + directly on a relation. + :type json_encode: boolean + :param bindings: List of bindings to check in addition to default api + bindings. + :type bindings: list of strings + :returns: CertRequest request as dictionary or JSON string. + :rtype: Union[dict, json] + """ + if bindings: + # Add default API bindings to bindings list + bindings = list(bindings + get_default_api_bindings()) + else: + # Use default API bindings + bindings = get_default_api_bindings() + req = CertRequest(json_encode=json_encode) + req.add_hostname_cn() + # Add os-hostname entries + _sans = get_certificate_sans(bindings=bindings) + + # Handle specific hostnames per binding + for binding in bindings: + try: + hostname_override = config(ADDRESS_MAP[binding]['override']) + except KeyError: + hostname_override = None + try: + try: + net_addr = resolve_address(endpoint_type=binding) + except KeyError: + net_addr = None + ip = network_get_primary_address(binding) + addresses = [net_addr, ip] + vip = get_vip_in_network(resolve_network_cidr(ip)) + if vip: + addresses.append(vip) + + # Clear any Nones or duplicates + addresses = list(set([i for i in addresses if i])) + # Add hostname certificate request + if hostname_override: + req.add_entry( + binding, + hostname_override, + addresses) + # Remove hostname specific addresses from _sans + for addr in addresses: + try: + _sans.remove(addr) + except (ValueError, KeyError): + pass + + except NoNetworkBinding: + log("Skipping request for certificate for ip in {} space, no " + "local address found".format(binding), WARNING) + # Guarantee all SANs are covered + # These are network addresses with no corresponding hostname. + # Add the ips to the hostname cert to allow for this. + req.add_hostname_cn_ip(_sans) + return req.get_request() + + +def get_certificate_sans(bindings=None): + """Get all possible IP addresses for certificate SANs. + + :param bindings: List of bindings to check in addition to default api + bindings. + :type bindings: list of strings + :returns: List of binding string names + :rtype: List[str] + """ + _sans = [local_address(unit_get_fallback='private-address')] + if bindings: + # Add default API bindings to bindings list + bindings = list(bindings + get_default_api_bindings()) + else: + # Use default API bindings + bindings = get_default_api_bindings() + + for binding in bindings: + # Check for config override + try: + net_config = config(ADDRESS_MAP[binding]['config']) + except KeyError: + # There is no configuration network for this binding name + net_config = None + # Using resolve_address is likely redundant. Keeping it here in + # case there is an edge case it handles. + try: + net_addr = resolve_address(endpoint_type=binding) + except KeyError: + net_addr = None + ip = get_relation_ip(binding, cidr_network=net_config) + _sans = _sans + [net_addr, ip] + vip = get_vip_in_network(resolve_network_cidr(ip)) + if vip: + _sans.append(vip) + # Clear any Nones and duplicates + return list(set([i for i in _sans if i])) + + +def create_ip_cert_links(ssl_dir, custom_hostname_link=None, bindings=None): + """Create symlinks for SAN records + + :param ssl_dir: str Directory to create symlinks in + :param custom_hostname_link: str Additional link to be created + :param bindings: List of bindings to check in addition to default api + bindings. + :type bindings: list of strings + """ + + if bindings: + # Add default API bindings to bindings list + bindings = list(bindings + get_default_api_bindings()) + else: + # Use default API bindings + bindings = get_default_api_bindings() + + # This includes the hostname cert and any specific bindng certs: + # admin, internal, public + req = get_certificate_request(json_encode=False, bindings=bindings)["cert_requests"] + # Specific certs + for cert_req in req.keys(): + requested_cert = os.path.join( + ssl_dir, + 'cert_{}'.format(cert_req)) + requested_key = os.path.join( + ssl_dir, + 'key_{}'.format(cert_req)) + for addr in req[cert_req]['sans']: + cert = os.path.join(ssl_dir, 'cert_{}'.format(addr)) + key = os.path.join(ssl_dir, 'key_{}'.format(addr)) + if os.path.isfile(requested_cert) and not os.path.isfile(cert): + os.symlink(requested_cert, cert) + os.symlink(requested_key, key) + + # Handle custom hostnames + hostname = get_hostname(local_address(unit_get_fallback='private-address')) + hostname_cert = os.path.join( + ssl_dir, + 'cert_{}'.format(hostname)) + hostname_key = os.path.join( + ssl_dir, + 'key_{}'.format(hostname)) + if custom_hostname_link: + custom_cert = os.path.join( + ssl_dir, + 'cert_{}'.format(custom_hostname_link)) + custom_key = os.path.join( + ssl_dir, + 'key_{}'.format(custom_hostname_link)) + if os.path.isfile(hostname_cert) and not os.path.isfile(custom_cert): + os.symlink(hostname_cert, custom_cert) + os.symlink(hostname_key, custom_key) + + +def install_certs(ssl_dir, certs, chain=None, user='root', group='root'): + """Install the certs passed into the ssl dir and append the chain if + provided. + + :param ssl_dir: str Directory to create symlinks in + :param certs: {} {'cn': {'cert': 'CERT', 'key': 'KEY'}} + :param chain: str Chain to be appended to certs + :param user: (Optional) Owner of certificate files. Defaults to 'root' + :type user: str + :param group: (Optional) Group of certificate files. Defaults to 'root' + :type group: str + """ + for cn, bundle in certs.items(): + cert_filename = 'cert_{}'.format(cn) + key_filename = 'key_{}'.format(cn) + cert_data = bundle['cert'] + if chain: + # Append chain file so that clients that trust the root CA will + # trust certs signed by an intermediate in the chain + cert_data = cert_data + os.linesep + chain + write_file( + path=os.path.join(ssl_dir, cert_filename), owner=user, group=group, + content=cert_data, perms=0o640) + write_file( + path=os.path.join(ssl_dir, key_filename), owner=user, group=group, + content=bundle['key'], perms=0o640) + + +def get_cert_relation_ca_name(cert_relation_id=None): + """Determine CA certificate name as provided by relation. + + The filename on disk depends on the name chosen for the application on the + providing end of the certificates relation. + + :param cert_relation_id: (Optional) Relation id providing the certs + :type cert_relation_id: str + :returns: CA certificate filename without path nor extension + :rtype: str + """ + if cert_relation_id is None: + try: + cert_relation_id = relation_ids('certificates')[0] + except IndexError: + return '' + return '{}_juju_ca_cert'.format( + remote_service_name(relid=cert_relation_id)) + + +def _manage_ca_certs(ca, cert_relation_id): + """Manage CA certs. + + :param ca: CA Certificate from certificate relation. + :type ca: str + :param cert_relation_id: Relation id providing the certs + :type cert_relation_id: str + """ + config_ssl_ca = config('ssl_ca') + config_cert_file = ca_cert_absolute_path(CONFIG_CA_CERT_FILE) + if config_ssl_ca: + log("Installing CA certificate from charm ssl_ca config to {}".format( + config_cert_file), INFO) + install_ca_cert( + b64decode(config_ssl_ca).rstrip(), + name=CONFIG_CA_CERT_FILE) + elif os.path.exists(config_cert_file): + log("Removing CA certificate {}".format(config_cert_file), INFO) + os.remove(config_cert_file) + log("Installing CA certificate from certificate relation", INFO) + install_ca_cert( + ca.encode(), + name=get_cert_relation_ca_name(cert_relation_id)) + + +def process_certificates(service_name, relation_id, unit, + custom_hostname_link=None, user='root', group='root', + bindings=None): + """Process the certificates supplied down the relation + + :param service_name: str Name of service the certificates are for. + :param relation_id: str Relation id providing the certs + :param unit: str Unit providing the certs + :param custom_hostname_link: str Name of custom link to create + :param user: (Optional) Owner of certificate files. Defaults to 'root' + :type user: str + :param group: (Optional) Group of certificate files. Defaults to 'root' + :type group: str + :param bindings: List of bindings to check in addition to default api + bindings. + :type bindings: list of strings + :returns: True if certificates processed for local unit or False + :rtype: bool + """ + if bindings: + # Add default API bindings to bindings list + bindings = list(bindings + get_default_api_bindings()) + else: + # Use default API bindings + bindings = get_default_api_bindings() + + data = relation_get(rid=relation_id, unit=unit) + ssl_dir = os.path.join('/etc/apache2/ssl/', service_name) + mkdir(path=ssl_dir) + name = local_unit().replace('/', '_') + certs = data.get('{}.processed_requests'.format(name)) + chain = data.get('chain') + ca = data.get('ca') + if certs: + certs = json.loads(certs) + _manage_ca_certs(ca, relation_id) + install_certs(ssl_dir, certs, chain, user=user, group=group) + create_ip_cert_links( + ssl_dir, + custom_hostname_link=custom_hostname_link, + bindings=bindings) + return True + return False + + +def get_requests_for_local_unit(relation_name=None): + """Extract any certificates data targeted at this unit down relation_name. + + :param relation_name: str Name of relation to check for data. + :returns: List of bundles of certificates. + :rtype: List of dicts + """ + local_name = local_unit().replace('/', '_') + raw_certs_key = '{}.processed_requests'.format(local_name) + relation_name = relation_name or 'certificates' + bundles = [] + for rid in relation_ids(relation_name): + sent = relation_get(rid=rid, unit=local_unit()) + legacy_keys = ['certificate_name', 'common_name'] + is_legacy_request = set(sent).intersection(legacy_keys) + for unit in related_units(rid): + data = relation_get(rid=rid, unit=unit) + # Note: Bug#2028683 - data may not be available if the certificates + # relation hasn't been populated by the providing charm. If no 'ca' + # in the data then don't attempt the bundle at all. + if data.get('ca'): + if data.get(raw_certs_key): + bundles.append({ + 'ca': data['ca'], + 'chain': data.get('chain'), + 'certs': json.loads(data[raw_certs_key]) + }) + elif is_legacy_request: + bundles.append({ + 'ca': data['ca'], + 'chain': data.get('chain'), + 'certs': { + sent['common_name']: { + 'cert': data.get(local_name + '.server.cert'), + 'key': data.get(local_name + '.server.key') + } + } + }) + + return bundles + + +def get_bundle_for_cn(cn, relation_name=None): + """Extract certificates for the given cn. + + :param cn: str Canonical Name on certificate. + :param relation_name: str Relation to check for certificates down. + :returns: Dictionary of certificate data, + :rtype: dict. + """ + entries = get_requests_for_local_unit(relation_name) + cert_bundle = {} + for entry in entries: + for _cn, bundle in entry['certs'].items(): + if _cn == cn: + cert_bundle = { + 'cert': bundle['cert'], + 'key': bundle['key'], + 'chain': entry['chain'], + 'ca': entry['ca']} + break + if cert_bundle: + break + return cert_bundle diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/context.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/context.py new file mode 100644 index 00000000..cd70b55c --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/context.py @@ -0,0 +1,3467 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import copy +import enum +import glob +import hashlib +import json +import math +import os +import re +import socket +import time + +from base64 import b64decode +from distutils.version import LooseVersion +from subprocess import ( + check_call, + check_output, + CalledProcessError) + +import charmhelpers.contrib.storage.linux.ceph as ch_ceph + +from charmhelpers.contrib.openstack.audits.openstack_security_guide import ( + _config_ini as config_ini +) + +from charmhelpers.fetch import ( + apt_install, + filter_installed_packages, + get_installed_version, +) +from charmhelpers.core.hookenv import ( + NoNetworkBinding, + config, + is_relation_made, + local_unit, + log, + relation_get, + relation_ids, + related_units, + relation_set, + unit_private_ip, + charm_name, + DEBUG, + INFO, + ERROR, + status_set, + network_get_primary_address, + WARNING, + service_name, + remote_service_name, +) + +from charmhelpers.core.sysctl import create as sysctl_create +from charmhelpers.core.strutils import bool_from_string +from charmhelpers.contrib.openstack.exceptions import OSContextError + +from charmhelpers.core.host import ( + get_bond_master, + is_phy_iface, + list_nics, + get_nic_hwaddr, + mkdir, + write_file, + pwgen, + lsb_release, + CompareHostReleases, +) +from charmhelpers.contrib.hahelpers.cluster import ( + determine_apache_port, + determine_api_port, + https, + is_clustered, +) +from charmhelpers.contrib.hahelpers.apache import ( + get_cert, + get_ca_cert, + install_ca_cert, +) +from charmhelpers.contrib.openstack.neutron import ( + neutron_plugin_attribute, + parse_data_port_mappings, +) +from charmhelpers.contrib.openstack.ip import ( + resolve_address, + INTERNAL, + ADMIN, + PUBLIC, + ADDRESS_MAP, + local_address, +) +from charmhelpers.contrib.network.ip import ( + get_address_in_network, + get_ipv4_addr, + get_ipv6_addr, + get_netmask_for_address, + format_ipv6_addr, + is_bridge_member, + is_ipv6_disabled, + get_relation_ip, +) +from charmhelpers.contrib.openstack.utils import ( + config_flags_parser, + get_os_codename_install_source, + enable_memcache, + CompareOpenStackReleases, + os_release, +) +from charmhelpers.core.unitdata import kv + +from charmhelpers.contrib.hardware import pci + +try: + import psutil +except ImportError: + apt_install('python3-psutil', fatal=True) + import psutil + +CA_CERT_PATH = '/usr/local/share/ca-certificates/keystone_juju_ca_cert.crt' +ADDRESS_TYPES = ['admin', 'internal', 'public'] +HAPROXY_RUN_DIR = '/var/run/haproxy/' +DEFAULT_OSLO_MESSAGING_DRIVER = "messagingv2" +DEFAULT_HAPROXY_EXPORTER_STATS_PORT = 8404 + + +def ensure_packages(packages): + """Install but do not upgrade required plugin packages.""" + required = filter_installed_packages(packages) + if required: + apt_install(required, fatal=True) + + +def context_complete(ctxt): + _missing = [k for k, v in ctxt.items() if v is None or v == ''] + + if _missing: + log('Missing required data: %s' % ' '.join(_missing), level=INFO) + return False + + return True + + +class OSContextGenerator(object): + """Base class for all context generators.""" + interfaces = [] + related = False + complete = False + missing_data = [] + + def __call__(self): + raise NotImplementedError + + def context_complete(self, ctxt): + """Check for missing data for the required context data. + Set self.missing_data if it exists and return False. + Set self.complete if no missing data and return True. + """ + # Fresh start + self.complete = False + self.missing_data = [] + for k, v in ctxt.items(): + if v is None or v == '': + if k not in self.missing_data: + self.missing_data.append(k) + + if self.missing_data: + self.complete = False + log('Missing required data: %s' % ' '.join(self.missing_data), + level=INFO) + else: + self.complete = True + return self.complete + + def get_related(self): + """Check if any of the context interfaces have relation ids. + Set self.related and return True if one of the interfaces + has relation ids. + """ + # Fresh start + self.related = False + try: + for interface in self.interfaces: + if relation_ids(interface): + self.related = True + return self.related + except AttributeError as e: + log("{} {}" + "".format(self, e), 'INFO') + return self.related + + +class KeystoneAuditMiddleware(OSContextGenerator): + def __init__(self, service: str) -> None: + self.service_name = service + + def __call__(self): + """Return context dictionary containing configuration status of + audit-middleware and the charm service name. + """ + ctxt = { + 'audit_middleware': config('audit-middleware') or False, + 'service_name': self.service_name + } + return ctxt + + +class SharedDBContext(OSContextGenerator): + interfaces = ['shared-db'] + + def __init__(self, database=None, user=None, relation_prefix=None, + ssl_dir=None, relation_id=None): + """Allows inspecting relation for settings prefixed with + relation_prefix. This is useful for parsing access for multiple + databases returned via the shared-db interface (eg, nova_password, + quantum_password) + """ + self.relation_prefix = relation_prefix + self.database = database + self.user = user + self.ssl_dir = ssl_dir + self.rel_name = self.interfaces[0] + self.relation_id = relation_id + + def __call__(self): + self.database = self.database or config('database') + self.user = self.user or config('database-user') + if None in [self.database, self.user]: + log("Could not generate shared_db context. Missing required charm " + "config options. (database name and user)", level=ERROR) + raise OSContextError + + ctxt = {} + + # NOTE(jamespage) if mysql charm provides a network upon which + # access to the database should be made, reconfigure relation + # with the service units local address and defer execution + access_network = relation_get('access-network') + if access_network is not None: + if self.relation_prefix is not None: + hostname_key = "{}_hostname".format(self.relation_prefix) + else: + hostname_key = "hostname" + access_hostname = get_address_in_network( + access_network, + local_address(unit_get_fallback='private-address')) + set_hostname = relation_get(attribute=hostname_key, + unit=local_unit()) + if set_hostname != access_hostname: + relation_set(relation_settings={hostname_key: access_hostname}) + return None # Defer any further hook execution for now.... + + password_setting = 'password' + if self.relation_prefix: + password_setting = self.relation_prefix + '_password' + + if self.relation_id: + rids = [self.relation_id] + else: + rids = relation_ids(self.interfaces[0]) + + rel = (get_os_codename_install_source(config('openstack-origin')) or + 'icehouse') + for rid in rids: + self.related = True + for unit in related_units(rid): + rdata = relation_get(rid=rid, unit=unit) + host = rdata.get('db_host') + host = format_ipv6_addr(host) or host + ctxt = { + 'database_host': host, + 'database': self.database, + 'database_user': self.user, + 'database_password': rdata.get(password_setting), + 'database_type': 'mysql+pymysql' + } + # Port is being introduced with LP Bug #1876188 + # but it not currently required and may not be set in all + # cases, particularly in classic charms. + port = rdata.get('db_port') + if port: + ctxt['database_port'] = port + if CompareOpenStackReleases(rel) < 'queens': + ctxt['database_type'] = 'mysql' + if self.context_complete(ctxt): + db_ssl(rdata, ctxt, self.ssl_dir) + return ctxt + return {} + + +class PostgresqlDBContext(OSContextGenerator): + interfaces = ['pgsql-db'] + + def __init__(self, database=None): + self.database = database + + def __call__(self): + self.database = self.database or config('database') + if self.database is None: + log('Could not generate postgresql_db context. Missing required ' + 'charm config options. (database name)', level=ERROR) + raise OSContextError + + ctxt = {} + for rid in relation_ids(self.interfaces[0]): + self.related = True + for unit in related_units(rid): + rel_host = relation_get('host', rid=rid, unit=unit) + rel_user = relation_get('user', rid=rid, unit=unit) + rel_passwd = relation_get('password', rid=rid, unit=unit) + ctxt = {'database_host': rel_host, + 'database': self.database, + 'database_user': rel_user, + 'database_password': rel_passwd, + 'database_type': 'postgresql'} + if self.context_complete(ctxt): + return ctxt + + return {} + + +def db_ssl(rdata, ctxt, ssl_dir): + if 'ssl_ca' in rdata and ssl_dir: + ca_path = os.path.join(ssl_dir, 'db-client.ca') + with open(ca_path, 'wb') as fh: + fh.write(b64decode(rdata['ssl_ca'])) + + ctxt['database_ssl_ca'] = ca_path + elif 'ssl_ca' in rdata: + log("Charm not setup for ssl support but ssl ca found", level=INFO) + return ctxt + + if 'ssl_cert' in rdata: + cert_path = os.path.join( + ssl_dir, 'db-client.cert') + if not os.path.exists(cert_path): + log("Waiting 1m for ssl client cert validity", level=INFO) + time.sleep(60) + + with open(cert_path, 'wb') as fh: + fh.write(b64decode(rdata['ssl_cert'])) + + ctxt['database_ssl_cert'] = cert_path + key_path = os.path.join(ssl_dir, 'db-client.key') + with open(key_path, 'wb') as fh: + fh.write(b64decode(rdata['ssl_key'])) + + ctxt['database_ssl_key'] = key_path + + return ctxt + + +class IdentityServiceContext(OSContextGenerator): + + _forward_compat_remaps = { + 'admin_user': 'admin-user-name', + 'service_username': 'service-user-name', + 'service_tenant': 'service-project-name', + 'service_tenant_id': 'service-project-id', + 'service_domain': 'service-domain-name', + } + + def __init__(self, + service=None, + service_user=None, + rel_name='identity-service'): + self.service = service + self.service_user = service_user + self.rel_name = rel_name + self.interfaces = [self.rel_name] + + def _setup_pki_cache(self): + if self.service and self.service_user: + # This is required for pki token signing if we don't want /tmp to + # be used. + cachedir = '/var/cache/%s' % (self.service) + if not os.path.isdir(cachedir): + log("Creating service cache dir %s" % (cachedir), level=DEBUG) + mkdir(path=cachedir, owner=self.service_user, + group=self.service_user, perms=0o700) + + return cachedir + return None + + def _get_pkg_name(self, python_name='keystonemiddleware'): + """Get corresponding distro installed package for python + package name. + + :param python_name: nameof the python package + :type: string + """ + pkg_names = map(lambda x: x + python_name, ('python3-', 'python-')) + + for pkg in pkg_names: + if not filter_installed_packages((pkg,)): + return pkg + + return None + + def _get_keystone_authtoken_ctxt(self, ctxt, keystonemiddleware_os_rel): + """Build Jinja2 context for full rendering of [keystone_authtoken] + section with variable names included. Re-constructed from former + template 'section-keystone-auth-mitaka'. + + :param ctxt: Jinja2 context returned from self.__call__() + :type: dict + :param keystonemiddleware_os_rel: OpenStack release name of + keystonemiddleware package installed + """ + c = collections.OrderedDict((('auth_type', 'password'),)) + + # 'www_authenticate_uri' replaced 'auth_uri' since Stein, + # see keystonemiddleware upstream sources for more info + if CompareOpenStackReleases(keystonemiddleware_os_rel) >= 'stein': + if 'public_auth_url' in ctxt: + c.update(( + ('www_authenticate_uri', '{}/v3'.format( + ctxt.get('public_auth_url'))),)) + else: + c.update(( + ('www_authenticate_uri', "{}://{}:{}/v3".format( + ctxt.get('service_protocol', ''), + ctxt.get('service_host', ''), + ctxt.get('service_port', ''))),)) + else: + c.update(( + ('auth_uri', "{}://{}:{}/v3".format( + ctxt.get('service_protocol', ''), + ctxt.get('service_host', ''), + ctxt.get('service_port', ''))),)) + + if 'internal_auth_url' in ctxt: + c.update(( + ('auth_url', ctxt.get('internal_auth_url')),)) + else: + c.update(( + ('auth_url', "{}://{}:{}/v3".format( + ctxt.get('auth_protocol', ''), + ctxt.get('auth_host', ''), + ctxt.get('auth_port', ''))),)) + + c.update(( + ('project_domain_name', ctxt.get('admin_domain_name', '')), + ('user_domain_name', ctxt.get('admin_domain_name', '')), + ('project_name', ctxt.get('admin_tenant_name', '')), + ('username', ctxt.get('admin_user', '')), + ('password', ctxt.get('admin_password', '')), + ('signing_dir', ctxt.get('signing_dir', '')),)) + + if ctxt.get('service_type'): + c.update((('service_type', ctxt.get('service_type')),)) + + return c + + def __call__(self): + log('Generating template context for ' + self.rel_name, level=DEBUG) + ctxt = {} + + keystonemiddleware_os_release = None + if self._get_pkg_name(): + keystonemiddleware_os_release = os_release(self._get_pkg_name()) + + cachedir = self._setup_pki_cache() + if cachedir: + ctxt['signing_dir'] = cachedir + + for rid in relation_ids(self.rel_name): + self.related = True + for unit in related_units(rid): + rdata = {} + # NOTE(jamespage): + # forwards compat with application data + # bag driven approach to relation. + _adata = relation_get(rid=rid, app=remote_service_name(rid)) + adata = {} + # if no app data bag presented - fallback + # to legacy unit based relation data + rdata = relation_get(rid=rid, unit=unit) + if _adata: + # New app data bag uses - instead of _ + # in key names - remap for compat with + # existing relation data keys + for key, value in _adata.items(): + if key == 'api-version': + adata[key.replace('-', '_')] = value.strip('v') + else: + adata[key.replace('-', '_')] = value + # Re-map some keys for backwards compatibility + for target, source in self._forward_compat_remaps.items(): + adata[target] = _adata.get(source) + # Now preferentially get data from the app data bag, but if + # it's not available, get it from the legacy based relation + # data. + + def _resolve(key): + return adata.get(key) or rdata.get(key) + + serv_host = _resolve('service_host') + serv_host = format_ipv6_addr(serv_host) or serv_host + auth_host = _resolve('auth_host') + auth_host = format_ipv6_addr(auth_host) or auth_host + int_host = _resolve('internal_host',) + int_host = format_ipv6_addr(int_host) or int_host + svc_protocol = _resolve('service_protocol') or 'http' + auth_protocol = _resolve('auth_protocol') or 'http' + admin_role = _resolve('admin_role') or 'Admin' + int_protocol = _resolve('internal_protocol') or 'http' + api_version = _resolve('api_version') or '2.0' + ctxt.update({'service_port': _resolve('service_port'), + 'service_host': serv_host, + 'auth_host': auth_host, + 'auth_port': _resolve('auth_port'), + 'internal_host': int_host, + 'internal_port': _resolve('internal_port'), + 'admin_tenant_name': _resolve('service_tenant'), + 'admin_user': _resolve('service_username'), + 'admin_password': _resolve('service_password'), + 'admin_role': admin_role, + 'service_protocol': svc_protocol, + 'auth_protocol': auth_protocol, + 'internal_protocol': int_protocol, + 'api_version': api_version}) + + service_type = _resolve('service_type') + if service_type: + ctxt['service_type'] = service_type + + if float(api_version) > 2: + ctxt.update({ + 'admin_domain_name': _resolve('service_domain'), + 'service_project_id': _resolve('service_tenant_id'), + 'service_domain_id': _resolve('service_domain_id')}) + + # NOTE: + # keystone-k8s operator presents full URLS + # for all three endpoints - public and internal are + # externally addressable for machine based charm + public_auth_url = _resolve('public_auth_url') + # if 'public_auth_url' in rdata: + if public_auth_url: + ctxt.update({ + 'public_auth_url': public_auth_url, + }) + internal_auth_url = _resolve('internal_auth_url') + # if 'internal_auth_url' in rdata: + if internal_auth_url: + ctxt.update({ + 'internal_auth_url': internal_auth_url, + }) + + # we keep all variables in ctxt for compatibility and + # add nested dictionary for keystone_authtoken generic + # templating + if keystonemiddleware_os_release: + ctxt['keystone_authtoken'] = \ + self._get_keystone_authtoken_ctxt( + ctxt, keystonemiddleware_os_release) + + if self.context_complete(ctxt): + # NOTE(jamespage) this is required for >= icehouse + # so a missing value just indicates keystone needs + # upgrading + ctxt['admin_user_id'] = _resolve('service_user_id') + ctxt['admin_tenant_id'] = _resolve('service_tenant_id') + ctxt['admin_domain_id'] = _resolve('service_domain_id') + return ctxt + + return {} + + +class IdentityCredentialsContext(IdentityServiceContext): + '''Context for identity-credentials interface type''' + + def __init__(self, + service=None, + service_user=None, + rel_name='identity-credentials'): + super(IdentityCredentialsContext, self).__init__(service, + service_user, + rel_name) + + def __call__(self): + log('Generating template context for ' + self.rel_name, level=DEBUG) + ctxt = {} + + cachedir = self._setup_pki_cache() + if cachedir: + ctxt['signing_dir'] = cachedir + + for rid in relation_ids(self.rel_name): + self.related = True + for unit in related_units(rid): + rdata = relation_get(rid=rid, unit=unit) + credentials_host = rdata.get('credentials_host') + credentials_host = ( + format_ipv6_addr(credentials_host) or credentials_host + ) + auth_host = rdata.get('auth_host') + auth_host = format_ipv6_addr(auth_host) or auth_host + svc_protocol = rdata.get('credentials_protocol') or 'http' + auth_protocol = rdata.get('auth_protocol') or 'http' + api_version = rdata.get('api_version') or '2.0' + ctxt.update({ + 'service_port': rdata.get('credentials_port'), + 'service_host': credentials_host, + 'auth_host': auth_host, + 'auth_port': rdata.get('auth_port'), + 'admin_tenant_name': rdata.get('credentials_project'), + 'admin_tenant_id': rdata.get('credentials_project_id'), + 'admin_user': rdata.get('credentials_username'), + 'admin_password': rdata.get('credentials_password'), + 'service_protocol': svc_protocol, + 'auth_protocol': auth_protocol, + 'api_version': api_version + }) + + if rdata.get('service_type'): + ctxt['service_type'] = rdata.get('service_type') + + if float(api_version) > 2: + ctxt.update({'admin_domain_name': + rdata.get('domain')}) + + if self.context_complete(ctxt): + return ctxt + + return {} + + +class NovaVendorMetadataContext(OSContextGenerator): + """Context used for configuring nova vendor metadata on nova.conf file.""" + + def __init__(self, os_release_pkg, interfaces=None): + """Initialize the NovaVendorMetadataContext object. + + :param os_release_pkg: the package name to extract the OpenStack + release codename from. + :type os_release_pkg: str + :param interfaces: list of string values to be used as the Context's + relation interfaces. + :type interfaces: List[str] + """ + self.os_release_pkg = os_release_pkg + if interfaces is not None: + self.interfaces = interfaces + + def __call__(self): + cmp_os_release = CompareOpenStackReleases( + os_release(self.os_release_pkg)) + ctxt = {'vendor_data': False} + + vdata_providers = [] + vdata = config('vendor-data') + vdata_url = config('vendor-data-url') + + if vdata: + try: + # validate the JSON. If invalid, we do not set anything here + json.loads(vdata) + except (TypeError, ValueError) as e: + log('Error decoding vendor-data. {}'.format(e), level=ERROR) + else: + ctxt['vendor_data'] = True + # Mitaka does not support DynamicJSON + # so vendordata_providers is not needed + if cmp_os_release > 'mitaka': + vdata_providers.append('StaticJSON') + + if vdata_url: + if cmp_os_release > 'mitaka': + ctxt['vendor_data_url'] = vdata_url + vdata_providers.append('DynamicJSON') + else: + log('Dynamic vendor data unsupported' + ' for {}.'.format(cmp_os_release), level=ERROR) + if vdata_providers: + ctxt['vendordata_providers'] = ','.join(vdata_providers) + + return ctxt + + +class NovaVendorMetadataJSONContext(OSContextGenerator): + """Context used for writing nova vendor metadata json file.""" + + def __init__(self, os_release_pkg): + """Initialize the NovaVendorMetadataJSONContext object. + + :param os_release_pkg: the package name to extract the OpenStack + release codename from. + :type os_release_pkg: str + """ + self.os_release_pkg = os_release_pkg + + def __call__(self): + ctxt = {'vendor_data_json': '{}'} + + vdata = config('vendor-data') + if vdata: + try: + # validate the JSON. If invalid, we return empty. + json.loads(vdata) + except (TypeError, ValueError) as e: + log('Error decoding vendor-data. {}'.format(e), level=ERROR) + else: + ctxt['vendor_data_json'] = vdata + + return ctxt + + +class AMQPContext(OSContextGenerator): + + def __init__(self, ssl_dir=None, rel_name='amqp', relation_prefix=None, + relation_id=None): + self.ssl_dir = ssl_dir + self.rel_name = rel_name + self.relation_prefix = relation_prefix + self.interfaces = [rel_name] + self.relation_id = relation_id + + def __call__(self): + log('Generating template context for amqp', level=DEBUG) + conf = config() + if self.relation_prefix: + user_setting = '%s-rabbit-user' % (self.relation_prefix) + vhost_setting = '%s-rabbit-vhost' % (self.relation_prefix) + else: + user_setting = 'rabbit-user' + vhost_setting = 'rabbit-vhost' + + try: + username = conf[user_setting] + vhost = conf[vhost_setting] + except KeyError as e: + log('Could not generate shared_db context. Missing required charm ' + 'config options: %s.' % e, level=ERROR) + raise OSContextError + + ctxt = {} + if self.relation_id: + rids = [self.relation_id] + else: + rids = relation_ids(self.rel_name) + for rid in rids: + ha_vip_only = False + self.related = True + transport_hosts = None + rabbitmq_port = '5672' + for unit in related_units(rid): + if relation_get('clustered', rid=rid, unit=unit): + ctxt['clustered'] = True + vip = relation_get('vip', rid=rid, unit=unit) + vip = format_ipv6_addr(vip) or vip + ctxt['rabbitmq_host'] = vip + transport_hosts = [vip] + else: + host = relation_get('private-address', rid=rid, unit=unit) + host = format_ipv6_addr(host) or host + ctxt['rabbitmq_host'] = host + transport_hosts = [host] + + ctxt.update({ + 'rabbitmq_user': username, + 'rabbitmq_password': relation_get('password', rid=rid, + unit=unit), + 'rabbitmq_virtual_host': vhost, + }) + + ssl_port = relation_get('ssl_port', rid=rid, unit=unit) + if ssl_port: + ctxt['rabbit_ssl_port'] = ssl_port + rabbitmq_port = ssl_port + + ssl_ca = relation_get('ssl_ca', rid=rid, unit=unit) + if ssl_ca: + ctxt['rabbit_ssl_ca'] = ssl_ca + + if relation_get('ha_queues', rid=rid, unit=unit) is not None: + ctxt['rabbitmq_ha_queues'] = True + + ha_vip_only = relation_get('ha-vip-only', + rid=rid, unit=unit) is not None + + if self.context_complete(ctxt): + if 'rabbit_ssl_ca' in ctxt: + if not self.ssl_dir: + log("Charm not setup for ssl support but ssl ca " + "found", level=INFO) + break + + ca_path = os.path.join( + self.ssl_dir, 'rabbit-client-ca.pem') + with open(ca_path, 'wb') as fh: + fh.write(b64decode(ctxt['rabbit_ssl_ca'])) + ctxt['rabbit_ssl_ca'] = ca_path + + # Sufficient information found = break out! + break + + # Used for active/active rabbitmq >= grizzly + if (('clustered' not in ctxt or ha_vip_only) and + len(related_units(rid)) > 1): + rabbitmq_hosts = [] + for unit in related_units(rid): + host = relation_get('private-address', rid=rid, unit=unit) + if not relation_get('password', rid=rid, unit=unit): + log( + ("Skipping {} password not sent which indicates " + "unit is not ready.".format(host)), + level=DEBUG) + continue + host = format_ipv6_addr(host) or host + rabbitmq_hosts.append(host) + + rabbitmq_hosts = sorted(rabbitmq_hosts) + ctxt['rabbitmq_hosts'] = ','.join(rabbitmq_hosts) + transport_hosts = rabbitmq_hosts + + if transport_hosts: + transport_url_hosts = ','.join([ + "{}:{}@{}:{}".format(ctxt['rabbitmq_user'], + ctxt['rabbitmq_password'], + host_, + rabbitmq_port) + for host_ in transport_hosts]) + ctxt['transport_url'] = "rabbit://{}/{}".format( + transport_url_hosts, vhost) + + oslo_messaging_flags = conf.get('oslo-messaging-flags', None) + if oslo_messaging_flags: + ctxt['oslo_messaging_flags'] = config_flags_parser( + oslo_messaging_flags) + + oslo_messaging_driver = conf.get( + 'oslo-messaging-driver', DEFAULT_OSLO_MESSAGING_DRIVER) + if oslo_messaging_driver: + ctxt['oslo_messaging_driver'] = oslo_messaging_driver + + notification_format = conf.get('notification-format', None) + if notification_format: + ctxt['notification_format'] = notification_format + + notification_topics = conf.get('notification-topics', None) + if notification_topics: + ctxt['notification_topics'] = notification_topics + + send_notifications_to_logs = conf.get('send-notifications-to-logs', None) + if send_notifications_to_logs: + ctxt['send_notifications_to_logs'] = send_notifications_to_logs + + if not self.complete: + return {} + + return ctxt + + +class CephContext(OSContextGenerator): + """Generates context for /etc/ceph/ceph.conf templates.""" + interfaces = ['ceph'] + + def __call__(self): + if not relation_ids('ceph'): + return {} + + log('Generating template context for ceph', level=DEBUG) + mon_hosts = [] + ctxt = { + 'use_syslog': str(config('use-syslog')).lower() + } + for rid in relation_ids('ceph'): + for unit in related_units(rid): + if not ctxt.get('auth'): + ctxt['auth'] = relation_get('auth', rid=rid, unit=unit) + if not ctxt.get('key'): + ctxt['key'] = relation_get('key', rid=rid, unit=unit) + if not ctxt.get('rbd_features'): + default_features = relation_get('rbd-features', rid=rid, unit=unit) + if default_features is not None: + ctxt['rbd_features'] = default_features + + ceph_addrs = relation_get('ceph-public-address', rid=rid, + unit=unit) + if ceph_addrs: + for addr in ceph_addrs.split(' '): + mon_hosts.append(format_ipv6_addr(addr) or addr) + else: + priv_addr = relation_get('private-address', rid=rid, + unit=unit) + mon_hosts.append(format_ipv6_addr(priv_addr) or priv_addr) + + ctxt['mon_hosts'] = ' '.join(sorted(mon_hosts)) + + if config('pool-type') and config('pool-type') == 'erasure-coded': + base_pool_name = config('rbd-pool') or config('rbd-pool-name') + if not base_pool_name: + base_pool_name = service_name() + ctxt['rbd_default_data_pool'] = base_pool_name + + if not os.path.isdir('/etc/ceph'): + os.mkdir('/etc/ceph') + + if not self.context_complete(ctxt): + return {} + + ensure_packages(['ceph-common']) + return ctxt + + def context_complete(self, ctxt): + """Overridden here to ensure the context is actually complete. + + We set `key` and `auth` to None here, by default, to ensure + that the context will always evaluate to incomplete until the + Ceph relation has actually sent these details; otherwise, + there is a potential race condition between the relation + appearing and the first unit actually setting this data on the + relation. + + :param ctxt: The current context members + :type ctxt: Dict[str, ANY] + :returns: True if the context is complete + :rtype: bool + """ + if 'auth' not in ctxt or 'key' not in ctxt: + return False + return super(CephContext, self).context_complete(ctxt) + + +class HAProxyContext(OSContextGenerator): + """Provides half a context for the haproxy template, which describes + all peers to be included in the cluster. Each charm needs to include + its own context generator that describes the port mapping. + + :side effect: mkdir is called on HAPROXY_RUN_DIR + """ + interfaces = ['cluster'] + + def __init__(self, singlenode_mode=False, + address_types=None, + exporter_stats_port=DEFAULT_HAPROXY_EXPORTER_STATS_PORT): + if address_types is None: + address_types = ADDRESS_TYPES[:] + + self.address_types = address_types + self.singlenode_mode = singlenode_mode + self.exporter_stats_port = exporter_stats_port + + def __call__(self): + if not os.path.isdir(HAPROXY_RUN_DIR): + mkdir(path=HAPROXY_RUN_DIR) + if not relation_ids('cluster') and not self.singlenode_mode: + return {} + + l_unit = local_unit().replace('/', '-') + cluster_hosts = collections.OrderedDict() + + # NOTE(jamespage): build out map of configured network endpoints + # and associated backends + for addr_type in self.address_types: + cfg_opt = 'os-{}-network'.format(addr_type) + # NOTE(thedac) For some reason the ADDRESS_MAP uses 'int' rather + # than 'internal' + if addr_type == 'internal': + _addr_map_type = INTERNAL + else: + _addr_map_type = addr_type + # Network spaces aware + laddr = get_relation_ip(ADDRESS_MAP[_addr_map_type]['binding'], + config(cfg_opt)) + if laddr: + netmask = get_netmask_for_address(laddr) + cluster_hosts[laddr] = { + 'network': "{}/{}".format(laddr, + netmask), + 'backends': collections.OrderedDict([(l_unit, + laddr)]) + } + for rid in relation_ids('cluster'): + for unit in sorted(related_units(rid)): + # API Charms will need to set {addr_type}-address with + # get_relation_ip(addr_type) + _laddr = relation_get('{}-address'.format(addr_type), + rid=rid, unit=unit) + if _laddr: + _unit = unit.replace('/', '-') + cluster_hosts[laddr]['backends'][_unit] = _laddr + + # NOTE(jamespage) add backend based on get_relation_ip - this + # will either be the only backend or the fallback if no acls + # match in the frontend + # Network spaces aware + addr = get_relation_ip('cluster') + cluster_hosts[addr] = {} + netmask = get_netmask_for_address(addr) + cluster_hosts[addr] = { + 'network': "{}/{}".format(addr, netmask), + 'backends': collections.OrderedDict([(l_unit, + addr)]) + } + for rid in relation_ids('cluster'): + for unit in sorted(related_units(rid)): + # API Charms will need to set their private-address with + # get_relation_ip('cluster') + _laddr = relation_get('private-address', + rid=rid, unit=unit) + if _laddr: + _unit = unit.replace('/', '-') + cluster_hosts[addr]['backends'][_unit] = _laddr + + ctxt = { + 'frontends': cluster_hosts, + 'default_backend': addr + } + + if config('haproxy-server-timeout'): + ctxt['haproxy_server_timeout'] = config('haproxy-server-timeout') + + if config('haproxy-client-timeout'): + ctxt['haproxy_client_timeout'] = config('haproxy-client-timeout') + + if config('haproxy-queue-timeout'): + ctxt['haproxy_queue_timeout'] = config('haproxy-queue-timeout') + + if config('haproxy-connect-timeout'): + ctxt['haproxy_connect_timeout'] = config('haproxy-connect-timeout') + + if config('prefer-ipv6'): + ctxt['local_host'] = 'ip6-localhost' + ctxt['haproxy_host'] = '::' + else: + ctxt['local_host'] = '127.0.0.1' + ctxt['haproxy_host'] = '0.0.0.0' + + ctxt['ipv6_enabled'] = not is_ipv6_disabled() + + ctxt['stat_port'] = '8888' + + db = kv() + ctxt['stat_password'] = db.get('stat-password') + if not ctxt['stat_password']: + ctxt['stat_password'] = db.set('stat-password', pwgen(32)) + db.flush() + + # NOTE(rgildein): configure prometheus exporter for haproxy > 2.0.0 + # New bind will be created and a prometheus-exporter + # will be used for path /metrics. At the same time, + # prometheus-exporter avoids using auth. + haproxy_version = get_installed_version("haproxy") + if (haproxy_version and + haproxy_version.ver_str >= LooseVersion("2.0.0") and + is_relation_made("haproxy-exporter")): + ctxt["stats_exporter_host"] = get_relation_ip("haproxy-exporter") + ctxt["stats_exporter_port"] = self.exporter_stats_port + + for frontend in cluster_hosts: + if (len(cluster_hosts[frontend]['backends']) > 1 or + self.singlenode_mode): + # Enable haproxy when we have enough peers. + log('Ensuring haproxy enabled in /etc/default/haproxy.', + level=DEBUG) + with open('/etc/default/haproxy', 'w') as out: + out.write('ENABLED=1\n') + + return ctxt + + log('HAProxy context is incomplete, this unit has no peers.', + level=INFO) + return {} + + +class ImageServiceContext(OSContextGenerator): + interfaces = ['image-service'] + + def __call__(self): + """Obtains the glance API server from the image-service relation. + Useful in nova and cinder (currently). + """ + log('Generating template context for image-service.', level=DEBUG) + rids = relation_ids('image-service') + if not rids: + return {} + + for rid in rids: + for unit in related_units(rid): + api_server = relation_get('glance-api-server', + rid=rid, unit=unit) + if api_server: + return {'glance_api_servers': api_server} + + log("ImageService context is incomplete. Missing required relation " + "data.", level=INFO) + return {} + + +class ApacheSSLContext(OSContextGenerator): + """Generates a context for an apache vhost configuration that configures + HTTPS reverse proxying for one or many endpoints. Generated context + looks something like:: + + { + 'namespace': 'cinder', + 'private_address': 'iscsi.mycinderhost.com', + 'endpoints': [(8776, 8766), (8777, 8767)] + } + + The endpoints list consists of a tuples mapping external ports + to internal ports. + """ + interfaces = ['https'] + + # charms should inherit this context and set external ports + # and service namespace accordingly. + external_ports = [] + service_namespace = None + user = group = 'root' + + def enable_modules(self): + cmd = ['a2enmod', 'ssl', 'proxy', 'proxy_http', 'headers'] + check_call(cmd) + + def configure_cert(self, cn=None): + ssl_dir = os.path.join('/etc/apache2/ssl/', self.service_namespace) + mkdir(path=ssl_dir) + cert, key = get_cert(cn) + if cert and key: + if cn: + cert_filename = 'cert_{}'.format(cn) + key_filename = 'key_{}'.format(cn) + else: + cert_filename = 'cert' + key_filename = 'key' + + write_file(path=os.path.join(ssl_dir, cert_filename), + content=b64decode(cert), owner=self.user, + group=self.group, perms=0o640) + write_file(path=os.path.join(ssl_dir, key_filename), + content=b64decode(key), owner=self.user, + group=self.group, perms=0o640) + + def configure_ca(self): + ca_cert = get_ca_cert() + if ca_cert: + install_ca_cert(b64decode(ca_cert)) + + def canonical_names(self): + """Figure out which canonical names clients will access this service. + """ + cns = [] + for r_id in relation_ids('identity-service'): + for unit in related_units(r_id): + rdata = relation_get(rid=r_id, unit=unit) + for k in rdata: + if k.startswith('ssl_key_'): + cns.append(k.lstrip('ssl_key_')) + + return sorted(list(set(cns))) + + def get_network_addresses(self): + """For each network configured, return corresponding address and + hostnamr or vip (if available). + + Returns a list of tuples of the form: + + [(address_in_net_a, hostname_in_net_a), + (address_in_net_b, hostname_in_net_b), + ...] + + or, if no hostnames(s) available: + + [(address_in_net_a, vip_in_net_a), + (address_in_net_b, vip_in_net_b), + ...] + + or, if no vip(s) available: + + [(address_in_net_a, address_in_net_a), + (address_in_net_b, address_in_net_b), + ...] + """ + addresses = [] + for net_type in [INTERNAL, ADMIN, PUBLIC]: + net_config = config(ADDRESS_MAP[net_type]['config']) + # NOTE(jamespage): Fallback must always be private address + # as this is used to bind services on the + # local unit. + fallback = local_address(unit_get_fallback="private-address") + if net_config: + addr = get_address_in_network(net_config, + fallback) + else: + try: + addr = network_get_primary_address( + ADDRESS_MAP[net_type]['binding'] + ) + except (NotImplementedError, NoNetworkBinding): + addr = fallback + + endpoint = resolve_address(net_type) + addresses.append((addr, endpoint)) + + # Log the set of addresses to have a trail log and capture if tuples + # change over time in the same unit (LP: #1952414). + sorted_addresses = sorted(set(addresses)) + log('get_network_addresses: {}'.format(sorted_addresses)) + return sorted_addresses + + def __call__(self): + if isinstance(self.external_ports, str): + self.external_ports = [self.external_ports] + + if not self.external_ports or not https(): + return {} + + use_keystone_ca = True + for rid in relation_ids('certificates'): + if related_units(rid): + use_keystone_ca = False + + if use_keystone_ca: + self.configure_ca() + + self.enable_modules() + + ctxt = {'namespace': self.service_namespace, + 'endpoints': [], + 'ext_ports': []} + + if use_keystone_ca: + cns = self.canonical_names() + if cns: + for cn in cns: + self.configure_cert(cn) + else: + # Expect cert/key provided in config (currently assumed that ca + # uses ip for cn) + for net_type in (INTERNAL, ADMIN, PUBLIC): + cn = resolve_address(endpoint_type=net_type) + self.configure_cert(cn) + + addresses = self.get_network_addresses() + for address, endpoint in addresses: + for api_port in self.external_ports: + ext_port = determine_apache_port(api_port, + singlenode_mode=True) + int_port = determine_api_port(api_port, singlenode_mode=True) + portmap = (address, endpoint, int(ext_port), int(int_port)) + ctxt['endpoints'].append(portmap) + ctxt['ext_ports'].append(int(ext_port)) + + ctxt['ext_ports'] = sorted(list(set(ctxt['ext_ports']))) + return ctxt + + +class NeutronContext(OSContextGenerator): + interfaces = [] + + @property + def plugin(self): + return None + + @property + def network_manager(self): + return None + + @property + def packages(self): + return neutron_plugin_attribute(self.plugin, 'packages', + self.network_manager) + + @property + def neutron_security_groups(self): + return None + + def _ensure_packages(self): + for pkgs in self.packages: + ensure_packages(pkgs) + + def ovs_ctxt(self): + driver = neutron_plugin_attribute(self.plugin, 'driver', + self.network_manager) + config = neutron_plugin_attribute(self.plugin, 'config', + self.network_manager) + ovs_ctxt = {'core_plugin': driver, + 'neutron_plugin': 'ovs', + 'neutron_security_groups': self.neutron_security_groups, + 'local_ip': unit_private_ip(), + 'config': config} + + return ovs_ctxt + + def nuage_ctxt(self): + driver = neutron_plugin_attribute(self.plugin, 'driver', + self.network_manager) + config = neutron_plugin_attribute(self.plugin, 'config', + self.network_manager) + nuage_ctxt = {'core_plugin': driver, + 'neutron_plugin': 'vsp', + 'neutron_security_groups': self.neutron_security_groups, + 'local_ip': unit_private_ip(), + 'config': config} + + return nuage_ctxt + + def nvp_ctxt(self): + driver = neutron_plugin_attribute(self.plugin, 'driver', + self.network_manager) + config = neutron_plugin_attribute(self.plugin, 'config', + self.network_manager) + nvp_ctxt = {'core_plugin': driver, + 'neutron_plugin': 'nvp', + 'neutron_security_groups': self.neutron_security_groups, + 'local_ip': unit_private_ip(), + 'config': config} + + return nvp_ctxt + + def n1kv_ctxt(self): + driver = neutron_plugin_attribute(self.plugin, 'driver', + self.network_manager) + n1kv_config = neutron_plugin_attribute(self.plugin, 'config', + self.network_manager) + n1kv_user_config_flags = config('n1kv-config-flags') + restrict_policy_profiles = config('n1kv-restrict-policy-profiles') + n1kv_ctxt = {'core_plugin': driver, + 'neutron_plugin': 'n1kv', + 'neutron_security_groups': self.neutron_security_groups, + 'local_ip': unit_private_ip(), + 'config': n1kv_config, + 'vsm_ip': config('n1kv-vsm-ip'), + 'vsm_username': config('n1kv-vsm-username'), + 'vsm_password': config('n1kv-vsm-password'), + 'restrict_policy_profiles': restrict_policy_profiles} + + if n1kv_user_config_flags: + flags = config_flags_parser(n1kv_user_config_flags) + n1kv_ctxt['user_config_flags'] = flags + + return n1kv_ctxt + + def calico_ctxt(self): + driver = neutron_plugin_attribute(self.plugin, 'driver', + self.network_manager) + config = neutron_plugin_attribute(self.plugin, 'config', + self.network_manager) + calico_ctxt = {'core_plugin': driver, + 'neutron_plugin': 'Calico', + 'neutron_security_groups': self.neutron_security_groups, + 'local_ip': unit_private_ip(), + 'config': config} + + return calico_ctxt + + def neutron_ctxt(self): + if https(): + proto = 'https' + else: + proto = 'http' + + if is_clustered(): + host = config('vip') + else: + host = local_address(unit_get_fallback='private-address') + + ctxt = {'network_manager': self.network_manager, + 'neutron_url': '%s://%s:%s' % (proto, host, '9696')} + return ctxt + + def pg_ctxt(self): + driver = neutron_plugin_attribute(self.plugin, 'driver', + self.network_manager) + config = neutron_plugin_attribute(self.plugin, 'config', + self.network_manager) + ovs_ctxt = {'core_plugin': driver, + 'neutron_plugin': 'plumgrid', + 'neutron_security_groups': self.neutron_security_groups, + 'local_ip': unit_private_ip(), + 'config': config} + return ovs_ctxt + + def midonet_ctxt(self): + driver = neutron_plugin_attribute(self.plugin, 'driver', + self.network_manager) + midonet_config = neutron_plugin_attribute(self.plugin, 'config', + self.network_manager) + mido_ctxt = {'core_plugin': driver, + 'neutron_plugin': 'midonet', + 'neutron_security_groups': self.neutron_security_groups, + 'local_ip': unit_private_ip(), + 'config': midonet_config} + + return mido_ctxt + + def __call__(self): + if self.network_manager not in ['quantum', 'neutron']: + return {} + + if not self.plugin: + return {} + + ctxt = self.neutron_ctxt() + + if self.plugin == 'ovs': + ctxt.update(self.ovs_ctxt()) + elif self.plugin in ['nvp', 'nsx']: + ctxt.update(self.nvp_ctxt()) + elif self.plugin == 'n1kv': + ctxt.update(self.n1kv_ctxt()) + elif self.plugin == 'Calico': + ctxt.update(self.calico_ctxt()) + elif self.plugin == 'vsp': + ctxt.update(self.nuage_ctxt()) + elif self.plugin == 'plumgrid': + ctxt.update(self.pg_ctxt()) + elif self.plugin == 'midonet': + ctxt.update(self.midonet_ctxt()) + + alchemy_flags = config('neutron-alchemy-flags') + if alchemy_flags: + flags = config_flags_parser(alchemy_flags) + ctxt['neutron_alchemy_flags'] = flags + + return ctxt + + +class NeutronPortContext(OSContextGenerator): + + def resolve_ports(self, ports): + """Resolve NICs not yet bound to bridge(s) + + If hwaddress provided then returns resolved hwaddress otherwise NIC. + """ + if not ports: + return None + + hwaddr_to_nic = {} + hwaddr_to_ip = {} + extant_nics = list_nics() + + for nic in extant_nics: + # Ignore virtual interfaces (bond masters will be identified from + # their slaves) + if not is_phy_iface(nic): + continue + + _nic = get_bond_master(nic) + if _nic: + log("Replacing iface '%s' with bond master '%s'" % (nic, _nic), + level=DEBUG) + nic = _nic + + hwaddr = get_nic_hwaddr(nic) + hwaddr_to_nic[hwaddr] = nic + addresses = get_ipv4_addr(nic, fatal=False) + addresses += get_ipv6_addr(iface=nic, fatal=False) + hwaddr_to_ip[hwaddr] = addresses + + resolved = [] + mac_regex = re.compile(r'([0-9A-F]{2}[:-]){5}([0-9A-F]{2})', re.I) + for entry in ports: + if re.match(mac_regex, entry): + # NIC is in known NICs and does NOT have an IP address + if entry in hwaddr_to_nic and not hwaddr_to_ip[entry]: + # If the nic is part of a bridge then don't use it + if is_bridge_member(hwaddr_to_nic[entry]): + continue + + # Entry is a MAC address for a valid interface that doesn't + # have an IP address assigned yet. + resolved.append(hwaddr_to_nic[entry]) + elif entry in extant_nics: + # If the passed entry is not a MAC address and the interface + # exists, assume it's a valid interface, and that the user put + # it there on purpose (we can trust it to be the real external + # network). + resolved.append(entry) + + # Ensure no duplicates + return list(set(resolved)) + + +class OSConfigFlagContext(OSContextGenerator): + """Provides support for user-defined config flags. + + Users can define a comma-seperated list of key=value pairs + in the charm configuration and apply them at any point in + any file by using a template flag. + + Sometimes users might want config flags inserted within a + specific section so this class allows users to specify the + template flag name, allowing for multiple template flags + (sections) within the same context. + + NOTE: the value of config-flags may be a comma-separated list of + key=value pairs and some Openstack config files support + comma-separated lists as values. + """ + + def __init__(self, charm_flag='config-flags', + template_flag='user_config_flags'): + """ + :param charm_flag: config flags in charm configuration. + :param template_flag: insert point for user-defined flags in template + file. + """ + super(OSConfigFlagContext, self).__init__() + self._charm_flag = charm_flag + self._template_flag = template_flag + + def __call__(self): + config_flags = config(self._charm_flag) + if not config_flags: + return {} + + return {self._template_flag: + config_flags_parser(config_flags)} + + +class LibvirtConfigFlagsContext(OSContextGenerator): + """ + This context provides support for extending + the libvirt section through user-defined flags. + """ + def __call__(self): + ctxt = {} + libvirt_flags = config('libvirt-flags') + if libvirt_flags: + ctxt['libvirt_flags'] = config_flags_parser( + libvirt_flags) + return ctxt + + +class SubordinateConfigContext(OSContextGenerator): + + """ + Responsible for inspecting relations to subordinates that + may be exporting required config via a json blob. + + The subordinate interface allows subordinates to export their + configuration requirements to the principle for multiple config + files and multiple services. Ie, a subordinate that has interfaces + to both glance and nova may export to following yaml blob as json:: + + glance: + /etc/glance/glance-api.conf: + sections: + DEFAULT: + - [key1, value1] + /etc/glance/glance-registry.conf: + MYSECTION: + - [key2, value2] + nova: + /etc/nova/nova.conf: + sections: + DEFAULT: + - [key3, value3] + + + It is then up to the principle charms to subscribe this context to + the service+config file it is interestd in. Configuration data will + be available in the template context, in glance's case, as:: + + ctxt = { + ... other context ... + 'subordinate_configuration': { + 'DEFAULT': { + 'key1': 'value1', + }, + 'MYSECTION': { + 'key2': 'value2', + }, + } + } + """ + + def __init__(self, service, config_file, interface): + """ + :param service : Service name key to query in any subordinate + data found + :param config_file : Service's config file to query sections + :param interface : Subordinate interface to inspect + """ + self.config_file = config_file + if isinstance(service, list): + self.services = service + else: + self.services = [service] + if isinstance(interface, list): + self.interfaces = interface + else: + self.interfaces = [interface] + + def __call__(self): + ctxt = {'sections': {}} + rids = [] + for interface in self.interfaces: + rids.extend(relation_ids(interface)) + for rid in rids: + for unit in related_units(rid): + sub_config = relation_get('subordinate_configuration', + rid=rid, unit=unit) + if sub_config and sub_config != '': + try: + sub_config = json.loads(sub_config) + except Exception: + log('Could not parse JSON from ' + 'subordinate_configuration setting from %s' + % rid, level=ERROR) + continue + + for service in self.services: + if service not in sub_config: + log('Found subordinate_configuration on %s but it ' + 'contained nothing for %s service' + % (rid, service), level=INFO) + continue + + sub_config = sub_config[service] + if self.config_file not in sub_config: + log('Found subordinate_configuration on %s but it ' + 'contained nothing for %s' + % (rid, self.config_file), level=INFO) + continue + + sub_config = sub_config[self.config_file] + for k, v in sub_config.items(): + if k == 'sections': + for section, config_list in v.items(): + log("adding section '%s'" % (section), + level=DEBUG) + if ctxt[k].get(section): + ctxt[k][section].extend(config_list) + else: + ctxt[k][section] = config_list + else: + ctxt[k] = v + if self.context_complete(ctxt): + log("%d section(s) found" % (len(ctxt['sections'])), level=DEBUG) + return ctxt + else: + return {} + + def context_complete(self, ctxt): + """Overridden here to ensure the context is actually complete. + + :param ctxt: The current context members + :type ctxt: Dict[str, ANY] + :returns: True if the context is complete + :rtype: bool + """ + if not ctxt.get('sections'): + return False + return super(SubordinateConfigContext, self).context_complete(ctxt) + + +class LogLevelContext(OSContextGenerator): + + def __call__(self): + ctxt = {} + ctxt['debug'] = \ + False if config('debug') is None else config('debug') + ctxt['verbose'] = \ + False if config('verbose') is None else config('verbose') + + return ctxt + + +class SyslogContext(OSContextGenerator): + + def __call__(self): + ctxt = {'use_syslog': config('use-syslog')} + return ctxt + + +class BindHostContext(OSContextGenerator): + + def __call__(self): + if config('prefer-ipv6'): + return {'bind_host': '::'} + else: + return {'bind_host': '0.0.0.0'} + + +MAX_DEFAULT_WORKERS = 4 +DEFAULT_MULTIPLIER = 2 + + +def _calculate_workers(): + ''' + Determine the number of worker processes based on the CPU + count of the unit containing the application. + + Workers will be limited to MAX_DEFAULT_WORKERS in + container environments where no worker-multipler configuration + option been set. + + @returns int: number of worker processes to use + ''' + multiplier = config('worker-multiplier') + + # distinguish an empty config and an explicit config as 0.0 + if multiplier is None: + multiplier = DEFAULT_MULTIPLIER + + count = int(_num_cpus() * multiplier) + if count <= 0: + # assign at least one worker + count = 1 + + if config('worker-multiplier') is None: + # NOTE(jamespage): Limit unconfigured worker-multiplier + # to MAX_DEFAULT_WORKERS to avoid insane + # worker configuration on large servers + # Reference: https://pad.lv/1665270 + count = min(count, MAX_DEFAULT_WORKERS) + + return count + + +def _num_cpus(): + ''' + Compatibility wrapper for calculating the number of CPU's + a unit has. + + @returns: int: number of CPU cores detected + ''' + try: + return psutil.cpu_count() + except AttributeError: + return psutil.NUM_CPUS + + +class WorkerConfigContext(OSContextGenerator): + + def __call__(self): + ctxt = {"workers": _calculate_workers()} + return ctxt + + +class WSGIWorkerConfigContext(WorkerConfigContext): + + def __init__(self, name=None, script=None, admin_script=None, + public_script=None, user=None, group=None, + process_weight=1.00, + admin_process_weight=0.25, public_process_weight=0.75): + self.service_name = name + self.user = user or name + self.group = group or name + self.script = script + self.admin_script = admin_script + self.public_script = public_script + self.process_weight = process_weight + self.admin_process_weight = admin_process_weight + self.public_process_weight = public_process_weight + + def __call__(self): + total_processes = _calculate_workers() + enable_wsgi_socket_rotation = config('wsgi-socket-rotation') + if enable_wsgi_socket_rotation is None: + enable_wsgi_socket_rotation = True + ctxt = { + "service_name": self.service_name, + "user": self.user, + "group": self.group, + "script": self.script, + "admin_script": self.admin_script, + "public_script": self.public_script, + "processes": int(math.ceil(self.process_weight * total_processes)), + "admin_processes": int(math.ceil(self.admin_process_weight * + total_processes)), + "public_processes": int(math.ceil(self.public_process_weight * + total_processes)), + "threads": 1, + "wsgi_socket_rotation": enable_wsgi_socket_rotation, + } + return ctxt + + +class ZeroMQContext(OSContextGenerator): + interfaces = ['zeromq-configuration'] + + def __call__(self): + ctxt = {} + if is_relation_made('zeromq-configuration', 'host'): + for rid in relation_ids('zeromq-configuration'): + for unit in related_units(rid): + ctxt['zmq_nonce'] = relation_get('nonce', unit, rid) + ctxt['zmq_host'] = relation_get('host', unit, rid) + ctxt['zmq_redis_address'] = relation_get( + 'zmq_redis_address', unit, rid) + + return ctxt + + +class NotificationDriverContext(OSContextGenerator): + + def __init__(self, zmq_relation='zeromq-configuration', + amqp_relation='amqp'): + """ + :param zmq_relation: Name of Zeromq relation to check + """ + self.zmq_relation = zmq_relation + self.amqp_relation = amqp_relation + + def __call__(self): + ctxt = {'notifications': 'False'} + if is_relation_made(self.amqp_relation): + ctxt['notifications'] = "True" + + return ctxt + + +class SysctlContext(OSContextGenerator): + """This context check if the 'sysctl' option exists on configuration + then creates a file with the loaded contents""" + def __call__(self): + sysctl_dict = config('sysctl') + if sysctl_dict: + sysctl_create(sysctl_dict, + '/etc/sysctl.d/50-{0}.conf'.format(charm_name())) + return {'sysctl': sysctl_dict} + + +class NeutronAPIContext(OSContextGenerator): + ''' + Inspects current neutron-plugin-api relation for neutron settings. Return + defaults if it is not present. + ''' + interfaces = ['neutron-plugin-api'] + + def __call__(self): + self.neutron_defaults = { + 'l2_population': { + 'rel_key': 'l2-population', + 'default': False, + }, + 'overlay_network_type': { + 'rel_key': 'overlay-network-type', + 'default': 'gre', + }, + 'neutron_security_groups': { + 'rel_key': 'neutron-security-groups', + 'default': False, + }, + 'network_device_mtu': { + 'rel_key': 'network-device-mtu', + 'default': None, + }, + 'enable_dvr': { + 'rel_key': 'enable-dvr', + 'default': False, + }, + 'enable_l3ha': { + 'rel_key': 'enable-l3ha', + 'default': False, + }, + 'dns_domain': { + 'rel_key': 'dns-domain', + 'default': None, + }, + 'polling_interval': { + 'rel_key': 'polling-interval', + 'default': 2, + }, + 'rpc_response_timeout': { + 'rel_key': 'rpc-response-timeout', + 'default': 60, + }, + 'report_interval': { + 'rel_key': 'report-interval', + 'default': 30, + }, + 'enable_qos': { + 'rel_key': 'enable-qos', + 'default': False, + }, + 'enable_nsg_logging': { + 'rel_key': 'enable-nsg-logging', + 'default': False, + }, + 'enable_nfg_logging': { + 'rel_key': 'enable-nfg-logging', + 'default': False, + }, + 'enable_port_forwarding': { + 'rel_key': 'enable-port-forwarding', + 'default': False, + }, + 'enable_fwaas': { + 'rel_key': 'enable-fwaas', + 'default': False, + }, + 'global_physnet_mtu': { + 'rel_key': 'global-physnet-mtu', + 'default': 1500, + }, + 'physical_network_mtus': { + 'rel_key': 'physical-network-mtus', + 'default': None, + }, + } + ctxt = self.get_neutron_options({}) + for rid in relation_ids('neutron-plugin-api'): + for unit in related_units(rid): + rdata = relation_get(rid=rid, unit=unit) + # The l2-population key is used by the context as a way of + # checking if the api service on the other end is sending data + # in a recent format. + if 'l2-population' in rdata: + ctxt.update(self.get_neutron_options(rdata)) + + extension_drivers = [] + + if ctxt['enable_qos']: + extension_drivers.append('qos') + + if ctxt['enable_nsg_logging']: + extension_drivers.append('log') + + ctxt['extension_drivers'] = ','.join(extension_drivers) + + l3_extension_plugins = [] + + if ctxt['enable_port_forwarding']: + l3_extension_plugins.append('port_forwarding') + + if ctxt['enable_fwaas']: + l3_extension_plugins.append('fwaas_v2') + if ctxt['enable_nfg_logging']: + l3_extension_plugins.append('fwaas_v2_log') + + ctxt['l3_extension_plugins'] = l3_extension_plugins + + return ctxt + + def get_neutron_options(self, rdata): + settings = {} + for nkey in self.neutron_defaults.keys(): + defv = self.neutron_defaults[nkey]['default'] + rkey = self.neutron_defaults[nkey]['rel_key'] + if rkey in rdata.keys(): + if type(defv) is bool: + settings[nkey] = bool_from_string(rdata[rkey]) + else: + settings[nkey] = rdata[rkey] + else: + settings[nkey] = defv + return settings + + +class ExternalPortContext(NeutronPortContext): + + def __call__(self): + ctxt = {} + ports = config('ext-port') + if ports: + ports = [p.strip() for p in ports.split()] + ports = self.resolve_ports(ports) + if ports: + ctxt = {"ext_port": ports[0]} + napi_settings = NeutronAPIContext()() + mtu = napi_settings.get('network_device_mtu') + if mtu: + ctxt['ext_port_mtu'] = mtu + + return ctxt + + +class DataPortContext(NeutronPortContext): + + def __call__(self): + ports = config('data-port') + if ports: + # Map of {bridge:port/mac} + portmap = parse_data_port_mappings(ports) + ports = portmap.keys() + # Resolve provided ports or mac addresses and filter out those + # already attached to a bridge. + resolved = self.resolve_ports(ports) + # Rebuild port index using resolved and filtered ports. + normalized = {get_nic_hwaddr(port): port for port in resolved + if port not in ports} + normalized.update({port: port for port in resolved + if port in ports}) + if resolved: + return { + normalized[port]: bridge + for port, bridge in portmap.items() + if port in normalized.keys() + } + + return None + + +class PhyNICMTUContext(DataPortContext): + + def __call__(self): + ctxt = {} + mappings = super(PhyNICMTUContext, self).__call__() + if mappings and mappings.keys(): + ports = sorted(mappings.keys()) + napi_settings = NeutronAPIContext()() + mtu = napi_settings.get('network_device_mtu') + all_ports = set() + # If any of ports is a vlan device, its underlying device must have + # mtu applied first. + for port in ports: + for lport in glob.glob("/sys/class/net/%s/lower_*" % port): + lport = os.path.basename(lport) + all_ports.add(lport.split('_')[1]) + + all_ports = list(all_ports) + all_ports.extend(ports) + if mtu: + ctxt["devs"] = '\\n'.join(all_ports) + ctxt['mtu'] = mtu + + return ctxt + + +class NetworkServiceContext(OSContextGenerator): + + def __init__(self, rel_name='quantum-network-service'): + self.rel_name = rel_name + self.interfaces = [rel_name] + + def __call__(self): + for rid in relation_ids(self.rel_name): + for unit in related_units(rid): + rdata = relation_get(rid=rid, unit=unit) + ctxt = { + 'keystone_host': rdata.get('keystone_host'), + 'service_port': rdata.get('service_port'), + 'auth_port': rdata.get('auth_port'), + 'service_tenant': rdata.get('service_tenant'), + 'service_username': rdata.get('service_username'), + 'service_password': rdata.get('service_password'), + 'quantum_host': rdata.get('quantum_host'), + 'quantum_port': rdata.get('quantum_port'), + 'quantum_url': rdata.get('quantum_url'), + 'region': rdata.get('region'), + 'service_protocol': + rdata.get('service_protocol') or 'http', + 'auth_protocol': + rdata.get('auth_protocol') or 'http', + 'api_version': + rdata.get('api_version') or '2.0', + } + if self.context_complete(ctxt): + return ctxt + return {} + + +class InternalEndpointContext(OSContextGenerator): + """Internal endpoint context. + + This context provides the endpoint type used for communication between + services e.g. between Nova and Cinder internally. Openstack uses Public + endpoints by default so this allows admins to optionally use internal + endpoints. + """ + def __call__(self): + return {'use_internal_endpoints': config('use-internal-endpoints')} + + +class VolumeAPIContext(InternalEndpointContext): + """Volume API context. + + This context provides information regarding the volume endpoint to use + when communicating between services. It determines which version of the + API is appropriate for use. + + This value will be determined in the resulting context dictionary + returned from calling the VolumeAPIContext object. Information provided + by this context is as follows: + + volume_api_version: the volume api version to use, currently + 'v2' or 'v3' + volume_catalog_info: the information to use for a cinder client + configuration that consumes API endpoints from the keystone + catalog. This is defined as the type:name:endpoint_type string. + """ + # FIXME(wolsen) This implementation is based on the provider being able + # to specify the package version to check but does not guarantee that the + # volume service api version selected is available. In practice, it is + # quite likely the volume service *is* providing the v3 volume service. + # This should be resolved when the service-discovery spec is implemented. + def __init__(self, pkg): + """ + Creates a new VolumeAPIContext for use in determining which version + of the Volume API should be used for communication. A package codename + should be supplied for determining the currently installed OpenStack + version. + + :param pkg: the package codename to use in order to determine the + component version (e.g. nova-common). See + charmhelpers.contrib.openstack.utils.PACKAGE_CODENAMES for more. + """ + super(VolumeAPIContext, self).__init__() + self._ctxt = None + if not pkg: + raise ValueError('package name must be provided in order to ' + 'determine current OpenStack version.') + self.pkg = pkg + + @property + def ctxt(self): + if self._ctxt is not None: + return self._ctxt + self._ctxt = self._determine_ctxt() + return self._ctxt + + def _determine_ctxt(self): + """Determines the Volume API endpoint information. + + Determines the appropriate version of the API that should be used + as well as the catalog_info string that would be supplied. Returns + a dict containing the volume_api_version and the volume_catalog_info. + """ + rel = os_release(self.pkg) + version = '2' + if CompareOpenStackReleases(rel) >= 'pike': + version = '3' + + service_type = 'volumev{version}'.format(version=version) + service_name = 'cinderv{version}'.format(version=version) + endpoint_type = 'publicURL' + if config('use-internal-endpoints'): + endpoint_type = 'internalURL' + catalog_info = '{type}:{name}:{endpoint}'.format( + type=service_type, name=service_name, endpoint=endpoint_type) + + return { + 'volume_api_version': version, + 'volume_catalog_info': catalog_info, + } + + def __call__(self): + return self.ctxt + + +class AppArmorContext(OSContextGenerator): + """Base class for apparmor contexts.""" + + def __init__(self, profile_name=None): + self._ctxt = None + self.aa_profile = profile_name + self.aa_utils_packages = ['apparmor-utils'] + + @property + def ctxt(self): + if self._ctxt is not None: + return self._ctxt + self._ctxt = self._determine_ctxt() + return self._ctxt + + def _determine_ctxt(self): + """ + Validate aa-profile-mode settings is disable, enforce, or complain. + + :return ctxt: Dictionary of the apparmor profile or None + """ + if config('aa-profile-mode') in ['disable', 'enforce', 'complain']: + ctxt = {'aa_profile_mode': config('aa-profile-mode'), + 'ubuntu_release': lsb_release()['DISTRIB_RELEASE']} + if self.aa_profile: + ctxt['aa_profile'] = self.aa_profile + else: + ctxt = None + return ctxt + + def __call__(self): + return self.ctxt + + def install_aa_utils(self): + """ + Install packages required for apparmor configuration. + """ + log("Installing apparmor utils.") + ensure_packages(self.aa_utils_packages) + + def manually_disable_aa_profile(self): + """ + Manually disable an apparmor profile. + + If aa-profile-mode is set to disabled (default) this is required as the + template has been written but apparmor is yet unaware of the profile + and aa-disable aa-profile fails. Without this the profile would kick + into enforce mode on the next service restart. + + """ + profile_path = '/etc/apparmor.d' + disable_path = '/etc/apparmor.d/disable' + if not os.path.lexists(os.path.join(disable_path, self.aa_profile)): + os.symlink(os.path.join(profile_path, self.aa_profile), + os.path.join(disable_path, self.aa_profile)) + + def setup_aa_profile(self): + """ + Setup an apparmor profile. + The ctxt dictionary will contain the apparmor profile mode and + the apparmor profile name. + Makes calls out to aa-disable, aa-complain, or aa-enforce to setup + the apparmor profile. + """ + self() + if not self.ctxt: + log("Not enabling apparmor Profile") + return + self.install_aa_utils() + cmd = ['aa-{}'.format(self.ctxt['aa_profile_mode'])] + cmd.append(self.ctxt['aa_profile']) + log("Setting up the apparmor profile for {} in {} mode." + "".format(self.ctxt['aa_profile'], self.ctxt['aa_profile_mode'])) + try: + check_call(cmd) + except CalledProcessError as e: + # If aa-profile-mode is set to disabled (default) manual + # disabling is required as the template has been written but + # apparmor is yet unaware of the profile and aa-disable aa-profile + # fails. If aa-disable learns to read profile files first this can + # be removed. + if self.ctxt['aa_profile_mode'] == 'disable': + log("Manually disabling the apparmor profile for {}." + "".format(self.ctxt['aa_profile'])) + self.manually_disable_aa_profile() + return + status_set('blocked', "Apparmor profile {} failed to be set to {}." + "".format(self.ctxt['aa_profile'], + self.ctxt['aa_profile_mode'])) + raise e + + +class MemcacheContext(OSContextGenerator): + """Memcache context + + This context provides options for configuring a local memcache client and + server for both IPv4 and IPv6 + """ + + def __init__(self, package=None): + """ + @param package: Package to examine to extrapolate OpenStack release. + Used when charms have no openstack-origin config + option (ie subordinates) + """ + self.package = package + + def __call__(self): + ctxt = {} + ctxt['use_memcache'] = enable_memcache(package=self.package) + if ctxt['use_memcache']: + # Trusty version of memcached does not support ::1 as a listen + # address so use host file entry instead + release = lsb_release()['DISTRIB_CODENAME'].lower() + if is_ipv6_disabled(): + if CompareHostReleases(release) > 'trusty': + ctxt['memcache_server'] = '127.0.0.1' + else: + ctxt['memcache_server'] = 'localhost' + ctxt['memcache_server_formatted'] = '127.0.0.1' + ctxt['memcache_port'] = '11211' + ctxt['memcache_url'] = '{}:{}'.format( + ctxt['memcache_server_formatted'], + ctxt['memcache_port']) + else: + if CompareHostReleases(release) > 'trusty': + ctxt['memcache_server'] = '::1' + else: + ctxt['memcache_server'] = 'ip6-localhost' + ctxt['memcache_server_formatted'] = '[::1]' + ctxt['memcache_port'] = '11211' + ctxt['memcache_url'] = 'inet6:{}:{}'.format( + ctxt['memcache_server_formatted'], + ctxt['memcache_port']) + return ctxt + + +class EnsureDirContext(OSContextGenerator): + ''' + Serves as a generic context to create a directory as a side-effect. + + Useful for software that supports drop-in files (.d) in conjunction + with config option-based templates. Examples include: + * OpenStack oslo.policy drop-in files; + * systemd drop-in config files; + * other software that supports overriding defaults with .d files + + Another use-case is when a subordinate generates a configuration for + primary to render in a separate directory. + + Some software requires a user to create a target directory to be + scanned for drop-in files with a specific format. This is why this + context is needed to do that before rendering a template. + ''' + + def __init__(self, dirname, **kwargs): + '''Used merely to ensure that a given directory exists.''' + self.dirname = dirname + self.kwargs = kwargs + + def __call__(self): + mkdir(self.dirname, **self.kwargs) + return {} + + +class VersionsContext(OSContextGenerator): + """Context to return the openstack and operating system versions. + + """ + def __init__(self, pkg='python-keystone'): + """Initialise context. + + :param pkg: Package to extrapolate openstack version from. + :type pkg: str + """ + self.pkg = pkg + + def __call__(self): + ostack = os_release(self.pkg) + osystem = lsb_release()['DISTRIB_CODENAME'].lower() + return { + 'openstack_release': ostack, + 'operating_system_release': osystem} + + +class LogrotateContext(OSContextGenerator): + """Common context generator for logrotate.""" + + def __init__(self, location, interval, count): + """ + :param location: Absolute path for the logrotate config file + :type location: str + :param interval: The interval for the rotations. Valid values are + 'daily', 'weekly', 'monthly', 'yearly' + :type interval: str + :param count: The logrotate count option configures the 'count' times + the log files are being rotated before being + :type count: int + """ + self.location = location + self.interval = interval + self.count = 'rotate {}'.format(count) + + def __call__(self): + ctxt = { + 'logrotate_logs_location': self.location, + 'logrotate_interval': self.interval, + 'logrotate_count': self.count, + } + return ctxt + + +class HostInfoContext(OSContextGenerator): + """Context to provide host information.""" + + def __init__(self, use_fqdn_hint_cb=None): + """Initialize HostInfoContext + + :param use_fqdn_hint_cb: Callback whose return value used to populate + `use_fqdn_hint` + :type use_fqdn_hint_cb: Callable[[], bool] + """ + # Store callback used to get hint for whether FQDN should be used + + # Depending on the workload a charm manages, the use of FQDN vs. + # shortname may be a deploy-time decision, i.e. behaviour can not + # change on charm upgrade or post-deployment configuration change. + + # The hint is passed on as a flag in the context to allow the decision + # to be made in the Jinja2 configuration template. + self.use_fqdn_hint_cb = use_fqdn_hint_cb + + def _get_canonical_name(self, name=None): + """Get the official FQDN of the host + + The implementation of ``socket.getfqdn()`` in the standard Python + library does not exhaust all methods of getting the official name + of a host ref Python issue https://bugs.python.org/issue5004 + + This function mimics the behaviour of a call to ``hostname -f`` to + get the official FQDN but returns an empty string if it is + unsuccessful. + + :param name: Shortname to get FQDN on + :type name: Optional[str] + :returns: The official FQDN for host or empty string ('') + :rtype: str + """ + name = name or socket.gethostname() + fqdn = '' + + try: + addrs = socket.getaddrinfo( + name, None, 0, socket.SOCK_DGRAM, 0, socket.AI_CANONNAME) + except OSError: + pass + else: + for addr in addrs: + if addr[3]: + if '.' in addr[3]: + fqdn = addr[3] + break + return fqdn + + def __call__(self): + name = socket.gethostname() + ctxt = { + 'host_fqdn': self._get_canonical_name(name) or name, + 'host': name, + 'use_fqdn_hint': ( + self.use_fqdn_hint_cb() if self.use_fqdn_hint_cb else False) + } + return ctxt + + +def validate_ovs_use_veth(*args, **kwargs): + """Validate OVS use veth setting for dhcp agents + + The ovs_use_veth setting is considered immutable as it will break existing + deployments. Historically, we set ovs_use_veth=True in dhcp_agent.ini. It + turns out this is no longer necessary. Ideally, all new deployments would + have this set to False. + + This function validates that the config value does not conflict with + previously deployed settings in dhcp_agent.ini. + + See LP Bug#1831935 for details. + + :returns: Status state and message + :rtype: Union[(None, None), (string, string)] + """ + existing_ovs_use_veth = ( + DHCPAgentContext.get_existing_ovs_use_veth()) + config_ovs_use_veth = DHCPAgentContext.parse_ovs_use_veth() + + # Check settings are set and not None + if existing_ovs_use_veth is not None and config_ovs_use_veth is not None: + # Check for mismatch between existing config ini and juju config + if existing_ovs_use_veth != config_ovs_use_veth: + # Stop the line to avoid breakage + msg = ( + "The existing setting for dhcp_agent.ini ovs_use_veth, {}, " + "does not match the juju config setting, {}. This may lead to " + "VMs being unable to receive a DHCP IP. Either change the " + "juju config setting or dhcp agents may need to be recreated." + .format(existing_ovs_use_veth, config_ovs_use_veth)) + log(msg, ERROR) + return ( + "blocked", + "Mismatched existing and configured ovs-use-veth. See log.") + + # Everything is OK + return None, None + + +class DHCPAgentContext(OSContextGenerator): + + def __call__(self): + """Return the DHCPAGentContext. + + Return all DHCP Agent INI related configuration. + ovs unit is attached to (as a subordinate) and the 'dns_domain' from + the neutron-plugin-api relations (if one is set). + + :returns: Dictionary context + :rtype: Dict + """ + + ctxt = {} + dnsmasq_flags = config('dnsmasq-flags') + if dnsmasq_flags: + ctxt['dnsmasq_flags'] = config_flags_parser(dnsmasq_flags) + ctxt['dns_servers'] = config('dns-servers') + + neutron_api_settings = NeutronAPIContext()() + + ctxt['debug'] = config('debug') + ctxt['instance_mtu'] = config('instance-mtu') + ctxt['ovs_use_veth'] = self.get_ovs_use_veth() + + ctxt['enable_metadata_network'] = config('enable-metadata-network') + ctxt['enable_isolated_metadata'] = config('enable-isolated-metadata') + + if neutron_api_settings.get('dns_domain'): + ctxt['dns_domain'] = neutron_api_settings.get('dns_domain') + + # Override user supplied config for these plugins as these settings are + # mandatory + if config('plugin') in ['nvp', 'nsx', 'n1kv']: + ctxt['enable_metadata_network'] = True + ctxt['enable_isolated_metadata'] = True + + ctxt['append_ovs_config'] = False + cmp_release = CompareOpenStackReleases( + os_release('neutron-common', base='icehouse')) + if cmp_release >= 'queens' and config('enable-dpdk'): + ctxt['append_ovs_config'] = True + + return ctxt + + @staticmethod + def get_existing_ovs_use_veth(): + """Return existing ovs_use_veth setting from dhcp_agent.ini. + + :returns: Boolean value of existing ovs_use_veth setting or None + :rtype: Optional[Bool] + """ + DHCP_AGENT_INI = "/etc/neutron/dhcp_agent.ini" + existing_ovs_use_veth = None + # If there is a dhcp_agent.ini file read the current setting + if os.path.isfile(DHCP_AGENT_INI): + # config_ini does the right thing and returns None if the setting + # is commented. + existing_ovs_use_veth = ( + config_ini(DHCP_AGENT_INI)["DEFAULT"].get("ovs_use_veth")) + # Convert to Bool if necessary + if isinstance(existing_ovs_use_veth, str): + return bool_from_string(existing_ovs_use_veth) + return existing_ovs_use_veth + + @staticmethod + def parse_ovs_use_veth(): + """Parse the ovs-use-veth config setting. + + Parse the string config setting for ovs-use-veth and return a boolean + or None. + + bool_from_string will raise a ValueError if the string is not falsy or + truthy. + + :raises: ValueError for invalid input + :returns: Boolean value of ovs-use-veth or None + :rtype: Optional[Bool] + """ + _config = config("ovs-use-veth") + # An unset parameter returns None. Just in case we will also check for + # an empty string: "". Ironically, (the problem we are trying to avoid) + # "False" returns True and "" returns False. + if _config is None or not _config: + # Return None + return + # bool_from_string handles many variations of true and false strings + # as well as upper and lowercases including: + # ['y', 'yes', 'true', 't', 'on', 'n', 'no', 'false', 'f', 'off'] + return bool_from_string(_config) + + def get_ovs_use_veth(self): + """Return correct ovs_use_veth setting for use in dhcp_agent.ini. + + Get the right value from config or existing dhcp_agent.ini file. + Existing has precedence. Attempt to default to "False" without + disrupting existing deployments. Handle existing deployments and + upgrades safely. See LP Bug#1831935 + + :returns: Value to use for ovs_use_veth setting + :rtype: Bool + """ + _existing = self.get_existing_ovs_use_veth() + if _existing is not None: + return _existing + + _config = self.parse_ovs_use_veth() + if _config is None: + # New better default + return False + else: + return _config + + +EntityMac = collections.namedtuple('EntityMac', ['entity', 'mac']) + + +def resolve_pci_from_mapping_config(config_key): + """Resolve local PCI devices from MAC addresses in mapping config. + + Note that this function keeps record of mac->PCI address lookups + in the local unit db as the devices will disappaear from the system + once bound. + + :param config_key: Configuration option key to parse data from + :type config_key: str + :returns: PCI device address to Tuple(entity, mac) map + :rtype: collections.OrderedDict[str,Tuple[str,str]] + """ + devices = pci.PCINetDevices() + resolved_devices = collections.OrderedDict() + db = kv() + # Note that ``parse_data_port_mappings`` returns Dict regardless of input + for mac, entity in parse_data_port_mappings(config(config_key)).items(): + pcidev = devices.get_device_from_mac(mac) + if pcidev: + # NOTE: store mac->pci allocation as post binding + # it disappears from PCIDevices. + db.set(mac, pcidev.pci_address) + db.flush() + + pci_address = db.get(mac) + if pci_address: + resolved_devices[pci_address] = EntityMac(entity, mac) + + return resolved_devices + + +class DPDKDeviceContext(OSContextGenerator): + + def __init__(self, driver_key=None, bridges_key=None, bonds_key=None): + """Initialize DPDKDeviceContext. + + :param driver_key: Key to use when retrieving driver config. + :type driver_key: str + :param bridges_key: Key to use when retrieving bridge config. + :type bridges_key: str + :param bonds_key: Key to use when retrieving bonds config. + :type bonds_key: str + """ + self.driver_key = driver_key or 'dpdk-driver' + self.bridges_key = bridges_key or 'data-port' + self.bonds_key = bonds_key or 'dpdk-bond-mappings' + + def __call__(self): + """Populate context. + + :returns: context + :rtype: Dict[str,Union[str,collections.OrderedDict[str,str]]] + """ + driver = config(self.driver_key) + if driver is None: + return {} + # Resolve PCI devices for both directly used devices (_bridges) + # and devices for use in dpdk bonds (_bonds) + pci_devices = resolve_pci_from_mapping_config(self.bridges_key) + pci_devices.update(resolve_pci_from_mapping_config(self.bonds_key)) + return {'devices': pci_devices, + 'driver': driver} + + +class OVSDPDKDeviceContext(OSContextGenerator): + + def __init__(self, bridges_key=None, bonds_key=None): + """Initialize OVSDPDKDeviceContext. + + :param bridges_key: Key to use when retrieving bridge config. + :type bridges_key: str + :param bonds_key: Key to use when retrieving bonds config. + :type bonds_key: str + """ + self.bridges_key = bridges_key or 'data-port' + self.bonds_key = bonds_key or 'dpdk-bond-mappings' + + @staticmethod + def _parse_cpu_list(cpulist): + """Parses a linux cpulist for a numa node + + :returns: list of cores + :rtype: List[int] + """ + cores = [] + if cpulist and re.match(r"^[0-9,\-^]*$", cpulist): + ranges = cpulist.split(',') + for cpu_range in ranges: + if "-" in cpu_range: + cpu_min_max = cpu_range.split('-') + cores += range(int(cpu_min_max[0]), + int(cpu_min_max[1]) + 1) + elif "^" in cpu_range: + cpu_rm = cpu_range.split('^') + cores.remove(int(cpu_rm[1])) + else: + cores.append(int(cpu_range)) + return cores + + def _numa_node_cores(self): + """Get map of numa node -> cpu core + + :returns: map of numa node -> cpu core + :rtype: Dict[str,List[int]] + """ + nodes = {} + node_regex = '/sys/devices/system/node/node*' + for node in glob.glob(node_regex): + index = node.lstrip('/sys/devices/system/node/node') + with open(os.path.join(node, 'cpulist')) as cpulist: + nodes[index] = self._parse_cpu_list(cpulist.read().strip()) + return nodes + + def cpu_mask(self): + """Get hex formatted CPU mask + The mask is based on using the first config:dpdk-socket-cores + cores of each NUMA node in the unit. + :returns: hex formatted CPU mask + :rtype: str + """ + num_cores = config('dpdk-socket-cores') + mask = 0 + for cores in self._numa_node_cores().values(): + for core in cores[:num_cores]: + mask = mask | 1 << core + return format(mask, '#04x') + + @classmethod + def pmd_cpu_mask(cls): + """Get hex formatted pmd CPU mask + + The mask is based on config:pmd-cpu-set. + :returns: hex formatted CPU mask + :rtype: str + """ + mask = 0 + cpu_list = cls._parse_cpu_list(config('pmd-cpu-set')) + if cpu_list: + for core in cpu_list: + mask = mask | 1 << core + return format(mask, '#x') + + def socket_memory(self): + """Formatted list of socket memory configuration per socket. + + :returns: socket memory configuration per socket. + :rtype: str + """ + lscpu_out = check_output( + ['lscpu', '-p=socket']).decode('UTF-8').strip() + sockets = set() + for line in lscpu_out.split('\n'): + try: + sockets.add(int(line)) + except ValueError: + # lscpu output is headed by comments so ignore them. + pass + sm_size = config('dpdk-socket-memory') + mem_list = [str(sm_size) for _ in sockets] + if mem_list: + return ','.join(mem_list) + else: + return str(sm_size) + + def devices(self): + """List of PCI devices for use by DPDK + + :returns: List of PCI devices for use by DPDK + :rtype: collections.OrderedDict[str,str] + """ + pci_devices = resolve_pci_from_mapping_config(self.bridges_key) + pci_devices.update(resolve_pci_from_mapping_config(self.bonds_key)) + return pci_devices + + def _formatted_whitelist(self, flag): + """Flag formatted list of devices to whitelist + + :param flag: flag format to use + :type flag: str + :rtype: str + """ + whitelist = [] + for device in self.devices(): + whitelist.append(flag.format(device=device)) + return ' '.join(whitelist) + + def device_whitelist(self): + """Formatted list of devices to whitelist for dpdk + + using the old style '-w' flag + + :returns: devices to whitelist prefixed by '-w ' + :rtype: str + """ + return self._formatted_whitelist('-w {device}') + + def pci_whitelist(self): + """Formatted list of devices to whitelist for dpdk + + using the new style '--pci-whitelist' flag + + :returns: devices to whitelist prefixed by '--pci-whitelist ' + :rtype: str + """ + return self._formatted_whitelist('--pci-whitelist {device}') + + def __call__(self): + """Populate context. + + :returns: context + :rtype: Dict[str,Union[bool,str]] + """ + ctxt = {} + whitelist = self.device_whitelist() + if whitelist: + ctxt['dpdk_enabled'] = config('enable-dpdk') + ctxt['device_whitelist'] = self.device_whitelist() + ctxt['socket_memory'] = self.socket_memory() + ctxt['cpu_mask'] = self.cpu_mask() + ctxt['pmd_cpu_mask'] = self.pmd_cpu_mask() + return ctxt + + +class BridgePortInterfaceMap(object): + """Build a map of bridge ports and interfaces from charm configuration. + + NOTE: the handling of this detail in the charm is pre-deprecated. + + The long term goal is for network connectivity detail to be modelled in + the server provisioning layer (such as MAAS) which in turn will provide + a Netplan YAML description that will be used to drive Open vSwitch. + + Until we get to that reality the charm will need to configure this + detail based on application level configuration options. + + There is a established way of mapping interfaces to ports and bridges + in the ``neutron-openvswitch`` and ``neutron-gateway`` charms and we + will carry that forward. + + The relationship between bridge, port and interface(s). + +--------+ + | bridge | + +--------+ + | + +----------------+ + | port aka. bond | + +----------------+ + | | + +-+ +-+ + |i| |i| + |n| |n| + |t| |t| + |0| |N| + +-+ +-+ + """ + class interface_type(enum.Enum): + """Supported interface types. + + Supported interface types can be found in the ``iface_types`` column + in the ``Open_vSwitch`` table on a running system. + """ + dpdk = 'dpdk' + internal = 'internal' + system = 'system' + + def __str__(self): + """Return string representation of value. + + :returns: string representation of value. + :rtype: str + """ + return self.value + + def __init__(self, bridges_key=None, bonds_key=None, enable_dpdk_key=None, + global_mtu=None): + """Initialize map. + + :param bridges_key: Name of bridge:interface/port map config key + (default: 'data-port') + :type bridges_key: Optional[str] + :param bonds_key: Name of port-name:interface map config key + (default: 'dpdk-bond-mappings') + :type bonds_key: Optional[str] + :param enable_dpdk_key: Name of DPDK toggle config key + (default: 'enable-dpdk') + :type enable_dpdk_key: Optional[str] + :param global_mtu: Set a MTU on all interfaces at map initialization. + + The default is to have Open vSwitch get this from the underlying + interface as set up by bare metal provisioning. + + Note that you can augment the MTU on an individual interface basis + like this: + + ifdatamap = bpi.get_ifdatamap(bridge, port) + ifdatamap = { + port: { + **ifdata, + **{'mtu-request': my_individual_mtu_map[port]}, + } + for port, ifdata in ifdatamap.items() + } + :type global_mtu: Optional[int] + """ + bridges_key = bridges_key or 'data-port' + bonds_key = bonds_key or 'dpdk-bond-mappings' + enable_dpdk_key = enable_dpdk_key or 'enable-dpdk' + self._map = collections.defaultdict( + lambda: collections.defaultdict(dict)) + self._ifname_mac_map = collections.defaultdict(list) + self._mac_ifname_map = {} + self._mac_pci_address_map = {} + + # First we iterate over the list of physical interfaces visible to the + # system and update interface name to mac and mac to interface name map + for ifname in list_nics(): + if not is_phy_iface(ifname): + continue + mac = get_nic_hwaddr(ifname) + self._ifname_mac_map[ifname] = [mac] + self._mac_ifname_map[mac] = ifname + + # check if interface is part of a linux bond + _bond_name = get_bond_master(ifname) + if _bond_name and _bond_name != ifname: + log('Add linux bond "{}" to map for physical interface "{}" ' + 'with mac "{}".'.format(_bond_name, ifname, mac), + level=DEBUG) + # for bonds we want to be able to get a list of the mac + # addresses for the physical interfaces the bond is made up of. + if self._ifname_mac_map.get(_bond_name): + self._ifname_mac_map[_bond_name].append(mac) + else: + self._ifname_mac_map[_bond_name] = [mac] + + # In light of the pre-deprecation notice in the docstring of this + # class we will expose the ability to configure OVS bonds as a + # DPDK-only feature, but generally use the data structures internally. + if config(enable_dpdk_key): + # resolve PCI address of interfaces listed in the bridges and bonds + # charm configuration options. Note that for already bound + # interfaces the helper will retrieve MAC address from the unit + # KV store as the information is no longer available in sysfs. + _pci_bridge_mac = resolve_pci_from_mapping_config( + bridges_key) + _pci_bond_mac = resolve_pci_from_mapping_config( + bonds_key) + + for pci_address, bridge_mac in _pci_bridge_mac.items(): + if bridge_mac.mac in self._mac_ifname_map: + # if we already have the interface name in our map it is + # visible to the system and therefore not bound to DPDK + continue + ifname = 'dpdk-{}'.format( + hashlib.sha1( + pci_address.encode('UTF-8')).hexdigest()[:7]) + self._ifname_mac_map[ifname] = [bridge_mac.mac] + self._mac_ifname_map[bridge_mac.mac] = ifname + self._mac_pci_address_map[bridge_mac.mac] = pci_address + + for pci_address, bond_mac in _pci_bond_mac.items(): + # for bonds we want to be able to get a list of macs from + # the bond name and also get at the interface name made up + # of the hash of the PCI address + ifname = 'dpdk-{}'.format( + hashlib.sha1( + pci_address.encode('UTF-8')).hexdigest()[:7]) + self._ifname_mac_map[bond_mac.entity].append(bond_mac.mac) + self._mac_ifname_map[bond_mac.mac] = ifname + self._mac_pci_address_map[bond_mac.mac] = pci_address + + config_bridges = config(bridges_key) or '' + for bridge, ifname_or_mac in ( + pair.split(':', 1) + for pair in config_bridges.split()): + if ':' in ifname_or_mac: + try: + ifname = self.ifname_from_mac(ifname_or_mac) + except KeyError: + # The interface is destined for a different unit in the + # deployment. + continue + macs = [ifname_or_mac] + else: + ifname = ifname_or_mac + macs = self.macs_from_ifname(ifname_or_mac) + + portname = ifname + for mac in macs: + try: + pci_address = self.pci_address_from_mac(mac) + iftype = self.interface_type.dpdk + ifname = self.ifname_from_mac(mac) + except KeyError: + pci_address = None + iftype = self.interface_type.system + + self.add_interface( + bridge, portname, ifname, iftype, pci_address, global_mtu) + + if not macs: + # We have not mapped the interface and it is probably some sort + # of virtual interface. Our user have put it in the config with + # a purpose so let's carry out their wish. LP: #1884743 + log('Add unmapped interface from config: name "{}" bridge "{}"' + .format(ifname, bridge), + level=DEBUG) + self.add_interface( + bridge, ifname, ifname, self.interface_type.system, None, + global_mtu) + + def __getitem__(self, key): + """Provide a Dict-like interface, get value of item. + + :param key: Key to look up value from. + :type key: any + :returns: Value + :rtype: any + """ + return self._map.__getitem__(key) + + def __iter__(self): + """Provide a Dict-like interface, iterate over keys. + + :returns: Iterator + :rtype: Iterator[any] + """ + return self._map.__iter__() + + def __len__(self): + """Provide a Dict-like interface, measure the length of internal map. + + :returns: Length + :rtype: int + """ + return len(self._map) + + def items(self): + """Provide a Dict-like interface, iterate over items. + + :returns: Key Value pairs + :rtype: Iterator[any, any] + """ + return self._map.items() + + def keys(self): + """Provide a Dict-like interface, iterate over keys. + + :returns: Iterator + :rtype: Iterator[any] + """ + return self._map.keys() + + def ifname_from_mac(self, mac): + """ + :returns: Name of interface + :rtype: str + :raises: KeyError + """ + return (get_bond_master(self._mac_ifname_map[mac]) or + self._mac_ifname_map[mac]) + + def macs_from_ifname(self, ifname): + """ + :returns: List of hardware address (MAC) of interface + :rtype: List[str] + :raises: KeyError + """ + return self._ifname_mac_map[ifname] + + def pci_address_from_mac(self, mac): + """ + :param mac: Hardware address (MAC) of interface + :type mac: str + :returns: PCI address of device associated with mac + :rtype: str + :raises: KeyError + """ + return self._mac_pci_address_map[mac] + + def add_interface(self, bridge, port, ifname, iftype, + pci_address, mtu_request): + """Add an interface to the map. + + :param bridge: Name of bridge on which the bond will be added + :type bridge: str + :param port: Name of port which will represent the bond on bridge + :type port: str + :param ifname: Name of interface that will make up the bonded port + :type ifname: str + :param iftype: Type of interface + :type iftype: BridgeBondMap.interface_type + :param pci_address: PCI address of interface + :type pci_address: Optional[str] + :param mtu_request: MTU to request for interface + :type mtu_request: Optional[int] + """ + self._map[bridge][port][ifname] = { + 'type': str(iftype), + } + if pci_address: + self._map[bridge][port][ifname].update({ + 'pci-address': pci_address, + }) + if mtu_request is not None: + self._map[bridge][port][ifname].update({ + 'mtu-request': str(mtu_request) + }) + + def get_ifdatamap(self, bridge, port): + """Get structure suitable for charmhelpers.contrib.network.ovs helpers. + + :param bridge: Name of bridge on which the port will be added + :type bridge: str + :param port: Name of port which will represent one or more interfaces + :type port: str + """ + for _bridge, _ports in self.items(): + for _port, _interfaces in _ports.items(): + if _bridge == bridge and _port == port: + ifdatamap = {} + for name, data in _interfaces.items(): + ifdatamap.update({ + name: { + 'type': data['type'], + }, + }) + if data.get('mtu-request') is not None: + ifdatamap[name].update({ + 'mtu_request': data['mtu-request'], + }) + if data.get('pci-address'): + ifdatamap[name].update({ + 'options': { + 'dpdk-devargs': data['pci-address'], + }, + }) + return ifdatamap + + +class BondConfig(object): + """Container and helpers for bond configuration options. + + Data is put into a dictionary and a convenient config get interface is + provided. + """ + + DEFAULT_LACP_CONFIG = { + 'mode': 'balance-tcp', + 'lacp': 'active', + 'lacp-time': 'fast' + } + ALL_BONDS = 'ALL_BONDS' + + BOND_MODES = ['active-backup', 'balance-slb', 'balance-tcp'] + BOND_LACP = ['active', 'passive', 'off'] + BOND_LACP_TIME = ['fast', 'slow'] + + def __init__(self, config_key=None): + """Parse specified configuration option. + + :param config_key: Configuration key to retrieve data from + (default: ``dpdk-bond-config``) + :type config_key: Optional[str] + """ + self.config_key = config_key or 'dpdk-bond-config' + + self.lacp_config = { + self.ALL_BONDS: copy.deepcopy(self.DEFAULT_LACP_CONFIG) + } + + lacp_config = config(self.config_key) + if lacp_config: + lacp_config_map = lacp_config.split() + for entry in lacp_config_map: + bond, entry = entry.partition(':')[0:3:2] + if not bond: + bond = self.ALL_BONDS + + mode, entry = entry.partition(':')[0:3:2] + if not mode: + mode = self.DEFAULT_LACP_CONFIG['mode'] + assert mode in self.BOND_MODES, \ + "Bond mode {} is invalid".format(mode) + + lacp, entry = entry.partition(':')[0:3:2] + if not lacp: + lacp = self.DEFAULT_LACP_CONFIG['lacp'] + assert lacp in self.BOND_LACP, \ + "Bond lacp {} is invalid".format(lacp) + + lacp_time, entry = entry.partition(':')[0:3:2] + if not lacp_time: + lacp_time = self.DEFAULT_LACP_CONFIG['lacp-time'] + assert lacp_time in self.BOND_LACP_TIME, \ + "Bond lacp-time {} is invalid".format(lacp_time) + + self.lacp_config[bond] = { + 'mode': mode, + 'lacp': lacp, + 'lacp-time': lacp_time + } + + def get_bond_config(self, bond): + """Get the LACP configuration for a bond + + :param bond: the bond name + :return: a dictionary with the configuration of the bond + :rtype: Dict[str,Dict[str,str]] + """ + return self.lacp_config.get(bond, self.lacp_config[self.ALL_BONDS]) + + def get_ovs_portdata(self, bond): + """Get structure suitable for charmhelpers.contrib.network.ovs helpers. + + :param bond: the bond name + :return: a dictionary with the configuration of the bond + :rtype: Dict[str,Union[str,Dict[str,str]]] + """ + bond_config = self.get_bond_config(bond) + return { + 'bond_mode': bond_config['mode'], + 'lacp': bond_config['lacp'], + 'other_config': { + 'lacp-time': bond_config['lacp-time'], + }, + } + + +class SRIOVContext(OSContextGenerator): + """Provide context for configuring SR-IOV devices.""" + + class sriov_config_mode(enum.Enum): + """Mode in which SR-IOV is configured. + + The configuration option identified by the ``numvfs_key`` parameter + is overloaded and defines in which mode the charm should interpret + the other SR-IOV-related configuration options. + """ + auto = 'auto' + blanket = 'blanket' + explicit = 'explicit' + + PCIDeviceNumVFs = collections.namedtuple( + 'PCIDeviceNumVFs', ['device', 'numvfs']) + + def _determine_numvfs(self, device, sriov_numvfs): + """Determine number of Virtual Functions (VFs) configured for device. + + :param device: Object describing a PCI Network interface card (NIC)/ + :type device: contrib.hardware.pci.PCINetDevice + :param sriov_numvfs: Number of VFs requested for blanket configuration. + :type sriov_numvfs: int + :returns: Number of VFs to configure for device + :rtype: Optional[int] + """ + + def _get_capped_numvfs(requested): + """Get a number of VFs that does not exceed individual card limits. + + Depending and make and model of NIC the number of VFs supported + vary. Requesting more VFs than a card support would be a fatal + error, cap the requested number at the total number of VFs each + individual card supports. + + :param requested: Number of VFs requested + :type requested: int + :returns: Number of VFs allowed + :rtype: int + """ + actual = min(int(requested), int(device.sriov_totalvfs)) + if actual < int(requested): + log('Requested VFs ({}) too high for device {}. Falling back ' + 'to value supported by device: {}' + .format(requested, device.interface_name, + device.sriov_totalvfs), + level=WARNING) + return actual + + if self._sriov_config_mode == self.sriov_config_mode.auto: + # auto-mode + # + # If device mapping configuration is present, return information + # on cards with mapping. + # + # If no device mapping configuration is present, return information + # for all cards. + # + # The maximum number of VFs supported by card will be used. + if (self._sriov_mapped_devices and + device.interface_name not in self._sriov_mapped_devices): + log('SR-IOV configured in auto mode: No device mapping for {}' + .format(device.interface_name), + level=DEBUG) + return + return _get_capped_numvfs(device.sriov_totalvfs) + elif self._sriov_config_mode == self.sriov_config_mode.blanket: + # blanket-mode + # + # User has specified a number of VFs that should apply to all + # cards with support for VFs. + return _get_capped_numvfs(sriov_numvfs) + elif self._sriov_config_mode == self.sriov_config_mode.explicit: + # explicit-mode + # + # User has given a list of interface names and associated number of + # VFs + if device.interface_name not in self._sriov_config_devices: + log('SR-IOV configured in explicit mode: No device:numvfs ' + 'pair for device {}, skipping.' + .format(device.interface_name), + level=DEBUG) + return + return _get_capped_numvfs( + self._sriov_config_devices[device.interface_name]) + else: + raise RuntimeError('This should not be reached') + + def __init__(self, numvfs_key=None, device_mappings_key=None): + """Initialize map from PCI devices and configuration options. + + :param numvfs_key: Config key for numvfs (default: 'sriov-numvfs') + :type numvfs_key: Optional[str] + :param device_mappings_key: Config key for device mappings + (default: 'sriov-device-mappings') + :type device_mappings_key: Optional[str] + :raises: RuntimeError + """ + numvfs_key = numvfs_key or 'sriov-numvfs' + device_mappings_key = device_mappings_key or 'sriov-device-mappings' + + devices = pci.PCINetDevices() + charm_config = config() + sriov_numvfs = charm_config.get(numvfs_key) or '' + sriov_device_mappings = charm_config.get(device_mappings_key) or '' + + # create list of devices from sriov_device_mappings config option + self._sriov_mapped_devices = [ + pair.split(':', 1)[1] + for pair in sriov_device_mappings.split() + ] + + # create map of device:numvfs from sriov_numvfs config option + self._sriov_config_devices = { + ifname: numvfs for ifname, numvfs in ( + pair.split(':', 1) for pair in sriov_numvfs.split() + if ':' in sriov_numvfs) + } + + # determine configuration mode from contents of sriov_numvfs + if sriov_numvfs == 'auto': + self._sriov_config_mode = self.sriov_config_mode.auto + elif sriov_numvfs.isdigit(): + self._sriov_config_mode = self.sriov_config_mode.blanket + elif ':' in sriov_numvfs: + self._sriov_config_mode = self.sriov_config_mode.explicit + else: + raise RuntimeError('Unable to determine mode of SR-IOV ' + 'configuration.') + + self._map = { + device.pci_address: self.PCIDeviceNumVFs( + device, self._determine_numvfs(device, sriov_numvfs)) + for device in devices.pci_devices + if device.sriov and + self._determine_numvfs(device, sriov_numvfs) is not None + } + + def __call__(self): + """Provide backward compatible SR-IOV context. + + :returns: Map interface name: min(configured, max) virtual functions. + Example: + { + 'eth0': 16, + 'eth1': 32, + 'eth2': 64, + } + :rtype: Dict[str,int] + """ + return { + pcidnvfs.device.interface_name: pcidnvfs.numvfs + for _, pcidnvfs in self._map.items() + } + + @property + def get_map(self): + """Provide map of configured SR-IOV capable PCI devices. + + :returns: Map PCI-address: (PCIDevice, min(configured, max) VFs. + Example: + { + '0000:81:00.0': self.PCIDeviceNumVFs(, 32), + '0000:81:00.1': self.PCIDeviceNumVFs(, 32), + } + :rtype: Dict[str, self.PCIDeviceNumVFs] + """ + return self._map + + +class CephBlueStoreCompressionContext(OSContextGenerator): + """Ceph BlueStore compression options.""" + + # Tuple with Tuples that map configuration option name to CephBrokerRq op + # property name + options = ( + ('bluestore-compression-algorithm', + 'compression-algorithm'), + ('bluestore-compression-mode', + 'compression-mode'), + ('bluestore-compression-required-ratio', + 'compression-required-ratio'), + ('bluestore-compression-min-blob-size', + 'compression-min-blob-size'), + ('bluestore-compression-min-blob-size-hdd', + 'compression-min-blob-size-hdd'), + ('bluestore-compression-min-blob-size-ssd', + 'compression-min-blob-size-ssd'), + ('bluestore-compression-max-blob-size', + 'compression-max-blob-size'), + ('bluestore-compression-max-blob-size-hdd', + 'compression-max-blob-size-hdd'), + ('bluestore-compression-max-blob-size-ssd', + 'compression-max-blob-size-ssd'), + ) + + def __init__(self): + """Initialize context by loading values from charm config. + + We keep two maps, one suitable for use with CephBrokerRq's and one + suitable for template generation. + """ + charm_config = config() + + # CephBrokerRq op map + self.op = {} + # Context exposed for template generation + self.ctxt = {} + for config_key, op_key in self.options: + value = charm_config.get(config_key) + self.ctxt.update({config_key.replace('-', '_'): value}) + self.op.update({op_key: value}) + + def __call__(self): + """Get context. + + :returns: Context + :rtype: Dict[str,any] + """ + return self.ctxt + + def get_op(self): + """Get values for use in CephBrokerRq op. + + :returns: Context values with CephBrokerRq op property name as key. + :rtype: Dict[str,any] + """ + return self.op + + def get_kwargs(self): + """Get values for use as keyword arguments. + + :returns: Context values with key suitable for use as kwargs to + CephBrokerRq add_op_create_*_pool methods. + :rtype: Dict[str,any] + """ + return { + k.replace('-', '_'): v + for k, v in self.op.items() + } + + def validate(self): + """Validate options. + + :raises: AssertionError + """ + # We slip in a dummy name on class instantiation to allow validation of + # the other options. It will not affect further use. + # + # NOTE: once we retire Python 3.5 we can fold this into a in-line + # dictionary comprehension in the call to the initializer. + dummy_op = {'name': 'dummy-name'} + dummy_op.update(self.op) + pool = ch_ceph.BasePool('dummy-service', op=dummy_op) + pool.validate() diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/deferred_events.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/deferred_events.py new file mode 100644 index 00000000..4c46e41a --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/deferred_events.py @@ -0,0 +1,418 @@ +# Copyright 2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module for managing deferred service events. + +This module is used to manage deferred service events from both charm actions +and package actions. +""" + +import datetime +import glob +import yaml +import os +import time +import uuid + +import charmhelpers.contrib.openstack.policy_rcd as policy_rcd +import charmhelpers.core.hookenv as hookenv +import charmhelpers.core.host as host +import charmhelpers.core.unitdata as unitdata + +import subprocess + + +# Deferred events generated from the charm are stored along side those +# generated from packaging. +DEFERRED_EVENTS_DIR = policy_rcd.POLICY_DEFERRED_EVENTS_DIR + + +class ServiceEvent(): + + def __init__(self, timestamp, service, reason, action, + policy_requestor_name=None, policy_requestor_type=None): + self.timestamp = timestamp + self.service = service + self.reason = reason + self.action = action + if policy_requestor_name: + self.policy_requestor_name = policy_requestor_name + else: + self.policy_requestor_name = hookenv.service_name() + if policy_requestor_type: + self.policy_requestor_type = policy_requestor_type + else: + self.policy_requestor_type = 'charm' + + def __eq__(self, other): + for attr in vars(self): + if getattr(self, attr) != getattr(other, attr): + return False + return True + + def matching_request(self, other): + for attr in ['service', 'action', 'reason']: + if getattr(self, attr) != getattr(other, attr): + return False + return True + + @classmethod + def from_dict(cls, data): + return cls( + data['timestamp'], + data['service'], + data['reason'], + data['action'], + data.get('policy_requestor_name'), + data.get('policy_requestor_type')) + + +def deferred_events_files(): + """Deferred event files + + Deferred event files that were generated by service_name() policy. + + :returns: Deferred event files + :rtype: List[str] + """ + return glob.glob('{}/*.deferred'.format(DEFERRED_EVENTS_DIR)) + + +def read_event_file(file_name): + """Read a file and return the corresponding objects. + + :param file_name: Name of file to read. + :type file_name: str + :returns: ServiceEvent from file. + :rtype: ServiceEvent + """ + with open(file_name, 'r') as f: + contents = yaml.safe_load(f) + event = ServiceEvent( + contents['timestamp'], + contents['service'], + contents['reason'], + contents['action'], + policy_requestor_name=contents.get('policy_requestor_name'), + policy_requestor_type=contents.get('policy_requestor_type')) + return event + + +def deferred_events(): + """Get list of deferred events. + + List of deferred events. Events are represented by dicts of the form: + + { + action: restart, + policy_requestor_name: neutron-openvswitch, + policy_requestor_type: charm, + reason: 'Pkg update', + service: openvswitch-switch, + time: 1614328743} + + :returns: List of deferred events. + :rtype: List[ServiceEvent] + """ + events = [] + for defer_file in deferred_events_files(): + event = read_event_file(defer_file) + if event.policy_requestor_name == hookenv.service_name(): + events.append((defer_file, event)) + return events + + +def duplicate_event_files(event): + """Get list of event files that have equivalent deferred events. + + :param event: Event to compare + :type event: ServiceEvent + :returns: List of event files + :rtype: List[str] + """ + duplicates = [] + for event_file, existing_event in deferred_events(): + if event.matching_request(existing_event): + duplicates.append(event_file) + return duplicates + + +def get_event_record_file(policy_requestor_type, policy_requestor_name): + """Generate filename for storing a new event. + + :param policy_requestor_type: System that blocked event + :type policy_requestor_type: str + :param policy_requestor_name: Name of application that blocked event + :type policy_requestor_name: str + :returns: File name + :rtype: str + """ + file_name = '{}/{}-{}-{}.deferred'.format( + DEFERRED_EVENTS_DIR, + policy_requestor_type, + policy_requestor_name, + uuid.uuid1()) + return file_name + + +def save_event(event): + """Write deferred events to backend. + + :param event: Event to save + :type event: ServiceEvent + """ + requestor_name = hookenv.service_name() + requestor_type = 'charm' + init_policy_log_dir() + if duplicate_event_files(event): + hookenv.log( + "Not writing new event, existing event found. {} {} {}".format( + event.service, + event.action, + event.reason), + level="DEBUG") + else: + record_file = get_event_record_file( + policy_requestor_type=requestor_type, + policy_requestor_name=requestor_name) + + with open(record_file, 'w') as f: + data = { + 'timestamp': event.timestamp, + 'service': event.service, + 'action': event.action, + 'reason': event.reason, + 'policy_requestor_type': requestor_type, + 'policy_requestor_name': requestor_name} + yaml.dump(data, f) + + +def clear_deferred_events(svcs, action): + """Remove any outstanding deferred events. + + Remove a deferred event if its service is in the services list and its + action matches. + + :param svcs: List of services to remove. + :type svcs: List[str] + :param action: Action to remove + :type action: str + """ + # XXX This function is not currently processing the action. It needs to + # match the action and also take account of try-restart and the + # equivalnce of stop-start and restart. + for defer_file in deferred_events_files(): + deferred_event = read_event_file(defer_file) + if deferred_event.service in svcs: + os.remove(defer_file) + + +def init_policy_log_dir(): + """Ensure directory to store events exists.""" + if not os.path.exists(DEFERRED_EVENTS_DIR): + os.mkdir(DEFERRED_EVENTS_DIR) + + +def get_deferred_events(): + """Return a list of deferred events requested by the charm and packages. + + :returns: List of deferred events + :rtype: List[ServiceEvent] + """ + events = [] + for _, event in deferred_events(): + events.append(event) + return events + + +def get_deferred_restarts(): + """List of deferred restart events requested by the charm and packages. + + :returns: List of deferred restarts + :rtype: List[ServiceEvent] + """ + return [e for e in get_deferred_events() if e.action == 'restart'] + + +def clear_deferred_restarts(services): + """Clear deferred restart events targeted at `services`. + + :param services: Services with deferred actions to clear. + :type services: List[str] + """ + clear_deferred_events(services, 'restart') + + +def process_svc_restart(service): + """Respond to a service restart having occurred. + + :param service: Services that the action was performed against. + :type service: str + """ + clear_deferred_restarts([service]) + + +def is_restart_permitted(): + """Check whether restarts are permitted. + + :returns: Whether restarts are permitted + :rtype: bool + """ + if hookenv.config('enable-auto-restarts') is None: + return True + return hookenv.config('enable-auto-restarts') + + +def check_and_record_restart_request(service, changed_files): + """Check if restarts are permitted, if they are not log the request. + + :param service: Service to be restarted + :type service: str + :param changed_files: Files that have changed to trigger restarts. + :type changed_files: List[str] + :returns: Whether restarts are permitted + :rtype: bool + """ + changed_files = sorted(list(set(changed_files))) + permitted = is_restart_permitted() + if not permitted: + save_event(ServiceEvent( + timestamp=round(time.time()), + service=service, + reason='File(s) changed: {}'.format( + ', '.join(changed_files)), + action='restart')) + return permitted + + +def deferrable_svc_restart(service, reason=None): + """Restarts service if permitted, if not defer it. + + :param service: Service to be restarted + :type service: str + :param reason: Reason for restart + :type reason: Union[str, None] + """ + if is_restart_permitted(): + host.service_restart(service) + else: + save_event(ServiceEvent( + timestamp=round(time.time()), + service=service, + reason=reason, + action='restart')) + + +def configure_deferred_restarts(services): + """Setup deferred restarts. + + :param services: Services to block restarts of. + :type services: List[str] + """ + policy_rcd.install_policy_rcd() + if is_restart_permitted(): + policy_rcd.remove_policy_file() + else: + blocked_actions = ['stop', 'restart', 'try-restart'] + for svc in services: + policy_rcd.add_policy_block(svc, blocked_actions) + + +def get_service_start_time(service): + """Find point in time when the systemd unit transitioned to active state. + + :param service: Services to check timetsamp of. + :type service: str + """ + start_time = None + out = subprocess.check_output( + [ + 'systemctl', + 'show', + service, + '--property=ActiveEnterTimestamp']) + str_time = out.decode().rstrip().replace('ActiveEnterTimestamp=', '') + if str_time: + start_time = datetime.datetime.strptime( + str_time, + '%a %Y-%m-%d %H:%M:%S %Z') + return start_time + + +def check_restart_timestamps(): + """Check deferred restarts against systemd units start time. + + Check if a service has a deferred event and clear it if it has been + subsequently restarted. + """ + for event in get_deferred_restarts(): + start_time = get_service_start_time(event.service) + deferred_restart_time = datetime.datetime.fromtimestamp( + event.timestamp) + if start_time and start_time < deferred_restart_time: + hookenv.log( + ("Restart still required, {} was started at {}, restart was " + "requested after that at {}").format( + event.service, + start_time, + deferred_restart_time), + level='DEBUG') + else: + clear_deferred_restarts([event.service]) + + +def set_deferred_hook(hookname): + """Record that a hook has been deferred. + + :param hookname: Name of hook that was deferred. + :type hookname: str + """ + with unitdata.HookData()() as t: + kv = t[0] + deferred_hooks = kv.get('deferred-hooks', []) + if hookname not in deferred_hooks: + deferred_hooks.append(hookname) + kv.set('deferred-hooks', sorted(list(set(deferred_hooks)))) + + +def get_deferred_hooks(): + """Get a list of deferred hooks. + + :returns: List of hook names. + :rtype: List[str] + """ + with unitdata.HookData()() as t: + kv = t[0] + return kv.get('deferred-hooks', []) + + +def clear_deferred_hooks(): + """Clear any deferred hooks.""" + with unitdata.HookData()() as t: + kv = t[0] + kv.set('deferred-hooks', []) + + +def clear_deferred_hook(hookname): + """Clear a specific deferred hooks. + + :param hookname: Name of hook to remove. + :type hookname: str + """ + with unitdata.HookData()() as t: + kv = t[0] + deferred_hooks = kv.get('deferred-hooks', []) + if hookname in deferred_hooks: + deferred_hooks.remove(hookname) + kv.set('deferred-hooks', deferred_hooks) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/exceptions.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/exceptions.py new file mode 100644 index 00000000..b2330637 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/exceptions.py @@ -0,0 +1,26 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class OSContextError(Exception): + """Raised when an error occurs during context generation. + + This exception is principally used in contrib.openstack.context + """ + pass + + +class ServiceActionError(Exception): + """Raised when a service action (stop/start/ etc) failed.""" + pass diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/files/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/files/__init__.py new file mode 100644 index 00000000..9df5f746 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/files/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# dummy __init__.py to fool syncer into thinking this is a syncable python +# module diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/files/check_deferred_restarts.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/files/check_deferred_restarts.py new file mode 100755 index 00000000..5f392b3c --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/files/check_deferred_restarts.py @@ -0,0 +1,128 @@ +#!/usr/bin/python3 + +# Copyright 2014-2022 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Checks for services with deferred restarts. + +This Nagios check will parse /var/lib/policy-rd.d/ +to find any restarts that are currently deferred. +""" + +import argparse +import glob +import sys +import yaml + + +DEFERRED_EVENTS_DIR = '/var/lib/policy-rc.d' + + +def get_deferred_events(): + """Return a list of deferred events dicts from policy-rc.d files. + + Events are read from DEFERRED_EVENTS_DIR and are of the form: + { + action: restart, + policy_requestor_name: rabbitmq-server, + policy_requestor_type: charm, + reason: 'Pkg update', + service: rabbitmq-server, + time: 1614328743 + } + + :raises OSError: Raised in case of a system error while reading a policy file + :raises yaml.YAMLError: Raised if parsing a policy file fails + + :returns: List of deferred event dictionaries + :rtype: list + """ + deferred_events_files = glob.glob( + '{}/*.deferred'.format(DEFERRED_EVENTS_DIR)) + + deferred_events = [] + for event_file in deferred_events_files: + with open(event_file, 'r') as f: + event = yaml.safe_load(f) + deferred_events.append(event) + + return deferred_events + + +def get_deferred_restart_services(application=None): + """Returns a list of services with deferred restarts. + + :param str application: Name of the application that blocked the service restart. + If application is None, all services with deferred restarts + are returned. Services which are blocked by a non-charm + requestor are always returned. + + :raises OSError: Raised in case of a system error while reading a policy file + :raises yaml.YAMLError: Raised if parsing a policy file fails + + :returns: List of services with deferred restarts belonging to application. + :rtype: list + """ + + deferred_restart_events = filter( + lambda e: e['action'] == 'restart', get_deferred_events()) + + deferred_restart_services = set() + for restart_event in deferred_restart_events: + if application: + if ( + restart_event['policy_requestor_type'] != 'charm' or + restart_event['policy_requestor_type'] == 'charm' and + restart_event['policy_requestor_name'] == application + ): + deferred_restart_services.add(restart_event['service']) + else: + deferred_restart_services.add(restart_event['service']) + + return list(deferred_restart_services) + + +def main(): + """Check for services with deferred restarts.""" + parser = argparse.ArgumentParser( + description='Check for services with deferred restarts') + parser.add_argument( + '--application', help='Check services belonging to this application only') + + args = parser.parse_args() + + services = set(get_deferred_restart_services(args.application)) + + if len(services) == 0: + print('OK: No deferred service restarts.') + sys.exit(0) + else: + print( + 'CRITICAL: Restarts are deferred for services: {}.'.format(', '.join(services))) + sys.exit(1) + + +if __name__ == '__main__': + try: + main() + except OSError as e: + print('CRITICAL: A system error occurred: {} ({})'.format(e.errno, e.strerror)) + sys.exit(1) + except yaml.YAMLError as e: + print('CRITICAL: Failed to parse a policy file: {}'.format(str(e))) + sys.exit(1) + except Exception as e: + print('CRITICAL: An unknown error occurred: {}'.format(str(e))) + sys.exit(1) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/files/check_haproxy.sh b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/files/check_haproxy.sh new file mode 100755 index 00000000..1df55db4 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/files/check_haproxy.sh @@ -0,0 +1,34 @@ +#!/bin/bash +#-------------------------------------------- +# This file is managed by Juju +#-------------------------------------------- +# +# Copyright 2009,2012 Canonical Ltd. +# Author: Tom Haddon + +CRITICAL=0 +NOTACTIVE='' +LOGFILE=/var/log/nagios/check_haproxy.log +AUTH=$(grep -r "stats auth" /etc/haproxy/haproxy.cfg | awk 'NR=1{print $3}') + +typeset -i N_INSTANCES=0 +for appserver in $(awk '/^\s+server/{print $2}' /etc/haproxy/haproxy.cfg) +do + N_INSTANCES=N_INSTANCES+1 + output=$(/usr/lib/nagios/plugins/check_http -a ${AUTH} -I 127.0.0.1 -p 8888 -u '/;csv' --regex=",${appserver},.*,UP.*" -e ' 200 OK') + if [ $? != 0 ]; then + date >> $LOGFILE + echo $output >> $LOGFILE + /usr/lib/nagios/plugins/check_http -a ${AUTH} -I 127.0.0.1 -p 8888 -u '/;csv' -v | grep ",${appserver}," >> $LOGFILE 2>&1 + CRITICAL=1 + NOTACTIVE="${NOTACTIVE} $appserver" + fi +done + +if [ $CRITICAL = 1 ]; then + echo "CRITICAL:${NOTACTIVE}" + exit 2 +fi + +echo "OK: All haproxy instances ($N_INSTANCES) looking good" +exit 0 diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/files/check_haproxy_queue_depth.sh b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/files/check_haproxy_queue_depth.sh new file mode 100755 index 00000000..91ce0246 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/files/check_haproxy_queue_depth.sh @@ -0,0 +1,30 @@ +#!/bin/bash +#-------------------------------------------- +# This file is managed by Juju +#-------------------------------------------- +# +# Copyright 2009,2012 Canonical Ltd. +# Author: Tom Haddon + +# These should be config options at some stage +CURRQthrsh=0 +MAXQthrsh=100 + +AUTH=$(grep -r "stats auth" /etc/haproxy/haproxy.cfg | awk 'NR=1{print $3}') + +HAPROXYSTATS=$(/usr/lib/nagios/plugins/check_http -a ${AUTH} -I 127.0.0.1 -p 8888 -u '/;csv' -v) + +for BACKEND in $(echo $HAPROXYSTATS| xargs -n1 | grep BACKEND | awk -F , '{print $1}') +do + CURRQ=$(echo "$HAPROXYSTATS" | grep $BACKEND | grep BACKEND | cut -d , -f 3) + MAXQ=$(echo "$HAPROXYSTATS" | grep $BACKEND | grep BACKEND | cut -d , -f 4) + + if [[ $CURRQ -gt $CURRQthrsh || $MAXQ -gt $MAXQthrsh ]] ; then + echo "CRITICAL: queue depth for $BACKEND - CURRENT:$CURRQ MAX:$MAXQ" + exit 2 + fi +done + +echo "OK: All haproxy queue depths looking good" +exit 0 + diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/files/policy_rc_d_script.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/files/policy_rc_d_script.py new file mode 100755 index 00000000..431e972b --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/files/policy_rc_d_script.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python3 + +"""This script is an implementation of policy-rc.d + +For further information on policy-rc.d see *1 + +*1 https://people.debian.org/~hmh/invokerc.d-policyrc.d-specification.txt +""" +import collections +import glob +import os +import logging +import sys +import time +import uuid +import yaml + + +SystemPolicy = collections.namedtuple( + 'SystemPolicy', + [ + 'policy_requestor_name', + 'policy_requestor_type', + 'service', + 'blocked_actions']) + +DEFAULT_POLICY_CONFIG_DIR = '/etc/policy-rc.d' +DEFAULT_POLICY_LOG_DIR = '/var/lib/policy-rc.d' + + +def read_policy_file(policy_file): + """Return system policies from given file. + + :param file_name: Name of file to read. + :type file_name: str + :returns: Policy + :rtype: List[SystemPolicy] + """ + policies = [] + if os.path.exists(policy_file): + with open(policy_file, 'r') as f: + policy = yaml.safe_load(f) + for service, actions in policy['blocked_actions'].items(): + service = service.replace('.service', '') + policies.append(SystemPolicy( + policy_requestor_name=policy['policy_requestor_name'], + policy_requestor_type=policy['policy_requestor_type'], + service=service, + blocked_actions=actions)) + return policies + + +def get_policies(policy_config_dir): + """Return all system policies in policy_config_dir. + + :param policy_config_dir: Name of file to read. + :type policy_config_dir: str + :returns: Policy + :rtype: List[SystemPolicy] + """ + _policy = [] + for f in glob.glob('{}/*.policy'.format(policy_config_dir)): + _policy.extend(read_policy_file(f)) + return _policy + + +def record_blocked_action(service, action, blocking_policies, policy_log_dir): + """Record that an action was requested but deniedl + + :param service: Service that was blocked + :type service: str + :param action: Action that was blocked. + :type action: str + :param blocking_policies: Policies that blocked the action on the service. + :type blocking_policies: List[SystemPolicy] + :param policy_log_dir: Directory to place the blocking action record. + :type policy_log_dir: str + """ + if not os.path.exists(policy_log_dir): + os.mkdir(policy_log_dir) + seconds = round(time.time()) + for policy in blocking_policies: + if not os.path.exists(policy_log_dir): + os.mkdir(policy_log_dir) + file_name = '{}/{}-{}-{}.deferred'.format( + policy_log_dir, + policy.policy_requestor_type, + policy.policy_requestor_name, + uuid.uuid1()) + with open(file_name, 'w') as f: + data = { + 'timestamp': seconds, + 'service': service, + 'action': action, + 'reason': 'Package update', + 'policy_requestor_type': policy.policy_requestor_type, + 'policy_requestor_name': policy.policy_requestor_name} + yaml.dump(data, f) + + +def get_blocking_policies(service, action, policy_config_dir): + """Record that an action was requested but deniedl + + :param service: Service that action is requested against. + :type service: str + :param action: Action that is requested. + :type action: str + :param policy_config_dir: Directory that stores policy files. + :type policy_config_dir: str + :returns: Policies + :rtype: List[SystemPolicy] + """ + service = service.replace('.service', '') + blocking_policies = [ + policy + for policy in get_policies(policy_config_dir) + if policy.service == service and action in policy.blocked_actions] + return blocking_policies + + +def process_action_request(service, action, policy_config_dir, policy_log_dir): + """Take the requested action against service and check if it is permitted. + + :param service: Service that action is requested against. + :type service: str + :param action: Action that is requested. + :type action: str + :param policy_config_dir: Directory that stores policy files. + :type policy_config_dir: str + :param policy_log_dir: Directory that stores policy files. + :type policy_log_dir: str + :returns: Tuple of whether the action is permitted and explanation. + :rtype: (boolean, str) + """ + blocking_policies = get_blocking_policies( + service, + action, + policy_config_dir) + if blocking_policies: + policy_msg = [ + '{} {}'.format(p.policy_requestor_type, p.policy_requestor_name) + for p in sorted(blocking_policies)] + message = '{} of {} blocked by {}'.format( + action, + service, + ', '.join(policy_msg)) + record_blocked_action( + service, + action, + blocking_policies, + policy_log_dir) + action_permitted = False + else: + message = "Permitting {} {}".format(service, action) + action_permitted = True + return action_permitted, message + + +def main(): + logging.basicConfig( + filename='/var/log/policy-rc.d.log', + level=logging.DEBUG, + format='%(asctime)s %(message)s') + + service = sys.argv[1] + action = sys.argv[2] + + permitted, message = process_action_request( + service, + action, + DEFAULT_POLICY_CONFIG_DIR, + DEFAULT_POLICY_LOG_DIR) + logging.info(message) + + # https://people.debian.org/~hmh/invokerc.d-policyrc.d-specification.txt + # Exit status codes: + # 0 - action allowed + # 1 - unknown action (therefore, undefined policy) + # 100 - unknown initscript id + # 101 - action forbidden by policy + # 102 - subsystem error + # 103 - syntax error + # 104 - [reserved] + # 105 - behaviour uncertain, policy undefined. + # 106 - action not allowed. Use the returned fallback actions + # (which are implied to be "allowed") instead. + + if permitted: + return 0 + else: + return 101 + + +if __name__ == "__main__": + rc = main() + sys.exit(rc) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/ha/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/ha/__init__.py new file mode 100644 index 00000000..9b088de8 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/ha/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/ha/utils.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/ha/utils.py new file mode 100644 index 00000000..b4912c42 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/ha/utils.py @@ -0,0 +1,377 @@ +# Copyright 2014-2016 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Copyright 2016 Canonical Ltd. +# +# Authors: +# Openstack Charmers < +# + +""" +Helpers for high availability. +""" + +import hashlib +import json +import os + +import re + +from charmhelpers.core.hookenv import ( + expected_related_units, + log, + relation_set, + charm_name, + config, + status_set, + DEBUG, + application_name, +) + +from charmhelpers.core.host import ( + lsb_release +) + +from charmhelpers.contrib.openstack.ip import ( + resolve_address, + is_ipv6, +) + +from charmhelpers.contrib.network.ip import ( + get_iface_for_address, + get_netmask_for_address, +) + +from charmhelpers.contrib.hahelpers.cluster import ( + get_hacluster_config +) + +JSON_ENCODE_OPTIONS = dict( + sort_keys=True, + allow_nan=False, + indent=None, + separators=(',', ':'), +) + +VIP_GROUP_NAME = 'grp_{service}_vips' +DNSHA_GROUP_NAME = 'grp_{service}_hostnames' +HAPROXY_DASHBOARD_RESOURCE = "haproxy-dashboard" + + +class DNSHAException(Exception): + """Raised when an error occurs setting up DNS HA + """ + + pass + + +def update_dns_ha_resource_params(resources, resource_params, + relation_id=None, + crm_ocf='ocf:maas:dns'): + """ Configure DNS-HA resources based on provided configuration and + update resource dictionaries for the HA relation. + + @param resources: Pointer to dictionary of resources. + Usually instantiated in ha_joined(). + @param resource_params: Pointer to dictionary of resource parameters. + Usually instantiated in ha_joined() + @param relation_id: Relation ID of the ha relation + @param crm_ocf: Corosync Open Cluster Framework resource agent to use for + DNS HA + """ + _relation_data = {'resources': {}, 'resource_params': {}} + update_hacluster_dns_ha(charm_name(), + _relation_data, + crm_ocf) + resources.update(_relation_data['resources']) + resource_params.update(_relation_data['resource_params']) + relation_set(relation_id=relation_id, groups=_relation_data['groups']) + + +def assert_charm_supports_dns_ha(): + """Validate prerequisites for DNS HA + The MAAS client is only available on Xenial or greater + + :raises DNSHAException: if release is < 16.04 + """ + if lsb_release().get('DISTRIB_RELEASE') < '16.04': + msg = ('DNS HA is only supported on 16.04 and greater ' + 'versions of Ubuntu.') + status_set('blocked', msg) + raise DNSHAException(msg) + return True + + +def expect_ha(): + """ Determine if the unit expects to be in HA + + Check juju goal-state if ha relation is expected, check for VIP or dns-ha + settings which indicate the unit should expect to be related to hacluster. + + @returns boolean + """ + ha_related_units = [] + try: + ha_related_units = list(expected_related_units(reltype='ha')) + except (NotImplementedError, KeyError): + pass + return len(ha_related_units) > 0 or config('vip') or config('dns-ha') + + +def generate_ha_relation_data(service, + extra_settings=None, + haproxy_enabled=True): + """ Generate relation data for ha relation + + Based on configuration options and unit interfaces, generate a json + encoded dict of relation data items for the hacluster relation, + providing configuration for DNS HA or VIP's + haproxy clone sets. + + Example of supplying additional settings:: + + COLO_CONSOLEAUTH = 'inf: res_nova_consoleauth grp_nova_vips' + AGENT_CONSOLEAUTH = 'ocf:openstack:nova-consoleauth' + AGENT_CA_PARAMS = 'op monitor interval="5s"' + + ha_console_settings = { + 'colocations': {'vip_consoleauth': COLO_CONSOLEAUTH}, + 'init_services': {'res_nova_consoleauth': 'nova-consoleauth'}, + 'resources': {'res_nova_consoleauth': AGENT_CONSOLEAUTH}, + 'resource_params': {'res_nova_consoleauth': AGENT_CA_PARAMS}) + generate_ha_relation_data('nova', extra_settings=ha_console_settings) + + + @param service: Name of the service being configured + @param extra_settings: Dict of additional resource data + @returns dict: json encoded data for use with relation_set + """ + _relation_data = {'resources': {}, 'resource_params': {}} + + if haproxy_enabled: + _meta = 'meta migration-threshold="INFINITY" failure-timeout="5s"' + _haproxy_res = 'res_{}_haproxy'.format(service) + _relation_data['resources'] = {_haproxy_res: 'lsb:haproxy'} + _relation_data['resource_params'] = { + _haproxy_res: '{} op monitor interval="5s"'.format(_meta) + } + _relation_data['init_services'] = {_haproxy_res: 'haproxy'} + _relation_data['clones'] = { + 'cl_{}_haproxy'.format(service): _haproxy_res + } + + if extra_settings: + for k, v in extra_settings.items(): + if _relation_data.get(k): + _relation_data[k].update(v) + else: + _relation_data[k] = v + + if config('dns-ha'): + update_hacluster_dns_ha(service, _relation_data) + else: + update_hacluster_vip(service, _relation_data) + + return { + 'json_{}'.format(k): json.dumps(v, **JSON_ENCODE_OPTIONS) + for k, v in _relation_data.items() if v + } + + +def update_hacluster_dns_ha(service, relation_data, + crm_ocf='ocf:maas:dns'): + """ Configure DNS-HA resources based on provided configuration + + @param service: Name of the service being configured + @param relation_data: Pointer to dictionary of relation data. + @param crm_ocf: Corosync Open Cluster Framework resource agent to use for + DNS HA + """ + # Validate the charm environment for DNS HA + assert_charm_supports_dns_ha() + + settings = ['os-admin-hostname', 'os-internal-hostname', + 'os-public-hostname', 'os-access-hostname'] + + # Check which DNS settings are set and update dictionaries + hostname_group = [] + for setting in settings: + hostname = config(setting) + if hostname is None: + log('DNS HA: Hostname setting {} is None. Ignoring.' + ''.format(setting), + DEBUG) + continue + m = re.search('os-(.+?)-hostname', setting) + if m: + endpoint_type = m.group(1) + # resolve_address's ADDRESS_MAP uses 'int' not 'internal' + if endpoint_type == 'internal': + endpoint_type = 'int' + else: + msg = ('Unexpected DNS hostname setting: {}. ' + 'Cannot determine endpoint_type name' + ''.format(setting)) + status_set('blocked', msg) + raise DNSHAException(msg) + + hostname_key = 'res_{}_{}_hostname'.format(service, endpoint_type) + if hostname_key in hostname_group: + log('DNS HA: Resource {}: {} already exists in ' + 'hostname group - skipping'.format(hostname_key, hostname), + DEBUG) + continue + + hostname_group.append(hostname_key) + relation_data['resources'][hostname_key] = crm_ocf + relation_data['resource_params'][hostname_key] = ( + 'params fqdn="{}" ip_address="{}"' + .format(hostname, resolve_address(endpoint_type=endpoint_type, + override=False))) + + if len(hostname_group) >= 1: + log('DNS HA: Hostname group is set with {} as members. ' + 'Informing the ha relation'.format(' '.join(hostname_group)), + DEBUG) + relation_data['groups'] = { + DNSHA_GROUP_NAME.format(service=service): ' '.join(hostname_group) + } + else: + msg = 'DNS HA: Hostname group has no members.' + status_set('blocked', msg) + raise DNSHAException(msg) + + +def get_vip_settings(vip): + """Calculate which nic is on the correct network for the given vip. + + If nic or netmask discovery fail then fallback to using charm supplied + config. If fallback is used this is indicated via the fallback variable. + + @param vip: VIP to lookup nic and cidr for. + @returns (str, str, bool): eg (iface, netmask, fallback) + """ + iface = get_iface_for_address(vip) + netmask = get_netmask_for_address(vip) + fallback = False + if iface is None: + iface = config('vip_iface') + fallback = True + if netmask is None: + netmask = config('vip_cidr') + fallback = True + return iface, netmask, fallback + + +def update_hacluster_vip(service, relation_data): + """ Configure VIP resources based on provided configuration + + @param service: Name of the service being configured + @param relation_data: Pointer to dictionary of relation data. + """ + cluster_config = get_hacluster_config() + vip_group = [] + vips_to_delete = [] + for vip in cluster_config['vip'].split(): + if is_ipv6(vip): + res_vip = 'ocf:heartbeat:IPv6addr' + vip_params = 'ipv6addr' + else: + res_vip = 'ocf:heartbeat:IPaddr2' + vip_params = 'ip' + + iface, netmask, fallback = get_vip_settings(vip) + + vip_monitoring = 'op monitor timeout="20s" interval="10s" depth="0"' + if iface is not None: + # NOTE(jamespage): Delete old VIP resources + # Old style naming encoding iface in name + # does not work well in environments where + # interface/subnet wiring is not consistent + vip_key = 'res_{}_{}_vip'.format(service, iface) + if vip_key in vips_to_delete: + vip_key = '{}_{}'.format(vip_key, vip_params) + vips_to_delete.append(vip_key) + + vip_key = 'res_{}_{}_vip'.format( + service, + hashlib.sha1(vip.encode('UTF-8')).hexdigest()[:7]) + + relation_data['resources'][vip_key] = res_vip + # NOTE(jamespage): + # Use option provided vip params if these where used + # instead of auto-detected values + if fallback: + relation_data['resource_params'][vip_key] = ( + 'params {ip}="{vip}" cidr_netmask="{netmask}" ' + 'nic="{iface}" {vip_monitoring}'.format( + ip=vip_params, + vip=vip, + iface=iface, + netmask=netmask, + vip_monitoring=vip_monitoring)) + else: + # NOTE(jamespage): + # let heartbeat figure out which interface and + # netmask to configure, which works nicely + # when network interface naming is not + # consistent across units. + relation_data['resource_params'][vip_key] = ( + 'params {ip}="{vip}" {vip_monitoring}'.format( + ip=vip_params, + vip=vip, + vip_monitoring=vip_monitoring)) + + vip_group.append(vip_key) + + if vips_to_delete: + try: + relation_data['delete_resources'].extend(vips_to_delete) + except KeyError: + relation_data['delete_resources'] = vips_to_delete + + if len(vip_group) >= 1: + key = VIP_GROUP_NAME.format(service=service) + try: + relation_data['groups'][key] = ' '.join(vip_group) + except KeyError: + relation_data['groups'] = { + key: ' '.join(vip_group) + } + + +def render_grafana_dashboard(prometheus_app_name, haproxy_dashboard): + """Load grafana dashboard json model and insert prometheus datasource. + + :param prometheus_app_name: name of the 'prometheus' application that will + be used as datasource in grafana dashboard + :type prometheus_app_name: str + :param haproxy_dashboard: path to haproxy dashboard + :type haproxy_dashboard: str + :return: Grafana dashboard json model as a str. + :rtype: str + """ + from charmhelpers.contrib.templating import jinja + + dashboard_template = os.path.basename(haproxy_dashboard) + dashboard_template_dir = os.path.dirname(haproxy_dashboard) + app_name = application_name() + datasource = "{} - Juju generated source".format(prometheus_app_name) + return jinja.render(dashboard_template, + {"datasource": datasource, + "app_name": app_name, + "prometheus_app_name": prometheus_app_name}, + template_dir=dashboard_template_dir, + jinja_env_args={"variable_start_string": "<< ", + "variable_end_string": " >>"}) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/ip.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/ip.py new file mode 100644 index 00000000..2afad369 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/ip.py @@ -0,0 +1,260 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.core.hookenv import ( + NoNetworkBinding, + config, + unit_get, + service_name, + network_get_primary_address, +) +from charmhelpers.contrib.network.ip import ( + get_address_in_network, + is_address_in_network, + is_ipv6, + get_ipv6_addr, + resolve_network_cidr, + get_iface_for_address +) +from charmhelpers.contrib.hahelpers.cluster import is_clustered + +PUBLIC = 'public' +INTERNAL = 'int' +ADMIN = 'admin' +ACCESS = 'access' + +# TODO: reconcile 'int' vs 'internal' binding names +ADDRESS_MAP = { + PUBLIC: { + 'binding': 'public', + 'config': 'os-public-network', + 'fallback': 'public-address', + 'override': 'os-public-hostname', + }, + INTERNAL: { + 'binding': 'internal', + 'config': 'os-internal-network', + 'fallback': 'private-address', + 'override': 'os-internal-hostname', + }, + ADMIN: { + 'binding': 'admin', + 'config': 'os-admin-network', + 'fallback': 'private-address', + 'override': 'os-admin-hostname', + }, + ACCESS: { + 'binding': 'access', + 'config': 'access-network', + 'fallback': 'private-address', + 'override': 'os-access-hostname', + }, + # Note (thedac) bridge to begin the reconciliation between 'int' vs + # 'internal' binding names + 'internal': { + 'binding': 'internal', + 'config': 'os-internal-network', + 'fallback': 'private-address', + 'override': 'os-internal-hostname', + }, +} + + +def canonical_url(configs, endpoint_type=PUBLIC): + """Returns the correct HTTP URL to this host given the state of HTTPS + configuration, hacluster and charm configuration. + + :param configs: OSTemplateRenderer config templating object to inspect + for a complete https context. + :param endpoint_type: str endpoint type to resolve. + :param returns: str base URL for services on the current service unit. + """ + scheme = _get_scheme(configs) + + address = resolve_address(endpoint_type) + if is_ipv6(address): + address = "[{}]".format(address) + + return '%s://%s' % (scheme, address) + + +def _get_scheme(configs): + """Returns the scheme to use for the url (either http or https) + depending upon whether https is in the configs value. + + :param configs: OSTemplateRenderer config templating object to inspect + for a complete https context. + :returns: either 'http' or 'https' depending on whether https is + configured within the configs context. + """ + scheme = 'http' + if configs and 'https' in configs.complete_contexts(): + scheme = 'https' + return scheme + + +def _get_address_override(endpoint_type=PUBLIC): + """Returns any address overrides that the user has defined based on the + endpoint type. + + Note: this function allows for the service name to be inserted into the + address if the user specifies {service_name}.somehost.org. + + :param endpoint_type: the type of endpoint to retrieve the override + value for. + :returns: any endpoint address or hostname that the user has overridden + or None if an override is not present. + """ + override_key = ADDRESS_MAP[endpoint_type]['override'] + addr_override = config(override_key) + if not addr_override: + return None + else: + return addr_override.format(service_name=service_name()) + + +def local_address(unit_get_fallback='public-address'): + """Return a network address for this unit. + + Attempt to retrieve a 'default' IP address for this unit + from network-get. If this is running with an old version of Juju then + fallback to unit_get. + + Note on juju < 2.9 the binding to juju-info may not exist, so fall back to + the unit-get. + + :param unit_get_fallback: Either 'public-address' or 'private-address'. + Only used with old versions of Juju. + :type unit_get_fallback: str + :returns: IP Address + :rtype: str + """ + try: + return network_get_primary_address('juju-info') + except (NotImplementedError, NoNetworkBinding): + return unit_get(unit_get_fallback) + + +def get_invalid_vips(): + """Check if any of the provided vips are invalid. + A vip is invalid if it doesn't belong to the subnet in any interface. + If all vips are valid, this returns an empty list. + + :returns: A list of strings, where each string is an invalid vip address. + :rtype: list + """ + + clustered = is_clustered() + vips = config('vip') + if vips: + vips = vips.split() + invalid_vips = [] + + if clustered and vips: + for vip in vips: + iface_for_vip = get_iface_for_address(vip) + if iface_for_vip is None: + invalid_vips.append(vip) + + return invalid_vips + + +def resolve_address(endpoint_type=PUBLIC, override=True): + """Return unit address depending on net config. + + If unit is clustered with vip(s) and has net splits defined, return vip on + correct network. If clustered with no nets defined, return primary vip. + + If not clustered, return unit address ensuring address is on configured net + split if one is configured, or a Juju 2.0 extra-binding has been used. + + :param endpoint_type: Network endpoing type + :param override: Accept hostname overrides or not + """ + resolved_address = None + if override: + resolved_address = _get_address_override(endpoint_type) + if resolved_address: + return resolved_address + + vips = config('vip') + if vips: + vips = vips.split() + + net_type = ADDRESS_MAP[endpoint_type]['config'] + net_addr = config(net_type) + net_fallback = ADDRESS_MAP[endpoint_type]['fallback'] + binding = ADDRESS_MAP[endpoint_type]['binding'] + clustered = is_clustered() + + if clustered and vips: + if net_addr: + for vip in vips: + if is_address_in_network(net_addr, vip): + resolved_address = vip + break + else: + # NOTE: endeavour to check vips against network space + # bindings + try: + bound_cidr = resolve_network_cidr( + network_get_primary_address(binding) + ) + for vip in vips: + if is_address_in_network(bound_cidr, vip): + resolved_address = vip + break + except (NotImplementedError, NoNetworkBinding): + # If no net-splits configured and no support for extra + # bindings/network spaces so we expect a single vip + resolved_address = vips[0] + else: + if config('prefer-ipv6'): + fallback_addr = get_ipv6_addr(exc_list=vips)[0] + else: + fallback_addr = local_address(unit_get_fallback=net_fallback) + + if net_addr: + resolved_address = get_address_in_network(net_addr, fallback_addr) + else: + # NOTE: only try to use extra bindings if legacy network + # configuration is not in use + try: + resolved_address = network_get_primary_address(binding) + except (NotImplementedError, NoNetworkBinding): + resolved_address = fallback_addr + + if resolved_address is None: + raise ValueError("Unable to resolve a suitable IP address based on " + "charm state and configuration. (net_type=%s, " + "clustered=%s)" % (net_type, clustered)) + + return resolved_address + + +def get_vip_in_network(network): + matching_vip = None + vips = config('vip') + if vips: + for vip in vips.split(): + if is_address_in_network(network, vip): + matching_vip = vip + return matching_vip + + +def get_default_api_bindings(): + _default_bindings = [] + for binding in [INTERNAL, ADMIN, PUBLIC]: + _default_bindings.append(ADDRESS_MAP[binding]['binding']) + return _default_bindings diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/keystone.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/keystone.py new file mode 100644 index 00000000..5775aa44 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/keystone.py @@ -0,0 +1,170 @@ +# +# Copyright 2017 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from charmhelpers.fetch import apt_install +from charmhelpers.contrib.openstack.context import IdentityServiceContext +from charmhelpers.core.hookenv import ( + log, + ERROR, +) + + +def get_api_suffix(api_version): + """Return the formatted api suffix for the given version + @param api_version: version of the keystone endpoint + @returns the api suffix formatted according to the given api + version + """ + return 'v2.0' if api_version in (2, "2", "2.0") else 'v3' + + +def format_endpoint(schema, addr, port, api_version): + """Return a formatted keystone endpoint + @param schema: http or https + @param addr: ipv4/ipv6 host of the keystone service + @param port: port of the keystone service + @param api_version: 2 or 3 + @returns a fully formatted keystone endpoint + """ + return '{}://{}:{}/{}/'.format(schema, addr, port, + get_api_suffix(api_version)) + + +def get_keystone_manager(endpoint, api_version, **kwargs): + """Return a keystonemanager for the correct API version + + @param endpoint: the keystone endpoint to point client at + @param api_version: version of the keystone api the client should use + @param kwargs: token or username/tenant/password information + @returns keystonemanager class used for interrogating keystone + """ + if api_version == 2: + return KeystoneManager2(endpoint, **kwargs) + if api_version == 3: + return KeystoneManager3(endpoint, **kwargs) + raise ValueError('No manager found for api version {}'.format(api_version)) + + +def get_keystone_manager_from_identity_service_context(): + """Return a keystonmanager generated from a + instance of charmhelpers.contrib.openstack.context.IdentityServiceContext + @returns keystonamenager instance + """ + context = IdentityServiceContext()() + if not context: + msg = "Identity service context cannot be generated" + log(msg, level=ERROR) + raise ValueError(msg) + + endpoint = format_endpoint(context['service_protocol'], + context['service_host'], + context['service_port'], + context['api_version']) + + if context['api_version'] in (2, "2.0"): + api_version = 2 + else: + api_version = 3 + + return get_keystone_manager(endpoint, api_version, + username=context['admin_user'], + password=context['admin_password'], + tenant_name=context['admin_tenant_name']) + + +class KeystoneManager(object): + + def resolve_service_id(self, service_name=None, service_type=None): + """Find the service_id of a given service""" + services = [s._info for s in self.api.services.list()] + + service_name = service_name.lower() + for s in services: + name = s['name'].lower() + if service_type and service_name: + if (service_name == name and service_type == s['type']): + return s['id'] + elif service_name and service_name == name: + return s['id'] + elif service_type and service_type == s['type']: + return s['id'] + return None + + def service_exists(self, service_name=None, service_type=None): + """Determine if the given service exists on the service list""" + return self.resolve_service_id(service_name, service_type) is not None + + +class KeystoneManager2(KeystoneManager): + + def __init__(self, endpoint, **kwargs): + try: + from keystoneclient.v2_0 import client + from keystoneclient.auth.identity import v2 + from keystoneclient import session + except ImportError: + apt_install(["python3-keystoneclient"], fatal=True) + + from keystoneclient.v2_0 import client + from keystoneclient.auth.identity import v2 + from keystoneclient import session + + self.api_version = 2 + + token = kwargs.get("token", None) + if token: + api = client.Client(endpoint=endpoint, token=token) + else: + auth = v2.Password(username=kwargs.get("username"), + password=kwargs.get("password"), + tenant_name=kwargs.get("tenant_name"), + auth_url=endpoint) + sess = session.Session(auth=auth) + api = client.Client(session=sess) + + self.api = api + + +class KeystoneManager3(KeystoneManager): + + def __init__(self, endpoint, **kwargs): + try: + from keystoneclient.v3 import client + from keystoneclient.auth import token_endpoint + from keystoneclient import session + from keystoneclient.auth.identity import v3 + except ImportError: + apt_install(["python3-keystoneclient"], fatal=True) + + from keystoneclient.v3 import client + from keystoneclient.auth import token_endpoint + from keystoneclient import session + from keystoneclient.auth.identity import v3 + + self.api_version = 3 + + token = kwargs.get("token", None) + if token: + auth = token_endpoint.Token(endpoint=endpoint, + token=token) + sess = session.Session(auth=auth) + else: + auth = v3.Password(auth_url=endpoint, + user_id=kwargs.get("username"), + password=kwargs.get("password"), + project_id=kwargs.get("tenant_name")) + sess = session.Session(auth=auth) + + self.api = client.Client(session=sess) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/neutron.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/neutron.py new file mode 100644 index 00000000..47772467 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/neutron.py @@ -0,0 +1,351 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Various utilities for dealing with Neutron and the renaming from Quantum. + +from subprocess import check_output + +from charmhelpers.core.hookenv import ( + config, + log, + ERROR, +) + +from charmhelpers.contrib.openstack.utils import ( + os_release, + CompareOpenStackReleases, +) + + +def headers_package(): + """Ensures correct linux-headers for running kernel are installed, + for building DKMS package""" + kver = check_output(['uname', '-r']).decode('UTF-8').strip() + return 'linux-headers-%s' % kver + + +QUANTUM_CONF_DIR = '/etc/quantum' + + +def kernel_version(): + """ Retrieve the current major kernel version as a tuple e.g. (3, 13) """ + kver = check_output(['uname', '-r']).decode('UTF-8').strip() + kver = kver.split('.') + return (int(kver[0]), int(kver[1])) + + +def determine_dkms_package(): + """ Determine which DKMS package should be used based on kernel version """ + # NOTE: 3.13 kernels have support for GRE and VXLAN native + if kernel_version() >= (3, 13): + return [] + else: + return [headers_package(), 'openvswitch-datapath-dkms'] + + +# legacy + + +def quantum_plugins(): + return { + 'ovs': { + 'config': '/etc/quantum/plugins/openvswitch/' + 'ovs_quantum_plugin.ini', + 'driver': 'quantum.plugins.openvswitch.ovs_quantum_plugin.' + 'OVSQuantumPluginV2', + 'contexts': [], + 'services': ['quantum-plugin-openvswitch-agent'], + 'packages': [determine_dkms_package(), + ['quantum-plugin-openvswitch-agent']], + 'server_packages': ['quantum-server', + 'quantum-plugin-openvswitch'], + 'server_services': ['quantum-server'] + }, + 'nvp': { + 'config': '/etc/quantum/plugins/nicira/nvp.ini', + 'driver': 'quantum.plugins.nicira.nicira_nvp_plugin.' + 'QuantumPlugin.NvpPluginV2', + 'contexts': [], + 'services': [], + 'packages': [], + 'server_packages': ['quantum-server', + 'quantum-plugin-nicira'], + 'server_services': ['quantum-server'] + } + } + + +NEUTRON_CONF_DIR = '/etc/neutron' + + +def neutron_plugins(): + release = os_release('nova-common') + plugins = { + 'ovs': { + 'config': '/etc/neutron/plugins/openvswitch/' + 'ovs_neutron_plugin.ini', + 'driver': 'neutron.plugins.openvswitch.ovs_neutron_plugin.' + 'OVSNeutronPluginV2', + 'contexts': [], + 'services': ['neutron-plugin-openvswitch-agent'], + 'packages': [determine_dkms_package(), + ['neutron-plugin-openvswitch-agent']], + 'server_packages': ['neutron-server', + 'neutron-plugin-openvswitch'], + 'server_services': ['neutron-server'] + }, + 'nvp': { + 'config': '/etc/neutron/plugins/nicira/nvp.ini', + 'driver': 'neutron.plugins.nicira.nicira_nvp_plugin.' + 'NeutronPlugin.NvpPluginV2', + 'contexts': [], + 'services': [], + 'packages': [], + 'server_packages': ['neutron-server', + 'neutron-plugin-nicira'], + 'server_services': ['neutron-server'] + }, + 'nsx': { + 'config': '/etc/neutron/plugins/vmware/nsx.ini', + 'driver': 'vmware', + 'contexts': [], + 'services': [], + 'packages': [], + 'server_packages': ['neutron-server', + 'neutron-plugin-vmware'], + 'server_services': ['neutron-server'] + }, + 'n1kv': { + 'config': '/etc/neutron/plugins/cisco/cisco_plugins.ini', + 'driver': 'neutron.plugins.cisco.network_plugin.PluginV2', + 'contexts': [], + 'services': [], + 'packages': [determine_dkms_package(), + ['neutron-plugin-cisco']], + 'server_packages': ['neutron-server', + 'neutron-plugin-cisco'], + 'server_services': ['neutron-server'] + }, + 'Calico': { + 'config': '/etc/neutron/plugins/ml2/ml2_conf.ini', + 'driver': 'neutron.plugins.ml2.plugin.Ml2Plugin', + 'contexts': [], + 'services': ['calico-felix', + 'bird', + 'neutron-dhcp-agent', + 'nova-api-metadata', + 'etcd'], + 'packages': [determine_dkms_package(), + ['calico-compute', + 'bird', + 'neutron-dhcp-agent', + 'nova-api-metadata', + 'etcd']], + 'server_packages': ['neutron-server', 'calico-control', 'etcd'], + 'server_services': ['neutron-server', 'etcd'] + }, + 'vsp': { + 'config': '/etc/neutron/plugins/nuage/nuage_plugin.ini', + 'driver': 'neutron.plugins.nuage.plugin.NuagePlugin', + 'contexts': [], + 'services': [], + 'packages': [], + 'server_packages': ['neutron-server', 'neutron-plugin-nuage'], + 'server_services': ['neutron-server'] + }, + 'plumgrid': { + 'config': '/etc/neutron/plugins/plumgrid/plumgrid.ini', + 'driver': ('neutron.plugins.plumgrid.plumgrid_plugin' + '.plumgrid_plugin.NeutronPluginPLUMgridV2'), + 'contexts': [], + 'services': [], + 'packages': ['plumgrid-lxc', + 'iovisor-dkms'], + 'server_packages': ['neutron-server', + 'neutron-plugin-plumgrid'], + 'server_services': ['neutron-server'] + }, + 'midonet': { + 'config': '/etc/neutron/plugins/midonet/midonet.ini', + 'driver': 'midonet.neutron.plugin.MidonetPluginV2', + 'contexts': [], + 'services': [], + 'packages': [determine_dkms_package()], + 'server_packages': ['neutron-server', + 'python-neutron-plugin-midonet'], + 'server_services': ['neutron-server'] + } + } + if CompareOpenStackReleases(release) >= 'icehouse': + # NOTE: patch in ml2 plugin for icehouse onwards + plugins['ovs']['config'] = '/etc/neutron/plugins/ml2/ml2_conf.ini' + plugins['ovs']['driver'] = 'neutron.plugins.ml2.plugin.Ml2Plugin' + plugins['ovs']['server_packages'] = ['neutron-server', + 'neutron-plugin-ml2'] + # NOTE: patch in vmware renames nvp->nsx for icehouse onwards + plugins['nvp'] = plugins['nsx'] + if CompareOpenStackReleases(release) >= 'kilo': + plugins['midonet']['driver'] = ( + 'neutron.plugins.midonet.plugin.MidonetPluginV2') + if CompareOpenStackReleases(release) >= 'liberty': + plugins['midonet']['driver'] = ( + 'midonet.neutron.plugin_v1.MidonetPluginV2') + plugins['midonet']['server_packages'].remove( + 'python-neutron-plugin-midonet') + plugins['midonet']['server_packages'].append( + 'python-networking-midonet') + plugins['plumgrid']['driver'] = ( + 'networking_plumgrid.neutron.plugins' + '.plugin.NeutronPluginPLUMgridV2') + plugins['plumgrid']['server_packages'].remove( + 'neutron-plugin-plumgrid') + if CompareOpenStackReleases(release) >= 'mitaka': + plugins['nsx']['server_packages'].remove('neutron-plugin-vmware') + plugins['nsx']['server_packages'].append('python-vmware-nsx') + plugins['nsx']['config'] = '/etc/neutron/nsx.ini' + plugins['vsp']['driver'] = ( + 'nuage_neutron.plugins.nuage.plugin.NuagePlugin') + if CompareOpenStackReleases(release) >= 'newton': + plugins['vsp']['config'] = '/etc/neutron/plugins/ml2/ml2_conf.ini' + plugins['vsp']['driver'] = 'neutron.plugins.ml2.plugin.Ml2Plugin' + plugins['vsp']['server_packages'] = ['neutron-server', + 'neutron-plugin-ml2'] + return plugins + + +def neutron_plugin_attribute(plugin, attr, net_manager=None): + manager = net_manager or network_manager() + if manager == 'quantum': + plugins = quantum_plugins() + elif manager == 'neutron': + plugins = neutron_plugins() + else: + log("Network manager '%s' does not support plugins." % (manager), + level=ERROR) + raise Exception + + try: + _plugin = plugins[plugin] + except KeyError: + log('Unrecognised plugin for %s: %s' % (manager, plugin), level=ERROR) + raise Exception + + try: + return _plugin[attr] + except KeyError: + return None + + +def network_manager(): + ''' + Deals with the renaming of Quantum to Neutron in H and any situations + that require compatibility (eg, deploying H with network-manager=quantum, + upgrading from G). + ''' + release = os_release('nova-common') + manager = config('network-manager').lower() + + if manager not in ['quantum', 'neutron']: + return manager + + if release in ['essex']: + # E does not support neutron + log('Neutron networking not supported in Essex.', level=ERROR) + raise Exception + elif release in ['folsom', 'grizzly']: + # neutron is named quantum in F and G + return 'quantum' + else: + # ensure accurate naming for all releases post-H + return 'neutron' + + +def parse_mappings(mappings, key_rvalue=False): + """By default mappings are lvalue keyed. + + If key_rvalue is True, the mapping will be reversed to allow multiple + configs for the same lvalue. + """ + parsed = {} + if mappings: + mappings = mappings.split() + for m in mappings: + p = m.partition(':') + + if key_rvalue: + key_index = 2 + val_index = 0 + # if there is no rvalue skip to next + if not p[1]: + continue + else: + key_index = 0 + val_index = 2 + + key = p[key_index].strip() + parsed[key] = p[val_index].strip() + + return parsed + + +def parse_bridge_mappings(mappings): + """Parse bridge mappings. + + Mappings must be a space-delimited list of provider:bridge mappings. + + Returns dict of the form {provider:bridge}. + """ + return parse_mappings(mappings) + + +def parse_data_port_mappings(mappings, default_bridge='br-data'): + """Parse data port mappings. + + Mappings must be a space-delimited list of bridge:port. + + Returns dict of the form {port:bridge} where ports may be mac addresses or + interface names. + """ + + # NOTE(dosaboy): we use rvalue for key to allow multiple values to be + # proposed for since it may be a mac address which will differ + # across units this allowing first-known-good to be chosen. + _mappings = parse_mappings(mappings, key_rvalue=True) + if not _mappings or list(_mappings.values()) == ['']: + if not mappings: + return {} + + # For backwards-compatibility we need to support port-only provided in + # config. + _mappings = {mappings.split()[0]: default_bridge} + + ports = _mappings.keys() + if len(set(ports)) != len(ports): + raise Exception("It is not allowed to have the same port configured " + "on more than one bridge") + + return _mappings + + +def parse_vlan_range_mappings(mappings): + """Parse vlan range mappings. + + Mappings must be a space-delimited list of provider:start:end mappings. + + The start:end range is optional and may be omitted. + + Returns dict of the form {provider: (start, end)}. + """ + _mappings = parse_mappings(mappings) + return {p: tuple(r.split(':')) for p, r in _mappings.items()} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/policy_rcd.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/policy_rcd.py new file mode 100644 index 00000000..ecffbc68 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/policy_rcd.py @@ -0,0 +1,173 @@ +# Copyright 2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module for managing policy-rc.d script and associated files. + +This module manages the installation of /usr/sbin/policy-rc.d, the +policy files and the event files. When a package update occurs the +packaging system calls: + +policy-rc.d [options] + +The return code of the script determines if the packaging system +will perform that action on the given service. The policy-rc.d +implementation installed by this module checks if an action is +permitted by checking policy files placed in /etc/policy-rc.d. +If a policy file exists which denies the requested action then +this is recorded in an event file which is placed in +/var/lib/policy-rc.d. +""" + +import os +import shutil +import tempfile +import yaml + +import charmhelpers.contrib.openstack.files as os_files +import charmhelpers.contrib.openstack.alternatives as alternatives +import charmhelpers.core.hookenv as hookenv +import charmhelpers.core.host as host + +POLICY_HEADER = """# Managed by juju\n""" +POLICY_DEFERRED_EVENTS_DIR = '/var/lib/policy-rc.d' +POLICY_CONFIG_DIR = '/etc/policy-rc.d' + + +def get_policy_file_name(): + """Get the name of the policy file for this application. + + :returns: Policy file name + :rtype: str + """ + application_name = hookenv.service_name() + return '{}/charm-{}.policy'.format(POLICY_CONFIG_DIR, application_name) + + +def read_default_policy_file(): + """Return the policy file. + + A policy is in the form: + blocked_actions: + neutron-dhcp-agent: [restart, stop, try-restart] + neutron-l3-agent: [restart, stop, try-restart] + neutron-metadata-agent: [restart, stop, try-restart] + neutron-openvswitch-agent: [restart, stop, try-restart] + openvswitch-switch: [restart, stop, try-restart] + ovs-vswitchd: [restart, stop, try-restart] + ovs-vswitchd-dpdk: [restart, stop, try-restart] + ovsdb-server: [restart, stop, try-restart] + policy_requestor_name: neutron-openvswitch + policy_requestor_type: charm + + :returns: Policy + :rtype: Dict[str, Union[str, Dict[str, List[str]]] + """ + policy = {} + policy_file = get_policy_file_name() + if os.path.exists(policy_file): + with open(policy_file, 'r') as f: + policy = yaml.safe_load(f) + return policy + + +def write_policy_file(policy_file, policy): + """Write policy to disk. + + :param policy_file: Name of policy file + :type policy_file: str + :param policy: Policy + :type policy: Dict[str, Union[str, Dict[str, List[str]]]] + """ + with tempfile.NamedTemporaryFile('w', delete=False) as f: + f.write(POLICY_HEADER) + yaml.dump(policy, f) + tmp_file_name = f.name + shutil.move(tmp_file_name, policy_file) + + +def remove_policy_file(): + """Remove policy file.""" + try: + os.remove(get_policy_file_name()) + except FileNotFoundError: + pass + + +def install_policy_rcd(): + """Install policy-rc.d components.""" + source_file_dir = os.path.dirname(os.path.abspath(os_files.__file__)) + policy_rcd_exec = "/var/lib/charm/{}/policy-rc.d".format( + hookenv.service_name()) + host.mkdir(os.path.dirname(policy_rcd_exec)) + shutil.copy2( + '{}/policy_rc_d_script.py'.format(source_file_dir), + policy_rcd_exec) + # policy-rc.d must be installed via the alternatives system: + # https://people.debian.org/~hmh/invokerc.d-policyrc.d-specification.txt + if not os.path.exists('/usr/sbin/policy-rc.d'): + alternatives.install_alternative( + 'policy-rc.d', + '/usr/sbin/policy-rc.d', + policy_rcd_exec) + host.mkdir(POLICY_CONFIG_DIR) + + +def get_default_policy(): + """Return the default policy structure. + + :returns: Policy + :rtype: Dict[str, Union[str, Dict[str, List[str]]] + """ + policy = { + 'policy_requestor_name': hookenv.service_name(), + 'policy_requestor_type': 'charm', + 'blocked_actions': {}} + return policy + + +def add_policy_block(service, blocked_actions): + """Update a policy file with new list of actions. + + :param service: Service name + :type service: str + :param blocked_actions: Action to block + :type blocked_actions: List[str] + """ + policy = read_default_policy_file() or get_default_policy() + policy_file = get_policy_file_name() + if policy['blocked_actions'].get(service): + policy['blocked_actions'][service].extend(blocked_actions) + else: + policy['blocked_actions'][service] = blocked_actions + policy['blocked_actions'][service] = sorted( + list(set(policy['blocked_actions'][service]))) + write_policy_file(policy_file, policy) + + +def remove_policy_block(service, unblocked_actions): + """Remove list of actions from policy file. + + :param service: Service name + :type service: str + :param unblocked_actions: Action to unblock + :type unblocked_actions: List[str] + """ + policy_file = get_policy_file_name() + policy = read_default_policy_file() + for action in unblocked_actions: + try: + policy['blocked_actions'][service].remove(action) + except (KeyError, ValueError): + continue + write_policy_file(policy_file, policy) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/policyd.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/policyd.py new file mode 100644 index 00000000..767943c2 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/policyd.py @@ -0,0 +1,763 @@ +# Copyright 2019-2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import contextlib +import os +import shutil +import yaml +import zipfile + +import charmhelpers +import charmhelpers.core.hookenv as hookenv +import charmhelpers.core.host as ch_host + +# Features provided by this module: + +""" +Policy.d helper functions +========================= + +The functions in this module are designed, as a set, to provide an easy-to-use +set of hooks for classic charms to add in /etc//policy.d/ +directory override YAML files. + +(For charms.openstack charms, a mixin class is provided for this +functionality). + +In order to "hook" this functionality into a (classic) charm, two functions are +provided: + + maybe_do_policyd_overrides(openstack_release, + service, + blacklist_paths=none, + blacklist_keys=none, + template_function=none, + restart_handler=none) + + maybe_do_policyd_overrides_on_config_changed(openstack_release, + service, + blacklist_paths=None, + blacklist_keys=None, + template_function=None, + restart_handler=None + +(See the docstrings for details on the parameters) + +The functions should be called from the install and upgrade hooks in the charm. +The `maybe_do_policyd_overrides_on_config_changed` function is designed to be +called on the config-changed hook, in that it does an additional check to +ensure that an already overridden policy.d in an upgrade or install hooks isn't +repeated. + +In order the *enable* this functionality, the charm's install, config_changed, +and upgrade_charm hooks need to be modified, and a new config option (see +below) needs to be added. The README for the charm should also be updated. + +Examples from the keystone charm are: + +@hooks.hook('install.real') +@harden() +def install(): + ... + # call the policy overrides handler which will install any policy overrides + maybe_do_policyd_overrides(os_release('keystone'), 'keystone') + + +@hooks.hook('config-changed') +@restart_on_change(restart_map(), restart_functions=restart_function_map()) +@harden() +def config_changed(): + ... + # call the policy overrides handler which will install any policy overrides + maybe_do_policyd_overrides_on_config_changed(os_release('keystone'), + 'keystone') + +@hooks.hook('upgrade-charm') +@restart_on_change(restart_map(), stopstart=True) +@harden() +def upgrade_charm(): + ... + # call the policy overrides handler which will install any policy overrides + maybe_do_policyd_overrides(os_release('keystone'), 'keystone') + +Status Line +=========== + +The workload status code in charm-helpers has been modified to detect if +policy.d override code has been incorporated into the charm by checking for the +new config variable (in the config.yaml). If it has been, then the workload +status line will automatically show "PO:" at the beginning of the workload +status for that unit/service if the config option is set. If the policy +override is broken, the "PO (broken):" will be shown. No changes to the charm +(apart from those already mentioned) are needed to enable this functionality. +(charms.openstack charms also get this functionality, but please see that +library for further details). +""" + +# The config.yaml for the charm should contain the following for the config +# option: + +""" + use-policyd-override: + type: boolean + default: False + description: | + If True then use the resource file named 'policyd-override' to install + override YAML files in the service's policy.d directory. The resource + file should be a ZIP file containing at least one yaml file with a .yaml + or .yml extension. If False then remove the overrides. +""" + +# The metadata.yaml for the charm should contain the following: +""" +resources: + policyd-override: + type: file + filename: policyd-override.zip + description: The policy.d overrides file +""" + +# The README for the charm should contain the following: +""" +Policy Overrides +---------------- + +This feature allows for policy overrides using the `policy.d` directory. This +is an **advanced** feature and the policies that the OpenStack service supports +should be clearly and unambiguously understood before trying to override, or +add to, the default policies that the service uses. The charm also has some +policy defaults. They should also be understood before being overridden. + +> **Caution**: It is possible to break the system (for tenants and other + services) if policies are incorrectly applied to the service. + +Policy overrides are YAML files that contain rules that will add to, or +override, existing policy rules in the service. The `policy.d` directory is +a place to put the YAML override files. This charm owns the +`/etc/keystone/policy.d` directory, and as such, any manual changes to it will +be overwritten on charm upgrades. + +Overrides are provided to the charm using a Juju resource called +`policyd-override`. The resource is a ZIP file. This file, say +`overrides.zip`, is attached to the charm by: + + + juju attach-resource policyd-override=overrides.zip + +The policy override is enabled in the charm using: + + juju config use-policyd-override=true + +When `use-policyd-override` is `True` the status line of the charm will be +prefixed with `PO:` indicating that policies have been overridden. If the +installation of the policy override YAML files failed for any reason then the +status line will be prefixed with `PO (broken):`. The log file for the charm +will indicate the reason. No policy override files are installed if the `PO +(broken):` is shown. The status line indicates that the overrides are broken, +not that the policy for the service has failed. The policy will be the defaults +for the charm and service. + +Policy overrides on one service may affect the functionality of another +service. Therefore, it may be necessary to provide policy overrides for +multiple service charms to achieve a consistent set of policies across the +OpenStack system. The charms for the other services that may need overrides +should be checked to ensure that they support overrides before proceeding. +""" + +POLICYD_VALID_EXTS = ['.yaml', '.yml', '.j2', '.tmpl', '.tpl'] +POLICYD_TEMPLATE_EXTS = ['.j2', '.tmpl', '.tpl'] +POLICYD_RESOURCE_NAME = "policyd-override" +POLICYD_CONFIG_NAME = "use-policyd-override" +POLICYD_SUCCESS_FILENAME = "policyd-override-success" +POLICYD_LOG_LEVEL_DEFAULT = hookenv.INFO +POLICYD_ALWAYS_BLACKLISTED_KEYS = ("admin_required", "cloud_admin") + + +class BadPolicyZipFile(Exception): + + def __init__(self, log_message): + self.log_message = log_message + + def __str__(self): + return self.log_message + + +class BadPolicyYamlFile(Exception): + + def __init__(self, log_message): + self.log_message = log_message + + def __str__(self): + return self.log_message + + +def is_policyd_override_valid_on_this_release(openstack_release): + """Check that the charm is running on at least Ubuntu Xenial, and at + least the queens release. + + :param openstack_release: the release codename that is installed. + :type openstack_release: str + :returns: True if okay + :rtype: bool + """ + # NOTE(ajkavanagh) circular import! This is because the status message + # generation code in utils has to call into this module, but this function + # needs the CompareOpenStackReleases() function. The only way to solve + # this is either to put ALL of this module into utils, or refactor one or + # other of the CompareOpenStackReleases or status message generation code + # into a 3rd module. + import charmhelpers.contrib.openstack.utils as ch_utils + return ch_utils.CompareOpenStackReleases(openstack_release) >= 'queens' + + +def maybe_do_policyd_overrides(openstack_release, + service, + blacklist_paths=None, + blacklist_keys=None, + template_function=None, + restart_handler=None, + user=None, + group=None, + config_changed=False): + """If the config option is set, get the resource file and process it to + enable the policy.d overrides for the service passed. + + The param `openstack_release` is required as the policyd overrides feature + is only supported on openstack_release "queens" or later, and on ubuntu + "xenial" or later. Prior to these versions, this feature is a NOP. + + The optional template_function is a function that accepts a string and has + an opportunity to modify the loaded file prior to it being read by + yaml.safe_load(). This allows the charm to perform "templating" using + charm derived data. + + The param blacklist_paths are paths (that are in the service's policy.d + directory that should not be touched). + + The param blacklist_keys are keys that must not appear in the yaml file. + If they do, then the whole policy.d file fails. + + The yaml file extracted from the resource_file (which is a zipped file) has + its file path reconstructed. This, also, must not match any path in the + black list. + + The param restart_handler is an optional Callable that is called to perform + the service restart if the policy.d file is changed. This should normally + be None as oslo.policy automatically picks up changes in the policy.d + directory. However, for any services where this is buggy then a + restart_handler can be used to force the policy.d files to be read. + + If the config_changed param is True, then the handling is slightly + different: It will only perform the policyd overrides if the config is True + and the success file doesn't exist. Otherwise, it does nothing as the + resource file has already been processed. + + :param openstack_release: The openstack release that is installed. + :type openstack_release: str + :param service: the service name to construct the policy.d directory for. + :type service: str + :param blacklist_paths: optional list of paths to leave alone + :type blacklist_paths: Union[None, List[str]] + :param blacklist_keys: optional list of keys that mustn't appear in the + yaml file's + :type blacklist_keys: Union[None, List[str]] + :param template_function: Optional function that can modify the string + prior to being processed as a Yaml document. + :type template_function: Union[None, Callable[[str], str]] + :param restart_handler: The function to call if the service should be + restarted. + :type restart_handler: Union[None, Callable[]] + :param user: The user to create/write files/directories as + :type user: Union[None, str] + :param group: the group to create/write files/directories as + :type group: Union[None, str] + :param config_changed: Set to True for config_changed hook. + :type config_changed: bool + """ + _user = service if user is None else user + _group = service if group is None else group + if not is_policyd_override_valid_on_this_release(openstack_release): + return + hookenv.log("Running maybe_do_policyd_overrides", + level=POLICYD_LOG_LEVEL_DEFAULT) + config = hookenv.config() + try: + if not config.get(POLICYD_CONFIG_NAME, False): + clean_policyd_dir_for(service, + blacklist_paths, + user=_user, + group=_group) + if (os.path.isfile(_policy_success_file()) and + restart_handler is not None and + callable(restart_handler)): + restart_handler() + remove_policy_success_file() + return + except Exception as e: + hookenv.log("... ERROR: Exception is: {}".format(str(e)), + level=POLICYD_CONFIG_NAME) + import traceback + hookenv.log(traceback.format_exc(), level=POLICYD_LOG_LEVEL_DEFAULT) + return + # if the policyd overrides have been performed when doing config_changed + # just return + if config_changed and is_policy_success_file_set(): + hookenv.log("... already setup, so skipping.", + level=POLICYD_LOG_LEVEL_DEFAULT) + return + # from now on it should succeed; if it doesn't then status line will show + # broken. + resource_filename = get_policy_resource_filename() + restart = process_policy_resource_file( + resource_filename, service, blacklist_paths, blacklist_keys, + template_function) + if restart and restart_handler is not None and callable(restart_handler): + restart_handler() + + +@charmhelpers.deprecate("Use maybe_do_policyd_overrides instead") +def maybe_do_policyd_overrides_on_config_changed(*args, **kwargs): + """This function is designed to be called from the config changed hook. + + DEPRECATED: please use maybe_do_policyd_overrides() with the param + `config_changed` as `True`. + + See maybe_do_policyd_overrides() for more details on the params. + """ + if 'config_changed' not in kwargs.keys(): + kwargs['config_changed'] = True + return maybe_do_policyd_overrides(*args, **kwargs) + + +def get_policy_resource_filename(): + """Function to extract the policy resource filename + + :returns: The filename of the resource, if set, otherwise, if an error + occurs, then None is returned. + :rtype: Union[str, None] + """ + try: + return hookenv.resource_get(POLICYD_RESOURCE_NAME) + except Exception: + return None + + +@contextlib.contextmanager +def open_and_filter_yaml_files(filepath, has_subdirs=False): + """Validate that the filepath provided is a zip file and contains at least + one (.yaml|.yml) file, and that the files are not duplicated when the zip + file is flattened. Note that the yaml files are not checked. This is the + first stage in validating the policy zipfile; individual yaml files are not + checked for validity or black listed keys. + + If the has_subdirs param is True, then the files are flattened to the first + directory, and the files in the root are ignored. + + An example of use is: + + with open_and_filter_yaml_files(some_path) as zfp, g: + for zipinfo in g: + # do something with zipinfo ... + + :param filepath: a filepath object that can be opened by zipfile + :type filepath: Union[AnyStr, os.PathLike[AntStr]] + :param has_subdirs: Keep first level of subdirectories in yaml file. + :type has_subdirs: bool + :returns: (zfp handle, + a generator of the (name, filename, ZipInfo object) tuples) as a + tuple. + :rtype: ContextManager[(zipfile.ZipFile, + Generator[(name, str, str, zipfile.ZipInfo)])] + :raises: zipfile.BadZipFile + :raises: BadPolicyZipFile if duplicated yaml or missing + :raises: IOError if the filepath is not found + """ + with zipfile.ZipFile(filepath, 'r') as zfp: + # first pass through; check for duplicates and at least one yaml file. + names = collections.defaultdict(int) + yamlfiles = _yamlfiles(zfp, has_subdirs) + for name, _, _, _ in yamlfiles: + names[name] += 1 + # There must be at least 1 yaml file. + if len(names.keys()) == 0: + raise BadPolicyZipFile("contains no yaml files with {} extensions." + .format(", ".join(POLICYD_VALID_EXTS))) + # There must be no duplicates + duplicates = [n for n, c in names.items() if c > 1] + if duplicates: + raise BadPolicyZipFile("{} have duplicates in the zip file." + .format(", ".join(duplicates))) + # Finally, let's yield the generator + yield (zfp, yamlfiles) + + +def _yamlfiles(zipfile, has_subdirs=False): + """Helper to get a yaml file (according to POLICYD_VALID_EXTS extensions) + and the infolist item from a zipfile. + + If the `has_subdirs` param is True, the the only yaml files that have a + directory component are read, and then first part of the directory + component is kept, along with the filename in the name. e.g. an entry with + a filename of: + + compute/someotherdir/override.yaml + + is returned as: + + compute/override, yaml, override.yaml, + + This is to help with the special, additional, processing that the dashboard + charm requires. + + :param zipfile: the zipfile to read zipinfo items from + :type zipfile: zipfile.ZipFile + :param has_subdirs: Keep first level of subdirectories in yaml file. + :type has_subdirs: bool + :returns: generator of (name, ext, filename, info item) for each + self-identified yaml file. + :rtype: List[(str, str, str, zipfile.ZipInfo)] + """ + files = [] + for infolist_item in zipfile.infolist(): + try: + if infolist_item.is_dir(): + continue + except AttributeError: + # fallback to "old" way to determine dir entry for pre-py36 + if infolist_item.filename.endswith('/'): + continue + _dir, name_ext = os.path.split(infolist_item.filename) + name, ext = os.path.splitext(name_ext) + if has_subdirs and _dir != "": + name = os.path.join(_dir.split(os.path.sep)[0], name) + ext = ext.lower() + if ext and ext in POLICYD_VALID_EXTS: + files.append((name, ext, name_ext, infolist_item)) + return files + + +def read_and_validate_yaml(stream_or_doc, blacklist_keys=None): + """Read, validate and return the (first) yaml document from the stream. + + The doc is read, and checked for a yaml file. The the top-level keys are + checked against the blacklist_keys provided. If there are problems then an + Exception is raised. Otherwise the yaml document is returned as a Python + object that can be dumped back as a yaml file on the system. + + The yaml file must only consist of a str:str mapping, and if not then the + yaml file is rejected. + + :param stream_or_doc: the file object to read the yaml from + :type stream_or_doc: Union[AnyStr, IO[AnyStr]] + :param blacklist_keys: Any keys, which if in the yaml file, should cause + and error. + :type blacklisted_keys: Union[None, List[str]] + :returns: the yaml file as a python document + :rtype: Dict[str, str] + :raises: yaml.YAMLError if there is a problem with the document + :raises: BadPolicyYamlFile if file doesn't look right or there are + blacklisted keys in the file. + """ + blacklist_keys = blacklist_keys or [] + blacklist_keys.append(POLICYD_ALWAYS_BLACKLISTED_KEYS) + doc = yaml.safe_load(stream_or_doc) + if not isinstance(doc, dict): + raise BadPolicyYamlFile("doesn't look like a policy file?") + keys = set(doc.keys()) + blacklisted_keys_present = keys.intersection(blacklist_keys) + if blacklisted_keys_present: + raise BadPolicyYamlFile("blacklisted keys {} present." + .format(", ".join(blacklisted_keys_present))) + if not all(isinstance(k, str) for k in keys): + raise BadPolicyYamlFile("keys in yaml aren't all strings?") + # check that the dictionary looks like a mapping of str to str + if not all(isinstance(v, str) for v in doc.values()): + raise BadPolicyYamlFile("values in yaml aren't all strings?") + return doc + + +def policyd_dir_for(service): + """Return the policy directory for the named service. + + :param service: str + :returns: the policy.d override directory. + :rtype: os.PathLike[str] + """ + return os.path.join("/", "etc", service, "policy.d") + + +def clean_policyd_dir_for(service, keep_paths=None, user=None, group=None): + """Clean out the policyd directory except for items that should be kept. + + The keep_paths, if used, should be set to the full path of the files that + should be kept in the policyd directory for the service. Note that the + service name is passed in, and then the policyd_dir_for() function is used. + This is so that a coding error doesn't result in a sudden deletion of the + charm (say). + + :param service: the service name to use to construct the policy.d dir. + :type service: str + :param keep_paths: optional list of paths to not delete. + :type keep_paths: Union[None, List[str]] + :param user: The user to create/write files/directories as + :type user: Union[None, str] + :param group: the group to create/write files/directories as + :type group: Union[None, str] + """ + _user = service if user is None else user + _group = service if group is None else group + keep_paths = keep_paths or [] + path = policyd_dir_for(service) + hookenv.log("Cleaning path: {}".format(path), level=hookenv.DEBUG) + if not os.path.exists(path): + ch_host.mkdir(path, owner=_user, group=_group, perms=0o775) + for direntry in os.scandir(path): + # see if the path should be kept. + if direntry.path in keep_paths: + continue + # we remove any directories; it's ours and there shouldn't be any + if direntry.is_dir(): + shutil.rmtree(direntry.path) + else: + os.remove(direntry.path) + + +def maybe_create_directory_for(path, user, group): + """For the filename 'path', ensure that the directory for that path exists. + + Note that if the directory already exists then the permissions are NOT + changed. + + :param path: the filename including the path to it. + :type path: str + :param user: the user to create the directory as + :param group: the group to create the directory as + """ + _dir, _ = os.path.split(path) + if not os.path.exists(_dir): + ch_host.mkdir(_dir, owner=user, group=group, perms=0o775) + + +def path_for_policy_file(service, name): + """Return the full path for a policy.d file that will be written to the + service's policy.d directory. + + It is constructed using policyd_dir_for(), the name and the ".yaml" + extension. + + For horizon, for example, it's a bit more complicated. The name param is + actually "override_service_dir/a_name", where target_service needs to be + one the allowed horizon override services. This translation and check is + done in the _yamlfiles() function. + + :param service: the service name + :type service: str + :param name: the name for the policy override + :type name: str + :returns: the full path name for the file + :rtype: os.PathLike[str] + """ + return os.path.join(policyd_dir_for(service), name + ".yaml") + + +def _policy_success_file(): + """Return the file name for a successful drop of policy.d overrides + + :returns: the path name for the file. + :rtype: str + """ + return os.path.join(hookenv.charm_dir(), POLICYD_SUCCESS_FILENAME) + + +def remove_policy_success_file(): + """Remove the file that indicates successful policyd override.""" + try: + os.remove(_policy_success_file()) + except Exception: + pass + + +def set_policy_success_file(): + """Set the file that indicates successful policyd override.""" + open(_policy_success_file(), "w").close() + + +def is_policy_success_file_set(): + """Returns True if the policy success file has been set. + + This indicates that policies are overridden and working properly. + + :returns: True if the policy file is set + :rtype: bool + """ + return os.path.isfile(_policy_success_file()) + + +def policyd_status_message_prefix(): + """Return the prefix str for the status line. + + "PO:" indicating that the policy overrides are in place, or "PO (broken):" + if the policy is supposed to be working but there is no success file. + + :returns: the prefix + :rtype: str + """ + if is_policy_success_file_set(): + return "PO:" + return "PO (broken):" + + +def process_policy_resource_file(resource_file, + service, + blacklist_paths=None, + blacklist_keys=None, + template_function=None, + preserve_topdir=False, + preprocess_filename=None, + user=None, + group=None): + """Process the resource file (which should contain at least one yaml file) + and write those files to the service's policy.d directory. + + The optional template_function is a function that accepts a python + string and has an opportunity to modify the document + prior to it being read by the yaml.safe_load() function and written to + disk. Note that this function does *not* say how the templating is done - + this is up to the charm to implement its chosen method. + + The param blacklist_paths are paths (that are in the service's policy.d + directory that should not be touched). + + The param blacklist_keys are keys that must not appear in the yaml file. + If they do, then the whole policy.d file fails. + + The yaml file extracted from the resource_file (which is a zipped file) has + its file path reconstructed. This, also, must not match any path in the + black list. + + The yaml filename can be modified in two ways. If the `preserve_topdir` + param is True, then files will be flattened to the top dir. This allows + for creating sets of files that can be grouped into a single level tree + structure. + + Secondly, if the `preprocess_filename` param is not None and callable() + then the name is passed to that function for preprocessing before being + converted to the end location. This is to allow munging of the filename + prior to being tested for a blacklist path. + + If any error occurs, then the policy.d directory is cleared, the error is + written to the log, and the status line will eventually show as failed. + + :param resource_file: The zipped file to open and extract yaml files form. + :type resource_file: Union[AnyStr, os.PathLike[AnyStr]] + :param service: the service name to construct the policy.d directory for. + :type service: str + :param blacklist_paths: optional list of paths to leave alone + :type blacklist_paths: Union[None, List[str]] + :param blacklist_keys: optional list of keys that mustn't appear in the + yaml file's + :type blacklist_keys: Union[None, List[str]] + :param template_function: Optional function that can modify the yaml + document. + :type template_function: Union[None, Callable[[AnyStr], AnyStr]] + :param preserve_topdir: Keep the toplevel subdir + :type preserve_topdir: bool + :param preprocess_filename: Optional function to use to process filenames + extracted from the resource file. + :type preprocess_filename: Union[None, Callable[[AnyStr]. AnyStr]] + :param user: The user to create/write files/directories as + :type user: Union[None, str] + :param group: the group to create/write files/directories as + :type group: Union[None, str] + :returns: True if the processing was successful, False if not. + :rtype: boolean + """ + hookenv.log("Running process_policy_resource_file", level=hookenv.DEBUG) + blacklist_paths = blacklist_paths or [] + completed = False + _preprocess = None + if preprocess_filename is not None and callable(preprocess_filename): + _preprocess = preprocess_filename + _user = service if user is None else user + _group = service if group is None else group + try: + with open_and_filter_yaml_files( + resource_file, preserve_topdir) as (zfp, gen): + # first clear out the policy.d directory and clear success + remove_policy_success_file() + clean_policyd_dir_for(service, + blacklist_paths, + user=_user, + group=_group) + for name, ext, filename, zipinfo in gen: + # See if the name should be preprocessed. + if _preprocess is not None: + name = _preprocess(name) + # construct a name for the output file. + yaml_filename = path_for_policy_file(service, name) + if yaml_filename in blacklist_paths: + raise BadPolicyZipFile("policy.d name {} is blacklisted" + .format(yaml_filename)) + with zfp.open(zipinfo) as fp: + doc = fp.read() + # if template_function is not None, then offer the document + # to the template function + if ext in POLICYD_TEMPLATE_EXTS: + if (template_function is None or not + callable(template_function)): + raise BadPolicyZipFile( + "Template {} but no template_function is " + "available".format(filename)) + doc = template_function(doc) + yaml_doc = read_and_validate_yaml(doc, blacklist_keys) + # we may have to create the directory + maybe_create_directory_for(yaml_filename, _user, _group) + ch_host.write_file(yaml_filename, + yaml.dump(yaml_doc).encode('utf-8'), + _user, + _group) + # Every thing worked, so we mark up a success. + completed = True + except (zipfile.BadZipFile, BadPolicyZipFile, BadPolicyYamlFile) as e: + hookenv.log("Processing {} failed: {}".format(resource_file, str(e)), + level=POLICYD_LOG_LEVEL_DEFAULT) + except IOError as e: + # technically this shouldn't happen; it would be a programming error as + # the filename comes from Juju and thus, should exist. + hookenv.log( + "File {} failed with IOError. This really shouldn't happen" + " -- error: {}".format(resource_file, str(e)), + level=POLICYD_LOG_LEVEL_DEFAULT) + except Exception as e: + import traceback + hookenv.log("General Exception({}) during policyd processing" + .format(str(e)), + level=POLICYD_LOG_LEVEL_DEFAULT) + hookenv.log(traceback.format_exc()) + finally: + if not completed: + hookenv.log("Processing {} failed: cleaning policy.d directory" + .format(resource_file), + level=POLICYD_LOG_LEVEL_DEFAULT) + clean_policyd_dir_for(service, + blacklist_paths, + user=_user, + group=_group) + else: + # touch the success filename + hookenv.log("policy.d overrides installed.", + level=POLICYD_LOG_LEVEL_DEFAULT) + set_policy_success_file() + return completed diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/ssh_migrations.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/ssh_migrations.py new file mode 100644 index 00000000..0512e3a5 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/ssh_migrations.py @@ -0,0 +1,412 @@ +# Copyright 2018 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import subprocess + +from charmhelpers.core.hookenv import ( + ERROR, + log, + relation_get, +) +from charmhelpers.contrib.network.ip import ( + is_ipv6, + ns_query, +) +from charmhelpers.contrib.openstack.utils import ( + get_hostname, + get_host_ip, + is_ip, +) + +NOVA_SSH_DIR = '/etc/nova/compute_ssh/' + + +def ssh_directory_for_unit(application_name, user=None): + """Return the directory used to store ssh assets for the application. + + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + :returns: Fully qualified directory path. + :rtype: str + """ + if user: + application_name = "{}_{}".format(application_name, user) + _dir = os.path.join(NOVA_SSH_DIR, application_name) + for d in [NOVA_SSH_DIR, _dir]: + if not os.path.isdir(d): + os.mkdir(d) + for f in ['authorized_keys', 'known_hosts']: + f = os.path.join(_dir, f) + if not os.path.isfile(f): + open(f, 'w').close() + return _dir + + +def known_hosts(application_name, user=None): + """Return the known hosts file for the application. + + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + :returns: Fully qualified path to file. + :rtype: str + """ + return os.path.join( + ssh_directory_for_unit(application_name, user), + 'known_hosts') + + +def authorized_keys(application_name, user=None): + """Return the authorized keys file for the application. + + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + :returns: Fully qualified path to file. + :rtype: str + """ + return os.path.join( + ssh_directory_for_unit(application_name, user), + 'authorized_keys') + + +def ssh_known_host_key(host, application_name, user=None): + """Return the first entry in known_hosts for host. + + :param host: hostname to lookup in file. + :type host: str + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + :returns: Host key + :rtype: str or None + """ + cmd = [ + 'ssh-keygen', + '-f', known_hosts(application_name, user), + '-H', + '-F', + host] + try: + # The first line of output is like '# Host xx found: line 1 type RSA', + # which should be excluded. + output = subprocess.check_output(cmd) + except subprocess.CalledProcessError as e: + # RC of 1 seems to be legitimate for most ssh-keygen -F calls. + if e.returncode == 1: + output = e.output + else: + raise + output = output.strip() + + if output: + # Bug #1500589 cmd has 0 rc on precise if entry not present + lines = output.split('\n') + if len(lines) >= 1: + return lines[0] + + return None + + +def remove_known_host(host, application_name, user=None): + """Remove the entry in known_hosts for host. + + :param host: hostname to lookup in file. + :type host: str + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + """ + log('Removing SSH known host entry for compute host at %s' % host) + cmd = ['ssh-keygen', '-f', known_hosts(application_name, user), '-R', host] + subprocess.check_call(cmd) + + +def is_same_key(key_1, key_2): + """Extract the key from two host entries and compare them. + + :param key_1: Host key + :type key_1: str + :param key_2: Host key + :type key_2: str + """ + # The key format get will be like '|1|2rUumCavEXWVaVyB5uMl6m85pZo=|Cp' + # 'EL6l7VTY37T/fg/ihhNb/GPgs= ssh-rsa AAAAB', we only need to compare + # the part start with 'ssh-rsa' followed with '= ', because the hash + # value in the beginning will change each time. + k_1 = key_1.split('= ')[1] + k_2 = key_2.split('= ')[1] + return k_1 == k_2 + + +def add_known_host(host, application_name, user=None): + """Add the given host key to the known hosts file. + + :param host: host name + :type host: str + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + """ + cmd = ['ssh-keyscan', '-H', '-t', 'rsa', host] + try: + remote_key = subprocess.check_output(cmd).strip() + except Exception as e: + log('Could not obtain SSH host key from %s' % host, level=ERROR) + raise e + + current_key = ssh_known_host_key(host, application_name, user) + if current_key and remote_key: + if is_same_key(remote_key, current_key): + log('Known host key for compute host %s up to date.' % host) + return + else: + remove_known_host(host, application_name, user) + + log('Adding SSH host key to known hosts for compute node at %s.' % host) + with open(known_hosts(application_name, user), 'a') as out: + out.write("{}\n".format(remote_key)) + + +def ssh_authorized_key_exists(public_key, application_name, user=None): + """Check if given key is in the authorized_key file. + + :param public_key: Public key. + :type public_key: str + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + :returns: Whether given key is in the authorized_key file. + :rtype: boolean + """ + with open(authorized_keys(application_name, user)) as keys: + return ('%s' % public_key) in keys.read() + + +def add_authorized_key(public_key, application_name, user=None): + """Add given key to the authorized_key file. + + :param public_key: Public key. + :type public_key: str + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + """ + with open(authorized_keys(application_name, user), 'a') as keys: + keys.write("{}\n".format(public_key)) + + +def ssh_compute_add_host_and_key(public_key, hostname, private_address, + application_name, user=None): + """Add a compute nodes ssh details to local cache. + + Collect various hostname variations and add the corresponding host keys to + the local known hosts file. Finally, add the supplied public key to the + authorized_key file. + + :param public_key: Public key. + :type public_key: str + :param hostname: Hostname to collect host keys from. + :type hostname: str + :param private_address:aCorresponding private address for hostname + :type private_address: str + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + """ + # If remote compute node hands us a hostname, ensure we have a + # known hosts entry for its IP, hostname and FQDN. + hosts = [private_address] + + if not is_ipv6(private_address): + if hostname: + hosts.append(hostname) + + if is_ip(private_address): + hn = get_hostname(private_address) + if hn: + hosts.append(hn) + short = hn.split('.')[0] + if ns_query(short): + hosts.append(short) + else: + hosts.append(get_host_ip(private_address)) + short = private_address.split('.')[0] + if ns_query(short): + hosts.append(short) + + for host in list(set(hosts)): + add_known_host(host, application_name, user) + + if not ssh_authorized_key_exists(public_key, application_name, user): + log('Saving SSH authorized key for compute host at %s.' % + private_address) + add_authorized_key(public_key, application_name, user) + + +def ssh_compute_add(public_key, application_name, rid=None, unit=None, + user=None): + """Add a compute nodes ssh details to local cache. + + Collect various hostname variations and add the corresponding host keys to + the local known hosts file. Finally, add the supplied public key to the + authorized_key file. + + :param public_key: Public key. + :type public_key: str + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param rid: Relation id of the relation between this charm and the app. If + none is supplied it is assumed its the relation relating to + the current hook context. + :type rid: str + :param unit: Unit to add ssh asserts for if none is supplied it is assumed + its the unit relating to the current hook context. + :type unit: str + :param user: The user that the ssh asserts are for. + :type user: str + """ + relation_data = relation_get(rid=rid, unit=unit) + ssh_compute_add_host_and_key( + public_key, + relation_data.get('hostname'), + relation_data.get('private-address'), + application_name, + user=user) + + +def ssh_known_hosts_lines(application_name, user=None): + """Return contents of known_hosts file for given application. + + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + """ + known_hosts_list = [] + with open(known_hosts(application_name, user)) as hosts: + for hosts_line in hosts: + if hosts_line.rstrip(): + known_hosts_list.append(hosts_line.rstrip()) + return known_hosts_list + + +def ssh_authorized_keys_lines(application_name, user=None): + """Return contents of authorized_keys file for given application. + + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + """ + authorized_keys_list = [] + + with open(authorized_keys(application_name, user)) as keys: + for authkey_line in keys: + if authkey_line.rstrip(): + authorized_keys_list.append(authkey_line.rstrip()) + return authorized_keys_list + + +def ssh_compute_remove(public_key, application_name, user=None): + """Remove given public key from authorized_keys file. + + :param public_key: Public key. + :type public_key: str + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + """ + if not (os.path.isfile(authorized_keys(application_name, user)) or + os.path.isfile(known_hosts(application_name, user))): + return + + keys = ssh_authorized_keys_lines(application_name, user=None) + keys = [k.strip() for k in keys] + + if public_key not in keys: + return + + [keys.remove(key) for key in keys if key == public_key] + + with open(authorized_keys(application_name, user), 'w') as _keys: + keys = '\n'.join(keys) + if not keys.endswith('\n'): + keys += '\n' + _keys.write(keys) + + +def get_ssh_settings(application_name, user=None): + """Retrieve the known host entries and public keys for application + + Retrieve the known host entries and public keys for application for all + units of the given application related to this application for the + app + user combination. + + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :param user: The user that the ssh asserts are for. + :type user: str + :returns: Public keys + host keys for all units for app + user combination. + :rtype: dict + """ + settings = {} + keys = {} + prefix = '' + if user: + prefix = '{}_'.format(user) + + for i, line in enumerate(ssh_known_hosts_lines( + application_name=application_name, user=user)): + settings['{}known_hosts_{}'.format(prefix, i)] = line + if settings: + settings['{}known_hosts_max_index'.format(prefix)] = len( + settings.keys()) + + for i, line in enumerate(ssh_authorized_keys_lines( + application_name=application_name, user=user)): + keys['{}authorized_keys_{}'.format(prefix, i)] = line + if keys: + keys['{}authorized_keys_max_index'.format(prefix)] = len(keys.keys()) + settings.update(keys) + return settings + + +def get_all_user_ssh_settings(application_name): + """Retrieve the known host entries and public keys for application + + Retrieve the known host entries and public keys for application for all + units of the given application related to this application for root user + and nova user. + + :param application_name: Name of application eg nova-compute-something + :type application_name: str + :returns: Public keys + host keys for all units for app + user combination. + :rtype: dict + """ + settings = get_ssh_settings(application_name) + settings.update(get_ssh_settings(application_name, user='nova')) + return settings diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/__init__.py new file mode 100644 index 00000000..9df5f746 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# dummy __init__.py to fool syncer into thinking this is a syncable python +# module diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/ceph.conf b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/ceph.conf new file mode 100644 index 00000000..c0f22360 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/ceph.conf @@ -0,0 +1,28 @@ +############################################################################### +# [ WARNING ] +# ceph configuration file maintained by Juju +# local changes may be overwritten. +############################################################################### +[global] +{% if auth -%} +auth_supported = {{ auth }} +keyring = /etc/ceph/$cluster.$name.keyring +mon host = {{ mon_hosts }} +{% endif -%} +log to syslog = {{ use_syslog }} +err to syslog = {{ use_syslog }} +clog to syslog = {{ use_syslog }} +{% if rbd_features %} +rbd default features = {{ rbd_features }} +{% endif %} + +[client] +{% if rbd_client_cache_settings -%} +{% for key, value in rbd_client_cache_settings.items() -%} +{{ key }} = {{ value }} +{% endfor -%} +{%- endif %} + +{% if rbd_default_data_pool -%} +rbd default data pool = {{ rbd_default_data_pool }} +{% endif %} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/git.upstart b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/git.upstart new file mode 100644 index 00000000..4bed404b --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/git.upstart @@ -0,0 +1,17 @@ +description "{{ service_description }}" +author "Juju {{ service_name }} Charm " + +start on runlevel [2345] +stop on runlevel [!2345] + +respawn + +exec start-stop-daemon --start --chuid {{ user_name }} \ + --chdir {{ start_dir }} --name {{ process_name }} \ + --exec {{ executable_name }} -- \ + {% for config_file in config_files -%} + --config-file={{ config_file }} \ + {% endfor -%} + {% if log_file -%} + --log-file={{ log_file }} + {% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/haproxy.cfg b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/haproxy.cfg new file mode 100644 index 00000000..da2522f6 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/haproxy.cfg @@ -0,0 +1,98 @@ +global + # NOTE: on startup haproxy chroot's to /var/lib/haproxy. + # + # Unfortunately the program will open some files prior to the call to + # chroot never to reopen them, and some after. So looking at the on-disk + # layout of haproxy resources you will find some resources relative to / + # such as the admin socket, and some relative to /var/lib/haproxy such as + # the log socket. + # + # The logging socket is (re-)opened after the chroot and must be relative + # to /var/lib/haproxy. + log /dev/log local0 + log /dev/log local1 notice + maxconn 20000 + user haproxy + group haproxy + spread-checks 0 + # The admin socket is opened prior to the chroot never to be reopened, so + # it lives outside the chroot directory in the filesystem. + stats socket /var/run/haproxy/admin.sock mode 600 level admin + stats timeout 2m + +defaults + log global + mode tcp + option tcplog + option dontlognull + retries 3 +{%- if haproxy_queue_timeout %} + timeout queue {{ haproxy_queue_timeout }} +{%- else %} + timeout queue 9000 +{%- endif %} +{%- if haproxy_connect_timeout %} + timeout connect {{ haproxy_connect_timeout }} +{%- else %} + timeout connect 9000 +{%- endif %} +{%- if haproxy_client_timeout %} + timeout client {{ haproxy_client_timeout }} +{%- else %} + timeout client 90000 +{%- endif %} +{%- if haproxy_server_timeout %} + timeout server {{ haproxy_server_timeout }} +{%- else %} + timeout server 90000 +{%- endif %} + +listen stats + bind {{ local_host }}:{{ stat_port }} +{%- if stats_exporter_host and stats_exporter_port %} + bind {{ stats_exporter_host }}:{{ stats_exporter_port }} + option http-use-htx + http-request use-service prometheus-exporter if { path /metrics } +{%- endif %} + mode http + stats enable + stats hide-version + stats realm Haproxy\ Statistics + stats uri / + stats auth admin:{{ stat_password }} + +{% if frontends -%} +{% for service, ports in service_ports.items() -%} +frontend tcp-in_{{ service }} + bind *:{{ ports[0] }} + {% if ipv6_enabled -%} + bind :::{{ ports[0] }} + {% endif -%} + {% for frontend in frontends -%} + acl net_{{ frontend }} dst {{ frontends[frontend]['network'] }} + use_backend {{ service }}_{{ frontend }} if net_{{ frontend }} + {% endfor -%} + default_backend {{ service }}_{{ default_backend }} + +{% for frontend in frontends -%} +backend {{ service }}_{{ frontend }} + balance leastconn + {% if backend_options -%} + {% if backend_options[service] -%} + {% for option in backend_options[service] -%} + {% for key, value in option.items() -%} + {{ key }} {{ value }} + {% endfor -%} + {% endfor -%} + {% endif -%} + {% endif -%} + {% for unit, address in frontends[frontend]['backends'].items() -%} + {% if https -%} + server {{ unit }} {{ address }}:{{ ports[1] }} check check-ssl verify none + {% else -%} + server {{ unit }} {{ address }}:{{ ports[1] }} check + {% endif -%} + {% endfor %} +{% endfor -%} +{% endfor -%} +{% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/logrotate b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/logrotate new file mode 100644 index 00000000..b2900d09 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/logrotate @@ -0,0 +1,9 @@ +/var/log/{{ logrotate_logs_location }}/*.log { + {{ logrotate_interval }} + {{ logrotate_count }} + compress + delaycompress + missingok + notifempty + copytruncate +} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/memcached.conf b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/memcached.conf new file mode 100644 index 00000000..26cb037c --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/memcached.conf @@ -0,0 +1,53 @@ +############################################################################### +# [ WARNING ] +# memcached configuration file maintained by Juju +# local changes may be overwritten. +############################################################################### + +# memcached default config file +# 2003 - Jay Bonci +# This configuration file is read by the start-memcached script provided as +# part of the Debian GNU/Linux distribution. + +# Run memcached as a daemon. This command is implied, and is not needed for the +# daemon to run. See the README.Debian that comes with this package for more +# information. +-d + +# Log memcached's output to /var/log/memcached +logfile /var/log/memcached.log + +# Be verbose +# -v + +# Be even more verbose (print client commands as well) +# -vv + +# Start with a cap of 64 megs of memory. It's reasonable, and the daemon default +# Note that the daemon will grow to this size, but does not start out holding this much +# memory +-m 64 + +# Default connection port is 11211 +-p {{ memcache_port }} + +# Run the daemon as root. The start-memcached will default to running as root if no +# -u command is present in this config file +-u memcache + +# Specify which IP address to listen on. The default is to listen on all IP addresses +# This parameter is one of the only security measures that memcached has, so make sure +# it's listening on a firewalled interface. +-l {{ memcache_server }} + +# Limit the number of simultaneous incoming connections. The daemon default is 1024 +# -c 1024 + +# Lock down all paged memory. Consult with the README and homepage before you do this +# -k + +# Return error when memory is exhausted (rather than removing items) +# -M + +# Maximize core file limit +# -r diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/openstack_https_frontend b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/openstack_https_frontend new file mode 100644 index 00000000..6ed869a5 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/openstack_https_frontend @@ -0,0 +1,37 @@ +{% if endpoints -%} +{% for ext_port in ext_ports -%} +Listen {{ ext_port }} +{% endfor -%} +{% for address, endpoint, ext, int in endpoints -%} + + ServerName {{ endpoint }} + SSLEngine on + + # This section is based on Mozilla's recommendation + # as the "intermediate" profile as of July 7th, 2020. + # https://wiki.mozilla.org/Security/Server_Side_TLS + SSLProtocol all -SSLv3 -TLSv1 -TLSv1.1 + SSLCipherSuite ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384 + SSLHonorCipherOrder off + + SSLCertificateFile /etc/apache2/ssl/{{ namespace }}/cert_{{ endpoint }} + # See LP 1484489 - this is to support <= 2.4.7 and >= 2.4.8 + SSLCertificateChainFile /etc/apache2/ssl/{{ namespace }}/cert_{{ endpoint }} + SSLCertificateKeyFile /etc/apache2/ssl/{{ namespace }}/key_{{ endpoint }} + ProxyPass / http://localhost:{{ int }}/ + ProxyPassReverse / http://localhost:{{ int }}/ + ProxyPreserveHost on + RequestHeader set X-Forwarded-Proto "https" + KeepAliveTimeout 75 + MaxKeepAliveRequests 1000 + +{% endfor -%} + + Order deny,allow + Allow from all + + + Order allow,deny + Allow from all + +{% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/openstack_https_frontend.conf b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/openstack_https_frontend.conf new file mode 100644 index 00000000..6ed869a5 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/openstack_https_frontend.conf @@ -0,0 +1,37 @@ +{% if endpoints -%} +{% for ext_port in ext_ports -%} +Listen {{ ext_port }} +{% endfor -%} +{% for address, endpoint, ext, int in endpoints -%} + + ServerName {{ endpoint }} + SSLEngine on + + # This section is based on Mozilla's recommendation + # as the "intermediate" profile as of July 7th, 2020. + # https://wiki.mozilla.org/Security/Server_Side_TLS + SSLProtocol all -SSLv3 -TLSv1 -TLSv1.1 + SSLCipherSuite ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384 + SSLHonorCipherOrder off + + SSLCertificateFile /etc/apache2/ssl/{{ namespace }}/cert_{{ endpoint }} + # See LP 1484489 - this is to support <= 2.4.7 and >= 2.4.8 + SSLCertificateChainFile /etc/apache2/ssl/{{ namespace }}/cert_{{ endpoint }} + SSLCertificateKeyFile /etc/apache2/ssl/{{ namespace }}/key_{{ endpoint }} + ProxyPass / http://localhost:{{ int }}/ + ProxyPassReverse / http://localhost:{{ int }}/ + ProxyPreserveHost on + RequestHeader set X-Forwarded-Proto "https" + KeepAliveTimeout 75 + MaxKeepAliveRequests 1000 + +{% endfor -%} + + Order deny,allow + Allow from all + + + Order allow,deny + Allow from all + +{% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-audit-middleware-notifications b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-audit-middleware-notifications new file mode 100644 index 00000000..1f88014f --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-audit-middleware-notifications @@ -0,0 +1,4 @@ +{% if audit_middleware -%} +[audit_middleware_notifications] +driver = log +{% endif -%} \ No newline at end of file diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-ceph-bluestore-compression b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-ceph-bluestore-compression new file mode 100644 index 00000000..a6430100 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-ceph-bluestore-compression @@ -0,0 +1,28 @@ +{# section header omitted as options can belong to multiple sections #} +{% if bluestore_compression_algorithm -%} +bluestore compression algorithm = {{ bluestore_compression_algorithm }} +{% endif -%} +{% if bluestore_compression_mode -%} +bluestore compression mode = {{ bluestore_compression_mode }} +{% endif -%} +{% if bluestore_compression_required_ratio -%} +bluestore compression required ratio = {{ bluestore_compression_required_ratio }} +{% endif -%} +{% if bluestore_compression_min_blob_size -%} +bluestore compression min blob size = {{ bluestore_compression_min_blob_size }} +{% endif -%} +{% if bluestore_compression_min_blob_size_hdd -%} +bluestore compression min blob size hdd = {{ bluestore_compression_min_blob_size_hdd }} +{% endif -%} +{% if bluestore_compression_min_blob_size_ssd -%} +bluestore compression min blob size ssd = {{ bluestore_compression_min_blob_size_ssd }} +{% endif -%} +{% if bluestore_compression_max_blob_size -%} +bluestore compression max blob size = {{ bluestore_compression_max_blob_size }} +{% endif -%} +{% if bluestore_compression_max_blob_size_hdd -%} +bluestore compression max blob size hdd = {{ bluestore_compression_max_blob_size_hdd }} +{% endif -%} +{% if bluestore_compression_max_blob_size_ssd -%} +bluestore compression max blob size ssd = {{ bluestore_compression_max_blob_size_ssd }} +{% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-filter-audit b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-filter-audit new file mode 100644 index 00000000..11512aee --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-filter-audit @@ -0,0 +1,6 @@ +{% if audit_middleware and service_name -%} +[filter:audit] +paste.filter_factory = keystonemiddleware.audit:filter_factory +audit_map_file = /etc/{{ service_name }}/api_audit_map.conf +service_name = {{ service_name }} +{% endif -%} \ No newline at end of file diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-keystone-authtoken b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-keystone-authtoken new file mode 100644 index 00000000..aef5edd8 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-keystone-authtoken @@ -0,0 +1,19 @@ +{% if auth_host -%} +[keystone_authtoken] +auth_uri = {{ service_protocol }}://{{ service_host }}:{{ service_port }} +auth_url = {{ auth_protocol }}://{{ auth_host }}:{{ auth_port }} +auth_plugin = password +project_domain_id = default +user_domain_id = default +project_name = {{ admin_tenant_name }} +username = {{ admin_user }} +password = {{ admin_password }} +signing_dir = {{ signing_dir }} +{% if service_type -%} +service_type = {{ service_type }} +{% endif -%} +{% if admin_role -%} +service_token_roles = {{ admin_role }} +service_token_roles_required = True +{% endif -%} +{% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-keystone-authtoken-legacy b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-keystone-authtoken-legacy new file mode 100644 index 00000000..9356b2be --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-keystone-authtoken-legacy @@ -0,0 +1,10 @@ +{% if auth_host -%} +[keystone_authtoken] +# Juno specific config (Bug #1557223) +auth_uri = {{ service_protocol }}://{{ service_host }}:{{ service_port }}/{{ service_admin_prefix }} +identity_uri = {{ auth_protocol }}://{{ auth_host }}:{{ auth_port }} +admin_tenant_name = {{ admin_tenant_name }} +admin_user = {{ admin_user }} +admin_password = {{ admin_password }} +signing_dir = {{ signing_dir }} +{% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-keystone-authtoken-mitaka b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-keystone-authtoken-mitaka new file mode 100644 index 00000000..31c21b4a --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-keystone-authtoken-mitaka @@ -0,0 +1,29 @@ +{% if auth_host -%} +[keystone_authtoken] +auth_type = password +{% if api_version == "3" -%} +auth_uri = {{ service_protocol }}://{{ service_host }}:{{ service_port }}/v3 +auth_url = {{ auth_protocol }}://{{ auth_host }}:{{ auth_port }}/v3 +project_domain_name = {{ admin_domain_name }} +user_domain_name = {{ admin_domain_name }} +{% if service_type -%} +service_type = {{ service_type }} +{% endif -%} +{% else -%} +auth_uri = {{ service_protocol }}://{{ service_host }}:{{ service_port }} +auth_url = {{ auth_protocol }}://{{ auth_host }}:{{ auth_port }} +project_domain_name = default +user_domain_name = default +{% endif -%} +project_name = {{ admin_tenant_name }} +username = {{ admin_user }} +password = {{ admin_password }} +signing_dir = {{ signing_dir }} +{% if use_memcache == true %} +memcached_servers = {{ memcache_url }} +{% endif -%} +{% if admin_role -%} +service_token_roles = {{ admin_role }} +service_token_roles_required = True +{% endif -%} +{% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-keystone-authtoken-v3only b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-keystone-authtoken-v3only new file mode 100644 index 00000000..d26a91fe --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-keystone-authtoken-v3only @@ -0,0 +1,9 @@ +{% if auth_host -%} +[keystone_authtoken] +{% for option_name, option_value in keystone_authtoken.items() -%} +{{ option_name }} = {{ option_value }} +{% endfor -%} +{% if use_memcache == true %} +memcached_servers = {{ memcache_url }} +{% endif -%} +{% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-oslo-cache b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-oslo-cache new file mode 100644 index 00000000..e056a32a --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-oslo-cache @@ -0,0 +1,6 @@ +[cache] +{% if memcache_url %} +enabled = true +backend = oslo_cache.memcache_pool +memcache_servers = {{ memcache_url }} +{% endif %} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-oslo-messaging-rabbit b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-oslo-messaging-rabbit new file mode 100644 index 00000000..bed2216a --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-oslo-messaging-rabbit @@ -0,0 +1,10 @@ +[oslo_messaging_rabbit] +{% if rabbitmq_ha_queues -%} +rabbit_ha_queues = True +{% endif -%} +{% if rabbit_ssl_port -%} +ssl = True +{% endif -%} +{% if rabbit_ssl_ca -%} +ssl_ca_file = {{ rabbit_ssl_ca }} +{% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-oslo-messaging-rabbit-ocata b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-oslo-messaging-rabbit-ocata new file mode 100644 index 00000000..365f4375 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-oslo-messaging-rabbit-ocata @@ -0,0 +1,10 @@ +[oslo_messaging_rabbit] +{% if rabbitmq_ha_queues -%} +rabbit_ha_queues = True +{% endif -%} +{% if rabbit_ssl_port -%} +rabbit_use_ssl = True +{% endif -%} +{% if rabbit_ssl_ca -%} +ssl_ca_file = {{ rabbit_ssl_ca }} +{% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-oslo-middleware b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-oslo-middleware new file mode 100644 index 00000000..dd73230a --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-oslo-middleware @@ -0,0 +1,5 @@ +[oslo_middleware] + +# Bug #1758675 +enable_proxy_headers_parsing = true + diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-oslo-notifications b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-oslo-notifications new file mode 100644 index 00000000..71c7eb06 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-oslo-notifications @@ -0,0 +1,15 @@ +{% if transport_url -%} +[oslo_messaging_notifications] +driver = {{ oslo_messaging_driver }} +transport_url = {{ transport_url }} +{% if send_notifications_to_logs %} +driver = log +{% endif %} +{% if notification_topics -%} +topics = {{ notification_topics }} +{% endif -%} +{% if notification_format -%} +[notifications] +notification_format = {{ notification_format }} +{% endif -%} +{% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-placement b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-placement new file mode 100644 index 00000000..8c224ec9 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-placement @@ -0,0 +1,20 @@ +[placement] +{% if auth_host -%} +auth_url = {{ auth_protocol }}://{{ auth_host }}:{{ auth_port }} +auth_type = password +{% if api_version == "3" -%} +project_domain_name = {{ admin_domain_name }} +user_domain_name = {{ admin_domain_name }} +{% else -%} +project_domain_name = default +user_domain_name = default +{% endif -%} +project_name = {{ admin_tenant_name }} +username = {{ admin_user }} +password = {{ admin_password }} +{% endif -%} +{% if region -%} +os_region_name = {{ region }} +region_name = {{ region }} +{% endif -%} +randomize_allocation_candidates = true diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-rabbitmq-oslo b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-rabbitmq-oslo new file mode 100644 index 00000000..b444c9c9 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-rabbitmq-oslo @@ -0,0 +1,22 @@ +{% if rabbitmq_host or rabbitmq_hosts -%} +[oslo_messaging_rabbit] +rabbit_userid = {{ rabbitmq_user }} +rabbit_virtual_host = {{ rabbitmq_virtual_host }} +rabbit_password = {{ rabbitmq_password }} +{% if rabbitmq_hosts -%} +rabbit_hosts = {{ rabbitmq_hosts }} +{% if rabbitmq_ha_queues -%} +rabbit_ha_queues = True +rabbit_durable_queues = False +{% endif -%} +{% else -%} +rabbit_host = {{ rabbitmq_host }} +{% endif -%} +{% if rabbit_ssl_port -%} +rabbit_use_ssl = True +rabbit_port = {{ rabbit_ssl_port }} +{% if rabbit_ssl_ca -%} +kombu_ssl_ca_certs = {{ rabbit_ssl_ca }} +{% endif -%} +{% endif -%} +{% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-service-user b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-service-user new file mode 100644 index 00000000..ff454086 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-service-user @@ -0,0 +1,11 @@ +{% if auth_host -%} +[service_user] +send_service_user_token = true +auth_type = password +auth_url = {{ auth_protocol }}://{{ auth_host }}:{{ auth_port }} +project_domain_name = service_domain +user_domain_name = service_domain +project_name = {{ admin_tenant_name }} +username = {{ admin_user }} +password = {{ admin_password }} +{% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-zeromq b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-zeromq new file mode 100644 index 00000000..95f1a76c --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/section-zeromq @@ -0,0 +1,14 @@ +{% if zmq_host -%} +# ZeroMQ configuration (restart-nonce: {{ zmq_nonce }}) +rpc_backend = zmq +rpc_zmq_host = {{ zmq_host }} +{% if zmq_redis_address -%} +rpc_zmq_matchmaker = redis +matchmaker_heartbeat_freq = 15 +matchmaker_heartbeat_ttl = 30 +[matchmaker_redis] +host = {{ zmq_redis_address }} +{% else -%} +rpc_zmq_matchmaker = ring +{% endif -%} +{% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/vendor_data.json b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/vendor_data.json new file mode 100644 index 00000000..904f612a --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/vendor_data.json @@ -0,0 +1 @@ +{{ vendor_data_json }} \ No newline at end of file diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/wsgi-openstack-api.conf b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/wsgi-openstack-api.conf new file mode 100644 index 00000000..de5f603f --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/wsgi-openstack-api.conf @@ -0,0 +1,103 @@ +# Configuration file maintained by Juju. Local changes may be overwritten. + +{% if port -%} +Listen {{ port }} +{% endif -%} + +{% if admin_port -%} +Listen {{ admin_port }} +{% endif -%} + +{% if public_port -%} +Listen {{ public_port }} +{% endif -%} + +{% if wsgi_socket_rotation -%} +WSGISocketRotation On +{% else -%} +WSGISocketRotation Off +{% endif -%} + +{% if port -%} + + WSGIDaemonProcess {{ service_name }} processes={{ processes }} threads={{ threads }} user={{ user }} group={{ group }} \ + display-name=%{GROUP} lang=C.UTF-8 locale=C.UTF-8 + WSGIProcessGroup {{ service_name }} + WSGIScriptAlias / {{ script }} + WSGIApplicationGroup %{GLOBAL} + WSGIPassAuthorization On + KeepAliveTimeout 75 + MaxKeepAliveRequests 1000 + = 2.4> + ErrorLogFormat "%{cu}t %M" + + ErrorLog /var/log/apache2/{{ service_name }}_error.log + CustomLog /var/log/apache2/{{ service_name }}_access.log combined + + + = 2.4> + Require all granted + + + Order allow,deny + Allow from all + + + +{% endif -%} + +{% if admin_port -%} + + WSGIDaemonProcess {{ service_name }}-admin processes={{ admin_processes }} threads={{ threads }} user={{ user }} group={{ group }} \ + display-name=%{GROUP} lang=C.UTF-8 locale=C.UTF-8 + WSGIProcessGroup {{ service_name }}-admin + WSGIScriptAlias / {{ admin_script }} + WSGIApplicationGroup %{GLOBAL} + WSGIPassAuthorization On + KeepAliveTimeout 75 + MaxKeepAliveRequests 1000 + = 2.4> + ErrorLogFormat "%{cu}t %M" + + ErrorLog /var/log/apache2/{{ service_name }}_error.log + CustomLog /var/log/apache2/{{ service_name }}_access.log combined + + + = 2.4> + Require all granted + + + Order allow,deny + Allow from all + + + +{% endif -%} + +{% if public_port -%} + + WSGIDaemonProcess {{ service_name }}-public processes={{ public_processes }} threads={{ threads }} user={{ user }} group={{ group }} \ + display-name=%{GROUP} lang=C.UTF-8 locale=C.UTF-8 + WSGIProcessGroup {{ service_name }}-public + WSGIScriptAlias / {{ public_script }} + WSGIApplicationGroup %{GLOBAL} + WSGIPassAuthorization On + KeepAliveTimeout 75 + MaxKeepAliveRequests 1000 + = 2.4> + ErrorLogFormat "%{cu}t %M" + + ErrorLog /var/log/apache2/{{ service_name }}_error.log + CustomLog /var/log/apache2/{{ service_name }}_access.log combined + + + = 2.4> + Require all granted + + + Order allow,deny + Allow from all + + + +{% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/wsgi-openstack-metadata.conf b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/wsgi-openstack-metadata.conf new file mode 100644 index 00000000..de5f603f --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templates/wsgi-openstack-metadata.conf @@ -0,0 +1,103 @@ +# Configuration file maintained by Juju. Local changes may be overwritten. + +{% if port -%} +Listen {{ port }} +{% endif -%} + +{% if admin_port -%} +Listen {{ admin_port }} +{% endif -%} + +{% if public_port -%} +Listen {{ public_port }} +{% endif -%} + +{% if wsgi_socket_rotation -%} +WSGISocketRotation On +{% else -%} +WSGISocketRotation Off +{% endif -%} + +{% if port -%} + + WSGIDaemonProcess {{ service_name }} processes={{ processes }} threads={{ threads }} user={{ user }} group={{ group }} \ + display-name=%{GROUP} lang=C.UTF-8 locale=C.UTF-8 + WSGIProcessGroup {{ service_name }} + WSGIScriptAlias / {{ script }} + WSGIApplicationGroup %{GLOBAL} + WSGIPassAuthorization On + KeepAliveTimeout 75 + MaxKeepAliveRequests 1000 + = 2.4> + ErrorLogFormat "%{cu}t %M" + + ErrorLog /var/log/apache2/{{ service_name }}_error.log + CustomLog /var/log/apache2/{{ service_name }}_access.log combined + + + = 2.4> + Require all granted + + + Order allow,deny + Allow from all + + + +{% endif -%} + +{% if admin_port -%} + + WSGIDaemonProcess {{ service_name }}-admin processes={{ admin_processes }} threads={{ threads }} user={{ user }} group={{ group }} \ + display-name=%{GROUP} lang=C.UTF-8 locale=C.UTF-8 + WSGIProcessGroup {{ service_name }}-admin + WSGIScriptAlias / {{ admin_script }} + WSGIApplicationGroup %{GLOBAL} + WSGIPassAuthorization On + KeepAliveTimeout 75 + MaxKeepAliveRequests 1000 + = 2.4> + ErrorLogFormat "%{cu}t %M" + + ErrorLog /var/log/apache2/{{ service_name }}_error.log + CustomLog /var/log/apache2/{{ service_name }}_access.log combined + + + = 2.4> + Require all granted + + + Order allow,deny + Allow from all + + + +{% endif -%} + +{% if public_port -%} + + WSGIDaemonProcess {{ service_name }}-public processes={{ public_processes }} threads={{ threads }} user={{ user }} group={{ group }} \ + display-name=%{GROUP} lang=C.UTF-8 locale=C.UTF-8 + WSGIProcessGroup {{ service_name }}-public + WSGIScriptAlias / {{ public_script }} + WSGIApplicationGroup %{GLOBAL} + WSGIPassAuthorization On + KeepAliveTimeout 75 + MaxKeepAliveRequests 1000 + = 2.4> + ErrorLogFormat "%{cu}t %M" + + ErrorLog /var/log/apache2/{{ service_name }}_error.log + CustomLog /var/log/apache2/{{ service_name }}_access.log combined + + + = 2.4> + Require all granted + + + Order allow,deny + Allow from all + + + +{% endif -%} diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templating.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templating.py new file mode 100644 index 00000000..3b7c6a9f --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/templating.py @@ -0,0 +1,370 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from charmhelpers.fetch import apt_install, apt_update +from charmhelpers.core.hookenv import ( + log, + ERROR, + INFO, + TRACE +) +from charmhelpers.contrib.openstack.utils import OPENSTACK_CODENAMES + +try: + from jinja2 import FileSystemLoader, ChoiceLoader, Environment, exceptions +except ImportError: + apt_update(fatal=True) + apt_install('python3-jinja2', fatal=True) + from jinja2 import FileSystemLoader, ChoiceLoader, Environment, exceptions + + +class OSConfigException(Exception): + pass + + +def get_loader(templates_dir, os_release): + """ + Create a jinja2.ChoiceLoader containing template dirs up to + and including os_release. If directory template directory + is missing at templates_dir, it will be omitted from the loader. + templates_dir is added to the bottom of the search list as a base + loading dir. + + A charm may also ship a templates dir with this module + and it will be appended to the bottom of the search list, eg:: + + hooks/charmhelpers/contrib/openstack/templates + + :param templates_dir (str): Base template directory containing release + sub-directories. + :param os_release (str): OpenStack release codename to construct template + loader. + :returns: jinja2.ChoiceLoader constructed with a list of + jinja2.FilesystemLoaders, ordered in descending + order by OpenStack release. + """ + tmpl_dirs = [(rel, os.path.join(templates_dir, rel)) + for rel in OPENSTACK_CODENAMES.values()] + + if not os.path.isdir(templates_dir): + log('Templates directory not found @ %s.' % templates_dir, + level=ERROR) + raise OSConfigException + + # the bottom contains tempaltes_dir and possibly a common templates dir + # shipped with the helper. + loaders = [FileSystemLoader(templates_dir)] + helper_templates = os.path.join(os.path.dirname(__file__), 'templates') + if os.path.isdir(helper_templates): + loaders.append(FileSystemLoader(helper_templates)) + + for rel, tmpl_dir in tmpl_dirs: + if os.path.isdir(tmpl_dir): + loaders.insert(0, FileSystemLoader(tmpl_dir)) + if rel == os_release: + break + # demote this log to the lowest level; we don't really need to see these + # lots in production even when debugging. + log('Creating choice loader with dirs: %s' % + [l.searchpath for l in loaders], level=TRACE) + return ChoiceLoader(loaders) + + +class OSConfigTemplate(object): + """ + Associates a config file template with a list of context generators. + Responsible for constructing a template context based on those generators. + """ + + def __init__(self, config_file, contexts, config_template=None): + self.config_file = config_file + + if hasattr(contexts, '__call__'): + self.contexts = [contexts] + else: + self.contexts = contexts + + self._complete_contexts = [] + + self.config_template = config_template + + def context(self): + ctxt = {} + for context in self.contexts: + _ctxt = context() + if _ctxt: + ctxt.update(_ctxt) + # track interfaces for every complete context. + [self._complete_contexts.append(interface) + for interface in context.interfaces + if interface not in self._complete_contexts] + return ctxt + + def complete_contexts(self): + ''' + Return a list of interfaces that have satisfied contexts. + ''' + if self._complete_contexts: + return self._complete_contexts + self.context() + return self._complete_contexts + + @property + def is_string_template(self): + """:returns: Boolean if this instance is a template initialised with a string""" + return self.config_template is not None + + +class OSConfigRenderer(object): + """ + This class provides a common templating system to be used by OpenStack + charms. It is intended to help charms share common code and templates, + and ease the burden of managing config templates across multiple OpenStack + releases. + + Basic usage:: + + # import some common context generates from charmhelpers + from charmhelpers.contrib.openstack import context + + # Create a renderer object for a specific OS release. + configs = OSConfigRenderer(templates_dir='/tmp/templates', + openstack_release='folsom') + # register some config files with context generators. + configs.register(config_file='/etc/nova/nova.conf', + contexts=[context.SharedDBContext(), + context.AMQPContext()]) + configs.register(config_file='/etc/nova/api-paste.ini', + contexts=[context.IdentityServiceContext()]) + configs.register(config_file='/etc/haproxy/haproxy.conf', + contexts=[context.HAProxyContext()]) + configs.register(config_file='/etc/keystone/policy.d/extra.cfg', + contexts=[context.ExtraPolicyContext() + context.KeystoneContext()], + config_template=hookenv.config('extra-policy')) + # write out a single config + configs.write('/etc/nova/nova.conf') + # write out all registered configs + configs.write_all() + + **OpenStack Releases and template loading** + + When the object is instantiated, it is associated with a specific OS + release. This dictates how the template loader will be constructed. + + The constructed loader attempts to load the template from several places + in the following order: + - from the most recent OS release-specific template dir (if one exists) + - the base templates_dir + - a template directory shipped in the charm with this helper file. + + For the example above, '/tmp/templates' contains the following structure:: + + /tmp/templates/nova.conf + /tmp/templates/api-paste.ini + /tmp/templates/grizzly/api-paste.ini + /tmp/templates/havana/api-paste.ini + + Since it was registered with the grizzly release, it first searches + the grizzly directory for nova.conf, then the templates dir. + + When writing api-paste.ini, it will find the template in the grizzly + directory. + + If the object were created with folsom, it would fall back to the + base templates dir for its api-paste.ini template. + + This system should help manage changes in config files through + openstack releases, allowing charms to fall back to the most recently + updated config template for a given release + + The haproxy.conf, since it is not shipped in the templates dir, will + be loaded from the module directory's template directory, eg + $CHARM/hooks/charmhelpers/contrib/openstack/templates. This allows + us to ship common templates (haproxy, apache) with the helpers. + + **Context generators** + + Context generators are used to generate template contexts during hook + execution. Doing so may require inspecting service relations, charm + config, etc. When registered, a config file is associated with a list + of generators. When a template is rendered and written, all context + generates are called in a chain to generate the context dictionary + passed to the jinja2 template. See context.py for more info. + """ + def __init__(self, templates_dir, openstack_release): + if not os.path.isdir(templates_dir): + log('Could not locate templates dir %s' % templates_dir, + level=ERROR) + raise OSConfigException + + self.templates_dir = templates_dir + self.openstack_release = openstack_release + self.templates = {} + self._tmpl_env = None + + if None in [Environment, ChoiceLoader, FileSystemLoader]: + # if this code is running, the object is created pre-install hook. + # jinja2 shouldn't get touched until the module is reloaded on next + # hook execution, with proper jinja2 bits successfully imported. + apt_install('python3-jinja2') + + def register(self, config_file, contexts, config_template=None): + """ + Register a config file with a list of context generators to be called + during rendering. + config_template can be used to load a template from a string instead of + using template loaders and template files. + :param config_file (str): a path where a config file will be rendered + :param contexts (list): a list of context dictionaries with kv pairs + :param config_template (str): an optional template string to use + """ + self.templates[config_file] = OSConfigTemplate( + config_file=config_file, + contexts=contexts, + config_template=config_template + ) + log('Registered config file: {}'.format(config_file), + level=INFO) + + def _get_tmpl_env(self): + if not self._tmpl_env: + loader = get_loader(self.templates_dir, self.openstack_release) + self._tmpl_env = Environment(loader=loader) + + def _get_template(self, template): + self._get_tmpl_env() + template = self._tmpl_env.get_template(template) + log('Loaded template from {}'.format(template.filename), + level=INFO) + return template + + def _get_template_from_string(self, ostmpl): + ''' + Get a jinja2 template object from a string. + :param ostmpl: OSConfigTemplate to use as a data source. + ''' + self._get_tmpl_env() + template = self._tmpl_env.from_string(ostmpl.config_template) + log('Loaded a template from a string for {}'.format( + ostmpl.config_file), + level=INFO) + return template + + def render(self, config_file): + if config_file not in self.templates: + log('Config not registered: {}'.format(config_file), level=ERROR) + raise OSConfigException + + ostmpl = self.templates[config_file] + ctxt = ostmpl.context() + + if ostmpl.is_string_template: + template = self._get_template_from_string(ostmpl) + log('Rendering from a string template: ' + '{}'.format(config_file), + level=INFO) + else: + _tmpl = os.path.basename(config_file) + try: + template = self._get_template(_tmpl) + except exceptions.TemplateNotFound: + # if no template is found with basename, try looking + # for it using a munged full path, eg: + # /etc/apache2/apache2.conf -> etc_apache2_apache2.conf + _tmpl = '_'.join(config_file.split('/')[1:]) + try: + template = self._get_template(_tmpl) + except exceptions.TemplateNotFound as e: + log('Could not load template from {} by {} or {}.' + ''.format( + self.templates_dir, + os.path.basename(config_file), + _tmpl + ), + level=ERROR) + raise e + + log('Rendering from template: {}'.format(config_file), + level=INFO) + return template.render(ctxt) + + def write(self, config_file): + """ + Write a single config file, raises if config file is not registered. + """ + if config_file not in self.templates: + log('Config not registered: %s' % config_file, level=ERROR) + raise OSConfigException + + _out = self.render(config_file).encode('UTF-8') + + with open(config_file, 'wb') as out: + out.write(_out) + + log('Wrote template %s.' % config_file, level=INFO) + + def write_all(self): + """ + Write out all registered config files. + """ + for k in self.templates.keys(): + self.write(k) + + def set_release(self, openstack_release): + """ + Resets the template environment and generates a new template loader + based on a the new openstack release. + """ + self._tmpl_env = None + self.openstack_release = openstack_release + self._get_tmpl_env() + + def complete_contexts(self): + ''' + Returns a list of context interfaces that yield a complete context. + ''' + interfaces = [] + for i in self.templates.values(): + interfaces.extend(i.complete_contexts()) + return interfaces + + def get_incomplete_context_data(self, interfaces): + ''' + Return dictionary of relation status of interfaces and any missing + required context data. Example: + {'amqp': {'missing_data': ['rabbitmq_password'], 'related': True}, + 'zeromq-configuration': {'related': False}} + ''' + incomplete_context_data = {} + + for i in self.templates.values(): + for context in i.contexts: + for interface in interfaces: + related = False + if interface in context.interfaces: + related = context.get_related() + missing_data = context.missing_data + if missing_data: + incomplete_context_data[interface] = {'missing_data': missing_data} + if related: + if incomplete_context_data.get(interface): + incomplete_context_data[interface].update({'related': True}) + else: + incomplete_context_data[interface] = {'related': True} + else: + incomplete_context_data[interface] = {'related': False} + return incomplete_context_data diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/utils.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/utils.py new file mode 100644 index 00000000..82c28d8e --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/utils.py @@ -0,0 +1,2695 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Common python helper functions used for OpenStack charms. +from collections import OrderedDict, namedtuple +from functools import partial, wraps + +import subprocess +import json +import operator +import os +import sys +import re +import itertools +import functools + +import traceback +import uuid +import yaml + +from charmhelpers import deprecate + +from charmhelpers.contrib.network import ip + +from charmhelpers.core import decorators, unitdata + +import charmhelpers.contrib.openstack.deferred_events as deferred_events + +from charmhelpers.core.hookenv import ( + WORKLOAD_STATES, + action_fail, + action_get, + action_set, + config, + expected_peer_units, + expected_related_units, + log as juju_log, + charm_dir, + INFO, + ERROR, + metadata, + related_units, + relation_get, + relation_id, + relation_ids, + relation_set, + service_name as ch_service_name, + status_set, + hook_name, + application_version_set, + cached, + leader_set, + leader_get, + local_unit, +) + +from charmhelpers.core.strutils import ( + BasicStringComparator, + bool_from_string, +) + +from charmhelpers.contrib.storage.linux.lvm import ( + deactivate_lvm_volume_group, + is_lvm_physical_volume, + remove_lvm_physical_volume, +) + +from charmhelpers.contrib.network.ip import ( + get_ipv6_addr, + is_ipv6, + port_has_listener, +) + +from charmhelpers.core.host import ( + lsb_release, + mounts, + umount, + service_running, + service_pause, + service_resume, + service_stop, + service_start, + restart_on_change_helper, +) + +from charmhelpers.fetch import ( + apt_cache, + apt_install, + import_key as fetch_import_key, + add_source as fetch_add_source, + SourceConfigError, + GPGKeyError, + get_upstream_version, + filter_installed_packages, + filter_missing_packages, + ubuntu_apt_pkg as apt, + OPENSTACK_RELEASES, + UBUNTU_OPENSTACK_RELEASE, +) + +from charmhelpers.fetch.snap import ( + snap_install, + snap_refresh, + valid_snap_channel, +) + +from charmhelpers.contrib.storage.linux.utils import is_block_device, zap_disk +from charmhelpers.contrib.storage.linux.loopback import ensure_loopback_device +from charmhelpers.contrib.openstack.exceptions import OSContextError, ServiceActionError +from charmhelpers.contrib.openstack.policyd import ( + policyd_status_message_prefix, + POLICYD_CONFIG_NAME, +) + +from charmhelpers.contrib.openstack.ha.utils import ( + expect_ha, +) + +CLOUD_ARCHIVE_URL = "http://ubuntu-cloud.archive.canonical.com/ubuntu" +CLOUD_ARCHIVE_KEY_ID = '5EDB1B62EC4926EA' + +DISTRO_PROPOSED = ('deb http://archive.ubuntu.com/ubuntu/ %s-proposed ' + 'restricted main multiverse universe') + +OPENSTACK_CODENAMES = OrderedDict([ + # NOTE(lourot): 'yyyy.i' isn't actually mapping with any real version + # number. This just means the i-th version of the year yyyy. + ('2011.2', 'diablo'), + ('2012.1', 'essex'), + ('2012.2', 'folsom'), + ('2013.1', 'grizzly'), + ('2013.2', 'havana'), + ('2014.1', 'icehouse'), + ('2014.2', 'juno'), + ('2015.1', 'kilo'), + ('2015.2', 'liberty'), + ('2016.1', 'mitaka'), + ('2016.2', 'newton'), + ('2017.1', 'ocata'), + ('2017.2', 'pike'), + ('2018.1', 'queens'), + ('2018.2', 'rocky'), + ('2019.1', 'stein'), + ('2019.2', 'train'), + ('2020.1', 'ussuri'), + ('2020.2', 'victoria'), + ('2021.1', 'wallaby'), + ('2021.2', 'xena'), + ('2022.1', 'yoga'), + ('2022.2', 'zed'), + ('2023.1', 'antelope'), + ('2023.2', 'bobcat'), + ('2024.1', 'caracal'), +]) + +# The ugly duckling - must list releases oldest to newest +SWIFT_CODENAMES = OrderedDict([ + ('diablo', + ['1.4.3']), + ('essex', + ['1.4.8']), + ('folsom', + ['1.7.4']), + ('grizzly', + ['1.7.6', '1.7.7', '1.8.0']), + ('havana', + ['1.9.0', '1.9.1', '1.10.0']), + ('icehouse', + ['1.11.0', '1.12.0', '1.13.0', '1.13.1']), + ('juno', + ['2.0.0', '2.1.0', '2.2.0']), + ('kilo', + ['2.2.1', '2.2.2']), + ('liberty', + ['2.3.0', '2.4.0', '2.5.0']), + ('mitaka', + ['2.5.0', '2.6.0', '2.7.0']), + ('newton', + ['2.8.0', '2.9.0', '2.10.0']), + ('ocata', + ['2.11.0', '2.12.0', '2.13.0']), + ('pike', + ['2.13.0', '2.15.0']), + ('queens', + ['2.16.0', '2.17.0']), + ('rocky', + ['2.18.0', '2.19.0']), + ('stein', + ['2.20.0', '2.21.0']), + ('train', + ['2.22.0', '2.23.0']), + ('ussuri', + ['2.24.0', '2.25.0']), + ('victoria', + ['2.25.0', '2.26.0']), +]) + +# >= Liberty version->codename mapping +PACKAGE_CODENAMES = { + 'nova-common': OrderedDict([ + ('12', 'liberty'), + ('13', 'mitaka'), + ('14', 'newton'), + ('15', 'ocata'), + ('16', 'pike'), + ('17', 'queens'), + ('18', 'rocky'), + ('19', 'stein'), + ('20', 'train'), + ('21', 'ussuri'), + ('22', 'victoria'), + ]), + 'neutron-common': OrderedDict([ + ('7', 'liberty'), + ('8', 'mitaka'), + ('9', 'newton'), + ('10', 'ocata'), + ('11', 'pike'), + ('12', 'queens'), + ('13', 'rocky'), + ('14', 'stein'), + ('15', 'train'), + ('16', 'ussuri'), + ('17', 'victoria'), + ]), + 'cinder-common': OrderedDict([ + ('7', 'liberty'), + ('8', 'mitaka'), + ('9', 'newton'), + ('10', 'ocata'), + ('11', 'pike'), + ('12', 'queens'), + ('13', 'rocky'), + ('14', 'stein'), + ('15', 'train'), + ('16', 'ussuri'), + ('17', 'victoria'), + ]), + 'keystone': OrderedDict([ + ('8', 'liberty'), + ('9', 'mitaka'), + ('10', 'newton'), + ('11', 'ocata'), + ('12', 'pike'), + ('13', 'queens'), + ('14', 'rocky'), + ('15', 'stein'), + ('16', 'train'), + ('17', 'ussuri'), + ('18', 'victoria'), + ]), + 'horizon-common': OrderedDict([ + ('8', 'liberty'), + ('9', 'mitaka'), + ('10', 'newton'), + ('11', 'ocata'), + ('12', 'pike'), + ('13', 'queens'), + ('14', 'rocky'), + ('15', 'stein'), + ('16', 'train'), + ('18', 'ussuri'), # Note this was actually 17.0 - 18.3 + ('19', 'victoria'), # Note this is really 18.6 + ]), + 'ceilometer-common': OrderedDict([ + ('5', 'liberty'), + ('6', 'mitaka'), + ('7', 'newton'), + ('8', 'ocata'), + ('9', 'pike'), + ('10', 'queens'), + ('11', 'rocky'), + ('12', 'stein'), + ('13', 'train'), + ('14', 'ussuri'), + ('15', 'victoria'), + ]), + 'heat-common': OrderedDict([ + ('5', 'liberty'), + ('6', 'mitaka'), + ('7', 'newton'), + ('8', 'ocata'), + ('9', 'pike'), + ('10', 'queens'), + ('11', 'rocky'), + ('12', 'stein'), + ('13', 'train'), + ('14', 'ussuri'), + ('15', 'victoria'), + ]), + 'glance-common': OrderedDict([ + ('11', 'liberty'), + ('12', 'mitaka'), + ('13', 'newton'), + ('14', 'ocata'), + ('15', 'pike'), + ('16', 'queens'), + ('17', 'rocky'), + ('18', 'stein'), + ('19', 'train'), + ('20', 'ussuri'), + ('21', 'victoria'), + ]), + 'openstack-dashboard': OrderedDict([ + ('8', 'liberty'), + ('9', 'mitaka'), + ('10', 'newton'), + ('11', 'ocata'), + ('12', 'pike'), + ('13', 'queens'), + ('14', 'rocky'), + ('15', 'stein'), + ('16', 'train'), + ('18', 'ussuri'), + ('19', 'victoria'), + ]), +} + +DEFAULT_LOOPBACK_SIZE = '5G' + +DB_SERIES_UPGRADING_KEY = 'cluster-series-upgrading' + +DB_MAINTENANCE_KEYS = [DB_SERIES_UPGRADING_KEY] + + +class CompareOpenStackReleases(BasicStringComparator): + """Provide comparisons of OpenStack releases. + + Use in the form of + + if CompareOpenStackReleases(release) > 'mitaka': + # do something with mitaka + """ + _list = OPENSTACK_RELEASES + + +def error_out(msg): + juju_log("FATAL ERROR: %s" % msg, level='ERROR') + sys.exit(1) + + +def get_installed_semantic_versioned_packages(): + '''Get a list of installed packages which have OpenStack semantic versioning + + :returns List of installed packages + :rtype: [pkg1, pkg2, ...] + ''' + return filter_missing_packages(PACKAGE_CODENAMES.keys()) + + +def get_os_codename_install_source(src): + '''Derive OpenStack release codename from a given installation source.''' + ubuntu_rel = lsb_release()['DISTRIB_CODENAME'] + rel = '' + if src is None: + return rel + if src in OPENSTACK_RELEASES: + return src + if src in ['distro', 'distro-proposed', 'proposed']: + try: + rel = UBUNTU_OPENSTACK_RELEASE[ubuntu_rel] + except KeyError: + e = 'Could not derive openstack release for '\ + 'this Ubuntu release: %s' % ubuntu_rel + error_out(e) + return rel + + if src.startswith('cloud:'): + ca_rel = src.split(':')[1] + ca_rel = ca_rel.split('-')[1].split('/')[0] + return ca_rel + + # Best guess match based on deb string provided + if (src.startswith('deb') or + src.startswith('ppa') or + src.startswith('snap')): + for v in OPENSTACK_CODENAMES.values(): + if v in src: + return v + + +def get_os_version_install_source(src): + codename = get_os_codename_install_source(src) + return get_os_version_codename(codename) + + +def get_os_codename_version(vers): + '''Determine OpenStack codename from version number.''' + try: + return OPENSTACK_CODENAMES[vers] + except KeyError: + e = 'Could not determine OpenStack codename for version %s' % vers + error_out(e) + + +def get_os_version_codename(codename, version_map=OPENSTACK_CODENAMES, + raise_exception=False): + '''Determine OpenStack version number from codename.''' + for k, v in version_map.items(): + if v == codename: + return k + e = 'Could not derive OpenStack version for '\ + 'codename: %s' % codename + if raise_exception: + raise ValueError(str(e)) + error_out(e) + + +def get_swift_codename(version): + '''Determine OpenStack codename that corresponds to swift version.''' + codenames = [k for k, v in SWIFT_CODENAMES.items() if version in v] + + if len(codenames) > 1: + # If more than one release codename contains this version we determine + # the actual codename based on the highest available install source. + for codename in reversed(codenames): + releases = UBUNTU_OPENSTACK_RELEASE + release = [k for k, v in releases.items() if codename in v] + ret = (subprocess + .check_output(['apt-cache', 'policy', 'swift']) + .decode('UTF-8')) + if codename in ret or release[0] in ret: + return codename + elif len(codenames) == 1: + return codenames[0] + + # NOTE: fallback - attempt to match with just major.minor version + match = re.match(r'^(\d+)\.(\d+)', version) + if match: + major_minor_version = match.group(0) + for codename, versions in SWIFT_CODENAMES.items(): + for release_version in versions: + if release_version.startswith(major_minor_version): + return codename + + return None + + +def get_os_codename_package(package, fatal=True): + """Derive OpenStack release codename from an installed package. + + Initially, see if the openstack-release pkg is available (by trying to + install it) and use it instead. + + If it isn't then it falls back to the existing method of checking the + version of the package passed and then resolving the version from that + using lookup tables. + + Note: if possible, charms should use get_installed_os_version() to + determine the version of the "openstack-release" pkg. + + :param package: the package to test for version information. + :type package: str + :param fatal: If True (default), then die via error_out() + :type fatal: bool + :returns: the OpenStack release codename (e.g. ussuri) + :rtype: str + """ + + codename = get_installed_os_version() + if codename: + return codename + + if snap_install_requested(): + cmd = ['snap', 'list', package] + try: + out = subprocess.check_output(cmd).decode('UTF-8') + except subprocess.CalledProcessError: + return None + lines = out.split('\n') + for line in lines: + if package in line: + # Second item in list is Version + return line.split()[1] + + cache = apt_cache() + + try: + pkg = cache[package] + except Exception: + if not fatal: + return None + # the package is unknown to the current apt cache. + e = 'Could not determine version of package with no installation '\ + 'candidate: %s' % package + error_out(e) + + if not pkg.current_ver: + if not fatal: + return None + # package is known, but no version is currently installed. + e = 'Could not determine version of uninstalled package: %s' % package + error_out(e) + + vers = apt.upstream_version(pkg.current_ver.ver_str) + if 'swift' in pkg.name: + # Fully x.y.z match for swift versions + match = re.match(r'^(\d+)\.(\d+)\.(\d+)', vers) + else: + # x.y match only for 20XX.X + # and ignore patch level for other packages + match = re.match(r'^(\d+)\.(\d+)', vers) + + if match: + vers = match.group(0) + + # Generate a major version number for newer semantic + # versions of openstack projects + major_vers = vers.split('.')[0] + # >= Liberty independent project versions + if (package in PACKAGE_CODENAMES and + major_vers in PACKAGE_CODENAMES[package]): + return PACKAGE_CODENAMES[package][major_vers] + else: + # < Liberty co-ordinated project versions + try: + if 'swift' in pkg.name: + return get_swift_codename(vers) + else: + return OPENSTACK_CODENAMES[vers] + except KeyError: + if not fatal: + return None + e = 'Could not determine OpenStack codename for version %s' % vers + error_out(e) + + +def get_os_version_package(pkg, fatal=True): + '''Derive OpenStack version number from an installed package.''' + codename = get_os_codename_package(pkg, fatal=fatal) + + if not codename: + return None + + if 'swift' in pkg: + vers_map = SWIFT_CODENAMES + for cname, version in vers_map.items(): + if cname == codename: + return version[-1] + else: + vers_map = OPENSTACK_CODENAMES + for version, cname in vers_map.items(): + if cname == codename: + return version + + +def get_installed_os_version(): + """Determine the OpenStack release code name from openstack-release pkg. + + This uses the "openstack-release" pkg (if it exists) to return the + OpenStack release codename (e.g. usurri, mitaka, ocata, etc.) + + Note, it caches the result so that it is only done once per hook. + + :returns: the OpenStack release codename, if available + :rtype: Optional[str] + """ + @cached + def _do_install(): + apt_install(filter_installed_packages(['openstack-release']), + fatal=False, quiet=True) + + _do_install() + return openstack_release().get('OPENSTACK_CODENAME') + + +def openstack_release(): + """Return /etc/os-release in a dict.""" + d = {} + try: + with open('/etc/openstack-release', 'r') as lsb: + for l in lsb: + s = l.split('=') + if len(s) != 2: + continue + d[s[0].strip()] = s[1].strip() + except FileNotFoundError: + pass + return d + + +# Module local cache variable for the os_release. +_os_rel = None + + +def reset_os_release(): + '''Unset the cached os_release version''' + global _os_rel + _os_rel = None + + +def os_release(package, base=None, reset_cache=False, source_key=None): + """Returns OpenStack release codename from a cached global. + + If reset_cache then unset the cached os_release version and return the + freshly determined version. + + If the codename can not be determined from either an installed package or + the installation source, the earliest release supported by the charm should + be returned. + + :param package: Name of package to determine release from + :type package: str + :param base: Fallback codename if endavours to determine from package fail + :type base: Optional[str] + :param reset_cache: Reset any cached codename value + :type reset_cache: bool + :param source_key: Name of source configuration option + (default: 'openstack-origin') + :type source_key: Optional[str] + :returns: OpenStack release codename + :rtype: str + """ + source_key = source_key or 'openstack-origin' + if not base: + base = UBUNTU_OPENSTACK_RELEASE[lsb_release()['DISTRIB_CODENAME']] + global _os_rel + if reset_cache: + reset_os_release() + if _os_rel: + return _os_rel + _os_rel = ( + get_os_codename_package(package, fatal=False) or + get_os_codename_install_source(config(source_key)) or + base) + return _os_rel + + +@deprecate("moved to charmhelpers.fetch.import_key()", "2017-07", log=juju_log) +def import_key(keyid): + """Import a key, either ASCII armored, or a GPG key id. + + @param keyid: the key in ASCII armor format, or a GPG key id. + @raises SystemExit() via sys.exit() on failure. + """ + try: + return fetch_import_key(keyid) + except GPGKeyError as e: + error_out("Could not import key: {}".format(str(e))) + + +def get_source_and_pgp_key(source_and_key): + """Look for a pgp key ID or ascii-armor key in the given input. + + :param source_and_key: String, "source_spec|keyid" where '|keyid' is + optional. + :returns (source_spec, key_id OR None) as a tuple. Returns None for key_id + if there was no '|' in the source_and_key string. + """ + try: + source, key = source_and_key.split('|', 2) + return source, key or None + except ValueError: + return source_and_key, None + + +@deprecate("use charmhelpers.fetch.add_source() instead.", + "2017-07", log=juju_log) +def configure_installation_source(source_plus_key): + """Configure an installation source. + + The functionality is provided by charmhelpers.fetch.add_source() + The difference between the two functions is that add_source() signature + requires the key to be passed directly, whereas this function passes an + optional key by appending '|' to the end of the source specification + 'source'. + + Another difference from add_source() is that the function calls sys.exit(1) + if the configuration fails, whereas add_source() raises + SourceConfigurationError(). Another difference, is that add_source() + silently fails (with a juju_log command) if there is no matching source to + configure, whereas this function fails with a sys.exit(1) + + :param source: String_plus_key -- see above for details. + + Note that the behaviour on error is to log the error to the juju log and + then call sys.exit(1). + """ + if source_plus_key.startswith('snap'): + # Do nothing for snap installs + return + # extract the key if there is one, denoted by a '|' in the rel + source, key = get_source_and_pgp_key(source_plus_key) + + # handle the ordinary sources via add_source + try: + fetch_add_source(source, key, fail_invalid=True) + except SourceConfigError as se: + error_out(str(se)) + + +def config_value_changed(option): + """ + Determine if config value changed since last call to this function. + """ + hook_data = unitdata.HookData() + with hook_data(): + db = unitdata.kv() + current = config(option) + saved = db.get(option) + db.set(option, current) + if saved is None: + return False + return current != saved + + +def get_endpoint_key(service_name, relation_id, unit_name): + """Return the key used to refer to an ep changed notification from a unit. + + :param service_name: Service name eg nova, neutron, placement etc + :type service_name: str + :param relation_id: The id of the relation the unit is on. + :type relation_id: str + :param unit_name: The name of the unit publishing the notification. + :type unit_name: str + :returns: The key used to refer to an ep changed notification from a unit + :rtype: str + """ + return '{}-{}-{}'.format( + service_name, + relation_id.replace(':', '_'), + unit_name.replace('/', '_')) + + +def get_endpoint_notifications(service_names, rel_name='identity-service'): + """Return all notifications for the given services. + + :param service_names: List of service name. + :type service_name: List + :param rel_name: Name of the relation to query + :type rel_name: str + :returns: A dict containing the source of the notification and its nonce. + :rtype: Dict[str, str] + """ + notifications = {} + for rid in relation_ids(rel_name): + for unit in related_units(relid=rid): + ep_changed_json = relation_get( + rid=rid, + unit=unit, + attribute='ep_changed') + if ep_changed_json: + ep_changed = json.loads(ep_changed_json) + for service in service_names: + if ep_changed.get(service): + key = get_endpoint_key(service, rid, unit) + notifications[key] = ep_changed[service] + return notifications + + +def endpoint_changed(service_name, rel_name='identity-service'): + """Whether a new notification has been received for an endpoint. + + :param service_name: Service name eg nova, neutron, placement etc + :type service_name: str + :param rel_name: Name of the relation to query + :type rel_name: str + :returns: Whether endpoint has changed + :rtype: bool + """ + changed = False + with unitdata.HookData()() as t: + db = t[0] + notifications = get_endpoint_notifications( + [service_name], + rel_name=rel_name) + for key, nonce in notifications.items(): + if db.get(key) != nonce: + juju_log(('New endpoint change notification found: ' + '{}={}').format(key, nonce), + 'INFO') + changed = True + break + return changed + + +def save_endpoint_changed_triggers(service_names, rel_name='identity-service'): + """Save the endpoint triggers in db so it can be tracked if they changed. + + :param service_names: List of service name. + :type service_name: List + :param rel_name: Name of the relation to query + :type rel_name: str + """ + with unitdata.HookData()() as t: + db = t[0] + notifications = get_endpoint_notifications( + service_names, + rel_name=rel_name) + for key, nonce in notifications.items(): + db.set(key, nonce) + + +def save_script_rc(script_path="scripts/scriptrc", **env_vars): + """ + Write an rc file in the charm-delivered directory containing + exported environment variables provided by env_vars. Any charm scripts run + outside the juju hook environment can source this scriptrc to obtain + updated config information necessary to perform health checks or + service changes. + """ + juju_rc_path = "%s/%s" % (charm_dir(), script_path) + if not os.path.exists(os.path.dirname(juju_rc_path)): + os.mkdir(os.path.dirname(juju_rc_path)) + with open(juju_rc_path, 'wt') as rc_script: + rc_script.write("#!/bin/bash\n") + for u, p in env_vars.items(): + if u != "script_path": + rc_script.write('export %s=%s\n' % (u, p)) + + +def openstack_upgrade_available(package): + """ + Determines if an OpenStack upgrade is available from installation + source, based on version of installed package. + + :param package: str: Name of installed package. + + :returns: bool: : Returns True if configured installation source offers + a newer version of package. + """ + + src = config('openstack-origin') + cur_vers = get_os_version_package(package) + if not cur_vers: + # The package has not been installed yet do not attempt upgrade + return False + try: + avail_vers = get_os_version_install_source(src) + except Exception: + avail_vers = cur_vers + apt.init() + return apt.version_compare(avail_vers, cur_vers) >= 1 + + +def ensure_block_device(block_device): + ''' + Confirm block_device, create as loopback if necessary. + + :param block_device: str: Full path of block device to ensure. + + :returns: str: Full path of ensured block device. + ''' + _none = ['None', 'none', None] + if (block_device in _none): + error_out('prepare_storage(): Missing required input: block_device=%s.' + % block_device) + + if block_device.startswith('/dev/'): + bdev = block_device + elif block_device.startswith('/'): + _bd = block_device.split('|') + if len(_bd) == 2: + bdev, size = _bd + else: + bdev = block_device + size = DEFAULT_LOOPBACK_SIZE + bdev = ensure_loopback_device(bdev, size) + else: + bdev = '/dev/%s' % block_device + + if not is_block_device(bdev): + error_out('Failed to locate valid block device at %s' % bdev) + + return bdev + + +def clean_storage(block_device): + ''' + Ensures a block device is clean. That is: + - unmounted + - any lvm volume groups are deactivated + - any lvm physical device signatures removed + - partition table wiped + + :param block_device: str: Full path to block device to clean. + ''' + for mp, d in mounts(): + if d == block_device: + juju_log('clean_storage(): %s is mounted @ %s, unmounting.' % + (d, mp), level=INFO) + umount(mp, persist=True) + + if is_lvm_physical_volume(block_device): + deactivate_lvm_volume_group(block_device) + remove_lvm_physical_volume(block_device) + else: + zap_disk(block_device) + + +is_ip = ip.is_ip +ns_query = ip.ns_query +get_host_ip = ip.get_host_ip +get_hostname = ip.get_hostname + + +def get_matchmaker_map(mm_file='/etc/oslo/matchmaker_ring.json'): + mm_map = {} + if os.path.isfile(mm_file): + with open(mm_file, 'r') as f: + mm_map = json.load(f) + return mm_map + + +def sync_db_with_multi_ipv6_addresses(database, database_user, + relation_prefix=None): + hosts = get_ipv6_addr(dynamic_only=False) + + if config('vip'): + vips = config('vip').split() + for vip in vips: + if vip and is_ipv6(vip): + hosts.append(vip) + + kwargs = {'database': database, + 'username': database_user, + 'hostname': json.dumps(hosts)} + + if relation_prefix: + for key in list(kwargs.keys()): + kwargs["%s_%s" % (relation_prefix, key)] = kwargs[key] + del kwargs[key] + + for rid in relation_ids('shared-db'): + relation_set(relation_id=rid, **kwargs) + + +def os_requires_version(ostack_release, pkg): + """ + Decorator for hook to specify minimum supported release + """ + def wrap(f): + @wraps(f) + def wrapped_f(*args): + if CompareOpenStackReleases(os_release(pkg)) < ostack_release: + raise Exception("This hook is not supported on releases" + " before %s" % ostack_release) + f(*args) + return wrapped_f + return wrap + + +def os_workload_status(configs, required_interfaces, charm_func=None): + """ + Decorator to set workload status based on complete contexts + """ + def wrap(f): + @wraps(f) + def wrapped_f(*args, **kwargs): + # Run the original function first + f(*args, **kwargs) + # Set workload status now that contexts have been + # acted on + set_os_workload_status(configs, required_interfaces, charm_func) + return wrapped_f + return wrap + + +def set_os_workload_status(configs, required_interfaces, charm_func=None, + services=None, ports=None): + """Set the state of the workload status for the charm. + + This calls _determine_os_workload_status() to get the new state, message + and sets the status using status_set() + + @param configs: a templating.OSConfigRenderer() object + @param required_interfaces: {generic: [specific, specific2, ...]} + @param charm_func: a callable function that returns state, message. The + signature is charm_func(configs) -> (state, message) + @param services: list of strings OR dictionary specifying services/ports + @param ports: OPTIONAL list of port numbers. + @returns state, message: the new workload status, user message + """ + state, message = _determine_os_workload_status( + configs, required_interfaces, charm_func, services, ports) + status_set(state, message) + + +def _determine_os_workload_status( + configs, required_interfaces, charm_func=None, + services=None, ports=None): + """Determine the state of the workload status for the charm. + + This function returns the new workload status for the charm based + on the state of the interfaces, the paused state and whether the + services are actually running and any specified ports are open. + + This checks: + + 1. if the unit should be paused, that it is actually paused. If so the + state is 'maintenance' + message, else 'broken'. + 2. that the interfaces/relations are complete. If they are not then + it sets the state to either 'broken' or 'waiting' and an appropriate + message. + 3. If all the relation data is set, then it checks that the actual + services really are running. If not it sets the state to 'broken'. + + If everything is okay then the state returns 'active'. + + @param configs: a templating.OSConfigRenderer() object + @param required_interfaces: {generic: [specific, specific2, ...]} + @param charm_func: a callable function that returns state, message. The + signature is charm_func(configs) -> (state, message) + @param services: list of strings OR dictionary specifying services/ports + @param ports: OPTIONAL list of port numbers. + @returns state, message: the new workload status, user message + """ + state, message = _ows_check_if_paused(services, ports) + + if state is None: + state, message = _ows_check_generic_interfaces( + configs, required_interfaces) + + if state != 'maintenance' and charm_func: + # _ows_check_charm_func() may modify the state, message + state, message = _ows_check_charm_func( + state, message, lambda: charm_func(configs)) + + if state is None: + state, message = ows_check_services_running(services, ports) + + if state is None: + state = 'active' + message = "Unit is ready" + juju_log(message, 'INFO') + + try: + if config(POLICYD_CONFIG_NAME): + message = "{} {}".format(policyd_status_message_prefix(), message) + # Get deferred restarts events that have been triggered by a policy + # written by this charm. + deferred_restarts = list(set( + [e.service + for e in deferred_events.get_deferred_restarts() + if e.policy_requestor_name == ch_service_name()])) + if deferred_restarts: + svc_msg = "Services queued for restart: {}".format( + ', '.join(sorted(deferred_restarts))) + message = "{}. {}".format(message, svc_msg) + deferred_hooks = deferred_events.get_deferred_hooks() + if deferred_hooks: + svc_msg = "Hooks skipped due to disabled auto restarts: {}".format( + ', '.join(sorted(deferred_hooks))) + message = "{}. {}".format(message, svc_msg) + + except Exception: + pass + + return state, message + + +def _ows_check_if_paused(services=None, ports=None): + """Check if the unit is supposed to be paused, and if so check that the + services/ports (if passed) are actually stopped/not being listened to. + + If the unit isn't supposed to be paused, just return None, None + + If the unit is performing a series upgrade, return a message indicating + this. + + @param services: OPTIONAL services spec or list of service names. + @param ports: OPTIONAL list of port numbers. + @returns state, message or None, None + """ + if is_unit_upgrading_set(): + state, message = check_actually_paused(services=services, + ports=ports) + if state is None: + # we're paused okay, so set maintenance and return + state = "blocked" + message = ("Ready for do-release-upgrade and reboot. " + "Set complete when finished.") + return state, message + + if is_unit_paused_set(): + state, message = check_actually_paused(services=services, + ports=ports) + if state is None: + # we're paused okay, so set maintenance and return + state = "maintenance" + message = "Paused. Use 'resume' action to resume normal service." + return state, message + return None, None + + +def _ows_check_generic_interfaces(configs, required_interfaces): + """Check the complete contexts to determine the workload status. + + - Checks for missing or incomplete contexts + - juju log details of missing required data. + - determines the correct workload status + - creates an appropriate message for status_set(...) + + if there are no problems then the function returns None, None + + @param configs: a templating.OSConfigRenderer() object + @params required_interfaces: {generic_interface: [specific_interface], } + @returns state, message or None, None + """ + incomplete_rel_data = incomplete_relation_data(configs, + required_interfaces) + state = None + message = None + missing_relations = set() + incomplete_relations = set() + + for generic_interface, relations_states in incomplete_rel_data.items(): + related_interface = None + missing_data = {} + # Related or not? + for interface, relation_state in relations_states.items(): + if relation_state.get('related'): + related_interface = interface + missing_data = relation_state.get('missing_data') + break + # No relation ID for the generic_interface? + if not related_interface: + juju_log("{} relation is missing and must be related for " + "functionality. ".format(generic_interface), 'WARN') + state = 'blocked' + missing_relations.add(generic_interface) + else: + # Relation ID eists but no related unit + if not missing_data: + # Edge case - relation ID exists but departings + _hook_name = hook_name() + if (('departed' in _hook_name or 'broken' in _hook_name) and + related_interface in _hook_name): + state = 'blocked' + missing_relations.add(generic_interface) + juju_log("{} relation's interface, {}, " + "relationship is departed or broken " + "and is required for functionality." + "".format(generic_interface, related_interface), + "WARN") + # Normal case relation ID exists but no related unit + # (joining) + else: + juju_log("{} relations's interface, {}, is related but has" + " no units in the relation." + "".format(generic_interface, related_interface), + "INFO") + # Related unit exists and data missing on the relation + else: + juju_log("{} relation's interface, {}, is related awaiting " + "the following data from the relationship: {}. " + "".format(generic_interface, related_interface, + ", ".join(missing_data)), "INFO") + if state != 'blocked': + state = 'waiting' + if generic_interface not in missing_relations: + incomplete_relations.add(generic_interface) + + if missing_relations: + message = "Missing relations: {}".format(", ".join(missing_relations)) + if incomplete_relations: + message += "; incomplete relations: {}" \ + "".format(", ".join(incomplete_relations)) + state = 'blocked' + elif incomplete_relations: + message = "Incomplete relations: {}" \ + "".format(", ".join(incomplete_relations)) + state = 'waiting' + + return state, message + + +def _ows_check_charm_func(state, message, charm_func_with_configs): + """Run a custom check function for the charm to see if it wants to + change the state. This is only run if not in 'maintenance' and + tests to see if the new state is more important that the previous + one determined by the interfaces/relations check. + + @param state: the previously determined state so far. + @param message: the user orientated message so far. + @param charm_func: a callable function that returns state, message + @returns state, message strings. + """ + if charm_func_with_configs: + charm_state, charm_message = charm_func_with_configs() + if (charm_state != 'active' and + charm_state != 'unknown' and + charm_state is not None): + state = workload_state_compare(state, charm_state) + if message: + charm_message = charm_message.replace("Incomplete relations: ", + "") + message = "{}, {}".format(message, charm_message) + else: + message = charm_message + return state, message + + +@deprecate("use ows_check_services_running() instead", "2022-05", log=juju_log) +def _ows_check_services_running(services, ports): + return ows_check_services_running(services, ports) + + +def ows_check_services_running(services, ports, ssl_check_info=None): + """Check that the services that should be running are actually running + and that any ports specified are being listened to. + + @param services: list of strings OR dictionary specifying services/ports + @param ports: list of ports + @param ssl_check_info: SSLPortCheckInfo object. If provided, port checks + will be done using an SSL connection. + @returns state, message: strings or None, None + """ + messages = [] + state = None + if services is not None: + services = _extract_services_list_helper(services) + services_running, running = _check_running_services(services) + if not all(running): + messages.append( + "Services not running that should be: {}" + .format(", ".join(_filter_tuples(services_running, False)))) + state = 'blocked' + # also verify that the ports that should be open are open + # NB, that ServiceManager objects only OPTIONALLY have ports + map_not_open, ports_open = ( + _check_listening_on_services_ports(services, ssl_check_info)) + if not all(ports_open): + # find which service has missing ports. They are in service + # order which makes it a bit easier. + message_parts = {service: ", ".join([str(v) for v in open_ports]) + for service, open_ports in map_not_open.items()} + message = ", ".join( + ["{}: [{}]".format(s, sp) for s, sp in message_parts.items()]) + messages.append( + "Services with ports not open that should be: {}" + .format(message)) + state = 'blocked' + + if ports is not None: + # and we can also check ports which we don't know the service for + ports_open, ports_open_bools = \ + _check_listening_on_ports_list(ports, ssl_check_info) + if not all(ports_open_bools): + messages.append( + "Ports which should be open, but are not: {}" + .format(", ".join([str(p) for p, v in ports_open + if not v]))) + state = 'blocked' + + if state is not None: + message = "; ".join(messages) + return state, message + + return None, None + + +def _extract_services_list_helper(services): + """Extract a OrderedDict of {service: [ports]} of the supplied services + for use by the other functions. + + The services object can either be: + - None : no services were passed (an empty dict is returned) + - a list of strings + - A dictionary (optionally OrderedDict) {service_name: {'service': ..}} + - An array of [{'service': service_name, ...}, ...] + + @param services: see above + @returns OrderedDict(service: [ports], ...) + """ + if services is None: + return {} + if isinstance(services, dict): + services = services.values() + # either extract the list of services from the dictionary, or if + # it is a simple string, use that. i.e. works with mixed lists. + _s = OrderedDict() + for s in services: + if isinstance(s, dict) and 'service' in s: + _s[s['service']] = s.get('ports', []) + if isinstance(s, str): + _s[s] = [] + return _s + + +def _check_running_services(services): + """Check that the services dict provided is actually running and provide + a list of (service, boolean) tuples for each service. + + Returns both a zipped list of (service, boolean) and a list of booleans + in the same order as the services. + + @param services: OrderedDict of strings: [ports], one for each service to + check. + @returns [(service, boolean), ...], : results for checks + [boolean] : just the result of the service checks + """ + services_running = [service_running(s) for s in services] + return list(zip(services, services_running)), services_running + + +def _check_listening_on_services_ports(services, test=False, + ssl_check_info=None): + """Check that the unit is actually listening (has the port open) on the + ports that the service specifies are open. If test is True then the + function returns the services with ports that are open rather than + closed. + + Returns an OrderedDict of service: ports and a list of booleans + + @param services: OrderedDict(service: [port, ...], ...) + @param test: default=False, if False, test for closed, otherwise open. + @param ssl_check_info: SSLPortCheckInfo object. If provided, port checks + will be done using an SSL connection. + @returns OrderedDict(service: [port-not-open, ...]...), [boolean] + """ + test = not (not (test)) # ensure test is True or False + all_ports = list(itertools.chain(*services.values())) + ports_states = [port_has_listener('0.0.0.0', p, ssl_check_info) + for p in all_ports] + map_ports = OrderedDict() + matched_ports = [p for p, opened in zip(all_ports, ports_states) + if opened == test] # essentially opened xor test + for service, ports in services.items(): + set_ports = set(ports).intersection(matched_ports) + if set_ports: + map_ports[service] = set_ports + return map_ports, ports_states + + +def _check_listening_on_ports_list(ports, ssl_check_info=None): + """Check that the ports list given are being listened to + + Returns a list of ports being listened to and a list of the + booleans. + + @param ssl_check_info: SSLPortCheckInfo object. If provided, port checks + will be done using an SSL connection. + @param ports: LIST of port numbers. + @returns [(port_num, boolean), ...], [boolean] + """ + ports_open = [port_has_listener('0.0.0.0', p, ssl_check_info) + for p in ports] + return zip(ports, ports_open), ports_open + + +def _filter_tuples(services_states, state): + """Return a simple list from a list of tuples according to the condition + + @param services_states: LIST of (string, boolean): service and running + state. + @param state: Boolean to match the tuple against. + @returns [LIST of strings] that matched the tuple RHS. + """ + return [s for s, b in services_states if b == state] + + +def workload_state_compare(current_workload_state, workload_state): + """ Return highest priority of two states""" + hierarchy = {'unknown': -1, + 'active': 0, + 'maintenance': 1, + 'waiting': 2, + 'blocked': 3, + } + + if hierarchy.get(workload_state) is None: + workload_state = 'unknown' + if hierarchy.get(current_workload_state) is None: + current_workload_state = 'unknown' + + # Set workload_state based on hierarchy of statuses + if hierarchy.get(current_workload_state) > hierarchy.get(workload_state): + return current_workload_state + else: + return workload_state + + +def incomplete_relation_data(configs, required_interfaces): + """Check complete contexts against required_interfaces + Return dictionary of incomplete relation data. + + configs is an OSConfigRenderer object with configs registered + + required_interfaces is a dictionary of required general interfaces + with dictionary values of possible specific interfaces. + Example: + required_interfaces = {'database': ['shared-db', 'pgsql-db']} + + The interface is said to be satisfied if anyone of the interfaces in the + list has a complete context. + + Return dictionary of incomplete or missing required contexts with relation + status of interfaces and any missing data points. Example: + {'message': + {'amqp': {'missing_data': ['rabbitmq_password'], 'related': True}, + 'zeromq-configuration': {'related': False}}, + 'identity': + {'identity-service': {'related': False}}, + 'database': + {'pgsql-db': {'related': False}, + 'shared-db': {'related': True}}} + """ + complete_ctxts = configs.complete_contexts() + incomplete_relations = [ + svc_type + for svc_type, interfaces in required_interfaces.items() + if not set(interfaces).intersection(complete_ctxts)] + return { + i: configs.get_incomplete_context_data(required_interfaces[i]) + for i in incomplete_relations} + + +def do_action_openstack_upgrade(package, upgrade_callback, configs): + """Perform action-managed OpenStack upgrade. + + Upgrades packages to the configured openstack-origin version and sets + the corresponding action status as a result. + + For backwards compatibility a config flag (action-managed-upgrade) must + be set for this code to run, otherwise a full service level upgrade will + fire on config-changed. + + @param package: package name for determining if openstack upgrade available + @param upgrade_callback: function callback to charm's upgrade function + @param configs: templating object derived from OSConfigRenderer class + + @return: True if upgrade successful; False if upgrade failed or skipped + """ + ret = False + + if openstack_upgrade_available(package): + if config('action-managed-upgrade'): + juju_log('Upgrading OpenStack release') + + try: + upgrade_callback(configs=configs) + action_set({'outcome': 'success, upgrade completed'}) + ret = True + except Exception: + action_set({'outcome': 'upgrade failed, see traceback'}) + action_set({'traceback': traceback.format_exc()}) + action_fail('upgrade callback resulted in an ' + 'unexpected error') + else: + action_set({'outcome': 'action-managed-upgrade config is ' + 'False, skipped upgrade'}) + else: + action_set({'outcome': 'no upgrade available'}) + + return ret + + +def do_action_package_upgrade(package, upgrade_callback, configs): + """Perform package upgrade within the current OpenStack release. + + Upgrades packages only if there is not an openstack upgrade available, + and sets the corresponding action status as a result. + + @param package: package name for determining if openstack upgrade available + @param upgrade_callback: function callback to charm's upgrade function + @param configs: templating object derived from OSConfigRenderer class + + @return: True if upgrade successful; False if upgrade failed or skipped + """ + ret = False + + if not openstack_upgrade_available(package): + juju_log('Upgrading packages') + + try: + upgrade_callback(configs=configs) + action_set({'outcome': 'success, upgrade completed'}) + ret = True + except Exception: + action_set({'outcome': 'upgrade failed, see traceback'}) + action_set({'traceback': traceback.format_exc()}) + action_fail('upgrade callback resulted in an ' + 'unexpected error') + else: + action_set({'outcome': 'upgrade skipped because an openstack upgrade ' + 'is available'}) + + return ret + + +def remote_restart(rel_name, remote_service=None): + trigger = { + 'restart-trigger': str(uuid.uuid4()), + } + if remote_service: + trigger['remote-service'] = remote_service + for rid in relation_ids(rel_name): + # This subordinate can be related to two separate services using + # different subordinate relations so only issue the restart if + # the principle is connected down the relation we think it is + if related_units(relid=rid): + relation_set(relation_id=rid, + relation_settings=trigger, + ) + + +def check_actually_paused(services=None, ports=None): + """Check that services listed in the services object and ports + are actually closed (not listened to), to verify that the unit is + properly paused. + + @param services: See _extract_services_list_helper + @returns status, : string for status (None if okay) + message : string for problem for status_set + """ + state = None + message = None + messages = [] + if services is not None: + services = _extract_services_list_helper(services) + services_running, services_states = _check_running_services(services) + if any(services_states): + # there shouldn't be any running so this is a problem + messages.append("these services running: {}" + .format(", ".join( + _filter_tuples(services_running, True)))) + state = "blocked" + ports_open, ports_open_bools = ( + _check_listening_on_services_ports(services, True)) + if any(ports_open_bools): + message_parts = {service: ", ".join([str(v) for v in open_ports]) + for service, open_ports in ports_open.items()} + message = ", ".join( + ["{}: [{}]".format(s, sp) for s, sp in message_parts.items()]) + messages.append( + "these service:ports are open: {}".format(message)) + state = 'blocked' + if ports is not None: + ports_open, bools = _check_listening_on_ports_list(ports) + if any(bools): + messages.append( + "these ports which should be closed, but are open: {}" + .format(", ".join([str(p) for p, v in ports_open if v]))) + state = 'blocked' + if messages: + message = ("Services should be paused but {}" + .format(", ".join(messages))) + return state, message + + +def set_unit_paused(): + """Set the unit to a paused state in the local kv() store. + This does NOT actually pause the unit + """ + with unitdata.HookData()() as t: + kv = t[0] + kv.set('unit-paused', True) + + +def clear_unit_paused(): + """Clear the unit from a paused state in the local kv() store + This does NOT actually restart any services - it only clears the + local state. + """ + with unitdata.HookData()() as t: + kv = t[0] + kv.set('unit-paused', False) + + +def is_unit_paused_set(): + """Return the state of the kv().get('unit-paused'). + This does NOT verify that the unit really is paused. + + To help with units that don't have HookData() (testing) + if it excepts, return False + """ + try: + with unitdata.HookData()() as t: + kv = t[0] + # transform something truth-y into a Boolean. + return not (not (kv.get('unit-paused'))) + except Exception: + return False + + +def is_hook_allowed(hookname, check_deferred_restarts=True): + """Check if hook can run. + + :param hookname: Name of hook to check.. + :type hookname: str + :param check_deferred_restarts: Whether to check deferred restarts. + :type check_deferred_restarts: bool + """ + permitted = True + reasons = [] + if is_unit_paused_set(): + reasons.append( + "Unit is pause or upgrading. Skipping {}".format(hookname)) + permitted = False + + if check_deferred_restarts: + if deferred_events.is_restart_permitted(): + permitted = True + deferred_events.clear_deferred_hook(hookname) + else: + if not config().changed('enable-auto-restarts'): + deferred_events.set_deferred_hook(hookname) + reasons.append("auto restarts are disabled") + permitted = False + return permitted, " and ".join(reasons) + + +def manage_payload_services(action, services=None, charm_func=None): + """Run an action against all services. + + An optional charm_func() can be called. It should raise an Exception to + indicate that the function failed. If it was successful it should return + None or an optional message. + + The signature for charm_func is: + charm_func() -> message: str + + charm_func() is executed after any services are stopped, if supplied. + + The services object can either be: + - None : no services were passed (an empty dict is returned) + - a list of strings + - A dictionary (optionally OrderedDict) {service_name: {'service': ..}} + - An array of [{'service': service_name, ...}, ...] + + :param action: Action to run: pause, resume, start or stop. + :type action: str + :param services: See above + :type services: See above + :param charm_func: function to run for custom charm pausing. + :type charm_func: f() + :returns: Status boolean and list of messages + :rtype: (bool, []) + :raises: RuntimeError + """ + actions = { + 'pause': service_pause, + 'resume': service_resume, + 'start': service_start, + 'stop': service_stop} + action = action.lower() + if action not in actions.keys(): + raise RuntimeError( + "action: {} must be one of: {}".format(action, + ', '.join(actions.keys()))) + services = _extract_services_list_helper(services) + messages = [] + success = True + if services: + for service in services.keys(): + rc = actions[action](service) + if not rc: + success = False + messages.append("{} didn't {} cleanly.".format(service, + action)) + if charm_func: + try: + message = charm_func() + if message: + messages.append(message) + except Exception as e: + success = False + messages.append(str(e)) + return success, messages + + +def make_wait_for_ports_barrier(ports, retry_count=5): + """Make a function to wait for port shutdowns. + + Create a function which closes over the provided ports. The function will + retry probing ports until they are closed or the retry count has been reached. + + """ + @decorators.retry_on_predicate(retry_count, operator.not_, base_delay=0.1) + def retry_port_check(): + _, ports_states = _check_listening_on_ports_list(ports) + juju_log("Probe ports {}, result: {}".format(ports, ports_states), level="DEBUG") + return any(ports_states) + return retry_port_check + + +def pause_unit(assess_status_func, services=None, ports=None, + charm_func=None): + """Pause a unit by stopping the services and setting 'unit-paused' + in the local kv() store. + + Also checks that the services have stopped and ports are no longer + being listened to. + + An optional charm_func() can be called that can either raise an + Exception or return non None, None to indicate that the unit + didn't pause cleanly. + + The signature for charm_func is: + charm_func() -> message: string + + charm_func() is executed after any services are stopped, if supplied. + + The services object can either be: + - None : no services were passed (an empty dict is returned) + - a list of strings + - A dictionary (optionally OrderedDict) {service_name: {'service': ..}} + - An array of [{'service': service_name, ...}, ...] + + @param assess_status_func: (f() -> message: string | None) or None + @param services: OPTIONAL see above + @param ports: OPTIONAL list of port + @param charm_func: function to run for custom charm pausing. + @returns None + @raises Exception(message) on an error for action_fail(). + """ + _, messages = manage_payload_services( + 'pause', + services=services, + charm_func=charm_func) + set_unit_paused() + + if assess_status_func: + message = assess_status_func() + if message: + messages.append(message) + if messages and not is_unit_upgrading_set(): + raise Exception("Couldn't pause: {}".format("; ".join(messages))) + + +def resume_unit(assess_status_func, services=None, ports=None, + charm_func=None): + """Resume a unit by starting the services and clearning 'unit-paused' + in the local kv() store. + + Also checks that the services have started and ports are being listened to. + + An optional charm_func() can be called that can either raise an + Exception or return non None to indicate that the unit + didn't resume cleanly. + + The signature for charm_func is: + charm_func() -> message: string + + charm_func() is executed after any services are started, if supplied. + + The services object can either be: + - None : no services were passed (an empty dict is returned) + - a list of strings + - A dictionary (optionally OrderedDict) {service_name: {'service': ..}} + - An array of [{'service': service_name, ...}, ...] + + @param assess_status_func: (f() -> message: string | None) or None + @param services: OPTIONAL see above + @param ports: OPTIONAL list of port + @param charm_func: function to run for custom charm resuming. + @returns None + @raises Exception(message) on an error for action_fail(). + """ + _, messages = manage_payload_services( + 'resume', + services=services, + charm_func=charm_func) + clear_unit_paused() + if assess_status_func: + message = assess_status_func() + if message: + messages.append(message) + if messages: + raise Exception("Couldn't resume: {}".format("; ".join(messages))) + + +def restart_services_action(services=None, when_all_stopped_func=None, + deferred_only=None): + """Manage a service restart request via charm action. + + :param services: Services to be restarted + :type model_name: List[str] + :param when_all_stopped_func: Function to call when all services are + stopped. + :type when_all_stopped_func: Callable[] + :param model_name: Only restart services which have a deferred restart + event. + :type model_name: bool + """ + if services and deferred_only: + raise ValueError( + "services and deferred_only are mutually exclusive") + if deferred_only: + services = list(set( + [a.service for a in deferred_events.get_deferred_restarts()])) + _, messages = manage_payload_services( + 'stop', + services=services, + charm_func=when_all_stopped_func) + if messages: + raise ServiceActionError( + "Error processing service stop request: {}".format( + "; ".join(messages))) + _, messages = manage_payload_services( + 'start', + services=services) + if messages: + raise ServiceActionError( + "Error processing service start request: {}".format( + "; ".join(messages))) + deferred_events.clear_deferred_restarts(services) + + +def make_assess_status_func(*args, **kwargs): + """Creates an assess_status_func() suitable for handing to pause_unit() + and resume_unit(). + + This uses the _determine_os_workload_status(...) function to determine + what the workload_status should be for the unit. If the unit is + not in maintenance or active states, then the message is returned to + the caller. This is so an action that doesn't result in either a + complete pause or complete resume can signal failure with an action_fail() + """ + def _assess_status_func(): + state, message = _determine_os_workload_status(*args, **kwargs) + status_set(state, message) + if state not in ['maintenance', 'active']: + return message + return None + + return _assess_status_func + + +def pausable_restart_on_change(restart_map, stopstart=False, + restart_functions=None, + can_restart_now_f=None, + post_svc_restart_f=None, + pre_restarts_wait_f=None): + """A restart_on_change decorator that checks to see if the unit is + paused. If it is paused then the decorated function doesn't fire. + + This is provided as a helper, as the @restart_on_change(...) decorator + is in core.host, yet the openstack specific helpers are in this file + (contrib.openstack.utils). Thus, this needs to be an optional feature + for openstack charms (or charms that wish to use the openstack + pause/resume type features). + + It is used as follows: + + from contrib.openstack.utils import ( + pausable_restart_on_change as restart_on_change) + + @restart_on_change(restart_map, stopstart=) + def some_hook(...): + pass + + see core.utils.restart_on_change() for more details. + + Note restart_map can be a callable, in which case, restart_map is only + evaluated at runtime. This means that it is lazy and the underlying + function won't be called if the decorated function is never called. Note, + retains backwards compatibility for passing a non-callable dictionary. + + :param f: function to decorate. + :type f: Callable + :param restart_map: Optionally callable, which then returns the restart_map or + the restart map {conf_file: [services]} + :type restart_map: Union[Callable[[],], Dict[str, List[str,]] + :param stopstart: whether to stop, start or restart a service + :type stopstart: booleean + :param restart_functions: nonstandard functions to use to restart services + {svc: func, ...} + :type restart_functions: Dict[str, Callable[[str], None]] + :param can_restart_now_f: A function used to check if the restart is + permitted. + :type can_restart_now_f: Callable[[str, List[str]], boolean] + :param post_svc_restart_f: A function run after a service has + restarted. + :type post_svc_restart_f: Callable[[str], None] + :param pre_restarts_wait_f: A function called before any restarts. + :type pre_restarts_wait_f: Callable[None, None] + :returns: decorator to use a restart_on_change with pausability + :rtype: decorator + + + """ + def wrap(f): + __restart_map_cache = None + + @functools.wraps(f) + def wrapped_f(*args, **kwargs): + nonlocal __restart_map_cache + if is_unit_paused_set(): + return f(*args, **kwargs) + if __restart_map_cache is None: + __restart_map_cache = restart_map() \ + if callable(restart_map) else restart_map + # otherwise, normal restart_on_change functionality + return restart_on_change_helper( + (lambda: f(*args, **kwargs)), + __restart_map_cache, + stopstart, + restart_functions, + can_restart_now_f, + post_svc_restart_f, + pre_restarts_wait_f) + return wrapped_f + return wrap + + +def ordered(orderme): + """Converts the provided dictionary into a collections.OrderedDict. + + The items in the returned OrderedDict will be inserted based on the + natural sort order of the keys. Nested dictionaries will also be sorted + in order to ensure fully predictable ordering. + + :param orderme: the dict to order + :return: collections.OrderedDict + :raises: ValueError: if `orderme` isn't a dict instance. + """ + if not isinstance(orderme, dict): + raise ValueError('argument must be a dict type') + + result = OrderedDict() + for k, v in sorted(orderme.items(), key=lambda x: x[0]): + if isinstance(v, dict): + result[k] = ordered(v) + else: + result[k] = v + + return result + + +def config_flags_parser(config_flags): + """Parses config flags string into dict. + + This parsing method supports a few different formats for the config + flag values to be parsed: + + 1. A string in the simple format of key=value pairs, with the possibility + of specifying multiple key value pairs within the same string. For + example, a string in the format of 'key1=value1, key2=value2' will + return a dict of: + + {'key1': 'value1', 'key2': 'value2'}. + + 2. A string in the above format, but supporting a comma-delimited list + of values for the same key. For example, a string in the format of + 'key1=value1, key2=value3,value4,value5' will return a dict of: + + {'key1': 'value1', 'key2': 'value2,value3,value4'} + + 3. A string containing a colon character (:) prior to an equal + character (=) will be treated as yaml and parsed as such. This can be + used to specify more complex key value pairs. For example, + a string in the format of 'key1: subkey1=value1, subkey2=value2' will + return a dict of: + + {'key1', 'subkey1=value1, subkey2=value2'} + + The provided config_flags string may be a list of comma-separated values + which themselves may be comma-separated list of values. + """ + # If we find a colon before an equals sign then treat it as yaml. + # Note: limit it to finding the colon first since this indicates assignment + # for inline yaml. + colon = config_flags.find(':') + equals = config_flags.find('=') + if colon > 0: + if colon < equals or equals < 0: + return ordered(yaml.safe_load(config_flags)) + + if config_flags.find('==') >= 0: + juju_log("config_flags is not in expected format (key=value)", + level=ERROR) + raise OSContextError + + # strip the following from each value. + post_strippers = ' ,' + # we strip any leading/trailing '=' or ' ' from the string then + # split on '='. + split = config_flags.strip(' =').split('=') + limit = len(split) + flags = OrderedDict() + for i in range(0, limit - 1): + current = split[i] + next = split[i + 1] + vindex = next.rfind(',') + if (i == limit - 2) or (vindex < 0): + value = next + else: + value = next[:vindex] + + if i == 0: + key = current + else: + # if this not the first entry, expect an embedded key. + index = current.rfind(',') + if index < 0: + juju_log("Invalid config value(s) at index %s" % (i), + level=ERROR) + raise OSContextError + key = current[index + 1:] + + # Add to collection. + flags[key.strip(post_strippers)] = value.rstrip(post_strippers) + + return flags + + +def os_application_version_set(package): + '''Set version of application for Juju 2.0 and later''' + application_version = get_upstream_version(package) + # NOTE(jamespage) if not able to figure out package version, fallback to + # openstack codename version detection. + if not application_version: + application_version_set(os_release(package)) + else: + application_version_set(application_version) + + +def os_application_status_set(check_function): + """Run the supplied function and set the application status accordingly. + + :param check_function: Function to run to get app states and messages. + :type check_function: function + """ + state, message = check_function() + status_set(state, message, application=True) + + +def enable_memcache(source=None, release=None, package=None): + """Determine if memcache should be enabled on the local unit + + @param release: release of OpenStack currently deployed + @param package: package to derive OpenStack version deployed + @returns boolean Whether memcache should be enabled + """ + _release = None + if release: + _release = release + else: + _release = os_release(package) + if not _release: + _release = get_os_codename_install_source(source) + + return CompareOpenStackReleases(_release) >= 'mitaka' + + +def token_cache_pkgs(source=None, release=None): + """Determine additional packages needed for token caching + + @param source: source string for charm + @param release: release of OpenStack currently deployed + @returns List of package to enable token caching + """ + packages = [] + if enable_memcache(source=source, release=release): + packages.extend(['memcached', 'python-memcache']) + return packages + + +def update_json_file(filename, items): + """Updates the json `filename` with a given dict. + :param filename: path to json file (e.g. /etc/glance/policy.json) + :param items: dict of items to update + """ + if not items: + return + + with open(filename) as fd: + policy = json.load(fd) + + # Compare before and after and if nothing has changed don't write the file + # since that could cause unnecessary service restarts. + before = json.dumps(policy, indent=4, sort_keys=True) + policy.update(items) + after = json.dumps(policy, indent=4, sort_keys=True) + if before == after: + return + + with open(filename, "w") as fd: + fd.write(after) + + +@cached +def snap_install_requested(): + """ Determine if installing from snaps + + If openstack-origin is of the form snap:track/channel[/branch] + and channel is in SNAPS_CHANNELS return True. + """ + origin = config('openstack-origin') or "" + if not origin.startswith('snap:'): + return False + + _src = origin[5:] + if '/' in _src: + channel = _src.split('/')[1] + else: + # Handle snap:track with no channel + channel = 'stable' + return valid_snap_channel(channel) + + +def get_snaps_install_info_from_origin(snaps, src, mode='classic'): + """Generate a dictionary of snap install information from origin + + @param snaps: List of snaps + @param src: String of openstack-origin or source of the form + snap:track/channel + @param mode: String classic, devmode or jailmode + @returns: Dictionary of snaps with channels and modes + """ + + if not src.startswith('snap:'): + juju_log("Snap source is not a snap origin", 'WARN') + return {} + + _src = src[5:] + channel = '--channel={}'.format(_src) + + return {snap: {'channel': channel, 'mode': mode} + for snap in snaps} + + +def install_os_snaps(snaps, refresh=False): + """Install OpenStack snaps from channel and with mode + + @param snaps: Dictionary of snaps with channels and modes of the form: + {'snap_name': {'channel': 'snap_channel', + 'mode': 'snap_mode'}} + Where channel is a snapstore channel and mode is --classic, --devmode + or --jailmode. + @param post_snap_install: Callback function to run after snaps have been + installed + """ + + def _ensure_flag(flag): + if flag.startswith('--'): + return flag + return '--{}'.format(flag) + + if refresh: + for snap in snaps.keys(): + snap_refresh(snap, + _ensure_flag(snaps[snap]['channel']), + _ensure_flag(snaps[snap]['mode'])) + else: + for snap in snaps.keys(): + snap_install(snap, + _ensure_flag(snaps[snap]['channel']), + _ensure_flag(snaps[snap]['mode'])) + + +def set_unit_upgrading(): + """Set the unit to a upgrading state in the local kv() store. + """ + with unitdata.HookData()() as t: + kv = t[0] + kv.set('unit-upgrading', True) + + +def clear_unit_upgrading(): + """Clear the unit from a upgrading state in the local kv() store + """ + with unitdata.HookData()() as t: + kv = t[0] + kv.set('unit-upgrading', False) + + +def is_unit_upgrading_set(): + """Return the state of the kv().get('unit-upgrading'). + + To help with units that don't have HookData() (testing) + if it excepts, return False + """ + try: + with unitdata.HookData()() as t: + kv = t[0] + # transform something truth-y into a Boolean. + return not (not (kv.get('unit-upgrading'))) + except Exception: + return False + + +def series_upgrade_prepare(pause_unit_helper=None, configs=None): + """ Run common series upgrade prepare tasks. + + :param pause_unit_helper: function: Function to pause unit + :param configs: OSConfigRenderer object: Configurations + :returns None: + """ + set_unit_upgrading() + if pause_unit_helper and configs: + if not is_unit_paused_set(): + pause_unit_helper(configs) + + +def series_upgrade_complete(resume_unit_helper=None, configs=None): + """ Run common series upgrade complete tasks. + + :param resume_unit_helper: function: Function to resume unit + :param configs: OSConfigRenderer object: Configurations + :returns None: + """ + clear_unit_paused() + clear_unit_upgrading() + if configs: + configs.write_all() + if resume_unit_helper: + resume_unit_helper(configs) + + +def is_db_initialised(): + """Check leader storage to see if database has been initialised. + + :returns: Whether DB has been initialised + :rtype: bool + """ + db_initialised = None + if leader_get('db-initialised') is None: + juju_log( + 'db-initialised key missing, assuming db is not initialised', + 'DEBUG') + db_initialised = False + else: + db_initialised = bool_from_string(leader_get('db-initialised')) + juju_log('Database initialised: {}'.format(db_initialised), 'DEBUG') + return db_initialised + + +def set_db_initialised(): + """Add flag to leader storage to indicate database has been initialised. + """ + juju_log('Setting db-initialised to True', 'DEBUG') + leader_set({'db-initialised': True}) + + +def is_db_maintenance_mode(relid=None): + """Check relation data from notifications of db in maintenance mode. + + :returns: Whether db has notified it is in maintenance mode. + :rtype: bool + """ + juju_log('Checking for maintenance notifications', 'DEBUG') + if relid: + r_ids = [relid] + else: + r_ids = relation_ids('shared-db') + rids_units = [(r, u) for r in r_ids for u in related_units(r)] + notifications = [] + for r_id, unit in rids_units: + settings = relation_get(unit=unit, rid=r_id) + for key, value in settings.items(): + if value and key in DB_MAINTENANCE_KEYS: + juju_log( + 'Unit: {}, Key: {}, Value: {}'.format(unit, key, value), + 'DEBUG') + try: + notifications.append(bool_from_string(value)) + except ValueError: + juju_log( + 'Could not discern bool from {}'.format(value), + 'WARN') + pass + return True in notifications + + +@cached +def container_scoped_relations(): + """Get all the container scoped relations + + :returns: List of relation names + :rtype: List + """ + md = metadata() + relations = [] + for relation_type in ('provides', 'requires', 'peers'): + for relation in md.get(relation_type, []): + if md[relation_type][relation].get('scope') == 'container': + relations.append(relation) + return relations + + +def container_scoped_relation_get(attribute=None): + """Get relation data from all container scoped relations. + + :param attribute: Name of attribute to get + :type attribute: Optional[str] + :returns: Iterator with relation data + :rtype: Iterator[Optional[any]] + """ + for endpoint_name in container_scoped_relations(): + for rid in relation_ids(endpoint_name): + for unit in related_units(rid): + yield relation_get( + attribute=attribute, + unit=unit, + rid=rid) + + +def is_db_ready(use_current_context=False, rel_name=None): + """Check remote database is ready to be used. + + Database relations are expected to provide a list of 'allowed' units to + confirm that the database is ready for use by those units. + + If db relation has provided this information and local unit is a member, + returns True otherwise False. + + :param use_current_context: Whether to limit checks to current hook + context. + :type use_current_context: bool + :param rel_name: Name of relation to check + :type rel_name: string + :returns: Whether remote db is ready. + :rtype: bool + :raises: Exception + """ + key = 'allowed_units' + + rel_name = rel_name or 'shared-db' + this_unit = local_unit() + + if use_current_context: + if relation_id() in relation_ids(rel_name): + rids_units = [(None, None)] + else: + raise Exception("use_current_context=True but not in {} " + "rel hook contexts (currently in {})." + .format(rel_name, relation_id())) + else: + rids_units = [(r_id, u) + for r_id in relation_ids(rel_name) + for u in related_units(r_id)] + + for rid, unit in rids_units: + allowed_units = relation_get(rid=rid, unit=unit, attribute=key) + if allowed_units and this_unit in allowed_units.split(): + juju_log("This unit ({}) is in allowed unit list from {}".format( + this_unit, + unit), 'DEBUG') + return True + + juju_log("This unit was not found in any allowed unit list") + return False + + +def is_expected_scale(peer_relation_name='cluster'): + """Query juju goal-state to determine whether our peer- and dependency- + relations are at the expected scale. + + Useful for deferring per unit per relation housekeeping work until we are + ready to complete it successfully and without unnecessary repetiton. + + Always returns True if version of juju used does not support goal-state. + + :param peer_relation_name: Name of peer relation + :type rel_name: string + :returns: True or False + :rtype: bool + """ + def _get_relation_id(rel_type): + return next((rid for rid in relation_ids(reltype=rel_type)), None) + + Relation = namedtuple('Relation', 'rel_type rel_id') + peer_rid = _get_relation_id(peer_relation_name) + # Units with no peers should still have a peer relation. + if not peer_rid: + juju_log('Not at expected scale, no peer relation found', 'DEBUG') + return False + expected_relations = [ + Relation(rel_type='shared-db', rel_id=_get_relation_id('shared-db'))] + if expect_ha(): + expected_relations.append( + Relation( + rel_type='ha', + rel_id=_get_relation_id('ha'))) + juju_log( + 'Checking scale of {} relations'.format( + ','.join([r.rel_type for r in expected_relations])), + 'DEBUG') + try: + if (len(related_units(relid=peer_rid)) < + len(list(expected_peer_units()))): + return False + for rel in expected_relations: + if not rel.rel_id: + juju_log( + 'Expected to find {} relation, but it is missing'.format( + rel.rel_type), + 'DEBUG') + return False + # Goal state returns every unit even for container scoped + # relations but the charm only ever has a relation with + # the local unit. + if rel.rel_type in container_scoped_relations(): + expected_count = 1 + else: + expected_count = len( + list(expected_related_units(reltype=rel.rel_type))) + if len(related_units(relid=rel.rel_id)) < expected_count: + juju_log( + ('Not at expected scale, not enough units on {} ' + 'relation'.format(rel.rel_type)), + 'DEBUG') + return False + except NotImplementedError: + return True + juju_log('All checks have passed, unit is at expected scale', 'DEBUG') + return True + + +def get_peer_key(unit_name): + """Get the peer key for this unit. + + The peer key is the key a unit uses to publish its status down the peer + relation + + :param unit_name: Name of unit + :type unit_name: string + :returns: Peer key for given unit + :rtype: string + """ + return 'unit-state-{}'.format(unit_name.replace('/', '-')) + + +UNIT_READY = 'READY' +UNIT_NOTREADY = 'NOTREADY' +UNIT_UNKNOWN = 'UNKNOWN' +UNIT_STATES = [UNIT_READY, UNIT_NOTREADY, UNIT_UNKNOWN] + + +def inform_peers_unit_state(state, relation_name='cluster'): + """Inform peers of the state of this unit. + + :param state: State of unit to publish + :type state: string + :param relation_name: Name of relation to publish state on + :type relation_name: string + """ + if state not in UNIT_STATES: + raise ValueError( + "Setting invalid state {} for unit".format(state)) + this_unit = local_unit() + for r_id in relation_ids(relation_name): + juju_log('Telling peer behind relation {} that {} is {}'.format( + r_id, this_unit, state), 'DEBUG') + relation_set(relation_id=r_id, + relation_settings={ + get_peer_key(this_unit): state}) + + +def get_peers_unit_state(relation_name='cluster'): + """Get the state of all peers. + + :param relation_name: Name of relation to check peers on. + :type relation_name: string + :returns: Unit states keyed on unit name. + :rtype: dict + :raises: ValueError + """ + r_ids = relation_ids(relation_name) + rids_units = [(r, u) for r in r_ids for u in related_units(r)] + unit_states = {} + for r_id, unit in rids_units: + settings = relation_get(unit=unit, rid=r_id) + unit_states[unit] = settings.get(get_peer_key(unit), UNIT_UNKNOWN) + if unit_states[unit] not in UNIT_STATES: + raise ValueError( + "Unit in unknown state {}".format(unit_states[unit])) + return unit_states + + +def are_peers_ready(relation_name='cluster'): + """Check if all peers are ready. + + :param relation_name: Name of relation to check peers on. + :type relation_name: string + :returns: Whether all units are ready. + :rtype: bool + """ + unit_states = get_peers_unit_state(relation_name).values() + juju_log('{} peers are in the following states: {}'.format( + relation_name, unit_states), 'DEBUG') + return all(state == UNIT_READY for state in unit_states) + + +def inform_peers_if_ready(check_unit_ready_func, relation_name='cluster'): + """Inform peers if this unit is ready. + + The check function should return a tuple (state, message). A state + of 'READY' indicates the unit is READY. + + :param check_unit_ready_func: Function to run to check readiness + :type check_unit_ready_func: function + :param relation_name: Name of relation to check peers on. + :type relation_name: string + """ + unit_ready, msg = check_unit_ready_func() + if unit_ready: + state = UNIT_READY + else: + state = UNIT_NOTREADY + juju_log('Telling peers this unit is: {}'.format(state), 'DEBUG') + inform_peers_unit_state(state, relation_name) + + +def check_api_unit_ready(check_db_ready=True): + """Check if this unit is ready. + + :param check_db_ready: Include checks of database readiness. + :type check_db_ready: bool + :returns: Whether unit state is ready and status message + :rtype: (bool, str) + """ + unit_state, msg = get_api_unit_status(check_db_ready=check_db_ready) + return unit_state == WORKLOAD_STATES.ACTIVE, msg + + +def get_api_unit_status(check_db_ready=True): + """Return a workload status and message for this unit. + + :param check_db_ready: Include checks of database readiness. + :type check_db_ready: bool + :returns: Workload state and message + :rtype: (bool, str) + """ + unit_state = WORKLOAD_STATES.ACTIVE + msg = 'Unit is ready' + if is_db_maintenance_mode(): + unit_state = WORKLOAD_STATES.MAINTENANCE + msg = 'Database in maintenance mode.' + elif is_unit_paused_set(): + unit_state = WORKLOAD_STATES.BLOCKED + msg = 'Unit paused.' + elif check_db_ready and not is_db_ready(): + unit_state = WORKLOAD_STATES.WAITING + msg = 'Allowed_units list provided but this unit not present' + elif not is_db_initialised(): + unit_state = WORKLOAD_STATES.WAITING + msg = 'Database not initialised' + elif not is_expected_scale(): + unit_state = WORKLOAD_STATES.WAITING + msg = 'Charm and its dependencies not yet at expected scale' + juju_log(msg, 'DEBUG') + return unit_state, msg + + +def check_api_application_ready(): + """Check if this application is ready. + + :returns: Whether application state is ready and status message + :rtype: (bool, str) + """ + app_state, msg = get_api_application_status() + return app_state == WORKLOAD_STATES.ACTIVE, msg + + +def get_api_application_status(): + """Return a workload status and message for this application. + + :returns: Workload state and message + :rtype: (bool, str) + """ + app_state, msg = get_api_unit_status() + if app_state == WORKLOAD_STATES.ACTIVE: + if are_peers_ready(): + msg = 'Application Ready' + else: + app_state = WORKLOAD_STATES.WAITING + msg = 'Some units are not ready' + juju_log(msg, 'DEBUG') + return app_state, msg + + +def sequence_status_check_functions(*functions): + """Sequence the functions passed so that they all get a chance to run as + the charm status check functions. + + :param *functions: a list of functions that return (state, message) + :type *functions: List[Callable[[OSConfigRender], (str, str)]] + :returns: the Callable that takes configs and returns (state, message) + :rtype: Callable[[OSConfigRender], (str, str)] + """ + def _inner_sequenced_functions(configs): + state, message = 'unknown', '' + for f in functions: + new_state, new_message = f(configs) + state = workload_state_compare(state, new_state) + if message: + message = "{}, {}".format(message, new_message) + else: + message = new_message + return state, message + + return _inner_sequenced_functions + + +SubordinatePackages = namedtuple('SubordinatePackages', ['install', 'purge']) + + +def get_subordinate_release_packages(os_release, package_type='deb'): + """Iterate over subordinate relations and get package information. + + :param os_release: OpenStack release to look for + :type os_release: str + :param package_type: Package type (one of 'deb' or 'snap') + :type package_type: str + :returns: Packages to install and packages to purge or None + :rtype: SubordinatePackages[set,set] + """ + install = set() + purge = set() + + for rdata in container_scoped_relation_get('releases-packages-map'): + rp_map = json.loads(rdata or '{}') + # The map provided by subordinate has OpenStack release name as key. + # Find package information from subordinate matching requested release + # or the most recent release prior to requested release by sorting the + # keys in reverse order. This follows established patterns in our + # charms for templates and reactive charm implementations, i.e. as long + # as nothing has changed the definitions for the prior OpenStack + # release is still valid. + for release in sorted(rp_map.keys(), reverse=True): + if (CompareOpenStackReleases(release) <= os_release and + package_type in rp_map[release]): + for name, container in ( + ('install', install), + ('purge', purge)): + for pkg in rp_map[release][package_type].get(name, []): + container.add(pkg) + break + return SubordinatePackages(install, purge) + + +def get_subordinate_services(): + """Iterate over subordinate relations and get service information. + + In a similar fashion as with get_subordinate_release_packages(), + principle charms can retrieve a list of services advertised by their + subordinate charms. This is useful to know about subordinate services when + pausing, resuming or upgrading a principle unit. + + :returns: Name of all services advertised by all subordinates + :rtype: Set[str] + """ + services = set() + for rdata in container_scoped_relation_get('services'): + services |= set(json.loads(rdata or '[]')) + return services + + +os_restart_on_change = partial( + pausable_restart_on_change, + can_restart_now_f=deferred_events.check_and_record_restart_request, + post_svc_restart_f=deferred_events.process_svc_restart) + + +def restart_services_action_helper(all_services): + """Helper to run the restart-services action. + + NOTE: all_services is all services that could be restarted but + depending on the action arguments it may be a subset of + these that are actually restarted. + + :param all_services: All services that could be restarted + :type all_services: List[str] + """ + deferred_only = action_get("deferred-only") + services = action_get("services") + if services: + services = services.split() + else: + services = all_services + if deferred_only: + restart_services_action(deferred_only=True) + else: + restart_services_action(services=services) + + +def show_deferred_events_action_helper(): + """Helper to run the show-deferred-restarts action.""" + restarts = [] + for event in deferred_events.get_deferred_events(): + restarts.append('{} {} {}'.format( + str(event.timestamp), + event.service.ljust(40), + event.reason)) + restarts.sort() + output = { + 'restarts': restarts, + 'hooks': deferred_events.get_deferred_hooks()} + action_set({'output': "{}".format( + yaml.dump(output, default_flow_style=False))}) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/openstack/vaultlocker.py b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/vaultlocker.py new file mode 100644 index 00000000..002bc579 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/openstack/vaultlocker.py @@ -0,0 +1,184 @@ +# Copyright 2018-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os + +import charmhelpers.contrib.openstack.alternatives as alternatives +import charmhelpers.contrib.openstack.context as context + +import charmhelpers.core.hookenv as hookenv +import charmhelpers.core.host as host +import charmhelpers.core.templating as templating +import charmhelpers.core.unitdata as unitdata + +VAULTLOCKER_BACKEND = 'charm-vaultlocker' + + +class VaultKVContext(context.OSContextGenerator): + """Vault KV context for interaction with vault-kv interfaces""" + interfaces = ['secrets-storage'] + + def __init__(self, secret_backend=None): + super(context.OSContextGenerator, self).__init__() + self.secret_backend = ( + secret_backend or 'charm-{}'.format(hookenv.service_name()) + ) + + def __call__(self): + try: + import hvac + except ImportError: + # BUG: #1862085 - if the relation is made to vault, but the + # 'encrypt' option is not made, then the charm errors with an + # import warning. This catches that, logs a warning, and returns + # with an empty context. + hookenv.log("VaultKVContext: trying to use hvac pythong module " + "but it's not available. Is secrets-stroage relation " + "made, but encrypt option not set?", + level=hookenv.WARNING) + # return an empty context on hvac import error + return {} + ctxt = {} + # NOTE(hopem): see https://bugs.launchpad.net/charm-helpers/+bug/1849323 + db = unitdata.kv() + # currently known-good secret-id + secret_id = db.get('secret-id') + + for relation_id in hookenv.relation_ids(self.interfaces[0]): + for unit in hookenv.related_units(relation_id): + data = hookenv.relation_get(unit=unit, + rid=relation_id) + vault_url = data.get('vault_url') + role_id = data.get('{}_role_id'.format(hookenv.local_unit())) + token = data.get('{}_token'.format(hookenv.local_unit())) + + if all([vault_url, role_id, token]): + token = json.loads(token) + vault_url = json.loads(vault_url) + + # Tokens may change when secret_id's are being + # reissued - if so use token to get new secret_id + token_success = False + try: + secret_id = retrieve_secret_id( + url=vault_url, + token=token + ) + token_success = True + except hvac.exceptions.InvalidRequest: + # Try next + pass + + if token_success: + db.set('secret-id', secret_id) + db.flush() + + ctxt['vault_url'] = vault_url + ctxt['role_id'] = json.loads(role_id) + ctxt['secret_id'] = secret_id + ctxt['secret_backend'] = self.secret_backend + vault_ca = data.get('vault_ca') + if vault_ca: + ctxt['vault_ca'] = json.loads(vault_ca) + + self.complete = True + break + else: + if secret_id: + ctxt['vault_url'] = vault_url + ctxt['role_id'] = json.loads(role_id) + ctxt['secret_id'] = secret_id + ctxt['secret_backend'] = self.secret_backend + vault_ca = data.get('vault_ca') + if vault_ca: + ctxt['vault_ca'] = json.loads(vault_ca) + + if self.complete: + break + + if ctxt: + self.complete = True + + return ctxt + + +def write_vaultlocker_conf(context, priority=100): + """Write vaultlocker configuration to disk and install alternative + + :param context: Dict of data from vault-kv relation + :ptype: context: dict + :param priority: Priority of alternative configuration + :ptype: priority: int""" + charm_vl_path = "/var/lib/charm/{}/vaultlocker.conf".format( + hookenv.service_name() + ) + host.mkdir(os.path.dirname(charm_vl_path), perms=0o700) + templating.render(source='vaultlocker.conf.j2', + target=charm_vl_path, + context=context, perms=0o600), + alternatives.install_alternative('vaultlocker.conf', + '/etc/vaultlocker/vaultlocker.conf', + charm_vl_path, priority) + + +def vault_relation_complete(backend=None): + """Determine whether vault relation is complete + + :param backend: Name of secrets backend requested + :ptype backend: string + :returns: whether the relation to vault is complete + :rtype: bool""" + try: + import hvac + except ImportError: + return False + try: + vault_kv = VaultKVContext(secret_backend=backend or VAULTLOCKER_BACKEND) + vault_kv() + return vault_kv.complete + except hvac.exceptions.InvalidRequest: + return False + + +# TODO: contrib a high level unwrap method to hvac that works +def retrieve_secret_id(url, token): + """Retrieve a response-wrapped secret_id from Vault + + :param url: URL to Vault Server + :ptype url: str + :param token: One shot Token to use + :ptype token: str + :returns: secret_id to use for Vault Access + :rtype: str""" + import hvac + try: + # hvac 0.10.1 changed default adapter to JSONAdapter + client = hvac.Client(url=url, token=token, adapter=hvac.adapters.Request) + except AttributeError: + # hvac < 0.6.2 doesn't have adapter but uses the same response interface + client = hvac.Client(url=url, token=token) + else: + # hvac < 0.9.2 assumes adapter is an instance, so doesn't instantiate + if not isinstance(client.adapter, hvac.adapters.Request): + client.adapter = hvac.adapters.Request(base_uri=url, token=token) + try: + # hvac == 1.0.0 has an API to unwrap with the user token + response = client.sys.unwrap() + except AttributeError: + # fallback to hvac < 1.0.0 + response = client._post('/v1/sys/wrapping/unwrap') + if response.status_code == 200: + data = response.json() + return data['data']['secret_id'] diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/python.py b/ceph-radosgw/hooks/charmhelpers/contrib/python.py new file mode 100644 index 00000000..fcded680 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/python.py @@ -0,0 +1,19 @@ +# Copyright 2014-2019 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# deprecated aliases for backwards compatibility +from charmhelpers.fetch.python import debug # noqa +from charmhelpers.fetch.python import packages # noqa +from charmhelpers.fetch.python import rpdb # noqa +from charmhelpers.fetch.python import version # noqa diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/storage/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/storage/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/storage/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/__init__.py b/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/bcache.py b/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/bcache.py new file mode 100644 index 00000000..605991e1 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/bcache.py @@ -0,0 +1,74 @@ +# Copyright 2017 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import json + +from charmhelpers.core.hookenv import log + +stats_intervals = ['stats_day', 'stats_five_minute', + 'stats_hour', 'stats_total'] + +SYSFS = '/sys' + + +class Bcache(object): + """Bcache behaviour + """ + + def __init__(self, cachepath): + self.cachepath = cachepath + + @classmethod + def fromdevice(cls, devname): + return cls('{}/block/{}/bcache'.format(SYSFS, devname)) + + def __str__(self): + return self.cachepath + + def get_stats(self, interval): + """Get cache stats + """ + intervaldir = 'stats_{}'.format(interval) + path = "{}/{}".format(self.cachepath, intervaldir) + out = dict() + for elem in os.listdir(path): + out[elem] = open('{}/{}'.format(path, elem)).read().strip() + return out + + +def get_bcache_fs(): + """Return all cache sets + """ + cachesetroot = "{}/fs/bcache".format(SYSFS) + try: + dirs = os.listdir(cachesetroot) + except OSError: + log("No bcache fs found") + return [] + cacheset = set([Bcache('{}/{}'.format(cachesetroot, d)) for d in dirs if not d.startswith('register')]) + return cacheset + + +def get_stats_action(cachespec, interval): + """Action for getting bcache statistics for a given cachespec. + Cachespec can either be a device name, eg. 'sdb', which will retrieve + cache stats for the given device, or 'global', which will retrieve stats + for all cachesets + """ + if cachespec == 'global': + caches = get_bcache_fs() + else: + caches = [Bcache.fromdevice(cachespec)] + res = dict((c.cachepath, c.get_stats(interval)) for c in caches) + return json.dumps(res, indent=4, separators=(',', ': ')) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/ceph.py b/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/ceph.py new file mode 100644 index 00000000..6ec67cba --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/ceph.py @@ -0,0 +1,2401 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file is sourced from lp:openstack-charm-helpers +# +# Authors: +# James Page +# Adam Gandelman +# + +import collections +import errno +import hashlib +import math + +import os +import shutil +import json +import time + +from subprocess import ( + check_call, + check_output, + CalledProcessError, +) +from charmhelpers import deprecate +from charmhelpers.core.hookenv import ( + application_name, + config, + service_name, + local_unit, + relation_get, + relation_ids, + relation_set, + related_units, + log, + DEBUG, + INFO, + WARNING, + ERROR, +) +from charmhelpers.core.host import ( + mount, + mounts, + service_start, + service_stop, + service_running, + umount, + cmp_pkgrevno, +) +from charmhelpers.fetch import ( + apt_install, +) +from charmhelpers.core.unitdata import kv + +from charmhelpers.core.kernel import modprobe +from charmhelpers.contrib.openstack.utils import config_flags_parser + +KEYRING = '/etc/ceph/ceph.client.{}.keyring' +KEYFILE = '/etc/ceph/ceph.client.{}.key' + +CEPH_CONF = """[global] +auth supported = {auth} +keyring = {keyring} +mon host = {mon_hosts} +log to syslog = {use_syslog} +err to syslog = {use_syslog} +clog to syslog = {use_syslog} +""" + +# The number of placement groups per OSD to target for placement group +# calculations. This number is chosen as 100 due to the ceph PG Calc +# documentation recommending to choose 100 for clusters which are not +# expected to increase in the foreseeable future. Since the majority of the +# calculations are done on deployment, target the case of non-expanding +# clusters as the default. +DEFAULT_PGS_PER_OSD_TARGET = 100 +DEFAULT_POOL_WEIGHT = 10.0 +LEGACY_PG_COUNT = 200 +DEFAULT_MINIMUM_PGS = 2 +AUTOSCALER_DEFAULT_PGS = 32 + + +class OsdPostUpgradeError(Exception): + """Error class for OSD post-upgrade operations.""" + pass + + +class OSDSettingConflict(Exception): + """Error class for conflicting osd setting requests.""" + pass + + +class OSDSettingNotAllowed(Exception): + """Error class for a disallowed setting.""" + pass + + +OSD_SETTING_EXCEPTIONS = (OSDSettingConflict, OSDSettingNotAllowed) + +OSD_SETTING_WHITELIST = [ + 'osd heartbeat grace', + 'osd heartbeat interval', +] + + +def _order_dict_by_key(rdict): + """Convert a dictionary into an OrderedDict sorted by key. + + :param rdict: Dictionary to be ordered. + :type rdict: dict + :returns: Ordered Dictionary. + :rtype: collections.OrderedDict + """ + return collections.OrderedDict(sorted(rdict.items(), key=lambda k: k[0])) + + +def get_osd_settings(relation_name): + """Consolidate requested osd settings from all clients. + + Consolidate requested osd settings from all clients. Check that the + requested setting is on the whitelist and it does not conflict with + any other requested settings. + + :returns: Dictionary of settings + :rtype: dict + + :raises: OSDSettingNotAllowed + :raises: OSDSettingConflict + """ + rel_ids = relation_ids(relation_name) + osd_settings = {} + for relid in rel_ids: + for unit in related_units(relid): + unit_settings = relation_get('osd-settings', unit, relid) or '{}' + unit_settings = json.loads(unit_settings) + for key, value in unit_settings.items(): + if key not in OSD_SETTING_WHITELIST: + msg = 'Illegal settings "{}"'.format(key) + raise OSDSettingNotAllowed(msg) + if key in osd_settings: + if osd_settings[key] != unit_settings[key]: + msg = 'Conflicting settings for "{}"'.format(key) + raise OSDSettingConflict(msg) + else: + osd_settings[key] = value + return _order_dict_by_key(osd_settings) + + +def send_application_name(relid=None, app_name=None): + """Send the application name down the relation. + + :param relid: Relation id to set application name in. + :type relid: str + :param app_name: Application name to send in the relation. + :type app_name: str + """ + if app_name is None: + app_name = application_name() + relation_set( + relation_id=relid, + relation_settings={'application-name': app_name}) + + +def send_osd_settings(): + """Pass on requested OSD settings to osd units.""" + try: + settings = get_osd_settings('client') + except OSD_SETTING_EXCEPTIONS as e: + # There is a problem with the settings, not passing them on. Update + # status will notify the user. + log(e, level=ERROR) + return + data = { + 'osd-settings': json.dumps(settings, sort_keys=True)} + for relid in relation_ids('osd'): + relation_set(relation_id=relid, + relation_settings=data) + + +def validator(value, valid_type, valid_range=None): + """Helper function for type validation. + + Used to validate these: + https://docs.ceph.com/docs/master/rados/operations/pools/#set-pool-values + https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#inline-compression + + Example input: + validator(value=1, + valid_type=int, + valid_range=[0, 2]) + + This says I'm testing value=1. It must be an int inclusive in [0,2] + + :param value: The value to validate. + :type value: any + :param valid_type: The type that value should be. + :type valid_type: any + :param valid_range: A range of values that value can assume. + :type valid_range: Optional[Union[List,Tuple]] + :raises: AssertionError, ValueError + """ + assert isinstance(value, valid_type), ( + "{} is not a {}".format(value, valid_type)) + if valid_range is not None: + assert isinstance( + valid_range, list) or isinstance(valid_range, tuple), ( + "valid_range must be of type List or Tuple, " + "was given {} of type {}" + .format(valid_range, type(valid_range))) + # If we're dealing with strings + if isinstance(value, str): + assert value in valid_range, ( + "{} is not in the list {}".format(value, valid_range)) + # Integer, float should have a min and max + else: + if len(valid_range) != 2: + raise ValueError( + "Invalid valid_range list of {} for {}. " + "List must be [min,max]".format(valid_range, value)) + assert value >= valid_range[0], ( + "{} is less than minimum allowed value of {}" + .format(value, valid_range[0])) + assert value <= valid_range[1], ( + "{} is greater than maximum allowed value of {}" + .format(value, valid_range[1])) + + +class PoolCreationError(Exception): + """A custom exception to inform the caller that a pool creation failed. + + Provides an error message + """ + + def __init__(self, message): + super(PoolCreationError, self).__init__(message) + + +class BasePool(object): + """An object oriented approach to Ceph pool creation. + + This base class is inherited by ReplicatedPool and ErasurePool. Do not call + create() on this base class as it will raise an exception. + + Instantiate a child class and call create(). + """ + # Dictionary that maps pool operation properties to Tuples with valid type + # and valid range + op_validation_map = { + 'compression-algorithm': (str, ('lz4', 'snappy', 'zlib', 'zstd')), + 'compression-mode': (str, ('none', 'passive', 'aggressive', 'force')), + 'compression-required-ratio': (float, None), + 'compression-min-blob-size': (int, None), + 'compression-min-blob-size-hdd': (int, None), + 'compression-min-blob-size-ssd': (int, None), + 'compression-max-blob-size': (int, None), + 'compression-max-blob-size-hdd': (int, None), + 'compression-max-blob-size-ssd': (int, None), + 'rbd-mirroring-mode': (str, ('image', 'pool')) + } + + def __init__(self, service, name=None, percent_data=None, app_name=None, + op=None): + """Initialize BasePool object. + + Pool information is either initialized from individual keyword + arguments or from a individual CephBrokerRq operation Dict. + + :param service: The Ceph user name to run commands under. + :type service: str + :param name: Name of pool to operate on. + :type name: str + :param percent_data: The expected pool size in relation to all + available resources in the Ceph cluster. Will be + used to set the ``target_size_ratio`` pool + property. (default: 10.0) + :type percent_data: Optional[float] + :param app_name: Ceph application name, usually one of: + ('cephfs', 'rbd', 'rgw') (default: 'unknown') + :type app_name: Optional[str] + :param op: Broker request Op to compile pool data from. + :type op: Optional[Dict[str,any]] + :raises: KeyError + """ + # NOTE: Do not perform initialization steps that require live data from + # a running cluster here. The *Pool classes may be used for validation. + self.service = service + self.op = op or {} + + if op: + # When initializing from op the `name` attribute is required and we + # will fail with KeyError if it is not provided. + self.name = op['name'] + self.percent_data = op.get('weight') + self.app_name = op.get('app-name') + else: + self.name = name + self.percent_data = percent_data + self.app_name = app_name + + # Set defaults for these if they are not provided + self.percent_data = self.percent_data or 10.0 + self.app_name = self.app_name or 'unknown' + + def validate(self): + """Check that value of supplied operation parameters are valid. + + :raises: ValueError + """ + for op_key, op_value in self.op.items(): + if op_key in self.op_validation_map and op_value is not None: + valid_type, valid_range = self.op_validation_map[op_key] + try: + validator(op_value, valid_type, valid_range) + except (AssertionError, ValueError) as e: + # Normalize on ValueError, also add information about which + # variable we had an issue with. + raise ValueError("'{}': {}".format(op_key, str(e))) + + def _create(self): + """Perform the pool creation, method MUST be overridden by child class. + """ + raise NotImplementedError + + def _post_create(self): + """Perform common post pool creation tasks. + + Note that pool properties subject to change during the lifetime of a + pool / deployment should go into the ``update`` method. + + Do not add calls for a specific pool type here, those should go into + one of the pool specific classes. + """ + nautilus_or_later = cmp_pkgrevno('ceph-common', '14.2.0') >= 0 + if nautilus_or_later: + # Ensure we set the expected pool ratio + update_pool( + client=self.service, + pool=self.name, + settings={ + 'target_size_ratio': str( + self.percent_data / 100.0), + }) + try: + set_app_name_for_pool(client=self.service, + pool=self.name, + name=self.app_name) + except CalledProcessError: + log('Could not set app name for pool {}' + .format(self.name), + level=WARNING) + if 'pg_autoscaler' in enabled_manager_modules(): + try: + enable_pg_autoscale(self.service, self.name) + except CalledProcessError as e: + log('Could not configure auto scaling for pool {}: {}' + .format(self.name, e), + level=WARNING) + + def create(self): + """Create pool and perform any post pool creation tasks. + + To allow for sharing of common code among pool specific classes the + processing has been broken out into the private methods ``_create`` + and ``_post_create``. + + Do not add any pool type specific handling here, that should go into + one of the pool specific classes. + """ + if not pool_exists(self.service, self.name): + self.validate() + self._create() + self._post_create() + self.update() + + def set_quota(self): + """Set a quota if requested. + + :raises: CalledProcessError + """ + max_bytes = self.op.get('max-bytes') + max_objects = self.op.get('max-objects') + if max_bytes or max_objects: + set_pool_quota(service=self.service, pool_name=self.name, + max_bytes=max_bytes, max_objects=max_objects) + + def set_compression(self): + """Set compression properties if requested. + + :raises: CalledProcessError + """ + compression_properties = { + key.replace('-', '_'): value + for key, value in self.op.items() + if key in ( + 'compression-algorithm', + 'compression-mode', + 'compression-required-ratio', + 'compression-min-blob-size', + 'compression-min-blob-size-hdd', + 'compression-min-blob-size-ssd', + 'compression-max-blob-size', + 'compression-max-blob-size-hdd', + 'compression-max-blob-size-ssd') and value} + if compression_properties: + update_pool(self.service, self.name, compression_properties) + + def update(self): + """Update properties for an already existing pool. + + Do not add calls for a specific pool type here, those should go into + one of the pool specific classes. + """ + self.validate() + self.set_quota() + self.set_compression() + + def add_cache_tier(self, cache_pool, mode): + """Adds a new cache tier to an existing pool. + + :param cache_pool: The cache tier pool name to add. + :type cache_pool: str + :param mode: The caching mode to use for this pool. + valid range = ["readonly", "writeback"] + :type mode: str + """ + # Check the input types and values + validator(value=cache_pool, valid_type=str) + validator( + value=mode, valid_type=str, + valid_range=["readonly", "writeback"]) + + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'add', self.name, cache_pool, + ]) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'cache-mode', cache_pool, mode, + ]) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'set-overlay', self.name, cache_pool, + ]) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'pool', 'set', cache_pool, 'hit_set_type', 'bloom', + ]) + + def remove_cache_tier(self, cache_pool): + """Removes a cache tier from Ceph. + + Flushes all dirty objects from writeback pools and waits for that to + complete. + + :param cache_pool: The cache tier pool name to remove. + :type cache_pool: str + """ + # read-only is easy, writeback is much harder + mode = get_cache_mode(self.service, cache_pool) + if mode == 'readonly': + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'cache-mode', cache_pool, 'none' + ]) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'remove', self.name, cache_pool, + ]) + + elif mode == 'writeback': + pool_forward_cmd = ['ceph', '--id', self.service, 'osd', 'tier', + 'cache-mode', cache_pool, 'forward'] + if cmp_pkgrevno('ceph-common', '10.1') >= 0: + # Jewel added a mandatory flag + pool_forward_cmd.append('--yes-i-really-mean-it') + + check_call(pool_forward_cmd) + # Flush the cache and wait for it to return + check_call([ + 'rados', '--id', self.service, + '-p', cache_pool, 'cache-flush-evict-all']) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'remove-overlay', self.name]) + check_call([ + 'ceph', '--id', self.service, + 'osd', 'tier', 'remove', self.name, cache_pool]) + + def get_pgs(self, pool_size, percent_data=DEFAULT_POOL_WEIGHT, + device_class=None): + """Return the number of placement groups to use when creating the pool. + + Returns the number of placement groups which should be specified when + creating the pool. This is based upon the calculation guidelines + provided by the Ceph Placement Group Calculator (located online at + http://ceph.com/pgcalc/). + + The number of placement groups are calculated using the following: + + (Target PGs per OSD) * (OSD #) * (%Data) + ---------------------------------------- + (Pool size) + + Per the upstream guidelines, the OSD # should really be considered + based on the number of OSDs which are eligible to be selected by the + pool. Since the pool creation doesn't specify any of CRUSH set rules, + the default rule will be dependent upon the type of pool being + created (replicated or erasure). + + This code makes no attempt to determine the number of OSDs which can be + selected for the specific rule, rather it is left to the user to tune + in the form of 'expected-osd-count' config option. + + :param pool_size: pool_size is either the number of replicas for + replicated pools or the K+M sum for erasure coded pools + :type pool_size: int + :param percent_data: the percentage of data that is expected to + be contained in the pool for the specific OSD set. Default value + is to assume 10% of the data is for this pool, which is a + relatively low % of the data but allows for the pg_num to be + increased. NOTE: the default is primarily to handle the scenario + where related charms requiring pools has not been upgraded to + include an update to indicate their relative usage of the pools. + :type percent_data: float + :param device_class: class of storage to use for basis of pgs + calculation; ceph supports nvme, ssd and hdd by default based + on presence of devices of each type in the deployment. + :type device_class: str + :returns: The number of pgs to use. + :rtype: int + """ + + # Note: This calculation follows the approach that is provided + # by the Ceph PG Calculator located at http://ceph.com/pgcalc/. + validator(value=pool_size, valid_type=int) + + # Ensure that percent data is set to something - even with a default + # it can be set to None, which would wreak havoc below. + if percent_data is None: + percent_data = DEFAULT_POOL_WEIGHT + + # If the expected-osd-count is specified, then use the max between + # the expected-osd-count and the actual osd_count + osd_list = get_osds(self.service, device_class) + expected = config('expected-osd-count') or 0 + + if osd_list: + if device_class: + osd_count = len(osd_list) + else: + osd_count = max(expected, len(osd_list)) + + # Log a message to provide some insight if the calculations claim + # to be off because someone is setting the expected count and + # there are more OSDs in reality. Try to make a proper guess + # based upon the cluster itself. + if not device_class and expected and osd_count != expected: + log("Found more OSDs than provided expected count. " + "Using the actual count instead", INFO) + elif expected: + # Use the expected-osd-count in older ceph versions to allow for + # a more accurate pg calculations + osd_count = expected + else: + # NOTE(james-page): Default to 200 for older ceph versions + # which don't support OSD query from cli + return LEGACY_PG_COUNT + + percent_data /= 100.0 + target_pgs_per_osd = config( + 'pgs-per-osd') or DEFAULT_PGS_PER_OSD_TARGET + num_pg = (target_pgs_per_osd * osd_count * percent_data) // pool_size + + # NOTE: ensure a sane minimum number of PGS otherwise we don't get any + # reasonable data distribution in minimal OSD configurations + if num_pg < DEFAULT_MINIMUM_PGS: + num_pg = DEFAULT_MINIMUM_PGS + + # The CRUSH algorithm has a slight optimization for placement groups + # with powers of 2 so find the nearest power of 2. If the nearest + # power of 2 is more than 25% below the original value, the next + # highest value is used. To do this, find the nearest power of 2 such + # that 2^n <= num_pg, check to see if its within the 25% tolerance. + exponent = math.floor(math.log(num_pg, 2)) + nearest = 2 ** exponent + if (num_pg - nearest) > (num_pg * 0.25): + # Choose the next highest power of 2 since the nearest is more + # than 25% below the original value. + return int(nearest * 2) + else: + return int(nearest) + + +class Pool(BasePool): + """Compatibility shim for any descendents external to this library.""" + + @deprecate( + 'The ``Pool`` baseclass has been replaced by ``BasePool`` class.') + def __init__(self, service, name): + super(Pool, self).__init__(service, name=name) + + def create(self): + pass + + +class ReplicatedPool(BasePool): + def __init__(self, service, name=None, pg_num=None, replicas=None, + percent_data=None, app_name=None, op=None, + profile_name='replicated_rule'): + """Initialize ReplicatedPool object. + + Pool information is either initialized from individual keyword + arguments or from a individual CephBrokerRq operation Dict. + + Please refer to the docstring of the ``BasePool`` class for + documentation of the common parameters. + + :param pg_num: Express wish for number of Placement Groups (this value + is subject to validation against a running cluster prior + to use to avoid creating a pool with too many PGs) + :type pg_num: int + :param replicas: Number of copies there should be of each object added + to this replicated pool. + :type replicas: int + :raises: KeyError + :param profile_name: Crush Profile to use + :type profile_name: Optional[str] + """ + # NOTE: Do not perform initialization steps that require live data from + # a running cluster here. The *Pool classes may be used for validation. + + # The common parameters are handled in our parents initializer + super(ReplicatedPool, self).__init__( + service=service, name=name, percent_data=percent_data, + app_name=app_name, op=op) + + if op: + # When initializing from op `replicas` is a required attribute, and + # we will fail with KeyError if it is not provided. + self.replicas = op['replicas'] + self.pg_num = op.get('pg_num') + self.profile_name = op.get('crush-profile') or profile_name + else: + self.replicas = replicas or 2 + self.pg_num = pg_num + self.profile_name = profile_name or 'replicated_rule' + + def _create(self): + # Validate if crush profile exists + if self.profile_name is None: + msg = ("Failed to discover crush profile named " + "{}".format(self.profile_name)) + log(msg, level=ERROR) + raise PoolCreationError(msg) + + # Do extra validation on pg_num with data from live cluster + if self.pg_num: + # Since the number of placement groups were specified, ensure + # that there aren't too many created. + max_pgs = self.get_pgs(self.replicas, 100.0) + self.pg_num = min(self.pg_num, max_pgs) + else: + self.pg_num = self.get_pgs(self.replicas, self.percent_data) + + nautilus_or_later = cmp_pkgrevno('ceph-common', '14.2.0') >= 0 + # Create it + if nautilus_or_later: + cmd = [ + 'ceph', '--id', self.service, 'osd', 'pool', 'create', + '--pg-num-min={}'.format( + min(AUTOSCALER_DEFAULT_PGS, self.pg_num) + ), + self.name, str(self.pg_num), self.profile_name + ] + else: + cmd = [ + 'ceph', '--id', self.service, 'osd', 'pool', 'create', + self.name, str(self.pg_num), self.profile_name + ] + check_call(cmd) + + def _post_create(self): + # Set the pool replica size + update_pool(client=self.service, + pool=self.name, + settings={'size': str(self.replicas)}) + # Perform other common post pool creation tasks + super(ReplicatedPool, self)._post_create() + + +class ErasurePool(BasePool): + """Default jerasure erasure coded pool.""" + + def __init__(self, service, name=None, erasure_code_profile=None, + percent_data=None, app_name=None, op=None, + allow_ec_overwrites=False): + """Initialize ErasurePool object. + + Pool information is either initialized from individual keyword + arguments or from a individual CephBrokerRq operation Dict. + + Please refer to the docstring of the ``BasePool`` class for + documentation of the common parameters. + + :param erasure_code_profile: EC Profile to use (default: 'default') + :type erasure_code_profile: Optional[str] + """ + # NOTE: Do not perform initialization steps that require live data from + # a running cluster here. The *Pool classes may be used for validation. + + # The common parameters are handled in our parents initializer + super(ErasurePool, self).__init__( + service=service, name=name, percent_data=percent_data, + app_name=app_name, op=op) + + if op: + # Note that the different default when initializing from op stems + # from different handling of this in the `charms.ceph` library. + self.erasure_code_profile = op.get('erasure-profile', + 'default-canonical') + self.allow_ec_overwrites = op.get('allow-ec-overwrites') + else: + # We keep the class default when initialized from keyword arguments + # to not break the API for any other consumers. + self.erasure_code_profile = erasure_code_profile or 'default' + self.allow_ec_overwrites = allow_ec_overwrites + + def _create(self): + # Try to find the erasure profile information in order to properly + # size the number of placement groups. The size of an erasure + # coded placement group is calculated as k+m. + erasure_profile = get_erasure_profile(self.service, + self.erasure_code_profile) + + # Check for errors + if erasure_profile is None: + msg = ("Failed to discover erasure profile named " + "{}".format(self.erasure_code_profile)) + log(msg, level=ERROR) + raise PoolCreationError(msg) + if 'k' not in erasure_profile or 'm' not in erasure_profile: + # Error + msg = ("Unable to find k (data chunks) or m (coding chunks) " + "in erasure profile {}".format(erasure_profile)) + log(msg, level=ERROR) + raise PoolCreationError(msg) + + k = int(erasure_profile['k']) + m = int(erasure_profile['m']) + pgs = self.get_pgs(k + m, self.percent_data) + nautilus_or_later = cmp_pkgrevno('ceph-common', '14.2.0') >= 0 + # Create it + if nautilus_or_later: + cmd = [ + 'ceph', '--id', self.service, 'osd', 'pool', 'create', + '--pg-num-min={}'.format( + min(AUTOSCALER_DEFAULT_PGS, pgs) + ), + self.name, str(pgs), str(pgs), + 'erasure', self.erasure_code_profile + ] + else: + cmd = [ + 'ceph', '--id', self.service, 'osd', 'pool', 'create', + self.name, str(pgs), str(pgs), + 'erasure', self.erasure_code_profile + ] + check_call(cmd) + + def _post_create(self): + super(ErasurePool, self)._post_create() + if self.allow_ec_overwrites: + update_pool(self.service, self.name, + {'allow_ec_overwrites': 'true'}) + + +def enabled_manager_modules(): + """Return a list of enabled manager modules. + + :rtype: List[str] + """ + cmd = ['ceph', 'mgr', 'module', 'ls'] + quincy_or_later = cmp_pkgrevno('ceph-common', '17.1.0') >= 0 + if quincy_or_later: + cmd.append('--format=json') + try: + modules = check_output(cmd).decode('utf-8') + except CalledProcessError as e: + log("Failed to list ceph modules: {}".format(e), WARNING) + return [] + modules = json.loads(modules) + return modules['enabled_modules'] + + +def enable_pg_autoscale(service, pool_name): + """Enable Ceph's PG autoscaler for the specified pool. + + :param service: The Ceph user name to run the command under + :type service: str + :param pool_name: The name of the pool to enable sutoscaling on + :type pool_name: str + :raises: CalledProcessError if the command fails + """ + check_call([ + 'ceph', '--id', service, + 'osd', 'pool', 'set', pool_name, 'pg_autoscale_mode', 'on']) + + +def get_mon_map(service): + """Return the current monitor map. + + :param service: The Ceph user name to run the command under + :type service: str + :returns: Dictionary with monitor map data + :rtype: Dict[str,any] + :raises: ValueError if the monmap fails to parse, CalledProcessError if our + ceph command fails. + """ + try: + octopus_or_later = cmp_pkgrevno('ceph-common', '15.0.0') >= 0 + mon_status_cmd = 'quorum_status' if octopus_or_later else 'mon_status' + mon_status = (check_output(['ceph', '--id', service, mon_status_cmd, + '--format=json'])).decode('utf-8') + try: + return json.loads(mon_status) + except ValueError as v: + log("Unable to parse mon_status json: {}. Error: {}" + .format(mon_status, str(v))) + raise + except CalledProcessError as e: + log("mon_status command failed with message: {}" + .format(str(e))) + raise + + +def hash_monitor_names(service): + """Get a sorted list of monitor hashes in ascending order. + + Uses the get_mon_map() function to get information about the monitor + cluster. Hash the name of each monitor. + + :param service: The Ceph user name to run the command under. + :type service: str + :returns: a sorted list of monitor hashes in an ascending order. + :rtype : List[str] + :raises: CalledProcessError, ValueError + """ + try: + hash_list = [] + monitor_list = get_mon_map(service=service) + if monitor_list['monmap']['mons']: + for mon in monitor_list['monmap']['mons']: + hash_list.append( + hashlib.sha224(mon['name'].encode('utf-8')).hexdigest()) + return sorted(hash_list) + else: + return None + except (ValueError, CalledProcessError): + raise + + +def monitor_key_delete(service, key): + """Delete a key and value pair from the monitor cluster. + + Deletes a key value pair on the monitor cluster. + + :param service: The Ceph user name to run the command under + :type service: str + :param key: The key to delete. + :type key: str + :raises: CalledProcessError + """ + try: + check_output( + ['ceph', '--id', service, + 'config-key', 'del', str(key)]) + except CalledProcessError as e: + log("Monitor config-key put failed with message: {}" + .format(e.output)) + raise + + +def monitor_key_set(service, key, value): + """Set a key value pair on the monitor cluster. + + :param service: The Ceph user name to run the command under. + :type service str + :param key: The key to set. + :type key: str + :param value: The value to set. This will be coerced into a string. + :type value: str + :raises: CalledProcessError + """ + try: + check_output( + ['ceph', '--id', service, + 'config-key', 'put', str(key), str(value)]) + except CalledProcessError as e: + log("Monitor config-key put failed with message: {}" + .format(e.output)) + raise + + +def monitor_key_get(service, key): + """Get the value of an existing key in the monitor cluster. + + :param service: The Ceph user name to run the command under + :type service: str + :param key: The key to search for. + :type key: str + :return: Returns the value of that key or None if not found. + :rtype: Optional[str] + """ + try: + output = check_output( + ['ceph', '--id', service, + 'config-key', 'get', str(key)]).decode('UTF-8') + return output + except CalledProcessError as e: + log("Monitor config-key get failed with message: {}" + .format(e.output)) + return None + + +def monitor_key_exists(service, key): + """Search for existence of key in the monitor cluster. + + :param service: The Ceph user name to run the command under. + :type service: str + :param key: The key to search for. + :type key: str + :return: Returns True if the key exists, False if not. + :rtype: bool + :raises: CalledProcessError if an unknown error occurs. + """ + try: + check_call( + ['ceph', '--id', service, + 'config-key', 'exists', str(key)]) + # I can return true here regardless because Ceph returns + # ENOENT if the key wasn't found + return True + except CalledProcessError as e: + if e.returncode == errno.ENOENT: + return False + else: + log("Unknown error from ceph config-get exists: {} {}" + .format(e.returncode, e.output)) + raise + + +def get_erasure_profile(service, name): + """Get an existing erasure code profile if it exists. + + :param service: The Ceph user name to run the command under. + :type service: str + :param name: Name of profile. + :type name: str + :returns: Dictionary with profile data. + :rtype: Optional[Dict[str]] + """ + try: + out = check_output(['ceph', '--id', service, + 'osd', 'erasure-code-profile', 'get', + name, '--format=json']).decode('utf-8') + return json.loads(out) + except (CalledProcessError, OSError, ValueError): + return None + + +def pool_set(service, pool_name, key, value): + """Sets a value for a RADOS pool in ceph. + + :param service: The Ceph user name to run the command under. + :type service: str + :param pool_name: Name of pool to set property on. + :type pool_name: str + :param key: Property key. + :type key: str + :param value: Value, will be coerced into str and shifted to lowercase. + :type value: str + :raises: CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'set', pool_name, key, str(value).lower()] + check_call(cmd) + + +def snapshot_pool(service, pool_name, snapshot_name): + """Snapshots a RADOS pool in Ceph. + + :param service: The Ceph user name to run the command under. + :type service: str + :param pool_name: Name of pool to snapshot. + :type pool_name: str + :param snapshot_name: Name of snapshot to create. + :type snapshot_name: str + :raises: CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'mksnap', pool_name, snapshot_name] + check_call(cmd) + + +def remove_pool_snapshot(service, pool_name, snapshot_name): + """Remove a snapshot from a RADOS pool in Ceph. + + :param service: The Ceph user name to run the command under. + :type service: str + :param pool_name: Name of pool to remove snapshot from. + :type pool_name: str + :param snapshot_name: Name of snapshot to remove. + :type snapshot_name: str + :raises: CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'rmsnap', pool_name, snapshot_name] + check_call(cmd) + + +def set_pool_quota(service, pool_name, max_bytes=None, max_objects=None): + """Set byte quota on a RADOS pool in Ceph. + + :param service: The Ceph user name to run the command under + :type service: str + :param pool_name: Name of pool + :type pool_name: str + :param max_bytes: Maximum bytes quota to apply + :type max_bytes: int + :param max_objects: Maximum objects quota to apply + :type max_objects: int + :raises: subprocess.CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'set-quota', pool_name] + if max_bytes: + cmd = cmd + ['max_bytes', str(max_bytes)] + if max_objects: + cmd = cmd + ['max_objects', str(max_objects)] + check_call(cmd) + + +def remove_pool_quota(service, pool_name): + """Remove byte quota on a RADOS pool in Ceph. + + :param service: The Ceph user name to run the command under. + :type service: str + :param pool_name: Name of pool to remove quota from. + :type pool_name: str + :raises: CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'set-quota', pool_name, 'max_bytes', '0'] + check_call(cmd) + + +def remove_erasure_profile(service, profile_name): + """Remove erasure code profile. + + :param service: The Ceph user name to run the command under + :type service: str + :param profile_name: Name of profile to remove. + :type profile_name: str + :raises: CalledProcessError + """ + cmd = [ + 'ceph', '--id', service, + 'osd', 'erasure-code-profile', 'rm', profile_name] + check_call(cmd) + + +def create_erasure_profile(service, profile_name, + erasure_plugin_name='jerasure', + failure_domain=None, + data_chunks=2, coding_chunks=1, + locality=None, durability_estimator=None, + helper_chunks=None, + scalar_mds=None, + crush_locality=None, + device_class=None, + erasure_plugin_technique=None): + """Create a new erasure code profile if one does not already exist for it. + + Profiles are considered immutable so will not be updated if the named + profile already exists. + + Please refer to [0] for more details. + + 0: http://docs.ceph.com/docs/master/rados/operations/erasure-code-profile/ + + :param service: The Ceph user name to run the command under. + :type service: str + :param profile_name: Name of profile. + :type profile_name: str + :param erasure_plugin_name: Erasure code plugin. + :type erasure_plugin_name: str + :param failure_domain: Failure domain, one of: + ('chassis', 'datacenter', 'host', 'osd', 'pdu', + 'pod', 'rack', 'region', 'room', 'root', 'row'). + :type failure_domain: str + :param data_chunks: Number of data chunks. + :type data_chunks: int + :param coding_chunks: Number of coding chunks. + :type coding_chunks: int + :param locality: Locality. + :type locality: int + :param durability_estimator: Durability estimator. + :type durability_estimator: int + :param helper_chunks: int + :type helper_chunks: int + :param device_class: Restrict placement to devices of specific class. + :type device_class: str + :param scalar_mds: one of ['isa', 'jerasure', 'shec'] + :type scalar_mds: str + :param crush_locality: LRC locality faulure domain, one of: + ('chassis', 'datacenter', 'host', 'osd', 'pdu', 'pod', + 'rack', 'region', 'room', 'root', 'row') or unset. + :type crush_locaity: str + :param erasure_plugin_technique: Coding technique for EC plugin + :type erasure_plugin_technique: str + :return: None. Can raise CalledProcessError, ValueError or AssertionError + """ + if erasure_profile_exists(service, profile_name): + log('EC profile {} exists, skipping update'.format(profile_name), + level=WARNING) + return + + plugin_techniques = { + 'jerasure': [ + 'reed_sol_van', + 'reed_sol_r6_op', + 'cauchy_orig', + 'cauchy_good', + 'liberation', + 'blaum_roth', + 'liber8tion' + ], + 'lrc': [], + 'isa': [ + 'reed_sol_van', + 'cauchy', + ], + 'shec': [ + 'single', + 'multiple' + ], + 'clay': [], + } + failure_domains = [ + 'chassis', 'datacenter', + 'host', 'osd', + 'pdu', 'pod', + 'rack', 'region', + 'room', 'root', + 'row', + ] + device_classes = [ + 'ssd', + 'hdd', + 'nvme' + ] + + validator(erasure_plugin_name, str, list(plugin_techniques.keys())) + + cmd = [ + 'ceph', '--id', service, + 'osd', 'erasure-code-profile', 'set', profile_name, + 'plugin={}'.format(erasure_plugin_name), + 'k={}'.format(str(data_chunks)), + 'm={}'.format(str(coding_chunks)), + ] + + if erasure_plugin_technique: + validator(erasure_plugin_technique, str, + plugin_techniques[erasure_plugin_name]) + cmd.append('technique={}'.format(erasure_plugin_technique)) + + luminous_or_later = cmp_pkgrevno('ceph-common', '12.0.0') >= 0 + + # Set failure domain from options if not provided in args + if not failure_domain and config('customize-failure-domain'): + # Defaults to 'host' so just need to deal with + # setting 'rack' if feature is enabled + failure_domain = 'rack' + + if failure_domain: + validator(failure_domain, str, failure_domains) + # failure_domain changed in luminous + if luminous_or_later: + cmd.append('crush-failure-domain={}'.format(failure_domain)) + else: + cmd.append('ruleset-failure-domain={}'.format(failure_domain)) + + # device class new in luminous + if luminous_or_later and device_class: + validator(device_class, str, device_classes) + cmd.append('crush-device-class={}'.format(device_class)) + else: + log('Skipping device class configuration (ceph < 12.0.0)', + level=DEBUG) + + # Add plugin specific information + if erasure_plugin_name == 'lrc': + # LRC mandatory configuration + if locality: + cmd.append('l={}'.format(str(locality))) + else: + raise ValueError("locality must be provided for lrc plugin") + # LRC optional configuration + if crush_locality: + validator(crush_locality, str, failure_domains) + cmd.append('crush-locality={}'.format(crush_locality)) + + if erasure_plugin_name == 'shec': + # SHEC optional configuration + if durability_estimator: + cmd.append('c={}'.format((durability_estimator))) + + if erasure_plugin_name == 'clay': + # CLAY optional configuration + if helper_chunks: + cmd.append('d={}'.format(str(helper_chunks))) + if scalar_mds: + cmd.append('scalar-mds={}'.format(scalar_mds)) + + check_call(cmd) + + +def rename_pool(service, old_name, new_name): + """Rename a Ceph pool from old_name to new_name. + + :param service: The Ceph user name to run the command under. + :type service: str + :param old_name: Name of pool subject to rename. + :type old_name: str + :param new_name: Name to rename pool to. + :type new_name: str + """ + validator(value=old_name, valid_type=str) + validator(value=new_name, valid_type=str) + + cmd = [ + 'ceph', '--id', service, + 'osd', 'pool', 'rename', old_name, new_name] + check_call(cmd) + + +def erasure_profile_exists(service, name): + """Check to see if an Erasure code profile already exists. + + :param service: The Ceph user name to run the command under + :type service: str + :param name: Name of profile to look for. + :type name: str + :returns: True if it exists, False otherwise. + :rtype: bool + """ + validator(value=name, valid_type=str) + try: + check_call(['ceph', '--id', service, + 'osd', 'erasure-code-profile', 'get', + name]) + return True + except CalledProcessError: + return False + + +def get_cache_mode(service, pool_name): + """Find the current caching mode of the pool_name given. + + :param service: The Ceph user name to run the command under + :type service: str + :param pool_name: Name of pool. + :type pool_name: str + :returns: Current cache mode. + :rtype: Optional[int] + """ + validator(value=service, valid_type=str) + validator(value=pool_name, valid_type=str) + out = check_output(['ceph', '--id', service, + 'osd', 'dump', '--format=json']).decode('utf-8') + try: + osd_json = json.loads(out) + for pool in osd_json['pools']: + if pool['pool_name'] == pool_name: + return pool['cache_mode'] + return None + except ValueError: + raise + + +def pool_exists(service, name): + """Check to see if a RADOS pool already exists.""" + try: + out = check_output( + ['rados', '--id', service, 'lspools']).decode('utf-8') + except CalledProcessError: + return False + + return name in out.split() + + +def get_osds(service, device_class=None): + """Return a list of all Ceph Object Storage Daemons currently in the + cluster (optionally filtered by storage device class). + + :param device_class: Class of storage device for OSD's + :type device_class: str + """ + luminous_or_later = cmp_pkgrevno('ceph-common', '12.0.0') >= 0 + if luminous_or_later and device_class: + out = check_output(['ceph', '--id', service, + 'osd', 'crush', 'class', + 'ls-osd', device_class, + '--format=json']).decode('utf-8') + else: + out = check_output(['ceph', '--id', service, + 'osd', 'ls', + '--format=json']).decode('utf-8') + return json.loads(out) + + +def install(): + """Basic Ceph client installation.""" + ceph_dir = "/etc/ceph" + if not os.path.exists(ceph_dir): + os.mkdir(ceph_dir) + + apt_install('ceph-common', fatal=True) + + +def rbd_exists(service, pool, rbd_img): + """Check to see if a RADOS block device exists.""" + try: + out = check_output(['rbd', 'list', '--id', + service, '--pool', pool]).decode('utf-8') + except CalledProcessError: + return False + + return rbd_img in out + + +def create_rbd_image(service, pool, image, sizemb): + """Create a new RADOS block device.""" + cmd = ['rbd', 'create', image, '--size', str(sizemb), '--id', service, + '--pool', pool] + check_call(cmd) + + +def update_pool(client, pool, settings): + """Update pool properties. + + :param client: Client/User-name to authenticate with. + :type client: str + :param pool: Name of pool to operate on + :type pool: str + :param settings: Dictionary with key/value pairs to set. + :type settings: Dict[str, str] + :raises: CalledProcessError + """ + cmd = ['ceph', '--id', client, 'osd', 'pool', 'set', pool] + for k, v in settings.items(): + check_call(cmd + [k, v]) + + +def set_app_name_for_pool(client, pool, name): + """Calls `osd pool application enable` for the specified pool name + + :param client: Name of the ceph client to use + :type client: str + :param pool: Pool to set app name for + :type pool: str + :param name: app name for the specified pool + :type name: str + + :raises: CalledProcessError if ceph call fails + """ + if cmp_pkgrevno('ceph-common', '12.0.0') >= 0: + cmd = ['ceph', '--id', client, 'osd', 'pool', + 'application', 'enable', pool, name] + check_call(cmd) + + +def create_pool(service, name, replicas=3, pg_num=None): + """Create a new RADOS pool.""" + if pool_exists(service, name): + log("Ceph pool {} already exists, skipping creation".format(name), + level=WARNING) + return + + if not pg_num: + # Calculate the number of placement groups based + # on upstream recommended best practices. + osds = get_osds(service) + if osds: + pg_num = (len(osds) * 100 // replicas) + else: + # NOTE(james-page): Default to 200 for older ceph versions + # which don't support OSD query from cli + pg_num = 200 + + cmd = ['ceph', '--id', service, 'osd', 'pool', 'create', name, str(pg_num)] + check_call(cmd) + + update_pool(service, name, settings={'size': str(replicas)}) + + +def delete_pool(service, name): + """Delete a RADOS pool from ceph.""" + cmd = ['ceph', '--id', service, 'osd', 'pool', 'delete', name, + '--yes-i-really-really-mean-it'] + check_call(cmd) + + +def _keyfile_path(service): + return KEYFILE.format(service) + + +def _keyring_path(service): + return KEYRING.format(service) + + +def add_key(service, key): + """Add a key to a keyring. + + Creates the keyring if it doesn't already exist. + + Logs and returns if the key is already in the keyring. + """ + keyring = _keyring_path(service) + if os.path.exists(keyring): + with open(keyring, 'r') as ring: + if key in ring.read(): + log('Ceph keyring exists at %s and has not changed.' % keyring, + level=DEBUG) + return + log('Updating existing keyring %s.' % keyring, level=DEBUG) + + cmd = ['ceph-authtool', keyring, '--create-keyring', + '--name=client.{}'.format(service), '--add-key={}'.format(key)] + check_call(cmd) + log('Created new ceph keyring at %s.' % keyring, level=DEBUG) + + +def create_keyring(service, key): + """Deprecated. Please use the more accurately named 'add_key'""" + return add_key(service, key) + + +def delete_keyring(service): + """Delete an existing Ceph keyring.""" + keyring = _keyring_path(service) + if not os.path.exists(keyring): + log('Keyring does not exist at %s' % keyring, level=WARNING) + return + + os.remove(keyring) + log('Deleted ring at %s.' % keyring, level=INFO) + + +def create_key_file(service, key): + """Create a file containing key.""" + keyfile = _keyfile_path(service) + if os.path.exists(keyfile): + log('Keyfile exists at %s.' % keyfile, level=WARNING) + return + + with open(keyfile, 'w') as fd: + fd.write(key) + + log('Created new keyfile at %s.' % keyfile, level=INFO) + + +def get_ceph_nodes(relation='ceph'): + """Query named relation to determine current nodes.""" + hosts = [] + for r_id in relation_ids(relation): + for unit in related_units(r_id): + hosts.append(relation_get('private-address', unit=unit, rid=r_id)) + + return hosts + + +def configure(service, key, auth, use_syslog): + """Perform basic configuration of Ceph.""" + add_key(service, key) + create_key_file(service, key) + hosts = get_ceph_nodes() + with open('/etc/ceph/ceph.conf', 'w') as ceph_conf: + ceph_conf.write(CEPH_CONF.format(auth=auth, + keyring=_keyring_path(service), + mon_hosts=",".join(map(str, hosts)), + use_syslog=use_syslog)) + modprobe('rbd') + + +def image_mapped(name): + """Determine whether a RADOS block device is mapped locally.""" + try: + out = check_output(['rbd', 'showmapped']).decode('utf-8') + except CalledProcessError: + return False + + return name in out + + +def map_block_storage(service, pool, image): + """Map a RADOS block device for local use.""" + cmd = [ + 'rbd', + 'map', + '{}/{}'.format(pool, image), + '--user', + service, + '--secret', + _keyfile_path(service), + ] + check_call(cmd) + + +def filesystem_mounted(fs): + """Determine whether a filesystem is already mounted.""" + return fs in [f for f, m in mounts()] + + +def make_filesystem(blk_device, fstype='ext4', timeout=10): + """Make a new filesystem on the specified block device.""" + count = 0 + e_noent = errno.ENOENT + while not os.path.exists(blk_device): + if count >= timeout: + log('Gave up waiting on block device %s' % blk_device, + level=ERROR) + raise IOError(e_noent, os.strerror(e_noent), blk_device) + + log('Waiting for block device %s to appear' % blk_device, + level=DEBUG) + count += 1 + time.sleep(1) + else: + log('Formatting block device %s as filesystem %s.' % + (blk_device, fstype), level=INFO) + check_call(['mkfs', '-t', fstype, blk_device]) + + +def place_data_on_block_device(blk_device, data_src_dst): + """Migrate data in data_src_dst to blk_device and then remount.""" + # mount block device into /mnt + mount(blk_device, '/mnt') + # copy data to /mnt + copy_files(data_src_dst, '/mnt') + # umount block device + umount('/mnt') + # Grab user/group ID's from original source + _dir = os.stat(data_src_dst) + uid = _dir.st_uid + gid = _dir.st_gid + # re-mount where the data should originally be + # TODO: persist is currently a NO-OP in core.host + mount(blk_device, data_src_dst, persist=True) + # ensure original ownership of new mount. + os.chown(data_src_dst, uid, gid) + + +def copy_files(src, dst, symlinks=False, ignore=None): + """Copy files from src to dst.""" + for item in os.listdir(src): + s = os.path.join(src, item) + d = os.path.join(dst, item) + if os.path.isdir(s): + shutil.copytree(s, d, symlinks, ignore) + else: + shutil.copy2(s, d) + + +def ensure_ceph_storage(service, pool, rbd_img, sizemb, mount_point, + blk_device, fstype, system_services=[], + replicas=3): + """NOTE: This function must only be called from a single service unit for + the same rbd_img otherwise data loss will occur. + + Ensures given pool and RBD image exists, is mapped to a block device, + and the device is formatted and mounted at the given mount_point. + + If formatting a device for the first time, data existing at mount_point + will be migrated to the RBD device before being re-mounted. + + All services listed in system_services will be stopped prior to data + migration and restarted when complete. + """ + # Ensure pool, RBD image, RBD mappings are in place. + if not pool_exists(service, pool): + log('Creating new pool {}.'.format(pool), level=INFO) + create_pool(service, pool, replicas=replicas) + + if not rbd_exists(service, pool, rbd_img): + log('Creating RBD image ({}).'.format(rbd_img), level=INFO) + create_rbd_image(service, pool, rbd_img, sizemb) + + if not image_mapped(rbd_img): + log('Mapping RBD Image {} as a Block Device.'.format(rbd_img), + level=INFO) + map_block_storage(service, pool, rbd_img) + + # make file system + # TODO: What happens if for whatever reason this is run again and + # the data is already in the rbd device and/or is mounted?? + # When it is mounted already, it will fail to make the fs + # XXX: This is really sketchy! Need to at least add an fstab entry + # otherwise this hook will blow away existing data if its executed + # after a reboot. + if not filesystem_mounted(mount_point): + make_filesystem(blk_device, fstype) + + for svc in system_services: + if service_running(svc): + log('Stopping services {} prior to migrating data.' + .format(svc), level=DEBUG) + service_stop(svc) + + place_data_on_block_device(blk_device, mount_point) + + for svc in system_services: + log('Starting service {} after migrating data.' + .format(svc), level=DEBUG) + service_start(svc) + + +def ensure_ceph_keyring(service, user=None, group=None, + relation='ceph', key=None): + """Ensures a ceph keyring is created for a named service and optionally + ensures user and group ownership. + + @returns boolean: Flag to indicate whether a key was successfully written + to disk based on either relation data or a supplied key + """ + if not key: + for rid in relation_ids(relation): + for unit in related_units(rid): + key = relation_get('key', rid=rid, unit=unit) + if key: + break + + if not key: + return False + + add_key(service=service, key=key) + keyring = _keyring_path(service) + if user and group: + check_call(['chown', '%s.%s' % (user, group), keyring]) + + return True + + +class CephBrokerRq(object): + """Ceph broker request. + + Multiple operations can be added to a request and sent to the Ceph broker + to be executed. + + Request is json-encoded for sending over the wire. + + The API is versioned and defaults to version 1. + """ + + # The below hash is the result of running + # `hashlib.sha1('[]'.encode()).hexdigest()` + EMPTY_LIST_SHA = '97d170e1550eee4afc0af065b78cda302a97674c' + + def __init__(self, api_version=1, request_id=None, raw_request_data=None): + """Initialize CephBrokerRq object. + + Builds a new empty request or rebuilds a request from on-wire JSON + data. + + :param api_version: API version for request (default: 1). + :type api_version: Optional[int] + :param request_id: Unique identifier for request. The identifier will + be updated as ops are added or removed from the + broker request. This ensures that Ceph will + correctly process requests where operations are + added after the initial request is processed. + (default: sha1 of operations) + :type request_id: Optional[str] + :param raw_request_data: JSON-encoded string to build request from. + :type raw_request_data: Optional[str] + :raises: KeyError + """ + if raw_request_data: + request_data = json.loads(raw_request_data) + self.api_version = request_data['api-version'] + self.set_ops(request_data['ops']) + self.request_id = request_data['request-id'] + else: + self.api_version = api_version + if request_id: + self.request_id = request_id + else: + self.request_id = CephBrokerRq.EMPTY_LIST_SHA + self.ops = [] + + def _hash_ops(self): + """Return the sha1 of the requested Broker ops.""" + return hashlib.sha1(json.dumps(self.ops, sort_keys=True).encode()).hexdigest() + + def add_op(self, op): + """Add an op if it is not already in the list. + + :param op: Operation to add. + :type op: dict + """ + if op not in self.ops: + self.ops.append(op) + self.request_id = self._hash_ops() + + def add_op_request_access_to_group(self, name, namespace=None, + permission=None, key_name=None, + object_prefix_permissions=None): + """ + Adds the requested permissions to the current service's Ceph key, + allowing the key to access only the specified pools or + object prefixes. object_prefix_permissions should be a dictionary + keyed on the permission with the corresponding value being a list + of prefixes to apply that permission to. + { + 'rwx': ['prefix1', 'prefix2'], + 'class-read': ['prefix3']} + """ + self.add_op({ + 'op': 'add-permissions-to-key', 'group': name, + 'namespace': namespace, + 'name': key_name or service_name(), + 'group-permission': permission, + 'object-prefix-permissions': object_prefix_permissions}) + + def add_op_create_pool(self, name, replica_count=3, pg_num=None, + weight=None, group=None, namespace=None, + app_name=None, max_bytes=None, max_objects=None): + """DEPRECATED: Use ``add_op_create_replicated_pool()`` or + ``add_op_create_erasure_pool()`` instead. + """ + return self.add_op_create_replicated_pool( + name, replica_count=replica_count, pg_num=pg_num, weight=weight, + group=group, namespace=namespace, app_name=app_name, + max_bytes=max_bytes, max_objects=max_objects) + + # Use function parameters and docstring to define types in a compatible + # manner. + # + # NOTE: Our caller should always use a kwarg Dict when calling us so + # no need to maintain fixed order/position for parameters. Please keep them + # sorted by name when adding new ones. + def _partial_build_common_op_create(self, + app_name=None, + compression_algorithm=None, + compression_mode=None, + compression_required_ratio=None, + compression_min_blob_size=None, + compression_min_blob_size_hdd=None, + compression_min_blob_size_ssd=None, + compression_max_blob_size=None, + compression_max_blob_size_hdd=None, + compression_max_blob_size_ssd=None, + group=None, + max_bytes=None, + max_objects=None, + namespace=None, + rbd_mirroring_mode='pool', + weight=None): + """Build common part of a create pool operation. + + :param app_name: Tag pool with application name. Note that there is + certain protocols emerging upstream with regard to + meaningful application names to use. + Examples are 'rbd' and 'rgw'. + :type app_name: Optional[str] + :param compression_algorithm: Compressor to use, one of: + ('lz4', 'snappy', 'zlib', 'zstd') + :type compression_algorithm: Optional[str] + :param compression_mode: When to compress data, one of: + ('none', 'passive', 'aggressive', 'force') + :type compression_mode: Optional[str] + :param compression_required_ratio: Minimum compression ratio for data + chunk, if the requested ratio is not + achieved the compressed version will + be thrown away and the original + stored. + :type compression_required_ratio: Optional[float] + :param compression_min_blob_size: Chunks smaller than this are never + compressed (unit: bytes). + :type compression_min_blob_size: Optional[int] + :param compression_min_blob_size_hdd: Chunks smaller than this are not + compressed when destined to + rotational media (unit: bytes). + :type compression_min_blob_size_hdd: Optional[int] + :param compression_min_blob_size_ssd: Chunks smaller than this are not + compressed when destined to flash + media (unit: bytes). + :type compression_min_blob_size_ssd: Optional[int] + :param compression_max_blob_size: Chunks larger than this are broken + into N * compression_max_blob_size + chunks before being compressed + (unit: bytes). + :type compression_max_blob_size: Optional[int] + :param compression_max_blob_size_hdd: Chunks larger than this are + broken into + N * compression_max_blob_size_hdd + chunks before being compressed + when destined for rotational + media (unit: bytes) + :type compression_max_blob_size_hdd: Optional[int] + :param compression_max_blob_size_ssd: Chunks larger than this are + broken into + N * compression_max_blob_size_ssd + chunks before being compressed + when destined for flash media + (unit: bytes). + :type compression_max_blob_size_ssd: Optional[int] + :param group: Group to add pool to + :type group: Optional[str] + :param max_bytes: Maximum bytes quota to apply + :type max_bytes: Optional[int] + :param max_objects: Maximum objects quota to apply + :type max_objects: Optional[int] + :param namespace: Group namespace + :type namespace: Optional[str] + :param rbd_mirroring_mode: Pool mirroring mode used when Ceph RBD + mirroring is enabled. + :type rbd_mirroring_mode: Optional[str] + :param weight: The percentage of data that is expected to be contained + in the pool from the total available space on the OSDs. + Used to calculate number of Placement Groups to create + for pool. + :type weight: Optional[float] + :returns: Dictionary with kwarg name as key. + :rtype: Dict[str,any] + :raises: AssertionError + """ + return { + 'app-name': app_name, + 'compression-algorithm': compression_algorithm, + 'compression-mode': compression_mode, + 'compression-required-ratio': compression_required_ratio, + 'compression-min-blob-size': compression_min_blob_size, + 'compression-min-blob-size-hdd': compression_min_blob_size_hdd, + 'compression-min-blob-size-ssd': compression_min_blob_size_ssd, + 'compression-max-blob-size': compression_max_blob_size, + 'compression-max-blob-size-hdd': compression_max_blob_size_hdd, + 'compression-max-blob-size-ssd': compression_max_blob_size_ssd, + 'group': group, + 'max-bytes': max_bytes, + 'max-objects': max_objects, + 'group-namespace': namespace, + 'rbd-mirroring-mode': rbd_mirroring_mode, + 'weight': weight, + } + + def add_op_create_replicated_pool(self, name, replica_count=3, pg_num=None, + crush_profile=None, **kwargs): + """Adds an operation to create a replicated pool. + + Refer to docstring for ``_partial_build_common_op_create`` for + documentation of keyword arguments. + + :param name: Name of pool to create + :type name: str + :param replica_count: Number of copies Ceph should keep of your data. + :type replica_count: int + :param pg_num: Request specific number of Placement Groups to create + for pool. + :type pg_num: int + :raises: AssertionError if provided data is of invalid type/range + :param crush_profile: Name of crush profile to use. If not set the + ceph-mon unit handling the broker request will + set its default value. + :type crush_profile: Optional[str] + """ + if pg_num and kwargs.get('weight'): + raise ValueError('pg_num and weight are mutually exclusive') + + op = { + 'op': 'create-pool', + 'name': name, + 'replicas': replica_count, + 'pg_num': pg_num, + 'crush-profile': crush_profile + } + op.update(self._partial_build_common_op_create(**kwargs)) + + # Initialize Pool-object to validate type and range of ops. + pool = ReplicatedPool('dummy-service', op=op) + pool.validate() + + self.add_op(op) + + def add_op_create_erasure_pool(self, name, erasure_profile=None, + allow_ec_overwrites=False, **kwargs): + """Adds an operation to create a erasure coded pool. + + Refer to docstring for ``_partial_build_common_op_create`` for + documentation of keyword arguments. + + :param name: Name of pool to create + :type name: str + :param erasure_profile: Name of erasure code profile to use. If not + set the ceph-mon unit handling the broker + request will set its default value. + :type erasure_profile: str + :param allow_ec_overwrites: allow EC pools to be overridden + :type allow_ec_overwrites: bool + :raises: AssertionError if provided data is of invalid type/range + """ + op = { + 'op': 'create-pool', + 'name': name, + 'pool-type': 'erasure', + 'erasure-profile': erasure_profile, + 'allow-ec-overwrites': allow_ec_overwrites, + } + op.update(self._partial_build_common_op_create(**kwargs)) + + # Initialize Pool-object to validate type and range of ops. + pool = ErasurePool('dummy-service', op) + pool.validate() + + self.add_op(op) + + def add_op_create_erasure_profile(self, name, + erasure_type='jerasure', + erasure_technique=None, + k=None, m=None, + failure_domain=None, + lrc_locality=None, + shec_durability_estimator=None, + clay_helper_chunks=None, + device_class=None, + clay_scalar_mds=None, + lrc_crush_locality=None): + """Adds an operation to create a erasure coding profile. + + :param name: Name of profile to create + :type name: str + :param erasure_type: Which of the erasure coding plugins should be used + :type erasure_type: string + :param erasure_technique: EC plugin technique to use + :type erasure_technique: string + :param k: Number of data chunks + :type k: int + :param m: Number of coding chunks + :type m: int + :param lrc_locality: Group the coding and data chunks into sets of size locality + (lrc plugin) + :type lrc_locality: int + :param durability_estimator: The number of parity chunks each of which includes + a data chunk in its calculation range (shec plugin) + :type durability_estimator: int + :param helper_chunks: The number of helper chunks to use for recovery operations + (clay plugin) + :type: helper_chunks: int + :param failure_domain: Type of failure domain from Ceph bucket types + to be used + :type failure_domain: string + :param device_class: Device class to use for profile (ssd, hdd) + :type device_class: string + :param clay_scalar_mds: Plugin to use for CLAY layered construction + (jerasure|isa|shec) + :type clay_scaler_mds: string + :param lrc_crush_locality: Type of crush bucket in which set of chunks + defined by lrc_locality will be stored. + :type lrc_crush_locality: string + """ + self.add_op({'op': 'create-erasure-profile', + 'name': name, + 'k': k, + 'm': m, + 'l': lrc_locality, + 'c': shec_durability_estimator, + 'd': clay_helper_chunks, + 'erasure-type': erasure_type, + 'erasure-technique': erasure_technique, + 'failure-domain': failure_domain, + 'device-class': device_class, + 'scalar-mds': clay_scalar_mds, + 'crush-locality': lrc_crush_locality}) + + def set_ops(self, ops): + """Set request ops to provided value. + + Useful for injecting ops that come from a previous request + to allow comparisons to ensure validity. + """ + self.ops = ops + self.request_id = self._hash_ops() + + @property + def request(self): + return json.dumps({'api-version': self.api_version, 'ops': self.ops, + 'request-id': self.request_id}) + + def _ops_equal(self, other): + keys_to_compare = [ + 'replicas', 'name', 'op', 'pg_num', 'group-permission', + 'object-prefix-permissions', + ] + keys_to_compare += list(self._partial_build_common_op_create().keys()) + if len(self.ops) == len(other.ops): + for req_no in range(0, len(self.ops)): + for key in keys_to_compare: + if self.ops[req_no].get(key) != other.ops[req_no].get(key): + return False + else: + return False + return True + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return False + if self.api_version == other.api_version and \ + self._ops_equal(other): + return True + else: + return False + + def __ne__(self, other): + return not self.__eq__(other) + + +class CephBrokerRsp(object): + """Ceph broker response. + + Response is json-decoded and contents provided as methods/properties. + + The API is versioned and defaults to version 1. + """ + + def __init__(self, encoded_rsp): + self.api_version = None + self.rsp = json.loads(encoded_rsp) + + @property + def request_id(self): + return self.rsp.get('request-id') + + @property + def exit_code(self): + return self.rsp.get('exit-code') + + @property + def exit_msg(self): + return self.rsp.get('stderr') + + +# Ceph Broker Conversation: +# If a charm needs an action to be taken by ceph it can create a CephBrokerRq +# and send that request to ceph via the ceph relation. The CephBrokerRq has a +# unique id so that the client can identity which CephBrokerRsp is associated +# with the request. Ceph will also respond to each client unit individually +# creating a response key per client unit eg glance/0 will get a CephBrokerRsp +# via key broker-rsp-glance-0 +# +# To use this the charm can just do something like: +# +# from charmhelpers.contrib.storage.linux.ceph import ( +# send_request_if_needed, +# is_request_complete, +# CephBrokerRq, +# ) +# +# @hooks.hook('ceph-relation-changed') +# def ceph_changed(): +# rq = CephBrokerRq() +# rq.add_op_create_pool(name='poolname', replica_count=3) +# +# if is_request_complete(rq): +# +# else: +# send_request_if_needed(get_ceph_request()) +# +# CephBrokerRq and CephBrokerRsp are serialized into JSON. Below is an example +# of glance having sent a request to ceph which ceph has successfully processed +# 'ceph:8': { +# 'ceph/0': { +# 'auth': 'cephx', +# 'broker-rsp-glance-0': '{"request-id": "0bc7dc54", "exit-code": 0}', +# 'broker_rsp': '{"request-id": "0da543b8", "exit-code": 0}', +# 'ceph-public-address': '10.5.44.103', +# 'key': 'AQCLDttVuHXINhAAvI144CB09dYchhHyTUY9BQ==', +# 'private-address': '10.5.44.103', +# }, +# 'glance/0': { +# 'broker_req': ('{"api-version": 1, "request-id": "0bc7dc54", ' +# '"ops": [{"replicas": 3, "name": "glance", ' +# '"op": "create-pool"}]}'), +# 'private-address': '10.5.44.109', +# }, +# } + +def get_previous_request(rid): + """Return the last ceph broker request sent on a given relation + + :param rid: Relation id to query for request + :type rid: str + :returns: CephBrokerRq object or None if relation data not found. + :rtype: Optional[CephBrokerRq] + """ + broker_req = relation_get(attribute='broker_req', rid=rid, + unit=local_unit()) + if broker_req: + return CephBrokerRq(raw_request_data=broker_req) + + +def get_request_states(request, relation='ceph'): + """Return a dict of requests per relation id with their corresponding + completion state. + + This allows a charm, which has a request for ceph, to see whether there is + an equivalent request already being processed and if so what state that + request is in. + + @param request: A CephBrokerRq object + """ + complete = [] + requests = {} + for rid in relation_ids(relation): + complete = False + previous_request = get_previous_request(rid) + if request == previous_request: + sent = True + complete = is_request_complete_for_rid(previous_request, rid) + else: + sent = False + complete = False + + requests[rid] = { + 'sent': sent, + 'complete': complete, + } + + return requests + + +def is_request_sent(request, relation='ceph'): + """Check to see if a functionally equivalent request has already been sent + + Returns True if a similair request has been sent + + @param request: A CephBrokerRq object + """ + states = get_request_states(request, relation=relation) + for rid in states.keys(): + if not states[rid]['sent']: + return False + + return True + + +def is_request_complete(request, relation='ceph'): + """Check to see if a functionally equivalent request has already been + completed + + Returns True if a similair request has been completed + + @param request: A CephBrokerRq object + """ + states = get_request_states(request, relation=relation) + for rid in states.keys(): + if not states[rid]['complete']: + return False + + return True + + +def is_request_complete_for_rid(request, rid): + """Check if a given request has been completed on the given relation + + @param request: A CephBrokerRq object + @param rid: Relation ID + """ + broker_key = get_broker_rsp_key() + for unit in related_units(rid): + rdata = relation_get(rid=rid, unit=unit) + if rdata.get(broker_key): + rsp = CephBrokerRsp(rdata.get(broker_key)) + if rsp.request_id == request.request_id: + if not rsp.exit_code: + return True + else: + # The remote unit sent no reply targeted at this unit so either the + # remote ceph cluster does not support unit targeted replies or it + # has not processed our request yet. + if rdata.get('broker_rsp'): + request_data = json.loads(rdata['broker_rsp']) + if request_data.get('request-id'): + log('Ignoring legacy broker_rsp without unit key as remote ' + 'service supports unit specific replies', level=DEBUG) + else: + log('Using legacy broker_rsp as remote service does not ' + 'supports unit specific replies', level=DEBUG) + rsp = CephBrokerRsp(rdata['broker_rsp']) + if not rsp.exit_code: + return True + + return False + + +def get_broker_rsp_key(): + """Return broker response key for this unit + + This is the key that ceph is going to use to pass request status + information back to this unit + """ + return 'broker-rsp-' + local_unit().replace('/', '-') + + +def send_request_if_needed(request, relation='ceph'): + """Send broker request if an equivalent request has not already been sent + + @param request: A CephBrokerRq object + """ + if is_request_sent(request, relation=relation): + log('Request already sent but not complete, not sending new request', + level=DEBUG) + else: + for rid in relation_ids(relation): + log('Sending request {}'.format(request.request_id), level=DEBUG) + relation_set(relation_id=rid, broker_req=request.request) + relation_set(relation_id=rid, relation_settings={'unit-name': local_unit()}) + + +def has_broker_rsp(rid=None, unit=None): + """Return True if the broker_rsp key is 'truthy' (i.e. set to something) in the relation data. + + :param rid: The relation to check (default of None means current relation) + :type rid: Union[str, None] + :param unit: The remote unit to check (default of None means current unit) + :type unit: Union[str, None] + :returns: True if broker key exists and is set to something 'truthy' + :rtype: bool + """ + rdata = relation_get(rid=rid, unit=unit) or {} + broker_rsp = rdata.get(get_broker_rsp_key()) + return True if broker_rsp else False + + +def is_broker_action_done(action, rid=None, unit=None): + """Check whether broker action has completed yet. + + @param action: name of action to be performed + @returns True if action complete otherwise False + """ + rdata = relation_get(rid=rid, unit=unit) or {} + broker_rsp = rdata.get(get_broker_rsp_key()) + if not broker_rsp: + return False + + rsp = CephBrokerRsp(broker_rsp) + unit_name = local_unit().partition('/')[2] + key = "unit_{}_ceph_broker_action.{}".format(unit_name, action) + kvstore = kv() + val = kvstore.get(key=key) + if val and val == rsp.request_id: + return True + + return False + + +def mark_broker_action_done(action, rid=None, unit=None): + """Mark action as having been completed. + + @param action: name of action to be performed + @returns None + """ + rdata = relation_get(rid=rid, unit=unit) or {} + broker_rsp = rdata.get(get_broker_rsp_key()) + if not broker_rsp: + return + + rsp = CephBrokerRsp(broker_rsp) + unit_name = local_unit().partition('/')[2] + key = "unit_{}_ceph_broker_action.{}".format(unit_name, action) + kvstore = kv() + kvstore.set(key=key, value=rsp.request_id) + kvstore.flush() + + +class CephConfContext(object): + """Ceph config (ceph.conf) context. + + Supports user-provided Ceph configuration settings. Use can provide a + dictionary as the value for the config-flags charm option containing + Ceph configuration settings keyede by their section in ceph.conf. + """ + def __init__(self, permitted_sections=None): + self.permitted_sections = permitted_sections or [] + + def __call__(self): + conf = config('config-flags') + if not conf: + return {} + + conf = config_flags_parser(conf) + if not isinstance(conf, dict): + log("Provided config-flags is not a dictionary - ignoring", + level=WARNING) + return {} + + permitted = self.permitted_sections + if permitted: + diff = set(conf.keys()).difference(set(permitted)) + if diff: + log("Config-flags contains invalid keys '%s' - they will be " + "ignored" % (', '.join(diff)), level=WARNING) + + ceph_conf = {} + for key in conf: + if permitted and key not in permitted: + log("Ignoring key '%s'" % key, level=WARNING) + continue + + ceph_conf[key] = conf[key] + return ceph_conf + + +class CephOSDConfContext(CephConfContext): + """Ceph config (ceph.conf) context. + + Consolidates settings from config-flags via CephConfContext with + settings provided by the mons. The config-flag values are preserved in + conf['osd'], settings from the mons which do not clash with config-flag + settings are in conf['osd_from_client'] and finally settings which do + clash are in conf['osd_from_client_conflict']. Rather than silently drop + the conflicting settings they are provided in the context so they can be + rendered commented out to give some visibility to the admin. + """ + + def __init__(self, permitted_sections=None): + super(CephOSDConfContext, self).__init__( + permitted_sections=permitted_sections) + try: + self.settings_from_mons = get_osd_settings('mon') + except OSDSettingConflict: + log( + "OSD settings from mons are inconsistent, ignoring them", + level=WARNING) + self.settings_from_mons = {} + + def filter_osd_from_mon_settings(self): + """Filter settings from client relation against config-flags. + + :returns: A tuple ( + ,config-flag values, + ,client settings which do not conflict with config-flag values, + ,client settings which confilct with config-flag values) + :rtype: (OrderedDict, OrderedDict, OrderedDict) + """ + ceph_conf = super(CephOSDConfContext, self).__call__() + conflicting_entries = {} + clear_entries = {} + for key, value in self.settings_from_mons.items(): + if key in ceph_conf.get('osd', {}): + if ceph_conf['osd'][key] != value: + conflicting_entries[key] = value + else: + clear_entries[key] = value + clear_entries = _order_dict_by_key(clear_entries) + conflicting_entries = _order_dict_by_key(conflicting_entries) + return ceph_conf, clear_entries, conflicting_entries + + def __call__(self): + """Construct OSD config context. + + Standard context with two additional special keys. + osd_from_client_conflict: client settings which confilct with + config-flag values + osd_from_client: settings which do not conflict with config-flag + values + + :returns: OSD config context dict. + :rtype: dict + """ + conf, osd_clear, osd_conflict = self.filter_osd_from_mon_settings() + conf['osd_from_client_conflict'] = osd_conflict + conf['osd_from_client'] = osd_clear + return conf diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/loopback.py b/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/loopback.py new file mode 100644 index 00000000..04daea29 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/loopback.py @@ -0,0 +1,88 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from subprocess import ( + check_call, + check_output, +) + + +################################################## +# loopback device helpers. +################################################## +def loopback_devices(): + ''' + Parse through 'losetup -a' output to determine currently mapped + loopback devices. Output is expected to look like: + + /dev/loop0: [0807]:961814 (/tmp/my.img) + + or: + + /dev/loop0: [0807]:961814 (/tmp/my.img (deleted)) + + :returns: dict: a dict mapping {loopback_dev: backing_file} + ''' + loopbacks = {} + cmd = ['losetup', '-a'] + output = check_output(cmd).decode('utf-8') + devs = [d.strip().split(' ', 2) for d in output.splitlines() if d != ''] + for dev, _, f in devs: + loopbacks[dev.replace(':', '')] = re.search(r'\((.+)\)', f).groups()[0] + return loopbacks + + +def create_loopback(file_path): + ''' + Create a loopback device for a given backing file. + + :returns: str: Full path to new loopback device (eg, /dev/loop0) + ''' + file_path = os.path.abspath(file_path) + check_call(['losetup', '--find', file_path]) + for d, f in loopback_devices().items(): + if f == file_path: + return d + + +def ensure_loopback_device(path, size): + ''' + Ensure a loopback device exists for a given backing file path and size. + If it a loopback device is not mapped to file, a new one will be created. + + TODO: Confirm size of found loopback device. + + :returns: str: Full path to the ensured loopback device (eg, /dev/loop0) + ''' + for d, f in loopback_devices().items(): + if f == path: + return d + + if not os.path.exists(path): + cmd = ['truncate', '--size', size, path] + check_call(cmd) + + return create_loopback(path) + + +def is_mapped_loopback_device(device): + """ + Checks if a given device name is an existing/mapped loopback device. + :param device: str: Full path to the device (eg, /dev/loop1). + :returns: str: Path to the backing file if is a loopback device + empty string otherwise + """ + return loopback_devices().get(device, "") diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/lvm.py b/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/lvm.py new file mode 100644 index 00000000..0d294c79 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/lvm.py @@ -0,0 +1,178 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +from subprocess import ( + CalledProcessError, + check_call, + check_output, +) + + +################################################## +# LVM helpers. +################################################## +def deactivate_lvm_volume_group(block_device): + ''' + Deactivate any volume group associated with an LVM physical volume. + + :param block_device: str: Full path to LVM physical volume + ''' + vg = list_lvm_volume_group(block_device) + if vg: + cmd = ['vgchange', '-an', vg] + check_call(cmd) + + +def is_lvm_physical_volume(block_device): + ''' + Determine whether a block device is initialized as an LVM PV. + + :param block_device: str: Full path of block device to inspect. + + :returns: boolean: True if block device is a PV, False if not. + ''' + try: + check_output(['pvdisplay', block_device]) + return True + except CalledProcessError: + return False + + +def remove_lvm_physical_volume(block_device): + ''' + Remove LVM PV signatures from a given block device. + + :param block_device: str: Full path of block device to scrub. + ''' + check_call(['pvremove', '-ff', '--yes', block_device]) + + +def list_lvm_volume_group(block_device): + ''' + List LVM volume group associated with a given block device. + + Assumes block device is a valid LVM PV. + + :param block_device: str: Full path of block device to inspect. + + :returns: str: Name of volume group associated with block device or None + ''' + vg = None + pvd = check_output(['pvdisplay', block_device]).splitlines() + for lvm in pvd: + lvm = lvm.decode('UTF-8') + if lvm.strip().startswith('VG Name'): + vg = ' '.join(lvm.strip().split()[2:]) + return vg + + +def create_lvm_physical_volume(block_device): + ''' + Initialize a block device as an LVM physical volume. + + :param block_device: str: Full path of block device to initialize. + + ''' + check_call(['pvcreate', block_device]) + + +def create_lvm_volume_group(volume_group, block_device): + ''' + Create an LVM volume group backed by a given block device. + + Assumes block device has already been initialized as an LVM PV. + + :param volume_group: str: Name of volume group to create. + :block_device: str: Full path of PV-initialized block device. + ''' + check_call(['vgcreate', volume_group, block_device]) + + +def list_logical_volumes(select_criteria=None, path_mode=False): + ''' + List logical volumes + + :param select_criteria: str: Limit list to those volumes matching this + criteria (see 'lvs -S help' for more details) + :param path_mode: bool: return logical volume name in 'vg/lv' format, this + format is required for some commands like lvextend + :returns: [str]: List of logical volumes + ''' + lv_diplay_attr = 'lv_name' + if path_mode: + # Parsing output logic relies on the column order + lv_diplay_attr = 'vg_name,' + lv_diplay_attr + cmd = ['lvs', '--options', lv_diplay_attr, '--noheadings'] + if select_criteria: + cmd.extend(['--select', select_criteria]) + lvs = [] + for lv in check_output(cmd).decode('UTF-8').splitlines(): + if not lv: + continue + if path_mode: + lvs.append('/'.join(lv.strip().split())) + else: + lvs.append(lv.strip()) + return lvs + + +list_thin_logical_volume_pools = functools.partial( + list_logical_volumes, + select_criteria='lv_attr =~ ^t') + +list_thin_logical_volumes = functools.partial( + list_logical_volumes, + select_criteria='lv_attr =~ ^V') + + +def extend_logical_volume_by_device(lv_name, block_device): + ''' + Extends the size of logical volume lv_name by the amount of free space on + physical volume block_device. + + :param lv_name: str: name of logical volume to be extended (vg/lv format) + :param block_device: str: name of block_device to be allocated to lv_name + ''' + cmd = ['lvextend', lv_name, block_device] + check_call(cmd) + + +def create_logical_volume(lv_name, volume_group, size=None): + ''' + Create a new logical volume in an existing volume group + + :param lv_name: str: name of logical volume to be created. + :param volume_group: str: Name of volume group to use for the new volume. + :param size: str: Size of logical volume to create (100% if not supplied) + :raises subprocess.CalledProcessError: in the event that the lvcreate fails. + ''' + if size: + check_call([ + 'lvcreate', + '--yes', + '-L', + '{}'.format(size), + '-n', lv_name, volume_group + ]) + # create the lv with all the space available, this is needed because the + # system call is different for LVM + else: + check_call([ + 'lvcreate', + '--yes', + '-l', + '100%FREE', + '-n', lv_name, volume_group + ]) diff --git a/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/utils.py b/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/utils.py new file mode 100644 index 00000000..4d05b121 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/contrib/storage/linux/utils.py @@ -0,0 +1,143 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from stat import S_ISBLK + +from subprocess import ( + CalledProcessError, + check_call, + check_output, + call +) + +from charmhelpers.core.hookenv import ( + log, + WARNING, + INFO +) + + +def _luks_uuid(dev): + """ + Check to see if dev is a LUKS encrypted volume, returning the UUID + of volume if it is. + + :param: dev: path to block device to check. + :returns: str. UUID of LUKS device or None if not a LUKS device + """ + try: + cmd = ['cryptsetup', 'luksUUID', dev] + return check_output(cmd).decode('UTF-8').strip() + except CalledProcessError: + return None + + +def is_luks_device(dev): + """ + Determine if dev is a LUKS-formatted block device. + + :param: dev: A full path to a block device to check for LUKS header + presence + :returns: boolean: indicates whether a device is used based on LUKS header. + """ + return True if _luks_uuid(dev) else False + + +def is_mapped_luks_device(dev): + """ + Determine if dev is a mapped LUKS device + :param: dev: A full path to a block device to be checked + :returns: boolean: indicates whether a device is mapped + """ + _, dirs, _ = next(os.walk( + '/sys/class/block/{}/holders/' + .format(os.path.basename(os.path.realpath(dev)))) + ) + is_held = len(dirs) > 0 + return is_held and is_luks_device(dev) + + +def is_block_device(path): + ''' + Confirm device at path is a valid block device node. + + :returns: boolean: True if path is a block device, False if not. + ''' + if not os.path.exists(path): + return False + return S_ISBLK(os.stat(path).st_mode) + + +def zap_disk(block_device): + ''' + Clear a block device of partition table. Relies on sgdisk, which is + installed as pat of the 'gdisk' package in Ubuntu. + + :param block_device: str: Full path of block device to clean. + ''' + # https://github.com/ceph/ceph/commit/fdd7f8d83afa25c4e09aaedd90ab93f3b64a677b + # sometimes sgdisk exits non-zero; this is OK, dd will clean up + call(['sgdisk', '--zap-all', '--', block_device]) + call(['sgdisk', '--clear', '--mbrtogpt', '--', block_device]) + dev_end = check_output(['blockdev', '--getsz', + block_device]).decode('UTF-8') + gpt_end = int(dev_end.split()[0]) - 100 + check_call(['dd', 'if=/dev/zero', 'of=%s' % (block_device), + 'bs=1M', 'count=1']) + check_call(['dd', 'if=/dev/zero', 'of=%s' % (block_device), + 'bs=512', 'count=100', 'seek=%s' % (gpt_end)]) + + +def is_device_mounted(device): + '''Given a device path, return True if that device is mounted, and False + if it isn't. + + :param device: str: Full path of the device to check. + :returns: boolean: True if the path represents a mounted device, False if + it doesn't. + ''' + try: + out = check_output(['lsblk', '-P', device]).decode('UTF-8') + except Exception: + return False + return bool(re.search(r'MOUNTPOINT=".+"', out)) + + +def mkfs_xfs(device, force=False, inode_size=None): + """Format device with XFS filesystem. + + By default this should fail if the device already has a filesystem on it. + :param device: Full path to device to format + :ptype device: tr + :param force: Force operation + :ptype: force: boolean + :param inode_size: XFS inode size in bytes; if set to 0 or None, + the value used will be the XFS system default + :ptype inode_size: int""" + cmd = ['mkfs.xfs'] + if force: + cmd.append("-f") + + if inode_size: + if inode_size >= 256 and inode_size <= 2048: + cmd += ['-i', "size={}".format(inode_size)] + else: + log("Config value xfs-inode-size={} is invalid. Using system default.".format(inode_size), level=WARNING) + else: + log("Using XFS filesystem with system default inode size.", level=INFO) + + cmd += [device] + check_call(cmd) diff --git a/ceph-radosgw/hooks/charmhelpers/core/__init__.py b/ceph-radosgw/hooks/charmhelpers/core/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-radosgw/hooks/charmhelpers/core/decorators.py b/ceph-radosgw/hooks/charmhelpers/core/decorators.py new file mode 100644 index 00000000..e7e95d17 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/decorators.py @@ -0,0 +1,93 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Copyright 2014 Canonical Ltd. +# +# Authors: +# Edward Hope-Morley +# + +import time + +from charmhelpers.core.hookenv import ( + log, + INFO, +) + + +def retry_on_exception(num_retries, base_delay=0, exc_type=Exception): + """If the decorated function raises exception exc_type, allow num_retries + retry attempts before raise the exception. + """ + def _retry_on_exception_inner_1(f): + def _retry_on_exception_inner_2(*args, **kwargs): + retries = num_retries + multiplier = 1 + while True: + try: + return f(*args, **kwargs) + except exc_type: + if not retries: + raise + + delay = base_delay * multiplier + multiplier += 1 + log("Retrying '%s' %d more times (delay=%s)" % + (f.__name__, retries, delay), level=INFO) + retries -= 1 + if delay: + time.sleep(delay) + + return _retry_on_exception_inner_2 + + return _retry_on_exception_inner_1 + + +def retry_on_predicate(num_retries, predicate_fun, base_delay=0): + """Retry based on return value + + The return value of the decorated function is passed to the given predicate_fun. If the + result of the predicate is False, retry the decorated function up to num_retries times + + An exponential backoff up to base_delay^num_retries seconds can be introduced by setting + base_delay to a nonzero value. The default is to run with a zero (i.e. no) delay + + :param num_retries: Max. number of retries to perform + :type num_retries: int + :param predicate_fun: Predicate function to determine if a retry is necessary + :type predicate_fun: callable + :param base_delay: Starting value in seconds for exponential delay, defaults to 0 (no delay) + :type base_delay: float + """ + def _retry_on_pred_inner_1(f): + def _retry_on_pred_inner_2(*args, **kwargs): + retries = num_retries + multiplier = 1 + delay = base_delay + while True: + result = f(*args, **kwargs) + if predicate_fun(result) or retries <= 0: + return result + delay *= multiplier + multiplier += 1 + log("Result {}, retrying '{}' {} more times (delay={})".format( + result, f.__name__, retries, delay), level=INFO) + retries -= 1 + if delay: + time.sleep(delay) + + return _retry_on_pred_inner_2 + + return _retry_on_pred_inner_1 diff --git a/ceph-radosgw/hooks/charmhelpers/core/files.py b/ceph-radosgw/hooks/charmhelpers/core/files.py new file mode 100644 index 00000000..fdd82b75 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/files.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__author__ = 'Jorge Niedbalski ' + +import os +import subprocess + + +def sed(filename, before, after, flags='g'): + """ + Search and replaces the given pattern on filename. + + :param filename: relative or absolute file path. + :param before: expression to be replaced (see 'man sed') + :param after: expression to replace with (see 'man sed') + :param flags: sed-compatible regex flags in example, to make + the search and replace case insensitive, specify ``flags="i"``. + The ``g`` flag is always specified regardless, so you do not + need to remember to include it when overriding this parameter. + :returns: If the sed command exit code was zero then return, + otherwise raise CalledProcessError. + """ + expression = r's/{0}/{1}/{2}'.format(before, + after, flags) + + return subprocess.check_call(["sed", "-i", "-r", "-e", + expression, + os.path.expanduser(filename)]) diff --git a/ceph-radosgw/hooks/charmhelpers/core/fstab.py b/ceph-radosgw/hooks/charmhelpers/core/fstab.py new file mode 100644 index 00000000..d9fa9152 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/fstab.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import io +import os + +__author__ = 'Jorge Niedbalski R. ' + + +class Fstab(io.FileIO): + """This class extends file in order to implement a file reader/writer + for file `/etc/fstab` + """ + + class Entry(object): + """Entry class represents a non-comment line on the `/etc/fstab` file + """ + def __init__(self, device, mountpoint, filesystem, + options, d=0, p=0): + self.device = device + self.mountpoint = mountpoint + self.filesystem = filesystem + + if not options: + options = "defaults" + + self.options = options + self.d = int(d) + self.p = int(p) + + def __eq__(self, o): + return str(self) == str(o) + + def __str__(self): + return "{} {} {} {} {} {}".format(self.device, + self.mountpoint, + self.filesystem, + self.options, + self.d, + self.p) + + DEFAULT_PATH = os.path.join(os.path.sep, 'etc', 'fstab') + + def __init__(self, path=None): + if path: + self._path = path + else: + self._path = self.DEFAULT_PATH + super(Fstab, self).__init__(self._path, 'rb+') + + def _hydrate_entry(self, line): + # NOTE: use split with no arguments to split on any + # whitespace including tabs + return Fstab.Entry(*filter( + lambda x: x not in ('', None), + line.strip("\n").split())) + + @property + def entries(self): + self.seek(0) + for line in self.readlines(): + line = line.decode('us-ascii') + try: + if line.strip() and not line.strip().startswith("#"): + yield self._hydrate_entry(line) + except ValueError: + pass + + def get_entry_by_attr(self, attr, value): + for entry in self.entries: + e_attr = getattr(entry, attr) + if e_attr == value: + return entry + return None + + def add_entry(self, entry): + if self.get_entry_by_attr('device', entry.device): + return False + + self.write((str(entry) + '\n').encode('us-ascii')) + self.truncate() + return entry + + def remove_entry(self, entry): + self.seek(0) + + lines = [l.decode('us-ascii') for l in self.readlines()] + + found = False + for index, line in enumerate(lines): + if line.strip() and not line.strip().startswith("#"): + if self._hydrate_entry(line) == entry: + found = True + break + + if not found: + return False + + lines.remove(line) + + self.seek(0) + self.write(''.join(lines).encode('us-ascii')) + self.truncate() + return True + + @classmethod + def remove_by_mountpoint(cls, mountpoint, path=None): + fstab = cls(path=path) + entry = fstab.get_entry_by_attr('mountpoint', mountpoint) + if entry: + return fstab.remove_entry(entry) + return False + + @classmethod + def add(cls, device, mountpoint, filesystem, options=None, path=None): + return cls(path=path).add_entry(Fstab.Entry(device, + mountpoint, filesystem, + options=options)) diff --git a/ceph-radosgw/hooks/charmhelpers/core/hookenv.py b/ceph-radosgw/hooks/charmhelpers/core/hookenv.py new file mode 100644 index 00000000..370c3e8f --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/hookenv.py @@ -0,0 +1,1636 @@ +# Copyright 2013-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"Interactions with the Juju environment" +# +# Authors: +# Charm Helpers Developers + +import copy +from distutils.version import LooseVersion +from enum import Enum +from functools import wraps +from collections import namedtuple, UserDict +import glob +import os +import json +import yaml +import re +import subprocess +import sys +import errno +import tempfile +from subprocess import CalledProcessError + +from charmhelpers import deprecate + + +CRITICAL = "CRITICAL" +ERROR = "ERROR" +WARNING = "WARNING" +INFO = "INFO" +DEBUG = "DEBUG" +TRACE = "TRACE" +MARKER = object() +SH_MAX_ARG = 131071 + + +RANGE_WARNING = ('Passing NO_PROXY string that includes a cidr. ' + 'This may not be compatible with software you are ' + 'running in your shell.') + + +class WORKLOAD_STATES(Enum): + ACTIVE = 'active' + BLOCKED = 'blocked' + MAINTENANCE = 'maintenance' + WAITING = 'waiting' + + +cache = {} + + +def cached(func): + """Cache return values for multiple executions of func + args + + For example:: + + @cached + def unit_get(attribute): + pass + + unit_get('test') + + will cache the result of unit_get + 'test' for future calls. + """ + @wraps(func) + def wrapper(*args, **kwargs): + global cache + key = json.dumps((func, args, kwargs), sort_keys=True, default=str) + try: + return cache[key] + except KeyError: + pass # Drop out of the exception handler scope. + res = func(*args, **kwargs) + cache[key] = res + return res + wrapper._wrapped = func + return wrapper + + +def flush(key): + """Flushes any entries from function cache where the + key is found in the function+args """ + flush_list = [] + for item in cache: + if key in item: + flush_list.append(item) + for item in flush_list: + del cache[item] + + +def log(message, level=None): + """Write a message to the juju log""" + command = ['juju-log'] + if level: + command += ['-l', level] + if not isinstance(message, str): + message = repr(message) + command += [message[:SH_MAX_ARG]] + # Missing juju-log should not cause failures in unit tests + # Send log output to stderr + try: + subprocess.call(command) + except OSError as e: + if e.errno == errno.ENOENT: + if level: + message = "{}: {}".format(level, message) + message = "juju-log: {}".format(message) + print(message, file=sys.stderr) + else: + raise + + +def function_log(message): + """Write a function progress message""" + command = ['function-log'] + if not isinstance(message, str): + message = repr(message) + command += [message[:SH_MAX_ARG]] + # Missing function-log should not cause failures in unit tests + # Send function_log output to stderr + try: + subprocess.call(command) + except OSError as e: + if e.errno == errno.ENOENT: + message = "function-log: {}".format(message) + print(message, file=sys.stderr) + else: + raise + + +class Serializable(UserDict): + """Wrapper, an object that can be serialized to yaml or json""" + + def __init__(self, obj): + # wrap the object + UserDict.__init__(self) + self.data = obj + + def __getattr__(self, attr): + # See if this object has attribute. + if attr in ("json", "yaml", "data"): + return self.__dict__[attr] + # Check for attribute in wrapped object. + got = getattr(self.data, attr, MARKER) + if got is not MARKER: + return got + # Proxy to the wrapped object via dict interface. + try: + return self.data[attr] + except KeyError: + raise AttributeError(attr) + + def __getstate__(self): + # Pickle as a standard dictionary. + return self.data + + def __setstate__(self, state): + # Unpickle into our wrapper. + self.data = state + + def json(self): + """Serialize the object to json""" + return json.dumps(self.data) + + def yaml(self): + """Serialize the object to yaml""" + return yaml.dump(self.data) + + +def execution_environment(): + """A convenient bundling of the current execution context""" + context = {} + context['conf'] = config() + if relation_id(): + context['reltype'] = relation_type() + context['relid'] = relation_id() + context['rel'] = relation_get() + context['unit'] = local_unit() + context['rels'] = relations() + context['env'] = os.environ + return context + + +def in_relation_hook(): + """Determine whether we're running in a relation hook""" + return 'JUJU_RELATION' in os.environ + + +def relation_type(): + """The scope for the current relation hook""" + return os.environ.get('JUJU_RELATION', None) + + +@cached +def relation_id(relation_name=None, service_or_unit=None): + """The relation ID for the current or a specified relation""" + if not relation_name and not service_or_unit: + return os.environ.get('JUJU_RELATION_ID', None) + elif relation_name and service_or_unit: + service_name = service_or_unit.split('/')[0] + for relid in relation_ids(relation_name): + remote_service = remote_service_name(relid) + if remote_service == service_name: + return relid + else: + raise ValueError('Must specify neither or both of relation_name and service_or_unit') + + +def departing_unit(): + """The departing unit for the current relation hook. + + Available since juju 2.8. + + :returns: the departing unit, or None if the information isn't available. + :rtype: Optional[str] + """ + return os.environ.get('JUJU_DEPARTING_UNIT', None) + + +def local_unit(): + """Local unit ID""" + return os.environ['JUJU_UNIT_NAME'] + + +def remote_unit(): + """The remote unit for the current relation hook""" + return os.environ.get('JUJU_REMOTE_UNIT', None) + + +def application_name(): + """ + The name of the deployed application this unit belongs to. + """ + return local_unit().split('/')[0] + + +def service_name(): + """ + .. deprecated:: 0.19.1 + Alias for :func:`application_name`. + """ + return application_name() + + +def model_name(): + """ + Name of the model that this unit is deployed in. + """ + return os.environ['JUJU_MODEL_NAME'] + + +def model_uuid(): + """ + UUID of the model that this unit is deployed in. + """ + return os.environ['JUJU_MODEL_UUID'] + + +def principal_unit(): + """Returns the principal unit of this unit, otherwise None""" + # Juju 2.2 and above provides JUJU_PRINCIPAL_UNIT + principal_unit = os.environ.get('JUJU_PRINCIPAL_UNIT', None) + # If it's empty, then this unit is the principal + if principal_unit == '': + return os.environ['JUJU_UNIT_NAME'] + elif principal_unit is not None: + return principal_unit + # For Juju 2.1 and below, let's try work out the principle unit by + # the various charms' metadata.yaml. + for reltype in relation_types(): + for rid in relation_ids(reltype): + for unit in related_units(rid): + md = _metadata_unit(unit) + if not md: + continue + subordinate = md.pop('subordinate', None) + if not subordinate: + return unit + return None + + +@cached +def remote_service_name(relid=None): + """The remote service name for a given relation-id (or the current relation)""" + if relid is None: + unit = remote_unit() + else: + units = related_units(relid) + unit = units[0] if units else None + return unit.split('/')[0] if unit else None + + +def hook_name(): + """The name of the currently executing hook""" + return os.environ.get('JUJU_HOOK_NAME', os.path.basename(sys.argv[0])) + + +class Config(dict): + """A dictionary representation of the charm's config.yaml, with some + extra features: + + - See which values in the dictionary have changed since the previous hook. + - For values that have changed, see what the previous value was. + - Store arbitrary data for use in a later hook. + + NOTE: Do not instantiate this object directly - instead call + ``hookenv.config()``, which will return an instance of :class:`Config`. + + Example usage:: + + >>> # inside a hook + >>> from charmhelpers.core import hookenv + >>> config = hookenv.config() + >>> config['foo'] + 'bar' + >>> # store a new key/value for later use + >>> config['mykey'] = 'myval' + + + >>> # user runs `juju set mycharm foo=baz` + >>> # now we're inside subsequent config-changed hook + >>> config = hookenv.config() + >>> config['foo'] + 'baz' + >>> # test to see if this val has changed since last hook + >>> config.changed('foo') + True + >>> # what was the previous value? + >>> config.previous('foo') + 'bar' + >>> # keys/values that we add are preserved across hooks + >>> config['mykey'] + 'myval' + + """ + CONFIG_FILE_NAME = '.juju-persistent-config' + + def __init__(self, *args, **kw): + super(Config, self).__init__(*args, **kw) + self.implicit_save = True + self._prev_dict = None + self.path = os.path.join(charm_dir(), Config.CONFIG_FILE_NAME) + if os.path.exists(self.path) and os.stat(self.path).st_size: + self.load_previous() + atexit(self._implicit_save) + + def load_previous(self, path=None): + """Load previous copy of config from disk. + + In normal usage you don't need to call this method directly - it + is called automatically at object initialization. + + :param path: + + File path from which to load the previous config. If `None`, + config is loaded from the default location. If `path` is + specified, subsequent `save()` calls will write to the same + path. + + """ + self.path = path or self.path + with open(self.path) as f: + try: + self._prev_dict = json.load(f) + except ValueError as e: + log('Found but was unable to parse previous config data, ' + 'ignoring which will report all values as changed - {}' + .format(str(e)), level=ERROR) + return + for k, v in copy.deepcopy(self._prev_dict).items(): + if k not in self: + self[k] = v + + def changed(self, key): + """Return True if the current value for this key is different from + the previous value. + + """ + if self._prev_dict is None: + return True + return self.previous(key) != self.get(key) + + def previous(self, key): + """Return previous value for this key, or None if there + is no previous value. + + """ + if self._prev_dict: + return self._prev_dict.get(key) + return None + + def save(self): + """Save this config to disk. + + If the charm is using the :mod:`Services Framework ` + or :meth:'@hook ' decorator, this + is called automatically at the end of successful hook execution. + Otherwise, it should be called directly by user code. + + To disable automatic saves, set ``implicit_save=False`` on this + instance. + + """ + with open(self.path, 'w') as f: + os.fchmod(f.fileno(), 0o600) + json.dump(self, f) + + def _implicit_save(self): + if self.implicit_save: + self.save() + + +_cache_config = None + + +def config(scope=None): + """ + Get the juju charm configuration (scope==None) or individual key, + (scope=str). The returned value is a Python data structure loaded as + JSON from the Juju config command. + + :param scope: If set, return the value for the specified key. + :type scope: Optional[str] + :returns: Either the whole config as a Config, or a key from it. + :rtype: Any + """ + global _cache_config + config_cmd_line = ['config-get', '--all', '--format=json'] + try: + if _cache_config is None: + config_data = json.loads( + subprocess.check_output(config_cmd_line).decode('UTF-8')) + _cache_config = Config(config_data) + if scope is not None: + return _cache_config.get(scope) + return _cache_config + except (json.decoder.JSONDecodeError, UnicodeDecodeError) as e: + log('Unable to parse output from config-get: config_cmd_line="{}" ' + 'message="{}"' + .format(config_cmd_line, str(e)), level=ERROR) + return None + + +@cached +def relation_get(attribute=None, unit=None, rid=None, app=None): + """Get relation information""" + _args = ['relation-get', '--format=json'] + if app is not None: + if unit is not None: + raise ValueError("Cannot use both 'unit' and 'app'") + _args.append('--app') + if rid: + _args.append('-r') + _args.append(rid) + _args.append(attribute or '-') + # unit or application name + if unit or app: + _args.append(unit or app) + try: + return json.loads(subprocess.check_output(_args).decode('UTF-8')) + except ValueError: + return None + except CalledProcessError as e: + if e.returncode == 2: + return None + raise + + +@cached +def _relation_set_accepts_file(): + """Return True if the juju relation-set command accepts a file. + + Cache the result as it won't change during the execution of a hook, and + thus we can make relation_set() more efficient by only checking for the + first relation_set() call. + + :returns: True if relation_set accepts a file. + :rtype: bool + :raises: subprocess.CalledProcessError if the check fails. + """ + return "--file" in subprocess.check_output( + ["relation-set", "--help"], universal_newlines=True) + + +def relation_set(relation_id=None, relation_settings=None, app=False, **kwargs): + """Set relation information for the current unit""" + relation_settings = relation_settings if relation_settings else {} + relation_cmd_line = ['relation-set'] + if app: + relation_cmd_line.append('--app') + if relation_id is not None: + relation_cmd_line.extend(('-r', relation_id)) + settings = relation_settings.copy() + settings.update(kwargs) + for key, value in settings.items(): + # Force value to be a string: it always should, but some call + # sites pass in things like dicts or numbers. + if value is not None: + settings[key] = "{}".format(value) + if _relation_set_accepts_file(): + # --file was introduced in Juju 1.23.2. Use it by default if + # available, since otherwise we'll break if the relation data is + # too big. Ideally we should tell relation-set to read the data from + # stdin, but that feature is broken in 1.23.2: Bug #1454678. + with tempfile.NamedTemporaryFile(delete=False) as settings_file: + settings_file.write(yaml.safe_dump(settings).encode("utf-8")) + subprocess.check_call( + relation_cmd_line + ["--file", settings_file.name]) + os.remove(settings_file.name) + else: + for key, value in settings.items(): + if value is None: + relation_cmd_line.append('{}='.format(key)) + else: + relation_cmd_line.append('{}={}'.format(key, value)) + subprocess.check_call(relation_cmd_line) + # Flush cache of any relation-gets for local unit + flush(local_unit()) + + +def relation_clear(r_id=None): + ''' Clears any relation data already set on relation r_id ''' + settings = relation_get(rid=r_id, + unit=local_unit()) + for setting in settings: + if setting not in ['public-address', 'private-address']: + settings[setting] = None + relation_set(relation_id=r_id, + **settings) + + +@cached +def relation_ids(reltype=None): + """A list of relation_ids""" + reltype = reltype or relation_type() + relid_cmd_line = ['relation-ids', '--format=json'] + if reltype is not None: + relid_cmd_line.append(reltype) + return json.loads( + subprocess.check_output(relid_cmd_line).decode('UTF-8')) or [] + return [] + + +@cached +def related_units(relid=None): + """A list of related units""" + relid = relid or relation_id() + units_cmd_line = ['relation-list', '--format=json'] + if relid is not None: + units_cmd_line.extend(('-r', relid)) + return json.loads( + subprocess.check_output(units_cmd_line).decode('UTF-8')) or [] + + +def expected_peer_units(): + """Get a generator for units we expect to join peer relation based on + goal-state. + + The local unit is excluded from the result to make it easy to gauge + completion of all peers joining the relation with existing hook tools. + + Example usage: + log('peer {} of {} joined peer relation' + .format(len(related_units()), + len(list(expected_peer_units())))) + + This function will raise NotImplementedError if used with juju versions + without goal-state support. + + :returns: iterator + :rtype: types.GeneratorType + :raises: NotImplementedError + """ + if not has_juju_version("2.4.0"): + # goal-state first appeared in 2.4.0. + raise NotImplementedError("goal-state") + _goal_state = goal_state() + return (key for key in _goal_state['units'] + if '/' in key and key != local_unit()) + + +def expected_related_units(reltype=None): + """Get a generator for units we expect to join relation based on + goal-state. + + Note that you can not use this function for the peer relation, take a look + at expected_peer_units() for that. + + This function will raise KeyError if you request information for a + relation type for which juju goal-state does not have information. It will + raise NotImplementedError if used with juju versions without goal-state + support. + + Example usage: + log('participant {} of {} joined relation {}' + .format(len(related_units()), + len(list(expected_related_units())), + relation_type())) + + :param reltype: Relation type to list data for, default is to list data for + the relation type we are currently executing a hook for. + :type reltype: str + :returns: iterator + :rtype: types.GeneratorType + :raises: KeyError, NotImplementedError + """ + if not has_juju_version("2.4.4"): + # goal-state existed in 2.4.0, but did not list individual units to + # join a relation in 2.4.1 through 2.4.3. (LP: #1794739) + raise NotImplementedError("goal-state relation unit count") + reltype = reltype or relation_type() + _goal_state = goal_state() + return (key for key in _goal_state['relations'][reltype] if '/' in key) + + +@cached +def relation_for_unit(unit=None, rid=None): + """Get the json representation of a unit's relation""" + unit = unit or remote_unit() + relation = relation_get(unit=unit, rid=rid) + for key in relation: + if key.endswith('-list'): + relation[key] = relation[key].split() + relation['__unit__'] = unit + return relation + + +@cached +def relations_for_id(relid=None): + """Get relations of a specific relation ID""" + relation_data = [] + relid = relid or relation_ids() + for unit in related_units(relid): + unit_data = relation_for_unit(unit, relid) + unit_data['__relid__'] = relid + relation_data.append(unit_data) + return relation_data + + +@cached +def relations_of_type(reltype=None): + """Get relations of a specific type""" + relation_data = [] + reltype = reltype or relation_type() + for relid in relation_ids(reltype): + for relation in relations_for_id(relid): + relation['__relid__'] = relid + relation_data.append(relation) + return relation_data + + +@cached +def metadata(): + """Get the current charm metadata.yaml contents as a python object""" + with open(os.path.join(charm_dir(), 'metadata.yaml')) as md: + return yaml.safe_load(md) + + +def _metadata_unit(unit): + """Given the name of a unit (e.g. apache2/0), get the unit charm's + metadata.yaml. Very similar to metadata() but allows us to inspect + other units. Unit needs to be co-located, such as a subordinate or + principal/primary. + + :returns: metadata.yaml as a python object. + + """ + basedir = os.sep.join(charm_dir().split(os.sep)[:-2]) + unitdir = 'unit-{}'.format(unit.replace(os.sep, '-')) + joineddir = os.path.join(basedir, unitdir, 'charm', 'metadata.yaml') + if not os.path.exists(joineddir): + return None + with open(joineddir) as md: + return yaml.safe_load(md) + + +@cached +def relation_types(): + """Get a list of relation types supported by this charm""" + rel_types = [] + md = metadata() + for key in ('provides', 'requires', 'peers'): + section = md.get(key) + if section: + rel_types.extend(section.keys()) + return rel_types + + +@cached +def peer_relation_id(): + '''Get the peers relation id if a peers relation has been joined, else None.''' + md = metadata() + section = md.get('peers') + if section: + for key in section: + relids = relation_ids(key) + if relids: + return relids[0] + return None + + +@cached +def relation_to_interface(relation_name): + """ + Given the name of a relation, return the interface that relation uses. + + :returns: The interface name, or ``None``. + """ + return relation_to_role_and_interface(relation_name)[1] + + +@cached +def relation_to_role_and_interface(relation_name): + """ + Given the name of a relation, return the role and the name of the interface + that relation uses (where role is one of ``provides``, ``requires``, or ``peers``). + + :returns: A tuple containing ``(role, interface)``, or ``(None, None)``. + """ + _metadata = metadata() + for role in ('provides', 'requires', 'peers'): + interface = _metadata.get(role, {}).get(relation_name, {}).get('interface') + if interface: + return role, interface + return None, None + + +@cached +def role_and_interface_to_relations(role, interface_name): + """ + Given a role and interface name, return a list of relation names for the + current charm that use that interface under that role (where role is one + of ``provides``, ``requires``, or ``peers``). + + :returns: A list of relation names. + """ + _metadata = metadata() + results = [] + for relation_name, relation in _metadata.get(role, {}).items(): + if relation['interface'] == interface_name: + results.append(relation_name) + return results + + +@cached +def interface_to_relations(interface_name): + """ + Given an interface, return a list of relation names for the current + charm that use that interface. + + :returns: A list of relation names. + """ + results = [] + for role in ('provides', 'requires', 'peers'): + results.extend(role_and_interface_to_relations(role, interface_name)) + return results + + +@cached +def charm_name(): + """Get the name of the current charm as is specified on metadata.yaml""" + return metadata().get('name') + + +@cached +def relations(): + """Get a nested dictionary of relation data for all related units""" + rels = {} + for reltype in relation_types(): + relids = {} + for relid in relation_ids(reltype): + units = {local_unit(): relation_get(unit=local_unit(), rid=relid)} + for unit in related_units(relid): + reldata = relation_get(unit=unit, rid=relid) + units[unit] = reldata + relids[relid] = units + rels[reltype] = relids + return rels + + +@cached +def is_relation_made(relation, keys='private-address'): + ''' + Determine whether a relation is established by checking for + presence of key(s). If a list of keys is provided, they + must all be present for the relation to be identified as made + ''' + if isinstance(keys, str): + keys = [keys] + for r_id in relation_ids(relation): + for unit in related_units(r_id): + context = {} + for k in keys: + context[k] = relation_get(k, rid=r_id, + unit=unit) + if None not in context.values(): + return True + return False + + +def _port_op(op_name, port, protocol="TCP"): + """Open or close a service network port""" + _args = [op_name] + icmp = protocol.upper() == "ICMP" + if icmp: + _args.append(protocol) + else: + _args.append('{}/{}'.format(port, protocol)) + try: + subprocess.check_call(_args) + except subprocess.CalledProcessError: + # Older Juju pre 2.3 doesn't support ICMP + # so treat it as a no-op if it fails. + if not icmp: + raise + + +def open_port(port, protocol="TCP"): + """Open a service network port""" + _port_op('open-port', port, protocol) + + +def close_port(port, protocol="TCP"): + """Close a service network port""" + _port_op('close-port', port, protocol) + + +def open_ports(start, end, protocol="TCP"): + """Opens a range of service network ports""" + _args = ['open-port'] + _args.append('{}-{}/{}'.format(start, end, protocol)) + subprocess.check_call(_args) + + +def close_ports(start, end, protocol="TCP"): + """Close a range of service network ports""" + _args = ['close-port'] + _args.append('{}-{}/{}'.format(start, end, protocol)) + subprocess.check_call(_args) + + +def opened_ports(): + """Get the opened ports + + *Note that this will only show ports opened in a previous hook* + + :returns: Opened ports as a list of strings: ``['8080/tcp', '8081-8083/tcp']`` + """ + _args = ['opened-ports', '--format=json'] + return json.loads(subprocess.check_output(_args).decode('UTF-8')) + + +@cached +def unit_get(attribute): + """Get the unit ID for the remote unit""" + _args = ['unit-get', '--format=json', attribute] + try: + return json.loads(subprocess.check_output(_args).decode('UTF-8')) + except ValueError: + return None + + +def unit_public_ip(): + """Get this unit's public IP address""" + return unit_get('public-address') + + +def unit_private_ip(): + """Get this unit's private IP address""" + return unit_get('private-address') + + +@cached +def storage_get(attribute=None, storage_id=None): + """Get storage attributes""" + _args = ['storage-get', '--format=json'] + if storage_id: + _args.extend(('-s', storage_id)) + if attribute: + _args.append(attribute) + try: + return json.loads(subprocess.check_output(_args).decode('UTF-8')) + except ValueError: + return None + + +@cached +def storage_list(storage_name=None): + """List the storage IDs for the unit""" + _args = ['storage-list', '--format=json'] + if storage_name: + _args.append(storage_name) + try: + return json.loads(subprocess.check_output(_args).decode('UTF-8')) + except ValueError: + return None + except OSError as e: + import errno + if e.errno == errno.ENOENT: + # storage-list does not exist + return [] + raise + + +class UnregisteredHookError(Exception): + """Raised when an undefined hook is called""" + pass + + +class Hooks(object): + """A convenient handler for hook functions. + + Example:: + + hooks = Hooks() + + # register a hook, taking its name from the function name + @hooks.hook() + def install(): + pass # your code here + + # register a hook, providing a custom hook name + @hooks.hook("config-changed") + def config_changed(): + pass # your code here + + if __name__ == "__main__": + # execute a hook based on the name the program is called by + hooks.execute(sys.argv) + """ + + def __init__(self, config_save=None): + super(Hooks, self).__init__() + self._hooks = {} + + # For unknown reasons, we allow the Hooks constructor to override + # config().implicit_save. + if config_save is not None: + config().implicit_save = config_save + + def register(self, name, function): + """Register a hook""" + self._hooks[name] = function + + def execute(self, args): + """Execute a registered hook based on args[0]""" + _run_atstart() + hook_name = os.path.basename(args[0]) + if hook_name in self._hooks: + try: + self._hooks[hook_name]() + except SystemExit as x: + if x.code is None or x.code == 0: + _run_atexit() + raise + _run_atexit() + else: + raise UnregisteredHookError(hook_name) + + def hook(self, *hook_names): + """Decorator, registering them as hooks""" + def wrapper(decorated): + for hook_name in hook_names: + self.register(hook_name, decorated) + else: + self.register(decorated.__name__, decorated) + if '_' in decorated.__name__: + self.register( + decorated.__name__.replace('_', '-'), decorated) + return decorated + return wrapper + + +class NoNetworkBinding(Exception): + pass + + +def charm_dir(): + """Return the root directory of the current charm""" + d = os.environ.get('JUJU_CHARM_DIR') + if d is not None: + return d + return os.environ.get('CHARM_DIR') + + +def cmd_exists(cmd): + """Return True if the specified cmd exists in the path""" + return any( + os.access(os.path.join(path, cmd), os.X_OK) + for path in os.environ["PATH"].split(os.pathsep) + ) + + +@cached +def action_get(key=None): + """Gets the value of an action parameter, or all key/value param pairs.""" + cmd = ['action-get'] + if key is not None: + cmd.append(key) + cmd.append('--format=json') + action_data = json.loads(subprocess.check_output(cmd).decode('UTF-8')) + return action_data + + +@cached +@deprecate("moved to action_get()", log=log) +def function_get(key=None): + """ + .. deprecated:: + Gets the value of an action parameter, or all key/value param pairs. + """ + cmd = ['function-get'] + # Fallback for older charms. + if not cmd_exists('function-get'): + cmd = ['action-get'] + + if key is not None: + cmd.append(key) + cmd.append('--format=json') + function_data = json.loads(subprocess.check_output(cmd).decode('UTF-8')) + return function_data + + +def action_set(values): + """Sets the values to be returned after the action finishes.""" + cmd = ['action-set'] + for k, v in list(values.items()): + cmd.append('{}={}'.format(k, v)) + subprocess.check_call(cmd) + + +@deprecate("moved to action_set()", log=log) +def function_set(values): + """ + .. deprecated:: + Sets the values to be returned after the function finishes. + """ + cmd = ['function-set'] + # Fallback for older charms. + if not cmd_exists('function-get'): + cmd = ['action-set'] + + for k, v in list(values.items()): + cmd.append('{}={}'.format(k, v)) + subprocess.check_call(cmd) + + +def action_fail(message): + """ + Sets the action status to failed and sets the error message. + + The results set by action_set are preserved. + """ + subprocess.check_call(['action-fail', message]) + + +@deprecate("moved to action_fail()", log=log) +def function_fail(message): + """ + .. deprecated:: + Sets the function status to failed and sets the error message. + + The results set by function_set are preserved. + """ + cmd = ['function-fail'] + # Fallback for older charms. + if not cmd_exists('function-fail'): + cmd = ['action-fail'] + cmd.append(message) + + subprocess.check_call(cmd) + + +def action_name(): + """Get the name of the currently executing action.""" + return os.environ.get('JUJU_ACTION_NAME') + + +def function_name(): + """Get the name of the currently executing function.""" + return os.environ.get('JUJU_FUNCTION_NAME') or action_name() + + +def action_uuid(): + """Get the UUID of the currently executing action.""" + return os.environ.get('JUJU_ACTION_UUID') + + +def function_id(): + """Get the ID of the currently executing function.""" + return os.environ.get('JUJU_FUNCTION_ID') or action_uuid() + + +def action_tag(): + """Get the tag for the currently executing action.""" + return os.environ.get('JUJU_ACTION_TAG') + + +def function_tag(): + """Get the tag for the currently executing function.""" + return os.environ.get('JUJU_FUNCTION_TAG') or action_tag() + + +def status_set(workload_state, message, application=False): + """Set the workload state with a message + + Use status-set to set the workload state with a message which is visible + to the user via juju status. If the status-set command is not found then + assume this is juju < 1.23 and juju-log the message instead. + + workload_state -- valid juju workload state. str or WORKLOAD_STATES + message -- status update message + application -- Whether this is an application state set + """ + bad_state_msg = '{!r} is not a valid workload state' + + if isinstance(workload_state, str): + try: + # Convert string to enum. + workload_state = WORKLOAD_STATES[workload_state.upper()] + except KeyError: + raise ValueError(bad_state_msg.format(workload_state)) + + if workload_state not in WORKLOAD_STATES: + raise ValueError(bad_state_msg.format(workload_state)) + + cmd = ['status-set'] + if application: + cmd.append('--application') + cmd.extend([workload_state.value, message]) + try: + ret = subprocess.call(cmd) + if ret == 0: + return + except OSError as e: + if e.errno != errno.ENOENT: + raise + log_message = 'status-set failed: {} {}'.format(workload_state.value, + message) + log(log_message, level='INFO') + + +def status_get(): + """Retrieve the previously set juju workload state and message + + If the status-get command is not found then assume this is juju < 1.23 and + return 'unknown', "" + + """ + cmd = ['status-get', "--format=json", "--include-data"] + try: + raw_status = subprocess.check_output(cmd) + except OSError as e: + if e.errno == errno.ENOENT: + return ('unknown', "") + else: + raise + else: + status = json.loads(raw_status.decode("UTF-8")) + return (status["status"], status["message"]) + + +def translate_exc(from_exc, to_exc): + def inner_translate_exc1(f): + @wraps(f) + def inner_translate_exc2(*args, **kwargs): + try: + return f(*args, **kwargs) + except from_exc: + raise to_exc + + return inner_translate_exc2 + + return inner_translate_exc1 + + +def application_version_set(version): + """Charm authors may trigger this command from any hook to output what + version of the application is running. This could be a package version, + for instance postgres version 9.5. It could also be a build number or + version control revision identifier, for instance git sha 6fb7ba68. """ + + cmd = ['application-version-set'] + cmd.append(version) + try: + subprocess.check_call(cmd) + except OSError: + log("Application Version: {}".format(version)) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +@cached +def goal_state(): + """Juju goal state values""" + cmd = ['goal-state', '--format=json'] + return json.loads(subprocess.check_output(cmd).decode('UTF-8')) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def is_leader(): + """Does the current unit hold the juju leadership + + Uses juju to determine whether the current unit is the leader of its peers + """ + cmd = ['is-leader', '--format=json'] + return json.loads(subprocess.check_output(cmd).decode('UTF-8')) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def leader_get(attribute=None): + """Juju leader get value(s)""" + cmd = ['leader-get', '--format=json'] + [attribute or '-'] + return json.loads(subprocess.check_output(cmd).decode('UTF-8')) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def leader_set(settings=None, **kwargs): + """Juju leader set value(s)""" + # Don't log secrets. + # log("Juju leader-set '%s'" % (settings), level=DEBUG) + cmd = ['leader-set'] + settings = settings or {} + settings.update(kwargs) + for k, v in settings.items(): + if v is None: + cmd.append('{}='.format(k)) + else: + cmd.append('{}={}'.format(k, v)) + subprocess.check_call(cmd) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def payload_register(ptype, klass, pid): + """ is used while a hook is running to let Juju know that a + payload has been started.""" + cmd = ['payload-register'] + for x in [ptype, klass, pid]: + cmd.append(x) + subprocess.check_call(cmd) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def payload_unregister(klass, pid): + """ is used while a hook is running to let Juju know + that a payload has been manually stopped. The and provided + must match a payload that has been previously registered with juju using + payload-register.""" + cmd = ['payload-unregister'] + for x in [klass, pid]: + cmd.append(x) + subprocess.check_call(cmd) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def payload_status_set(klass, pid, status): + """is used to update the current status of a registered payload. + The and provided must match a payload that has been previously + registered with juju using payload-register. The must be one of the + follow: starting, started, stopping, stopped""" + cmd = ['payload-status-set'] + for x in [klass, pid, status]: + cmd.append(x) + subprocess.check_call(cmd) + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def resource_get(name): + """used to fetch the resource path of the given name. + + must match a name of defined resource in metadata.yaml + + returns either a path or False if resource not available + """ + if not name: + return False + + cmd = ['resource-get', name] + try: + return subprocess.check_output(cmd).decode('UTF-8') + except subprocess.CalledProcessError: + return False + + +@cached +def juju_version(): + """Full version string (eg. '1.23.3.1-trusty-amd64')""" + # Per https://bugs.launchpad.net/juju-core/+bug/1455368/comments/1 + jujud = glob.glob('/var/lib/juju/tools/machine-*/jujud')[0] + return subprocess.check_output([jujud, 'version'], + universal_newlines=True).strip() + + +def has_juju_version(minimum_version): + """Return True if the Juju version is at least the provided version""" + return LooseVersion(juju_version()) >= LooseVersion(minimum_version) + + +_atexit = [] +_atstart = [] + + +def atstart(callback, *args, **kwargs): + '''Schedule a callback to run before the main hook. + + Callbacks are run in the order they were added. + + This is useful for modules and classes to perform initialization + and inject behavior. In particular: + + - Run common code before all of your hooks, such as logging + the hook name or interesting relation data. + - Defer object or module initialization that requires a hook + context until we know there actually is a hook context, + making testing easier. + - Rather than requiring charm authors to include boilerplate to + invoke your helper's behavior, have it run automatically if + your object is instantiated or module imported. + + This is not at all useful after your hook framework as been launched. + ''' + global _atstart + _atstart.append((callback, args, kwargs)) + + +def atexit(callback, *args, **kwargs): + '''Schedule a callback to run on successful hook completion. + + Callbacks are run in the reverse order that they were added.''' + _atexit.append((callback, args, kwargs)) + + +def _run_atstart(): + '''Hook frameworks must invoke this before running the main hook body.''' + global _atstart + for callback, args, kwargs in _atstart: + callback(*args, **kwargs) + del _atstart[:] + + +def _run_atexit(): + '''Hook frameworks must invoke this after the main hook body has + successfully completed. Do not invoke it if the hook fails.''' + global _atexit + for callback, args, kwargs in reversed(_atexit): + callback(*args, **kwargs) + del _atexit[:] + + +@translate_exc(from_exc=OSError, to_exc=NotImplementedError) +def network_get_primary_address(binding): + ''' + Deprecated since Juju 2.3; use network_get() + + Retrieve the primary network address for a named binding + + :param binding: string. The name of a relation of extra-binding + :return: string. The primary IP address for the named binding + :raise: NotImplementedError if run on Juju < 2.0 + ''' + cmd = ['network-get', '--primary-address', binding] + try: + response = subprocess.check_output( + cmd, + stderr=subprocess.STDOUT).decode('UTF-8').strip() + except CalledProcessError as e: + if 'no network config found for binding' in e.output.decode('UTF-8'): + raise NoNetworkBinding("No network binding for {}" + .format(binding)) + else: + raise + return response + + +def network_get(endpoint, relation_id=None): + """ + Retrieve the network details for a relation endpoint + + :param endpoint: string. The name of a relation endpoint + :param relation_id: int. The ID of the relation for the current context. + :return: dict. The loaded YAML output of the network-get query. + :raise: NotImplementedError if request not supported by the Juju version. + """ + if not has_juju_version('2.2'): + raise NotImplementedError(juju_version()) # earlier versions require --primary-address + if relation_id and not has_juju_version('2.3'): + raise NotImplementedError # 2.3 added the -r option + + cmd = ['network-get', endpoint, '--format', 'yaml'] + if relation_id: + cmd.append('-r') + cmd.append(relation_id) + response = subprocess.check_output( + cmd, + stderr=subprocess.STDOUT).decode('UTF-8').strip() + return yaml.safe_load(response) + + +def add_metric(*args, **kwargs): + """Add metric values. Values may be expressed with keyword arguments. For + metric names containing dashes, these may be expressed as one or more + 'key=value' positional arguments. May only be called from the collect-metrics + hook.""" + _args = ['add-metric'] + _kvpairs = [] + _kvpairs.extend(args) + _kvpairs.extend(['{}={}'.format(k, v) for k, v in kwargs.items()]) + _args.extend(sorted(_kvpairs)) + try: + subprocess.check_call(_args) + return + except EnvironmentError as e: + if e.errno != errno.ENOENT: + raise + log_message = 'add-metric failed: {}'.format(' '.join(_kvpairs)) + log(log_message, level='INFO') + + +def meter_status(): + """Get the meter status, if running in the meter-status-changed hook.""" + return os.environ.get('JUJU_METER_STATUS') + + +def meter_info(): + """Get the meter status information, if running in the meter-status-changed + hook.""" + return os.environ.get('JUJU_METER_INFO') + + +def iter_units_for_relation_name(relation_name): + """Iterate through all units in a relation + + Generator that iterates through all the units in a relation and yields + a named tuple with rid and unit field names. + + Usage: + data = [(u.rid, u.unit) + for u in iter_units_for_relation_name(relation_name)] + + :param relation_name: string relation name + :yield: Named Tuple with rid and unit field names + """ + RelatedUnit = namedtuple('RelatedUnit', 'rid, unit') + for rid in relation_ids(relation_name): + for unit in related_units(rid): + yield RelatedUnit(rid, unit) + + +def ingress_address(rid=None, unit=None): + """ + Retrieve the ingress-address from a relation when available. + Otherwise, return the private-address. + + When used on the consuming side of the relation (unit is a remote + unit), the ingress-address is the IP address that this unit needs + to use to reach the provided service on the remote unit. + + When used on the providing side of the relation (unit == local_unit()), + the ingress-address is the IP address that is advertised to remote + units on this relation. Remote units need to use this address to + reach the local provided service on this unit. + + Note that charms may document some other method to use in + preference to the ingress_address(), such as an address provided + on a different relation attribute or a service discovery mechanism. + This allows charms to redirect inbound connections to their peers + or different applications such as load balancers. + + Usage: + addresses = [ingress_address(rid=u.rid, unit=u.unit) + for u in iter_units_for_relation_name(relation_name)] + + :param rid: string relation id + :param unit: string unit name + :side effect: calls relation_get + :return: string IP address + """ + settings = relation_get(rid=rid, unit=unit) + return (settings.get('ingress-address') or + settings.get('private-address')) + + +def egress_subnets(rid=None, unit=None): + """ + Retrieve the egress-subnets from a relation. + + This function is to be used on the providing side of the + relation, and provides the ranges of addresses that client + connections may come from. The result is uninteresting on + the consuming side of a relation (unit == local_unit()). + + Returns a stable list of subnets in CIDR format. + eg. ['192.168.1.0/24', '2001::F00F/128'] + + If egress-subnets is not available, falls back to using the published + ingress-address, or finally private-address. + + :param rid: string relation id + :param unit: string unit name + :side effect: calls relation_get + :return: list of subnets in CIDR format. eg. ['192.168.1.0/24', '2001::F00F/128'] + """ + def _to_range(addr): + if re.search(r'^(?:\d{1,3}\.){3}\d{1,3}$', addr) is not None: + addr += '/32' + elif ':' in addr and '/' not in addr: # IPv6 + addr += '/128' + return addr + + settings = relation_get(rid=rid, unit=unit) + if 'egress-subnets' in settings: + return [n.strip() for n in settings['egress-subnets'].split(',') if n.strip()] + if 'ingress-address' in settings: + return [_to_range(settings['ingress-address'])] + if 'private-address' in settings: + return [_to_range(settings['private-address'])] + return [] # Should never happen + + +def unit_doomed(unit=None): + """Determines if the unit is being removed from the model + + Requires Juju 2.4.1. + + :param unit: string unit name, defaults to local_unit + :side effect: calls goal_state + :side effect: calls local_unit + :side effect: calls has_juju_version + :return: True if the unit is being removed, already gone, or never existed + """ + if not has_juju_version("2.4.1"): + # We cannot risk blindly returning False for 'we don't know', + # because that could cause data loss; if call sites don't + # need an accurate answer, they likely don't need this helper + # at all. + # goal-state existed in 2.4.0, but did not handle removals + # correctly until 2.4.1. + raise NotImplementedError("is_doomed") + if unit is None: + unit = local_unit() + gs = goal_state() + units = gs.get('units', {}) + if unit not in units: + return True + # I don't think 'dead' units ever show up in the goal-state, but + # check anyway in addition to 'dying'. + return units[unit]['status'] in ('dying', 'dead') + + +def env_proxy_settings(selected_settings=None): + """Get proxy settings from process environment variables. + + Get charm proxy settings from environment variables that correspond to + juju-http-proxy, juju-https-proxy juju-no-proxy (available as of 2.4.2, see + lp:1782236) and juju-ftp-proxy in a format suitable for passing to an + application that reacts to proxy settings passed as environment variables. + Some applications support lowercase or uppercase notation (e.g. curl), some + support only lowercase (e.g. wget), there are also subjectively rare cases + of only uppercase notation support. no_proxy CIDR and wildcard support also + varies between runtimes and applications as there is no enforced standard. + + Some applications may connect to multiple destinations and expose config + options that would affect only proxy settings for a specific destination + these should be handled in charms in an application-specific manner. + + :param selected_settings: format only a subset of possible settings + :type selected_settings: list + :rtype: Option(None, dict[str, str]) + """ + SUPPORTED_SETTINGS = { + 'http': 'HTTP_PROXY', + 'https': 'HTTPS_PROXY', + 'no_proxy': 'NO_PROXY', + 'ftp': 'FTP_PROXY' + } + if selected_settings is None: + selected_settings = SUPPORTED_SETTINGS + + selected_vars = [v for k, v in SUPPORTED_SETTINGS.items() + if k in selected_settings] + proxy_settings = {} + for var in selected_vars: + var_val = os.getenv(var) + if var_val: + proxy_settings[var] = var_val + proxy_settings[var.lower()] = var_val + # Now handle juju-prefixed environment variables. The legacy vs new + # environment variable usage is mutually exclusive + charm_var_val = os.getenv('JUJU_CHARM_{}'.format(var)) + if charm_var_val: + proxy_settings[var] = charm_var_val + proxy_settings[var.lower()] = charm_var_val + if 'no_proxy' in proxy_settings: + if _contains_range(proxy_settings['no_proxy']): + log(RANGE_WARNING, level=WARNING) + return proxy_settings if proxy_settings else None + + +def _contains_range(addresses): + """Check for cidr or wildcard domain in a string. + + Given a string comprising a comma separated list of ip addresses + and domain names, determine whether the string contains IP ranges + or wildcard domains. + + :param addresses: comma separated list of domains and ip addresses. + :type addresses: str + """ + return ( + # Test for cidr (e.g. 10.20.20.0/24) + "/" in addresses or + # Test for wildcard domains (*.foo.com or .foo.com) + "*" in addresses or + addresses.startswith(".") or + ",." in addresses or + " ." in addresses) + + +def is_subordinate(): + """Check whether charm is subordinate in unit metadata. + + :returns: True if unit is subordniate, False otherwise. + :rtype: bool + """ + return metadata().get('subordinate') is True diff --git a/ceph-radosgw/hooks/charmhelpers/core/host.py b/ceph-radosgw/hooks/charmhelpers/core/host.py new file mode 100644 index 00000000..def403c5 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/host.py @@ -0,0 +1,1309 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tools for working with the host system""" +# Copyright 2012 Canonical Ltd. +# +# Authors: +# Nick Moffitt +# Matthew Wedgwood + +import errno +import os +import re +import pwd +import glob +import grp +import random +import string +import subprocess +import hashlib +import functools +import itertools + +from contextlib import contextmanager +from collections import OrderedDict, defaultdict +from .hookenv import log, INFO, DEBUG, local_unit, charm_name +from .fstab import Fstab +from charmhelpers.osplatform import get_platform + +__platform__ = get_platform() +if __platform__ == "ubuntu": + from charmhelpers.core.host_factory.ubuntu import ( # NOQA:F401 + service_available, + add_new_group, + lsb_release, + cmp_pkgrevno, + CompareHostReleases, + get_distrib_codename, + arch + ) # flake8: noqa -- ignore F401 for this import +elif __platform__ == "centos": + from charmhelpers.core.host_factory.centos import ( # NOQA:F401 + service_available, + add_new_group, + lsb_release, + cmp_pkgrevno, + CompareHostReleases, + ) # flake8: noqa -- ignore F401 for this import + +UPDATEDB_PATH = '/etc/updatedb.conf' +CA_CERT_DIR = '/usr/local/share/ca-certificates' + + +def service_start(service_name, **kwargs): + """Start a system service. + + The specified service name is managed via the system level init system. + Some init systems (e.g. upstart) require that additional arguments be + provided in order to directly control service instances whereas other init + systems allow for addressing instances of a service directly by name (e.g. + systemd). + + The kwargs allow for the additional parameters to be passed to underlying + init systems for those systems which require/allow for them. For example, + the ceph-osd upstart script requires the id parameter to be passed along + in order to identify which running daemon should be reloaded. The follow- + ing example stops the ceph-osd service for instance id=4: + + service_stop('ceph-osd', id=4) + + :param service_name: the name of the service to stop + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for systemd enabled systems. + """ + return service('start', service_name, **kwargs) + + +def service_stop(service_name, **kwargs): + """Stop a system service. + + The specified service name is managed via the system level init system. + Some init systems (e.g. upstart) require that additional arguments be + provided in order to directly control service instances whereas other init + systems allow for addressing instances of a service directly by name (e.g. + systemd). + + The kwargs allow for the additional parameters to be passed to underlying + init systems for those systems which require/allow for them. For example, + the ceph-osd upstart script requires the id parameter to be passed along + in order to identify which running daemon should be reloaded. The follow- + ing example stops the ceph-osd service for instance id=4: + + service_stop('ceph-osd', id=4) + + :param service_name: the name of the service to stop + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for systemd enabled systems. + """ + return service('stop', service_name, **kwargs) + + +def service_enable(service_name, **kwargs): + """Enable a system service. + + The specified service name is managed via the system level init system. + Some init systems (e.g. upstart) require that additional arguments be + provided in order to directly control service instances whereas other init + systems allow for addressing instances of a service directly by name (e.g. + systemd). + + The kwargs allow for the additional parameters to be passed to underlying + init systems for those systems which require/allow for them. For example, + the ceph-osd upstart script requires the id parameter to be passed along + in order to identify which running daemon should be restarted. The follow- + ing example restarts the ceph-osd service for instance id=4: + + service_enable('ceph-osd', id=4) + + :param service_name: the name of the service to enable + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for init systems not allowing additional + parameters via the commandline (systemd). + """ + return service('enable', service_name, **kwargs) + + +def service_restart(service_name, **kwargs): + """Restart a system service. + + The specified service name is managed via the system level init system. + Some init systems (e.g. upstart) require that additional arguments be + provided in order to directly control service instances whereas other init + systems allow for addressing instances of a service directly by name (e.g. + systemd). + + The kwargs allow for the additional parameters to be passed to underlying + init systems for those systems which require/allow for them. For example, + the ceph-osd upstart script requires the id parameter to be passed along + in order to identify which running daemon should be restarted. The follow- + ing example restarts the ceph-osd service for instance id=4: + + service_restart('ceph-osd', id=4) + + :param service_name: the name of the service to restart + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for init systems not allowing additional + parameters via the commandline (systemd). + """ + return service('restart', service_name) + + +def service_reload(service_name, restart_on_failure=False, **kwargs): + """Reload a system service, optionally falling back to restart if + reload fails. + + The specified service name is managed via the system level init system. + Some init systems (e.g. upstart) require that additional arguments be + provided in order to directly control service instances whereas other init + systems allow for addressing instances of a service directly by name (e.g. + systemd). + + The kwargs allow for the additional parameters to be passed to underlying + init systems for those systems which require/allow for them. For example, + the ceph-osd upstart script requires the id parameter to be passed along + in order to identify which running daemon should be reloaded. The follow- + ing example restarts the ceph-osd service for instance id=4: + + service_reload('ceph-osd', id=4) + + :param service_name: the name of the service to reload + :param restart_on_failure: boolean indicating whether to fallback to a + restart if the reload fails. + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for init systems not allowing additional + parameters via the commandline (systemd). + """ + service_result = service('reload', service_name, **kwargs) + if not service_result and restart_on_failure: + service_result = service('restart', service_name, **kwargs) + return service_result + + +def service_pause(service_name, init_dir="/etc/init", initd_dir="/etc/init.d", + **kwargs): + """Pause a system service. + + Stop it, and prevent it from starting again at boot. + + :param service_name: the name of the service to pause + :param init_dir: path to the upstart init directory + :param initd_dir: path to the sysv init directory + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for init systems which do not support + key=value arguments via the commandline. + """ + stopped = True + if service_running(service_name, **kwargs): + stopped = service_stop(service_name, **kwargs) + upstart_file = os.path.join(init_dir, "{}.conf".format(service_name)) + sysv_file = os.path.join(initd_dir, service_name) + if init_is_systemd(service_name=service_name): + service('disable', service_name) + service('mask', service_name) + elif os.path.exists(upstart_file): + override_path = os.path.join( + init_dir, '{}.override'.format(service_name)) + with open(override_path, 'w') as fh: + fh.write("manual\n") + elif os.path.exists(sysv_file): + subprocess.check_call(["update-rc.d", service_name, "disable"]) + else: + raise ValueError( + "Unable to detect {0} as SystemD, Upstart {1} or" + " SysV {2}".format( + service_name, upstart_file, sysv_file)) + return stopped + + +def service_resume(service_name, init_dir="/etc/init", + initd_dir="/etc/init.d", **kwargs): + """Resume a system service. + + Re-enable starting again at boot. Start the service. + + :param service_name: the name of the service to resume + :param init_dir: the path to the init dir + :param initd dir: the path to the initd dir + :param **kwargs: additional parameters to pass to the init system when + managing services. These will be passed as key=value + parameters to the init system's commandline. kwargs + are ignored for systemd enabled systems. + """ + upstart_file = os.path.join(init_dir, "{}.conf".format(service_name)) + sysv_file = os.path.join(initd_dir, service_name) + if init_is_systemd(service_name=service_name): + if service('is-enabled', service_name): + log('service {} already enabled'.format(service_name), level=DEBUG) + else: + service('unmask', service_name) + service('enable', service_name) + elif os.path.exists(upstart_file): + override_path = os.path.join( + init_dir, '{}.override'.format(service_name)) + if os.path.exists(override_path): + os.unlink(override_path) + elif os.path.exists(sysv_file): + subprocess.check_call(["update-rc.d", service_name, "enable"]) + else: + raise ValueError( + "Unable to detect {0} as SystemD, Upstart {1} or" + " SysV {2}".format( + service_name, upstart_file, sysv_file)) + started = service_running(service_name, **kwargs) + + if not started: + started = service_start(service_name, **kwargs) + return started + + +def service(action, service_name=None, **kwargs): + """Control a system service. + + :param action: the action to take on the service + :param service_name: the name of the service to perform th action on + :param **kwargs: additional params to be passed to the service command in + the form of key=value. + """ + if init_is_systemd(service_name=service_name): + cmd = ['systemctl', action] + if service_name is not None: + cmd.append(service_name) + else: + cmd = ['service', service_name, action] + for key, value in kwargs.items(): + parameter = '%s=%s' % (key, value) + cmd.append(parameter) + return subprocess.call(cmd) == 0 + + +_UPSTART_CONF = "/etc/init/{}.conf" +_INIT_D_CONF = "/etc/init.d/{}" + + +def service_running(service_name, **kwargs): + """Determine whether a system service is running. + + :param service_name: the name of the service + :param **kwargs: additional args to pass to the service command. This is + used to pass additional key=value arguments to the + service command line for managing specific instance + units (e.g. service ceph-osd status id=2). The kwargs + are ignored in systemd services. + """ + if init_is_systemd(service_name=service_name): + return service('is-active', service_name) + else: + if os.path.exists(_UPSTART_CONF.format(service_name)): + try: + cmd = ['status', service_name] + for key, value in kwargs.items(): + parameter = '%s=%s' % (key, value) + cmd.append(parameter) + output = subprocess.check_output( + cmd, stderr=subprocess.STDOUT).decode('UTF-8') + except subprocess.CalledProcessError: + return False + else: + # This works for upstart scripts where the 'service' command + # returns a consistent string to represent running + # 'start/running' + if ("start/running" in output or + "is running" in output or + "up and running" in output): + return True + elif os.path.exists(_INIT_D_CONF.format(service_name)): + # Check System V scripts init script return codes + return service('status', service_name) + return False + + +SYSTEMD_SYSTEM = '/run/systemd/system' + + +def init_is_systemd(service_name=None): + """ + Returns whether the host uses systemd for the specified service. + + @param Optional[str] service_name: specific name of service + """ + if str(service_name).startswith("snap."): + return True + if lsb_release()['DISTRIB_CODENAME'] == 'trusty': + return False + return os.path.isdir(SYSTEMD_SYSTEM) + + +def adduser(username, password=None, shell='/bin/bash', + system_user=False, primary_group=None, + secondary_groups=None, uid=None, home_dir=None): + """Add a user to the system. + + Will log but otherwise succeed if the user already exists. + + :param str username: Username to create + :param str password: Password for user; if ``None``, create a system user + :param str shell: The default shell for the user + :param bool system_user: Whether to create a login or system user + :param str primary_group: Primary group for user; defaults to username + :param list secondary_groups: Optional list of additional groups + :param int uid: UID for user being created + :param str home_dir: Home directory for user + + :returns: The password database entry struct, as returned by `pwd.getpwnam` + """ + try: + user_info = pwd.getpwnam(username) + log('user {0} already exists!'.format(username)) + if uid: + user_info = pwd.getpwuid(int(uid)) + log('user with uid {0} already exists!'.format(uid)) + except KeyError: + log('creating user {0}'.format(username)) + cmd = ['useradd'] + if uid: + cmd.extend(['--uid', str(uid)]) + if home_dir: + cmd.extend(['--home', str(home_dir)]) + if system_user or password is None: + cmd.append('--system') + else: + cmd.extend([ + '--create-home', + '--shell', shell, + '--password', password, + ]) + if not primary_group: + try: + grp.getgrnam(username) + primary_group = username # avoid "group exists" error + except KeyError: + pass + if primary_group: + cmd.extend(['-g', primary_group]) + if secondary_groups: + cmd.extend(['-G', ','.join(secondary_groups)]) + cmd.append(username) + subprocess.check_call(cmd) + user_info = pwd.getpwnam(username) + return user_info + + +def user_exists(username): + """Check if a user exists""" + try: + pwd.getpwnam(username) + user_exists = True + except KeyError: + user_exists = False + return user_exists + + +def uid_exists(uid): + """Check if a uid exists""" + try: + pwd.getpwuid(uid) + uid_exists = True + except KeyError: + uid_exists = False + return uid_exists + + +def group_exists(groupname): + """Check if a group exists""" + try: + grp.getgrnam(groupname) + group_exists = True + except KeyError: + group_exists = False + return group_exists + + +def gid_exists(gid): + """Check if a gid exists""" + try: + grp.getgrgid(gid) + gid_exists = True + except KeyError: + gid_exists = False + return gid_exists + + +def add_group(group_name, system_group=False, gid=None): + """Add a group to the system + + Will log but otherwise succeed if the group already exists. + + :param str group_name: group to create + :param bool system_group: Create system group + :param int gid: GID for user being created + + :returns: The password database entry struct, as returned by `grp.getgrnam` + """ + try: + group_info = grp.getgrnam(group_name) + log('group {0} already exists!'.format(group_name)) + if gid: + group_info = grp.getgrgid(gid) + log('group with gid {0} already exists!'.format(gid)) + except KeyError: + log('creating group {0}'.format(group_name)) + add_new_group(group_name, system_group, gid) + group_info = grp.getgrnam(group_name) + return group_info + + +def add_user_to_group(username, group): + """Add a user to a group""" + cmd = ['gpasswd', '-a', username, group] + log("Adding user {} to group {}".format(username, group)) + subprocess.check_call(cmd) + + +def chage(username, lastday=None, expiredate=None, inactive=None, + mindays=None, maxdays=None, root=None, warndays=None): + """Change user password expiry information + + :param str username: User to update + :param str lastday: Set when password was changed in YYYY-MM-DD format + :param str expiredate: Set when user's account will no longer be + accessible in YYYY-MM-DD format. + -1 will remove an account expiration date. + :param str inactive: Set the number of days of inactivity after a password + has expired before the account is locked. + -1 will remove an account's inactivity. + :param str mindays: Set the minimum number of days between password + changes to MIN_DAYS. + 0 indicates the password can be changed anytime. + :param str maxdays: Set the maximum number of days during which a + password is valid. + -1 as MAX_DAYS will remove checking maxdays + :param str root: Apply changes in the CHROOT_DIR directory + :param str warndays: Set the number of days of warning before a password + change is required + :raises subprocess.CalledProcessError: if call to chage fails + """ + cmd = ['chage'] + if root: + cmd.extend(['--root', root]) + if lastday: + cmd.extend(['--lastday', lastday]) + if expiredate: + cmd.extend(['--expiredate', expiredate]) + if inactive: + cmd.extend(['--inactive', inactive]) + if mindays: + cmd.extend(['--mindays', mindays]) + if maxdays: + cmd.extend(['--maxdays', maxdays]) + if warndays: + cmd.extend(['--warndays', warndays]) + cmd.append(username) + subprocess.check_call(cmd) + + +remove_password_expiry = functools.partial(chage, expiredate='-1', inactive='-1', mindays='0', maxdays='-1') + + +def rsync(from_path, to_path, flags='-r', options=None, timeout=None): + """Replicate the contents of a path""" + options = options or ['--delete', '--executability'] + cmd = ['/usr/bin/rsync', flags] + if timeout: + cmd = ['timeout', str(timeout)] + cmd + cmd.extend(options) + cmd.append(from_path) + cmd.append(to_path) + log(" ".join(cmd)) + return subprocess.check_output(cmd, stderr=subprocess.STDOUT).decode('UTF-8').strip() + + +def symlink(source, destination): + """Create a symbolic link""" + log("Symlinking {} as {}".format(source, destination)) + cmd = [ + 'ln', + '-sf', + source, + destination, + ] + subprocess.check_call(cmd) + + +def mkdir(path, owner='root', group='root', perms=0o555, force=False): + """Create a directory""" + log("Making dir {} {}:{} {:o}".format(path, owner, group, + perms)) + uid = pwd.getpwnam(owner).pw_uid + gid = grp.getgrnam(group).gr_gid + realpath = os.path.abspath(path) + path_exists = os.path.exists(realpath) + if path_exists and force: + if not os.path.isdir(realpath): + log("Removing non-directory file {} prior to mkdir()".format(path)) + os.unlink(realpath) + os.makedirs(realpath, perms) + elif not path_exists: + os.makedirs(realpath, perms) + os.chown(realpath, uid, gid) + os.chmod(realpath, perms) + + +def write_file(path, content, owner='root', group='root', perms=0o444): + """Create or overwrite a file with the contents of a byte string.""" + uid = pwd.getpwnam(owner).pw_uid + gid = grp.getgrnam(group).gr_gid + # lets see if we can grab the file and compare the context, to avoid doing + # a write. + existing_content = None + existing_uid, existing_gid, existing_perms = None, None, None + try: + with open(path, 'rb') as target: + existing_content = target.read() + stat = os.stat(path) + existing_uid, existing_gid, existing_perms = ( + stat.st_uid, stat.st_gid, stat.st_mode + ) + except Exception: + pass + if content != existing_content: + log("Writing file {} {}:{} {:o}".format(path, owner, group, perms), + level=DEBUG) + with open(path, 'wb') as target: + os.fchown(target.fileno(), uid, gid) + os.fchmod(target.fileno(), perms) + if isinstance(content, str): + content = content.encode('UTF-8') + target.write(content) + return + # the contents were the same, but we might still need to change the + # ownership or permissions. + if existing_uid != uid: + log("Changing uid on already existing content: {} -> {}" + .format(existing_uid, uid), level=DEBUG) + os.chown(path, uid, -1) + if existing_gid != gid: + log("Changing gid on already existing content: {} -> {}" + .format(existing_gid, gid), level=DEBUG) + os.chown(path, -1, gid) + if existing_perms != perms: + log("Changing permissions on existing content: {} -> {}" + .format(existing_perms, perms), level=DEBUG) + os.chmod(path, perms) + + +def fstab_remove(mp): + """Remove the given mountpoint entry from /etc/fstab""" + return Fstab.remove_by_mountpoint(mp) + + +def fstab_add(dev, mp, fs, options=None): + """Adds the given device entry to the /etc/fstab file""" + return Fstab.add(dev, mp, fs, options=options) + + +def mount(device, mountpoint, options=None, persist=False, filesystem="ext3"): + """Mount a filesystem at a particular mountpoint""" + cmd_args = ['mount'] + if options is not None: + cmd_args.extend(['-o', options]) + cmd_args.extend([device, mountpoint]) + try: + subprocess.check_output(cmd_args) + except subprocess.CalledProcessError as e: + log('Error mounting {} at {}\n{}'.format(device, mountpoint, e.output)) + return False + + if persist: + return fstab_add(device, mountpoint, filesystem, options=options) + return True + + +def umount(mountpoint, persist=False): + """Unmount a filesystem""" + cmd_args = ['umount', mountpoint] + try: + subprocess.check_output(cmd_args) + except subprocess.CalledProcessError as e: + log('Error unmounting {}\n{}'.format(mountpoint, e.output)) + return False + + if persist: + return fstab_remove(mountpoint) + return True + + +def mounts(): + """Get a list of all mounted volumes as [[mountpoint,device],[...]]""" + with open('/proc/mounts') as f: + # [['/mount/point','/dev/path'],[...]] + system_mounts = [m[1::-1] for m in [l.strip().split() + for l in f.readlines()]] + return system_mounts + + +def fstab_mount(mountpoint): + """Mount filesystem using fstab""" + cmd_args = ['mount', mountpoint] + try: + subprocess.check_output(cmd_args) + except subprocess.CalledProcessError as e: + log('Error unmounting {}\n{}'.format(mountpoint, e.output)) + return False + return True + + +def file_hash(path, hash_type='md5'): + """Generate a hash checksum of the contents of 'path' or None if not found. + + :param str hash_type: Any hash alrgorithm supported by :mod:`hashlib`, + such as md5, sha1, sha256, sha512, etc. + """ + if os.path.exists(path): + h = getattr(hashlib, hash_type)() + with open(path, 'rb') as source: + h.update(source.read()) + return h.hexdigest() + else: + return None + + +def path_hash(path): + """Generate a hash checksum of all files matching 'path'. Standard + wildcards like '*' and '?' are supported, see documentation for the 'glob' + module for more information. + + :return: dict: A { filename: hash } dictionary for all matched files. + Empty if none found. + """ + return { + filename: file_hash(filename) + for filename in glob.iglob(path) + } + + +def check_hash(path, checksum, hash_type='md5'): + """Validate a file using a cryptographic checksum. + + :param str checksum: Value of the checksum used to validate the file. + :param str hash_type: Hash algorithm used to generate `checksum`. + Can be any hash algorithm supported by :mod:`hashlib`, + such as md5, sha1, sha256, sha512, etc. + :raises ChecksumError: If the file fails the checksum + + """ + actual_checksum = file_hash(path, hash_type) + if checksum != actual_checksum: + raise ChecksumError("'%s' != '%s'" % (checksum, actual_checksum)) + + +class ChecksumError(ValueError): + """A class derived from Value error to indicate the checksum failed.""" + pass + + +class restart_on_change(object): + """Decorator and context manager to handle restarts. + + Usage: + + @restart_on_change(restart_map, ...) + def function_that_might_trigger_a_restart(...) + ... + + Or: + + with restart_on_change(restart_map, ...): + do_stuff_that_might_trigger_a_restart() + ... + """ + + def __init__(self, restart_map, stopstart=False, restart_functions=None, + can_restart_now_f=None, post_svc_restart_f=None, + pre_restarts_wait_f=None): + """ + :param restart_map: {file: [service, ...]} + :type restart_map: Dict[str, List[str,]] + :param stopstart: whether to stop, start or restart a service + :type stopstart: booleean + :param restart_functions: nonstandard functions to use to restart + services {svc: func, ...} + :type restart_functions: Dict[str, Callable[[str], None]] + :param can_restart_now_f: A function used to check if the restart is + permitted. + :type can_restart_now_f: Callable[[str, List[str]], boolean] + :param post_svc_restart_f: A function run after a service has + restarted. + :type post_svc_restart_f: Callable[[str], None] + :param pre_restarts_wait_f: A function called before any restarts. + :type pre_restarts_wait_f: Callable[None, None] + """ + self.restart_map = restart_map + self.stopstart = stopstart + self.restart_functions = restart_functions + self.can_restart_now_f = can_restart_now_f + self.post_svc_restart_f = post_svc_restart_f + self.pre_restarts_wait_f = pre_restarts_wait_f + + def __call__(self, f): + """Work like a decorator. + + Returns a wrapped function that performs the restart if triggered. + + :param f: The function that is being wrapped. + :type f: Callable[[Any], Any] + :returns: the wrapped function + :rtype: Callable[[Any], Any] + """ + @functools.wraps(f) + def wrapped_f(*args, **kwargs): + return restart_on_change_helper( + (lambda: f(*args, **kwargs)), + self.restart_map, + stopstart=self.stopstart, + restart_functions=self.restart_functions, + can_restart_now_f=self.can_restart_now_f, + post_svc_restart_f=self.post_svc_restart_f, + pre_restarts_wait_f=self.pre_restarts_wait_f) + return wrapped_f + + def __enter__(self): + """Enter the runtime context related to this object. """ + self.checksums = _pre_restart_on_change_helper(self.restart_map) + + def __exit__(self, exc_type, exc_val, exc_tb): + """Exit the runtime context related to this object. + + The parameters describe the exception that caused the context to be + exited. If the context was exited without an exception, all three + arguments will be None. + """ + if exc_type is None: + _post_restart_on_change_helper( + self.checksums, + self.restart_map, + stopstart=self.stopstart, + restart_functions=self.restart_functions, + can_restart_now_f=self.can_restart_now_f, + post_svc_restart_f=self.post_svc_restart_f, + pre_restarts_wait_f=self.pre_restarts_wait_f) + # All is good, so return False; any exceptions will propagate. + return False + + +def restart_on_change_helper(lambda_f, restart_map, stopstart=False, + restart_functions=None, + can_restart_now_f=None, + post_svc_restart_f=None, + pre_restarts_wait_f=None): + """Helper function to perform the restart_on_change function. + + This is provided for decorators to restart services if files described + in the restart_map have changed after an invocation of lambda_f(). + + This functions allows for a number of helper functions to be passed. + + `restart_functions` is a map with a service as the key and the + corresponding value being the function to call to restart the service. For + example if `restart_functions={'some-service': my_restart_func}` then + `my_restart_func` should a function which takes one argument which is the + service name to be retstarted. + + `can_restart_now_f` is a function which checks that a restart is permitted. + It should return a bool which indicates if a restart is allowed and should + take a service name (str) and a list of changed files (List[str]) as + arguments. + + `post_svc_restart_f` is a function which runs after a service has been + restarted. It takes the service name that was restarted as an argument. + + `pre_restarts_wait_f` is a function which is called before any restarts + occur. The use case for this is an application which wants to try and + stagger restarts between units. + + :param lambda_f: function to call. + :type lambda_f: Callable[[], ANY] + :param restart_map: {file: [service, ...]} + :type restart_map: Dict[str, List[str,]] + :param stopstart: whether to stop, start or restart a service + :type stopstart: booleean + :param restart_functions: nonstandard functions to use to restart services + {svc: func, ...} + :type restart_functions: Dict[str, Callable[[str], None]] + :param can_restart_now_f: A function used to check if the restart is + permitted. + :type can_restart_now_f: Callable[[str, List[str]], boolean] + :param post_svc_restart_f: A function run after a service has + restarted. + :type post_svc_restart_f: Callable[[str], None] + :param pre_restarts_wait_f: A function called before any restarts. + :type pre_restarts_wait_f: Callable[None, None] + :returns: result of lambda_f() + :rtype: ANY + """ + checksums = _pre_restart_on_change_helper(restart_map) + r = lambda_f() + _post_restart_on_change_helper(checksums, + restart_map, + stopstart, + restart_functions, + can_restart_now_f, + post_svc_restart_f, + pre_restarts_wait_f) + return r + + +def _pre_restart_on_change_helper(restart_map): + """Take a snapshot of file hashes. + + :param restart_map: {file: [service, ...]} + :type restart_map: Dict[str, List[str,]] + :returns: Dictionary of file paths and the files checksum. + :rtype: Dict[str, str] + """ + return {path: path_hash(path) for path in restart_map} + + +def _post_restart_on_change_helper(checksums, + restart_map, + stopstart=False, + restart_functions=None, + can_restart_now_f=None, + post_svc_restart_f=None, + pre_restarts_wait_f=None): + """Check whether files have changed. + + :param checksums: Dictionary of file paths and the files checksum. + :type checksums: Dict[str, str] + :param restart_map: {file: [service, ...]} + :type restart_map: Dict[str, List[str,]] + :param stopstart: whether to stop, start or restart a service + :type stopstart: booleean + :param restart_functions: nonstandard functions to use to restart services + {svc: func, ...} + :type restart_functions: Dict[str, Callable[[str], None]] + :param can_restart_now_f: A function used to check if the restart is + permitted. + :type can_restart_now_f: Callable[[str, List[str]], boolean] + :param post_svc_restart_f: A function run after a service has + restarted. + :type post_svc_restart_f: Callable[[str], None] + :param pre_restarts_wait_f: A function called before any restarts. + :type pre_restarts_wait_f: Callable[None, None] + """ + if restart_functions is None: + restart_functions = {} + changed_files = defaultdict(list) + restarts = [] + # create a list of lists of the services to restart + for path, services in restart_map.items(): + if path_hash(path) != checksums[path]: + restarts.append(services) + for svc in services: + changed_files[svc].append(path) + # create a flat list of ordered services without duplicates from lists + services_list = list(OrderedDict.fromkeys(itertools.chain(*restarts))) + if services_list: + if pre_restarts_wait_f: + pre_restarts_wait_f() + actions = ('stop', 'start') if stopstart else ('restart',) + for service_name in services_list: + if can_restart_now_f: + if not can_restart_now_f(service_name, + changed_files[service_name]): + continue + if service_name in restart_functions: + restart_functions[service_name](service_name) + else: + for action in actions: + service(action, service_name) + if post_svc_restart_f: + post_svc_restart_f(service_name) + + +def pwgen(length=None): + """Generate a random password.""" + if length is None: + # A random length is ok to use a weak PRNG + length = random.choice(range(35, 45)) + alphanumeric_chars = [ + l for l in (string.ascii_letters + string.digits) + if l not in 'l0QD1vAEIOUaeiou'] + # Use a crypto-friendly PRNG (e.g. /dev/urandom) for making the + # actual password + random_generator = random.SystemRandom() + random_chars = [ + random_generator.choice(alphanumeric_chars) for _ in range(length)] + return ''.join(random_chars) + + +def is_phy_iface(interface): + """Returns True if interface is not virtual, otherwise False.""" + if interface: + sys_net = '/sys/class/net' + if os.path.isdir(sys_net): + for iface in glob.glob(os.path.join(sys_net, '*')): + if '/virtual/' in os.path.realpath(iface): + continue + + if interface == os.path.basename(iface): + return True + + return False + + +def get_bond_master(interface): + """Returns bond master if interface is bond slave otherwise None. + + NOTE: the provided interface is expected to be physical + """ + if interface: + iface_path = '/sys/class/net/%s' % (interface) + if os.path.exists(iface_path): + if '/virtual/' in os.path.realpath(iface_path): + return None + + master = os.path.join(iface_path, 'master') + if os.path.exists(master): + master = os.path.realpath(master) + # make sure it is a bond master + if os.path.exists(os.path.join(master, 'bonding')): + return os.path.basename(master) + + return None + + +def list_nics(nic_type=None): + """Return a list of nics of given type(s)""" + if isinstance(nic_type, str): + int_types = [nic_type] + else: + int_types = nic_type + + interfaces = [] + if nic_type: + for int_type in int_types: + cmd = ['ip', 'addr', 'show', 'label', int_type + '*'] + ip_output = subprocess.check_output( + cmd).decode('UTF-8', errors='replace') + ip_output = ip_output.split('\n') + ip_output = (line for line in ip_output if line) + for line in ip_output: + if line.split()[1].startswith(int_type): + matched = re.search('.*: (' + int_type + + r'[0-9]+\.[0-9]+)@.*', line) + if matched: + iface = matched.groups()[0] + else: + iface = line.split()[1].replace(":", "") + + if iface not in interfaces: + interfaces.append(iface) + else: + cmd = ['ip', 'a'] + ip_output = subprocess.check_output( + cmd).decode('UTF-8', errors='replace').split('\n') + ip_output = (line.strip() for line in ip_output if line) + + key = re.compile(r'^[0-9]+:\s+(.+):') + for line in ip_output: + matched = re.search(key, line) + if matched: + iface = matched.group(1) + iface = iface.partition("@")[0] + if iface not in interfaces: + interfaces.append(iface) + + return interfaces + + +def set_nic_mtu(nic, mtu): + """Set the Maximum Transmission Unit (MTU) on a network interface.""" + cmd = ['ip', 'link', 'set', nic, 'mtu', mtu] + subprocess.check_call(cmd) + + +def get_nic_mtu(nic): + """Return the Maximum Transmission Unit (MTU) for a network interface.""" + cmd = ['ip', 'addr', 'show', nic] + ip_output = subprocess.check_output( + cmd).decode('UTF-8', errors='replace').split('\n') + mtu = "" + for line in ip_output: + words = line.split() + if 'mtu' in words: + mtu = words[words.index("mtu") + 1] + return mtu + + +def get_nic_hwaddr(nic): + """Return the Media Access Control (MAC) for a network interface.""" + cmd = ['ip', '-o', '-0', 'addr', 'show', nic] + ip_output = subprocess.check_output(cmd).decode('UTF-8', errors='replace') + hwaddr = "" + words = ip_output.split() + if 'link/ether' in words: + hwaddr = words[words.index('link/ether') + 1] + return hwaddr + + +@contextmanager +def chdir(directory): + """Change the current working directory to a different directory for a code + block and return the previous directory after the block exits. Useful to + run commands from a specified directory. + + :param str directory: The directory path to change to for this context. + """ + cur = os.getcwd() + try: + yield os.chdir(directory) + finally: + os.chdir(cur) + + +def chownr(path, owner, group, follow_links=True, chowntopdir=False): + """Recursively change user and group ownership of files and directories + in given path. Doesn't chown path itself by default, only its children. + + :param str path: The string path to start changing ownership. + :param str owner: The owner string to use when looking up the uid. + :param str group: The group string to use when looking up the gid. + :param bool follow_links: Also follow and chown links if True + :param bool chowntopdir: Also chown path itself if True + """ + uid = pwd.getpwnam(owner).pw_uid + gid = grp.getgrnam(group).gr_gid + if follow_links: + chown = os.chown + else: + chown = os.lchown + + if chowntopdir: + broken_symlink = os.path.lexists(path) and not os.path.exists(path) + if not broken_symlink: + chown(path, uid, gid) + for root, dirs, files in os.walk(path, followlinks=follow_links): + for name in dirs + files: + full = os.path.join(root, name) + try: + chown(full, uid, gid) + except (IOError, OSError) as e: + # Intended to ignore "file not found". + if e.errno == errno.ENOENT: + pass + + +def lchownr(path, owner, group): + """Recursively change user and group ownership of files and directories + in a given path, not following symbolic links. See the documentation for + 'os.lchown' for more information. + + :param str path: The string path to start changing ownership. + :param str owner: The owner string to use when looking up the uid. + :param str group: The group string to use when looking up the gid. + """ + chownr(path, owner, group, follow_links=False) + + +def owner(path): + """Returns a tuple containing the username & groupname owning the path. + + :param str path: the string path to retrieve the ownership + :return tuple(str, str): A (username, groupname) tuple containing the + name of the user and group owning the path. + :raises OSError: if the specified path does not exist + """ + stat = os.stat(path) + username = pwd.getpwuid(stat.st_uid)[0] + groupname = grp.getgrgid(stat.st_gid)[0] + return username, groupname + + +def get_total_ram(): + """The total amount of system RAM in bytes. + + This is what is reported by the OS, and may be overcommitted when + there are multiple containers hosted on the same machine. + """ + with open('/proc/meminfo', 'r') as f: + for line in f.readlines(): + if line: + key, value, unit = line.split() + if key == 'MemTotal:': + assert unit == 'kB', 'Unknown unit' + return int(value) * 1024 # Classic, not KiB. + raise NotImplementedError() + + +UPSTART_CONTAINER_TYPE = '/run/container_type' + + +def is_container(): + """Determine whether unit is running in a container + + @return: boolean indicating if unit is in a container + """ + if init_is_systemd(): + # Detect using systemd-detect-virt + return subprocess.call(['systemd-detect-virt', + '--container']) == 0 + else: + # Detect using upstart container file marker + return os.path.exists(UPSTART_CONTAINER_TYPE) + + +def add_to_updatedb_prunepath(path, updatedb_path=UPDATEDB_PATH): + """Adds the specified path to the mlocate's udpatedb.conf PRUNEPATH list. + + This method has no effect if the path specified by updatedb_path does not + exist or is not a file. + + @param path: string the path to add to the updatedb.conf PRUNEPATHS value + @param updatedb_path: the path the updatedb.conf file + """ + if not os.path.exists(updatedb_path) or os.path.isdir(updatedb_path): + # If the updatedb.conf file doesn't exist then don't attempt to update + # the file as the package providing mlocate may not be installed on + # the local system + return + + with open(updatedb_path, 'r+') as f_id: + updatedb_text = f_id.read() + output = updatedb(updatedb_text, path) + f_id.seek(0) + f_id.write(output) + f_id.truncate() + + +def updatedb(updatedb_text, new_path): + lines = [line for line in updatedb_text.split("\n")] + for i, line in enumerate(lines): + if line.startswith("PRUNEPATHS="): + paths_line = line.split("=")[1].replace('"', '') + paths = paths_line.split(" ") + if new_path not in paths: + paths.append(new_path) + lines[i] = 'PRUNEPATHS="{}"'.format(' '.join(paths)) + output = "\n".join(lines) + return output + + +def modulo_distribution(modulo=3, wait=30, non_zero_wait=False): + """ Modulo distribution + + This helper uses the unit number, a modulo value and a constant wait time + to produce a calculated wait time distribution. This is useful in large + scale deployments to distribute load during an expensive operation such as + service restarts. + + If you have 1000 nodes that need to restart 100 at a time 1 minute at a + time: + + time.wait(modulo_distribution(modulo=100, wait=60)) + restart() + + If you need restarts to happen serially set modulo to the exact number of + nodes and set a high constant wait time: + + time.wait(modulo_distribution(modulo=10, wait=120)) + restart() + + @param modulo: int The modulo number creates the group distribution + @param wait: int The constant time wait value + @param non_zero_wait: boolean Override unit % modulo == 0, + return modulo * wait. Used to avoid collisions with + leader nodes which are often given priority. + @return: int Calculated time to wait for unit operation + """ + unit_number = int(local_unit().split('/')[1]) + calculated_wait_time = (unit_number % modulo) * wait + if non_zero_wait and calculated_wait_time == 0: + return modulo * wait + else: + return calculated_wait_time + + +def ca_cert_absolute_path(basename_without_extension): + """Returns absolute path to CA certificate. + + :param basename_without_extension: Filename without extension + :type basename_without_extension: str + :returns: Absolute full path + :rtype: str + """ + return '{}/{}.crt'.format(CA_CERT_DIR, basename_without_extension) + + +def install_ca_cert(ca_cert, name=None): + """ + Install the given cert as a trusted CA. + + The ``name`` is the stem of the filename where the cert is written, and if + not provided, it will default to ``juju-{charm_name}``. + + If the cert is empty or None, or is unchanged, nothing is done. + """ + if not ca_cert: + return + if not isinstance(ca_cert, bytes): + ca_cert = ca_cert.encode('utf8') + if not name: + name = 'juju-{}'.format(charm_name()) + cert_file = ca_cert_absolute_path(name) + new_hash = hashlib.md5(ca_cert).hexdigest() + if file_hash(cert_file) == new_hash: + return + log("Installing new CA cert at: {}".format(cert_file), level=INFO) + write_file(cert_file, ca_cert) + subprocess.check_call(['update-ca-certificates', '--fresh']) + + +def get_system_env(key, default=None): + """Get data from system environment as represented in ``/etc/environment``. + + :param key: Key to look up + :type key: str + :param default: Value to return if key is not found + :type default: any + :returns: Value for key if found or contents of default parameter + :rtype: any + :raises: subprocess.CalledProcessError + """ + env_file = '/etc/environment' + # use the shell and env(1) to parse the global environments file. This is + # done to get the correct result even if the user has shell variable + # substitutions or other shell logic in that file. + output = subprocess.check_output( + ['env', '-i', '/bin/bash', '-c', + 'set -a && source {} && env'.format(env_file)], + universal_newlines=True) + for k, v in (line.split('=', 1) + for line in output.splitlines() if '=' in line): + if k == key: + return v + else: + return default diff --git a/ceph-radosgw/hooks/charmhelpers/core/host_factory/__init__.py b/ceph-radosgw/hooks/charmhelpers/core/host_factory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-radosgw/hooks/charmhelpers/core/host_factory/centos.py b/ceph-radosgw/hooks/charmhelpers/core/host_factory/centos.py new file mode 100644 index 00000000..7781a396 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/host_factory/centos.py @@ -0,0 +1,72 @@ +import subprocess +import yum +import os + +from charmhelpers.core.strutils import BasicStringComparator + + +class CompareHostReleases(BasicStringComparator): + """Provide comparisons of Host releases. + + Use in the form of + + if CompareHostReleases(release) > 'trusty': + # do something with mitaka + """ + + def __init__(self, item): + raise NotImplementedError( + "CompareHostReleases() is not implemented for CentOS") + + +def service_available(service_name): + # """Determine whether a system service is available.""" + if os.path.isdir('/run/systemd/system'): + cmd = ['systemctl', 'is-enabled', service_name] + else: + cmd = ['service', service_name, 'is-enabled'] + return subprocess.call(cmd) == 0 + + +def add_new_group(group_name, system_group=False, gid=None): + cmd = ['groupadd'] + if gid: + cmd.extend(['--gid', str(gid)]) + if system_group: + cmd.append('-r') + cmd.append(group_name) + subprocess.check_call(cmd) + + +def lsb_release(): + """Return /etc/os-release in a dict.""" + d = {} + with open('/etc/os-release', 'r') as lsb: + for l in lsb: + s = l.split('=') + if len(s) != 2: + continue + d[s[0].strip()] = s[1].strip() + return d + + +def cmp_pkgrevno(package, revno, pkgcache=None): + """Compare supplied revno with the revno of the installed package. + + * 1 => Installed revno is greater than supplied arg + * 0 => Installed revno is the same as supplied arg + * -1 => Installed revno is less than supplied arg + + This function imports YumBase function if the pkgcache argument + is None. + """ + if not pkgcache: + y = yum.YumBase() + packages = y.doPackageLists() + pkgcache = {i.Name: i.version for i in packages['installed']} + pkg = pkgcache[package] + if pkg > revno: + return 1 + if pkg < revno: + return -1 + return 0 diff --git a/ceph-radosgw/hooks/charmhelpers/core/host_factory/ubuntu.py b/ceph-radosgw/hooks/charmhelpers/core/host_factory/ubuntu.py new file mode 100644 index 00000000..732d76c3 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/host_factory/ubuntu.py @@ -0,0 +1,125 @@ +import subprocess + +from charmhelpers.core.hookenv import cached +from charmhelpers.core.strutils import BasicStringComparator + + +UBUNTU_RELEASES = ( + 'lucid', + 'maverick', + 'natty', + 'oneiric', + 'precise', + 'quantal', + 'raring', + 'saucy', + 'trusty', + 'utopic', + 'vivid', + 'wily', + 'xenial', + 'yakkety', + 'zesty', + 'artful', + 'bionic', + 'cosmic', + 'disco', + 'eoan', + 'focal', + 'groovy', + 'hirsute', + 'impish', + 'jammy', + 'kinetic', + 'lunar', + 'mantic', +) + + +class CompareHostReleases(BasicStringComparator): + """Provide comparisons of Ubuntu releases. + + Use in the form of + + if CompareHostReleases(release) > 'trusty': + # do something with mitaka + """ + _list = UBUNTU_RELEASES + + +def service_available(service_name): + """Determine whether a system service is available""" + try: + subprocess.check_output( + ['service', service_name, 'status'], + stderr=subprocess.STDOUT).decode('UTF-8') + except subprocess.CalledProcessError as e: + return b'unrecognized service' not in e.output + else: + return True + + +def add_new_group(group_name, system_group=False, gid=None): + cmd = ['addgroup'] + if gid: + cmd.extend(['--gid', str(gid)]) + if system_group: + cmd.append('--system') + else: + cmd.extend([ + '--group', + ]) + cmd.append(group_name) + subprocess.check_call(cmd) + + +def lsb_release(): + """Return /etc/lsb-release in a dict""" + d = {} + with open('/etc/lsb-release', 'r') as lsb: + for l in lsb: + k, v = l.split('=') + d[k.strip()] = v.strip() + return d + + +def get_distrib_codename(): + """Return the codename of the distribution + :returns: The codename + :rtype: str + """ + return lsb_release()['DISTRIB_CODENAME'].lower() + + +def cmp_pkgrevno(package, revno, pkgcache=None): + """Compare supplied revno with the revno of the installed package. + + * 1 => Installed revno is greater than supplied arg + * 0 => Installed revno is the same as supplied arg + * -1 => Installed revno is less than supplied arg + + This function imports apt_cache function from charmhelpers.fetch if + the pkgcache argument is None. Be sure to add charmhelpers.fetch if + you call this function, or pass an apt_pkg.Cache() instance. + """ + from charmhelpers.fetch import apt_pkg, get_installed_version + if not pkgcache: + current_ver = get_installed_version(package) + else: + pkg = pkgcache[package] + current_ver = pkg.current_ver + + return apt_pkg.version_compare(current_ver.ver_str, revno) + + +@cached +def arch(): + """Return the package architecture as a string. + + :returns: the architecture + :rtype: str + :raises: subprocess.CalledProcessError if dpkg command fails + """ + return subprocess.check_output( + ['dpkg', '--print-architecture'] + ).rstrip().decode('UTF-8') diff --git a/ceph-radosgw/hooks/charmhelpers/core/hugepage.py b/ceph-radosgw/hooks/charmhelpers/core/hugepage.py new file mode 100644 index 00000000..54b5b5e2 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/hugepage.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import yaml +from charmhelpers.core import fstab +from charmhelpers.core import sysctl +from charmhelpers.core.host import ( + add_group, + add_user_to_group, + fstab_mount, + mkdir, +) +from charmhelpers.core.strutils import bytes_from_string +from subprocess import check_output + + +def hugepage_support(user, group='hugetlb', nr_hugepages=256, + max_map_count=65536, mnt_point='/run/hugepages/kvm', + pagesize='2MB', mount=True, set_shmmax=False): + """Enable hugepages on system. + + Args: + user (str) -- Username to allow access to hugepages to + group (str) -- Group name to own hugepages + nr_hugepages (int) -- Number of pages to reserve + max_map_count (int) -- Number of Virtual Memory Areas a process can own + mnt_point (str) -- Directory to mount hugepages on + pagesize (str) -- Size of hugepages + mount (bool) -- Whether to Mount hugepages + """ + group_info = add_group(group) + gid = group_info.gr_gid + add_user_to_group(user, group) + if max_map_count < 2 * nr_hugepages: + max_map_count = 2 * nr_hugepages + sysctl_settings = { + 'vm.nr_hugepages': nr_hugepages, + 'vm.max_map_count': max_map_count, + 'vm.hugetlb_shm_group': gid, + } + if set_shmmax: + shmmax_current = int(check_output(['sysctl', '-n', 'kernel.shmmax'])) + shmmax_minsize = bytes_from_string(pagesize) * nr_hugepages + if shmmax_minsize > shmmax_current: + sysctl_settings['kernel.shmmax'] = shmmax_minsize + sysctl.create(yaml.dump(sysctl_settings), '/etc/sysctl.d/10-hugepage.conf') + mkdir(mnt_point, owner='root', group='root', perms=0o755, force=False) + lfstab = fstab.Fstab() + fstab_entry = lfstab.get_entry_by_attr('mountpoint', mnt_point) + if fstab_entry: + lfstab.remove_entry(fstab_entry) + entry = lfstab.Entry('nodev', mnt_point, 'hugetlbfs', + 'mode=1770,gid={},pagesize={}'.format(gid, pagesize), 0, 0) + lfstab.add_entry(entry) + if mount: + fstab_mount(mnt_point) diff --git a/ceph-radosgw/hooks/charmhelpers/core/kernel.py b/ceph-radosgw/hooks/charmhelpers/core/kernel.py new file mode 100644 index 00000000..e01f4f8b --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/kernel.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import subprocess + +from charmhelpers.osplatform import get_platform +from charmhelpers.core.hookenv import ( + log, + INFO +) + +__platform__ = get_platform() +if __platform__ == "ubuntu": + from charmhelpers.core.kernel_factory.ubuntu import ( # NOQA:F401 + persistent_modprobe, + update_initramfs, + ) # flake8: noqa -- ignore F401 for this import +elif __platform__ == "centos": + from charmhelpers.core.kernel_factory.centos import ( # NOQA:F401 + persistent_modprobe, + update_initramfs, + ) # flake8: noqa -- ignore F401 for this import + +__author__ = "Jorge Niedbalski " + + +def modprobe(module, persist=True): + """Load a kernel module and configure for auto-load on reboot.""" + cmd = ['modprobe', module] + + log('Loading kernel module %s' % module, level=INFO) + + subprocess.check_call(cmd) + if persist: + persistent_modprobe(module) + + +def rmmod(module, force=False): + """Remove a module from the linux kernel""" + cmd = ['rmmod'] + if force: + cmd.append('-f') + cmd.append(module) + log('Removing kernel module %s' % module, level=INFO) + return subprocess.check_call(cmd) + + +def lsmod(): + """Shows what kernel modules are currently loaded""" + return subprocess.check_output(['lsmod'], + universal_newlines=True) + + +def is_module_loaded(module): + """Checks if a kernel module is already loaded""" + matches = re.findall('^%s[ ]+' % module, lsmod(), re.M) + return len(matches) > 0 diff --git a/ceph-radosgw/hooks/charmhelpers/core/kernel_factory/__init__.py b/ceph-radosgw/hooks/charmhelpers/core/kernel_factory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-radosgw/hooks/charmhelpers/core/kernel_factory/centos.py b/ceph-radosgw/hooks/charmhelpers/core/kernel_factory/centos.py new file mode 100644 index 00000000..1c402c11 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/kernel_factory/centos.py @@ -0,0 +1,17 @@ +import subprocess +import os + + +def persistent_modprobe(module): + """Load a kernel module and configure for auto-load on reboot.""" + if not os.path.exists('/etc/rc.modules'): + open('/etc/rc.modules', 'a') + os.chmod('/etc/rc.modules', 111) + with open('/etc/rc.modules', 'r+') as modules: + if module not in modules.read(): + modules.write('modprobe %s\n' % module) + + +def update_initramfs(version='all'): + """Updates an initramfs image.""" + return subprocess.check_call(["dracut", "-f", version]) diff --git a/ceph-radosgw/hooks/charmhelpers/core/kernel_factory/ubuntu.py b/ceph-radosgw/hooks/charmhelpers/core/kernel_factory/ubuntu.py new file mode 100644 index 00000000..3de372fd --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/kernel_factory/ubuntu.py @@ -0,0 +1,13 @@ +import subprocess + + +def persistent_modprobe(module): + """Load a kernel module and configure for auto-load on reboot.""" + with open('/etc/modules', 'r+') as modules: + if module not in modules.read(): + modules.write(module + "\n") + + +def update_initramfs(version='all'): + """Updates an initramfs image.""" + return subprocess.check_call(["update-initramfs", "-k", version, "-u"]) diff --git a/ceph-radosgw/hooks/charmhelpers/core/services/__init__.py b/ceph-radosgw/hooks/charmhelpers/core/services/__init__.py new file mode 100644 index 00000000..61fd074e --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/services/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .base import * # NOQA +from .helpers import * # NOQA diff --git a/ceph-radosgw/hooks/charmhelpers/core/services/base.py b/ceph-radosgw/hooks/charmhelpers/core/services/base.py new file mode 100644 index 00000000..8d217b59 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/services/base.py @@ -0,0 +1,363 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import json +import inspect +from collections import OrderedDict +from collections.abc import Iterable + +from charmhelpers.core import host +from charmhelpers.core import hookenv + + +__all__ = ['ServiceManager', 'ManagerCallback', + 'PortManagerCallback', 'open_ports', 'close_ports', 'manage_ports', + 'service_restart', 'service_stop'] + + +class ServiceManager(object): + def __init__(self, services=None): + """ + Register a list of services, given their definitions. + + Service definitions are dicts in the following formats (all keys except + 'service' are optional):: + + { + "service": , + "required_data": , + "provided_data": , + "data_ready": , + "data_lost": , + "start": , + "stop": , + "ports": , + } + + The 'required_data' list should contain dicts of required data (or + dependency managers that act like dicts and know how to collect the data). + Only when all items in the 'required_data' list are populated are the list + of 'data_ready' and 'start' callbacks executed. See `is_ready()` for more + information. + + The 'provided_data' list should contain relation data providers, most likely + a subclass of :class:`charmhelpers.core.services.helpers.RelationContext`, + that will indicate a set of data to set on a given relation. + + The 'data_ready' value should be either a single callback, or a list of + callbacks, to be called when all items in 'required_data' pass `is_ready()`. + Each callback will be called with the service name as the only parameter. + After all of the 'data_ready' callbacks are called, the 'start' callbacks + are fired. + + The 'data_lost' value should be either a single callback, or a list of + callbacks, to be called when a 'required_data' item no longer passes + `is_ready()`. Each callback will be called with the service name as the + only parameter. After all of the 'data_lost' callbacks are called, + the 'stop' callbacks are fired. + + The 'start' value should be either a single callback, or a list of + callbacks, to be called when starting the service, after the 'data_ready' + callbacks are complete. Each callback will be called with the service + name as the only parameter. This defaults to + `[host.service_start, services.open_ports]`. + + The 'stop' value should be either a single callback, or a list of + callbacks, to be called when stopping the service. If the service is + being stopped because it no longer has all of its 'required_data', this + will be called after all of the 'data_lost' callbacks are complete. + Each callback will be called with the service name as the only parameter. + This defaults to `[services.close_ports, host.service_stop]`. + + The 'ports' value should be a list of ports to manage. The default + 'start' handler will open the ports after the service is started, + and the default 'stop' handler will close the ports prior to stopping + the service. + + + Examples: + + The following registers an Upstart service called bingod that depends on + a mongodb relation and which runs a custom `db_migrate` function prior to + restarting the service, and a Runit service called spadesd:: + + manager = services.ServiceManager([ + { + 'service': 'bingod', + 'ports': [80, 443], + 'required_data': [MongoRelation(), config(), {'my': 'data'}], + 'data_ready': [ + services.template(source='bingod.conf'), + services.template(source='bingod.ini', + target='/etc/bingod.ini', + owner='bingo', perms=0400), + ], + }, + { + 'service': 'spadesd', + 'data_ready': services.template(source='spadesd_run.j2', + target='/etc/sv/spadesd/run', + perms=0555), + 'start': runit_start, + 'stop': runit_stop, + }, + ]) + manager.manage() + """ + self._ready_file = os.path.join(hookenv.charm_dir(), 'READY-SERVICES.json') + self._ready = None + self.services = OrderedDict() + for service in services or []: + service_name = service['service'] + self.services[service_name] = service + + def manage(self): + """ + Handle the current hook by doing The Right Thing with the registered services. + """ + hookenv._run_atstart() + try: + hook_name = hookenv.hook_name() + if hook_name == 'stop': + self.stop_services() + else: + self.reconfigure_services() + self.provide_data() + except SystemExit as x: + if x.code is None or x.code == 0: + hookenv._run_atexit() + hookenv._run_atexit() + + def provide_data(self): + """ + Set the relation data for each provider in the ``provided_data`` list. + + A provider must have a `name` attribute, which indicates which relation + to set data on, and a `provide_data()` method, which returns a dict of + data to set. + + The `provide_data()` method can optionally accept two parameters: + + * ``remote_service`` The name of the remote service that the data will + be provided to. The `provide_data()` method will be called once + for each connected service (not unit). This allows the method to + tailor its data to the given service. + * ``service_ready`` Whether or not the service definition had all of + its requirements met, and thus the ``data_ready`` callbacks run. + + Note that the ``provided_data`` methods are now called **after** the + ``data_ready`` callbacks are run. This gives the ``data_ready`` callbacks + a chance to generate any data necessary for the providing to the remote + services. + """ + for service_name, service in self.services.items(): + service_ready = self.is_ready(service_name) + for provider in service.get('provided_data', []): + for relid in hookenv.relation_ids(provider.name): + units = hookenv.related_units(relid) + if not units: + continue + remote_service = units[0].split('/')[0] + argspec = inspect.getfullargspec(provider.provide_data) + if len(argspec.args) > 1: + data = provider.provide_data(remote_service, service_ready) + else: + data = provider.provide_data() + if data: + hookenv.relation_set(relid, data) + + def reconfigure_services(self, *service_names): + """ + Update all files for one or more registered services, and, + if ready, optionally restart them. + + If no service names are given, reconfigures all registered services. + """ + for service_name in service_names or self.services.keys(): + if self.is_ready(service_name): + self.fire_event('data_ready', service_name) + self.fire_event('start', service_name, default=[ + service_restart, + manage_ports]) + self.save_ready(service_name) + else: + if self.was_ready(service_name): + self.fire_event('data_lost', service_name) + self.fire_event('stop', service_name, default=[ + manage_ports, + service_stop]) + self.save_lost(service_name) + + def stop_services(self, *service_names): + """ + Stop one or more registered services, by name. + + If no service names are given, stops all registered services. + """ + for service_name in service_names or self.services.keys(): + self.fire_event('stop', service_name, default=[ + manage_ports, + service_stop]) + + def get_service(self, service_name): + """ + Given the name of a registered service, return its service definition. + """ + service = self.services.get(service_name) + if not service: + raise KeyError('Service not registered: %s' % service_name) + return service + + def fire_event(self, event_name, service_name, default=None): + """ + Fire a data_ready, data_lost, start, or stop event on a given service. + """ + service = self.get_service(service_name) + callbacks = service.get(event_name, default) + if not callbacks: + return + if not isinstance(callbacks, Iterable): + callbacks = [callbacks] + for callback in callbacks: + if isinstance(callback, ManagerCallback): + callback(self, service_name, event_name) + else: + callback(service_name) + + def is_ready(self, service_name): + """ + Determine if a registered service is ready, by checking its 'required_data'. + + A 'required_data' item can be any mapping type, and is considered ready + if `bool(item)` evaluates as True. + """ + service = self.get_service(service_name) + reqs = service.get('required_data', []) + return all(bool(req) for req in reqs) + + def _load_ready_file(self): + if self._ready is not None: + return + if os.path.exists(self._ready_file): + with open(self._ready_file) as fp: + self._ready = set(json.load(fp)) + else: + self._ready = set() + + def _save_ready_file(self): + if self._ready is None: + return + with open(self._ready_file, 'w') as fp: + json.dump(list(self._ready), fp) + + def save_ready(self, service_name): + """ + Save an indicator that the given service is now data_ready. + """ + self._load_ready_file() + self._ready.add(service_name) + self._save_ready_file() + + def save_lost(self, service_name): + """ + Save an indicator that the given service is no longer data_ready. + """ + self._load_ready_file() + self._ready.discard(service_name) + self._save_ready_file() + + def was_ready(self, service_name): + """ + Determine if the given service was previously data_ready. + """ + self._load_ready_file() + return service_name in self._ready + + +class ManagerCallback(object): + """ + Special case of a callback that takes the `ServiceManager` instance + in addition to the service name. + + Subclasses should implement `__call__` which should accept three parameters: + + * `manager` The `ServiceManager` instance + * `service_name` The name of the service it's being triggered for + * `event_name` The name of the event that this callback is handling + """ + def __call__(self, manager, service_name, event_name): + raise NotImplementedError() + + +class PortManagerCallback(ManagerCallback): + """ + Callback class that will open or close ports, for use as either + a start or stop action. + """ + def __call__(self, manager, service_name, event_name): + service = manager.get_service(service_name) + # turn this generator into a list, + # as we'll be going over it multiple times + new_ports = list(service.get('ports', [])) + port_file = os.path.join(hookenv.charm_dir(), '.{}.ports'.format(service_name)) + if os.path.exists(port_file): + with open(port_file) as fp: + old_ports = fp.read().split(',') + for old_port in old_ports: + if bool(old_port) and not self.ports_contains(old_port, new_ports): + hookenv.close_port(old_port) + with open(port_file, 'w') as fp: + fp.write(','.join(str(port) for port in new_ports)) + for port in new_ports: + # A port is either a number or 'ICMP' + protocol = 'TCP' + if str(port).upper() == 'ICMP': + protocol = 'ICMP' + if event_name == 'start': + hookenv.open_port(port, protocol) + elif event_name == 'stop': + hookenv.close_port(port, protocol) + + def ports_contains(self, port, ports): + if not bool(port): + return False + if str(port).upper() != 'ICMP': + port = int(port) + return port in ports + + +def service_stop(service_name): + """ + Wrapper around host.service_stop to prevent spurious "unknown service" + messages in the logs. + """ + if host.service_running(service_name): + host.service_stop(service_name) + + +def service_restart(service_name): + """ + Wrapper around host.service_restart to prevent spurious "unknown service" + messages in the logs. + """ + if host.service_available(service_name): + if host.service_running(service_name): + host.service_restart(service_name) + else: + host.service_start(service_name) + + +# Convenience aliases +open_ports = close_ports = manage_ports = PortManagerCallback() diff --git a/ceph-radosgw/hooks/charmhelpers/core/services/helpers.py b/ceph-radosgw/hooks/charmhelpers/core/services/helpers.py new file mode 100644 index 00000000..5bf62dd5 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/services/helpers.py @@ -0,0 +1,290 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import yaml + +from charmhelpers.core import hookenv +from charmhelpers.core import host +from charmhelpers.core import templating + +from charmhelpers.core.services.base import ManagerCallback + + +__all__ = ['RelationContext', 'TemplateCallback', + 'render_template', 'template'] + + +class RelationContext(dict): + """ + Base class for a context generator that gets relation data from juju. + + Subclasses must provide the attributes `name`, which is the name of the + interface of interest, `interface`, which is the type of the interface of + interest, and `required_keys`, which is the set of keys required for the + relation to be considered complete. The data for all interfaces matching + the `name` attribute that are complete will used to populate the dictionary + values (see `get_data`, below). + + The generated context will be namespaced under the relation :attr:`name`, + to prevent potential naming conflicts. + + :param str name: Override the relation :attr:`name`, since it can vary from charm to charm + :param list additional_required_keys: Extend the list of :attr:`required_keys` + """ + name = None + interface = None + + def __init__(self, name=None, additional_required_keys=None): + if not hasattr(self, 'required_keys'): + self.required_keys = [] + + if name is not None: + self.name = name + if additional_required_keys: + self.required_keys.extend(additional_required_keys) + self.get_data() + + def __bool__(self): + """ + Returns True if all of the required_keys are available. + """ + return self.is_ready() + + __nonzero__ = __bool__ + + def __repr__(self): + return super(RelationContext, self).__repr__() + + def is_ready(self): + """ + Returns True if all of the `required_keys` are available from any units. + """ + ready = len(self.get(self.name, [])) > 0 + if not ready: + hookenv.log('Incomplete relation: {}'.format(self.__class__.__name__), hookenv.DEBUG) + return ready + + def _is_ready(self, unit_data): + """ + Helper method that tests a set of relation data and returns True if + all of the `required_keys` are present. + """ + return set(unit_data.keys()).issuperset(set(self.required_keys)) + + def get_data(self): + """ + Retrieve the relation data for each unit involved in a relation and, + if complete, store it in a list under `self[self.name]`. This + is automatically called when the RelationContext is instantiated. + + The units are sorted lexographically first by the service ID, then by + the unit ID. Thus, if an interface has two other services, 'db:1' + and 'db:2', with 'db:1' having two units, 'wordpress/0' and 'wordpress/1', + and 'db:2' having one unit, 'mediawiki/0', all of which have a complete + set of data, the relation data for the units will be stored in the + order: 'wordpress/0', 'wordpress/1', 'mediawiki/0'. + + If you only care about a single unit on the relation, you can just + access it as `{{ interface[0]['key'] }}`. However, if you can at all + support multiple units on a relation, you should iterate over the list, + like:: + + {% for unit in interface -%} + {{ unit['key'] }}{% if not loop.last %},{% endif %} + {%- endfor %} + + Note that since all sets of relation data from all related services and + units are in a single list, if you need to know which service or unit a + set of data came from, you'll need to extend this class to preserve + that information. + """ + if not hookenv.relation_ids(self.name): + return + + ns = self.setdefault(self.name, []) + for rid in sorted(hookenv.relation_ids(self.name)): + for unit in sorted(hookenv.related_units(rid)): + reldata = hookenv.relation_get(rid=rid, unit=unit) + if self._is_ready(reldata): + ns.append(reldata) + + def provide_data(self): + """ + Return data to be relation_set for this interface. + """ + return {} + + +class MysqlRelation(RelationContext): + """ + Relation context for the `mysql` interface. + + :param str name: Override the relation :attr:`name`, since it can vary from charm to charm + :param list additional_required_keys: Extend the list of :attr:`required_keys` + """ + name = 'db' + interface = 'mysql' + + def __init__(self, *args, **kwargs): + self.required_keys = ['host', 'user', 'password', 'database'] + RelationContext.__init__(self, *args, **kwargs) + + +class HttpRelation(RelationContext): + """ + Relation context for the `http` interface. + + :param str name: Override the relation :attr:`name`, since it can vary from charm to charm + :param list additional_required_keys: Extend the list of :attr:`required_keys` + """ + name = 'website' + interface = 'http' + + def __init__(self, *args, **kwargs): + self.required_keys = ['host', 'port'] + RelationContext.__init__(self, *args, **kwargs) + + def provide_data(self): + return { + 'host': hookenv.unit_get('private-address'), + 'port': 80, + } + + +class RequiredConfig(dict): + """ + Data context that loads config options with one or more mandatory options. + + Once the required options have been changed from their default values, all + config options will be available, namespaced under `config` to prevent + potential naming conflicts (for example, between a config option and a + relation property). + + :param list *args: List of options that must be changed from their default values. + """ + + def __init__(self, *args): + self.required_options = args + self['config'] = hookenv.config() + with open(os.path.join(hookenv.charm_dir(), 'config.yaml')) as fp: + self.config = yaml.safe_load(fp).get('options', {}) + + def __bool__(self): + for option in self.required_options: + if option not in self['config']: + return False + current_value = self['config'][option] + default_value = self.config[option].get('default') + if current_value == default_value: + return False + if current_value in (None, '') and default_value in (None, ''): + return False + return True + + def __nonzero__(self): + return self.__bool__() + + +class StoredContext(dict): + """ + A data context that always returns the data that it was first created with. + + This is useful to do a one-time generation of things like passwords, that + will thereafter use the same value that was originally generated, instead + of generating a new value each time it is run. + """ + def __init__(self, file_name, config_data): + """ + If the file exists, populate `self` with the data from the file. + Otherwise, populate with the given data and persist it to the file. + """ + if os.path.exists(file_name): + self.update(self.read_context(file_name)) + else: + self.store_context(file_name, config_data) + self.update(config_data) + + def store_context(self, file_name, config_data): + if not os.path.isabs(file_name): + file_name = os.path.join(hookenv.charm_dir(), file_name) + with open(file_name, 'w') as file_stream: + os.fchmod(file_stream.fileno(), 0o600) + yaml.dump(config_data, file_stream) + + def read_context(self, file_name): + if not os.path.isabs(file_name): + file_name = os.path.join(hookenv.charm_dir(), file_name) + with open(file_name, 'r') as file_stream: + data = yaml.safe_load(file_stream) + if not data: + raise OSError("%s is empty" % file_name) + return data + + +class TemplateCallback(ManagerCallback): + """ + Callback class that will render a Jinja2 template, for use as a ready + action. + + :param str source: The template source file, relative to + `$CHARM_DIR/templates` + + :param str target: The target to write the rendered template to (or None) + :param str owner: The owner of the rendered file + :param str group: The group of the rendered file + :param int perms: The permissions of the rendered file + :param partial on_change_action: functools partial to be executed when + rendered file changes + :param jinja2 loader template_loader: A jinja2 template loader + + :return str: The rendered template + """ + def __init__(self, source, target, + owner='root', group='root', perms=0o444, + on_change_action=None, template_loader=None): + self.source = source + self.target = target + self.owner = owner + self.group = group + self.perms = perms + self.on_change_action = on_change_action + self.template_loader = template_loader + + def __call__(self, manager, service_name, event_name): + pre_checksum = '' + if self.on_change_action and os.path.isfile(self.target): + pre_checksum = host.file_hash(self.target) + service = manager.get_service(service_name) + context = {'ctx': {}} + for ctx in service.get('required_data', []): + context.update(ctx) + context['ctx'].update(ctx) + + result = templating.render(self.source, self.target, context, + self.owner, self.group, self.perms, + template_loader=self.template_loader) + if self.on_change_action: + if pre_checksum == host.file_hash(self.target): + hookenv.log( + 'No change detected: {}'.format(self.target), + hookenv.DEBUG) + else: + self.on_change_action() + + return result + + +# Convenience aliases for templates +render_template = template = TemplateCallback diff --git a/ceph-radosgw/hooks/charmhelpers/core/strutils.py b/ceph-radosgw/hooks/charmhelpers/core/strutils.py new file mode 100644 index 00000000..31366871 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/strutils.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +TRUTHY_STRINGS = {'y', 'yes', 'true', 't', 'on'} +FALSEY_STRINGS = {'n', 'no', 'false', 'f', 'off'} + + +def bool_from_string(value, truthy_strings=TRUTHY_STRINGS, falsey_strings=FALSEY_STRINGS, assume_false=False): + """Interpret string value as boolean. + + Returns True if value translates to True otherwise False. + """ + if isinstance(value, str): + value = str(value) + else: + msg = "Unable to interpret non-string value '%s' as boolean" % (value) + raise ValueError(msg) + + value = value.strip().lower() + + if value in truthy_strings: + return True + elif value in falsey_strings or assume_false: + return False + + msg = "Unable to interpret string value '%s' as boolean" % (value) + raise ValueError(msg) + + +def bytes_from_string(value): + """Interpret human readable string value as bytes. + + Returns int + """ + BYTE_POWER = { + 'K': 1, + 'KB': 1, + 'M': 2, + 'MB': 2, + 'G': 3, + 'GB': 3, + 'T': 4, + 'TB': 4, + 'P': 5, + 'PB': 5, + } + if isinstance(value, str): + value = str(value) + else: + msg = "Unable to interpret non-string value '%s' as bytes" % (value) + raise ValueError(msg) + matches = re.match("([0-9]+)([a-zA-Z]+)", value) + if matches: + size = int(matches.group(1)) * (1024 ** BYTE_POWER[matches.group(2)]) + else: + # Assume that value passed in is bytes + try: + size = int(value) + except ValueError: + msg = "Unable to interpret string value '%s' as bytes" % (value) + raise ValueError(msg) + return size + + +class BasicStringComparator(object): + """Provides a class that will compare strings from an iterator type object. + Used to provide > and < comparisons on strings that may not necessarily be + alphanumerically ordered. e.g. OpenStack or Ubuntu releases AFTER the + z-wrap. + """ + + _list = None + + def __init__(self, item): + if self._list is None: + raise Exception("Must define the _list in the class definition!") + try: + self.index = self._list.index(item) + except Exception: + raise KeyError("Item '{}' is not in list '{}'" + .format(item, self._list)) + + def __eq__(self, other): + assert isinstance(other, str) or isinstance(other, self.__class__) + return self.index == self._list.index(other) + + def __ne__(self, other): + return not self.__eq__(other) + + def __lt__(self, other): + assert isinstance(other, str) or isinstance(other, self.__class__) + return self.index < self._list.index(other) + + def __ge__(self, other): + return not self.__lt__(other) + + def __gt__(self, other): + assert isinstance(other, str) or isinstance(other, self.__class__) + return self.index > self._list.index(other) + + def __le__(self, other): + return not self.__gt__(other) + + def __str__(self): + """Always give back the item at the index so it can be used in + comparisons like: + + s_mitaka = CompareOpenStack('mitaka') + s_newton = CompareOpenstack('newton') + + assert s_newton > s_mitaka + + @returns: + """ + return self._list[self.index] diff --git a/ceph-radosgw/hooks/charmhelpers/core/sysctl.py b/ceph-radosgw/hooks/charmhelpers/core/sysctl.py new file mode 100644 index 00000000..386428d6 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/sysctl.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import yaml + +from subprocess import check_call, CalledProcessError + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + ERROR, + WARNING, +) + +from charmhelpers.core.host import is_container + +__author__ = 'Jorge Niedbalski R. ' + + +def create(sysctl_dict, sysctl_file, ignore=False): + """Creates a sysctl.conf file from a YAML associative array + + :param sysctl_dict: a dict or YAML-formatted string of sysctl + options eg "{ 'kernel.max_pid': 1337 }" + :type sysctl_dict: str + :param sysctl_file: path to the sysctl file to be saved + :type sysctl_file: str or unicode + :param ignore: If True, ignore "unknown variable" errors. + :type ignore: bool + :returns: None + """ + if type(sysctl_dict) is not dict: + try: + sysctl_dict_parsed = yaml.safe_load(sysctl_dict) + except yaml.YAMLError: + log("Error parsing YAML sysctl_dict: {}".format(sysctl_dict), + level=ERROR) + return + else: + sysctl_dict_parsed = sysctl_dict + + with open(sysctl_file, "w") as fd: + for key, value in sysctl_dict_parsed.items(): + fd.write("{}={}\n".format(key, value)) + + log("Updating sysctl_file: {} values: {}".format(sysctl_file, + sysctl_dict_parsed), + level=DEBUG) + + call = ["sysctl", "-p", sysctl_file] + if ignore: + call.append("-e") + + try: + check_call(call) + except CalledProcessError as e: + if is_container(): + log("Error setting some sysctl keys in this container: {}".format(e.output), + level=WARNING) + else: + raise e diff --git a/ceph-radosgw/hooks/charmhelpers/core/templating.py b/ceph-radosgw/hooks/charmhelpers/core/templating.py new file mode 100644 index 00000000..cb0213dc --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/templating.py @@ -0,0 +1,88 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from charmhelpers.core import host +from charmhelpers.core import hookenv + + +def render(source, target, context, owner='root', group='root', + perms=0o444, templates_dir=None, encoding='UTF-8', + template_loader=None, config_template=None): + """ + Render a template. + + The `source` path, if not absolute, is relative to the `templates_dir`. + + The `target` path should be absolute. It can also be `None`, in which + case no file will be written. + + The context should be a dict containing the values to be replaced in the + template. + + config_template may be provided to render from a provided template instead + of loading from a file. + + The `owner`, `group`, and `perms` options will be passed to `write_file`. + + If omitted, `templates_dir` defaults to the `templates` folder in the charm. + + The rendered template will be written to the file as well as being returned + as a string. + + Note: Using this requires python3-jinja2; if it is not installed, calling + this will attempt to use charmhelpers.fetch.apt_install to install it. + """ + try: + from jinja2 import FileSystemLoader, Environment, exceptions + except ImportError: + try: + from charmhelpers.fetch import apt_install + except ImportError: + hookenv.log('Could not import jinja2, and could not import ' + 'charmhelpers.fetch to install it', + level=hookenv.ERROR) + raise + apt_install('python3-jinja2', fatal=True) + from jinja2 import FileSystemLoader, Environment, exceptions + + if template_loader: + template_env = Environment(loader=template_loader) + else: + if templates_dir is None: + templates_dir = os.path.join(hookenv.charm_dir(), 'templates') + template_env = Environment(loader=FileSystemLoader(templates_dir)) + + # load from a string if provided explicitly + if config_template is not None: + template = template_env.from_string(config_template) + else: + try: + source = source + template = template_env.get_template(source) + except exceptions.TemplateNotFound as e: + hookenv.log('Could not load template %s from %s.' % + (source, templates_dir), + level=hookenv.ERROR) + raise e + content = template.render(context) + if target is not None: + target_dir = os.path.dirname(target) + if not os.path.exists(target_dir): + # This is a terrible default directory permission, as the file + # or its siblings will often contain secrets. + host.mkdir(os.path.dirname(target), owner, group, perms=0o755) + host.write_file(target, content.encode(encoding), owner, group, perms) + return content diff --git a/ceph-radosgw/hooks/charmhelpers/core/unitdata.py b/ceph-radosgw/hooks/charmhelpers/core/unitdata.py new file mode 100644 index 00000000..65153f1f --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/core/unitdata.py @@ -0,0 +1,563 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Authors: +# Kapil Thangavelu +# +""" +Intro +----- + +A simple way to store state in units. This provides a key value +storage with support for versioned, transactional operation, +and can calculate deltas from previous values to simplify unit logic +when processing changes. + + +Hook Integration +---------------- + +There are several extant frameworks for hook execution, including + + - charmhelpers.core.hookenv.Hooks + - charmhelpers.core.services.ServiceManager + +The storage classes are framework agnostic, one simple integration is +via the HookData contextmanager. It will record the current hook +execution environment (including relation data, config data, etc.), +setup a transaction and allow easy access to the changes from +previously seen values. One consequence of the integration is the +reservation of particular keys ('rels', 'unit', 'env', 'config', +'charm_revisions') for their respective values. + +Here's a fully worked integration example using hookenv.Hooks:: + + from charmhelper.core import hookenv, unitdata + + hook_data = unitdata.HookData() + db = unitdata.kv() + hooks = hookenv.Hooks() + + @hooks.hook + def config_changed(): + # Print all changes to configuration from previously seen + # values. + for changed, (prev, cur) in hook_data.conf.items(): + print('config changed', changed, + 'previous value', prev, + 'current value', cur) + + # Get some unit specific bookkeeping + if not db.get('pkg_key'): + key = urllib.urlopen('https://example.com/pkg_key').read() + db.set('pkg_key', key) + + # Directly access all charm config as a mapping. + conf = db.getrange('config', True) + + # Directly access all relation data as a mapping + rels = db.getrange('rels', True) + + if __name__ == '__main__': + with hook_data(): + hook.execute() + + +A more basic integration is via the hook_scope context manager which simply +manages transaction scope (and records hook name, and timestamp):: + + >>> from unitdata import kv + >>> db = kv() + >>> with db.hook_scope('install'): + ... # do work, in transactional scope. + ... db.set('x', 1) + >>> db.get('x') + 1 + + +Usage +----- + +Values are automatically json de/serialized to preserve basic typing +and complex data struct capabilities (dicts, lists, ints, booleans, etc). + +Individual values can be manipulated via get/set:: + + >>> kv.set('y', True) + >>> kv.get('y') + True + + # We can set complex values (dicts, lists) as a single key. + >>> kv.set('config', {'a': 1, 'b': True'}) + + # Also supports returning dictionaries as a record which + # provides attribute access. + >>> config = kv.get('config', record=True) + >>> config.b + True + + +Groups of keys can be manipulated with update/getrange:: + + >>> kv.update({'z': 1, 'y': 2}, prefix="gui.") + >>> kv.getrange('gui.', strip=True) + {'z': 1, 'y': 2} + +When updating values, its very helpful to understand which values +have actually changed and how have they changed. The storage +provides a delta method to provide for this:: + + >>> data = {'debug': True, 'option': 2} + >>> delta = kv.delta(data, 'config.') + >>> delta.debug.previous + None + >>> delta.debug.current + True + >>> delta + {'debug': (None, True), 'option': (None, 2)} + +Note the delta method does not persist the actual change, it needs to +be explicitly saved via 'update' method:: + + >>> kv.update(data, 'config.') + +Values modified in the context of a hook scope retain historical values +associated to the hookname. + + >>> with db.hook_scope('config-changed'): + ... db.set('x', 42) + >>> db.gethistory('x') + [(1, u'x', 1, u'install', u'2015-01-21T16:49:30.038372'), + (2, u'x', 42, u'config-changed', u'2015-01-21T16:49:30.038786')] + +""" + +import collections +import contextlib +import datetime +import itertools +import json +import logging +import os +import pprint +import sqlite3 +import sys + +__author__ = 'Kapil Thangavelu ' + + +class Storage(object): + """Simple key value database for local unit state within charms. + + Modifications are not persisted unless :meth:`flush` is called. + + To support dicts, lists, integer, floats, and booleans values + are automatically json encoded/decoded. + + Note: to facilitate unit testing, ':memory:' can be passed as the + path parameter which causes sqlite3 to only build the db in memory. + This should only be used for testing purposes. + """ + def __init__(self, path=None, keep_revisions=False): + self.db_path = path + self.keep_revisions = keep_revisions + if path is None: + if 'UNIT_STATE_DB' in os.environ: + self.db_path = os.environ['UNIT_STATE_DB'] + else: + self.db_path = os.path.join( + os.environ.get('CHARM_DIR', ''), '.unit-state.db') + if self.db_path != ':memory:': + with open(self.db_path, 'a') as f: + os.fchmod(f.fileno(), 0o600) + self.conn = sqlite3.connect('%s' % self.db_path) + self.cursor = self.conn.cursor() + self.revision = None + self._closed = False + self._init() + + def close(self): + if self._closed: + return + self.flush(False) + self.cursor.close() + self.conn.close() + self._closed = True + + def get(self, key, default=None, record=False): + self.cursor.execute('select data from kv where key=?', [key]) + result = self.cursor.fetchone() + if not result: + return default + if record: + return Record(json.loads(result[0])) + return json.loads(result[0]) + + def getrange(self, key_prefix, strip=False): + """ + Get a range of keys starting with a common prefix as a mapping of + keys to values. + + :param str key_prefix: Common prefix among all keys + :param bool strip: Optionally strip the common prefix from the key + names in the returned dict + :return dict: A (possibly empty) dict of key-value mappings + """ + self.cursor.execute("select key, data from kv where key like ?", + ['%s%%' % key_prefix]) + result = self.cursor.fetchall() + + if not result: + return {} + if not strip: + key_prefix = '' + return dict([ + (k[len(key_prefix):], json.loads(v)) for k, v in result]) + + def update(self, mapping, prefix=""): + """ + Set the values of multiple keys at once. + + :param dict mapping: Mapping of keys to values + :param str prefix: Optional prefix to apply to all keys in `mapping` + before setting + """ + for k, v in mapping.items(): + self.set("%s%s" % (prefix, k), v) + + def unset(self, key): + """ + Remove a key from the database entirely. + """ + self.cursor.execute('delete from kv where key=?', [key]) + if self.keep_revisions and self.revision and self.cursor.rowcount: + self.cursor.execute( + 'insert into kv_revisions values (?, ?, ?)', + [key, self.revision, json.dumps('DELETED')]) + + def unsetrange(self, keys=None, prefix=""): + """ + Remove a range of keys starting with a common prefix, from the database + entirely. + + :param list keys: List of keys to remove. + :param str prefix: Optional prefix to apply to all keys in ``keys`` + before removing. + """ + if keys is not None: + keys = ['%s%s' % (prefix, key) for key in keys] + self.cursor.execute('delete from kv where key in (%s)' % ','.join(['?'] * len(keys)), keys) + if self.keep_revisions and self.revision and self.cursor.rowcount: + self.cursor.execute( + 'insert into kv_revisions values %s' % ','.join(['(?, ?, ?)'] * len(keys)), + list(itertools.chain.from_iterable((key, self.revision, json.dumps('DELETED')) for key in keys))) + else: + self.cursor.execute('delete from kv where key like ?', + ['%s%%' % prefix]) + if self.keep_revisions and self.revision and self.cursor.rowcount: + self.cursor.execute( + 'insert into kv_revisions values (?, ?, ?)', + ['%s%%' % prefix, self.revision, json.dumps('DELETED')]) + + def set(self, key, value): + """ + Set a value in the database. + + :param str key: Key to set the value for + :param value: Any JSON-serializable value to be set + """ + serialized = json.dumps(value) + + self.cursor.execute('select data from kv where key=?', [key]) + exists = self.cursor.fetchone() + + # Skip mutations to the same value + if exists: + if exists[0] == serialized: + return value + + if not exists: + self.cursor.execute( + 'insert into kv (key, data) values (?, ?)', + (key, serialized)) + else: + self.cursor.execute(''' + update kv + set data = ? + where key = ?''', [serialized, key]) + + # Save + if (not self.keep_revisions) or (not self.revision): + return value + + self.cursor.execute( + 'select 1 from kv_revisions where key=? and revision=?', + [key, self.revision]) + exists = self.cursor.fetchone() + + if not exists: + self.cursor.execute( + '''insert into kv_revisions ( + revision, key, data) values (?, ?, ?)''', + (self.revision, key, serialized)) + else: + self.cursor.execute( + ''' + update kv_revisions + set data = ? + where key = ? + and revision = ?''', + [serialized, key, self.revision]) + + return value + + def delta(self, mapping, prefix): + """ + return a delta containing values that have changed. + """ + previous = self.getrange(prefix, strip=True) + if not previous: + pk = set() + else: + pk = set(previous.keys()) + ck = set(mapping.keys()) + delta = DeltaSet() + + # added + for k in ck.difference(pk): + delta[k] = Delta(None, mapping[k]) + + # removed + for k in pk.difference(ck): + delta[k] = Delta(previous[k], None) + + # changed + for k in pk.intersection(ck): + c = mapping[k] + p = previous[k] + if c != p: + delta[k] = Delta(p, c) + + return delta + + @contextlib.contextmanager + def hook_scope(self, name=""): + """Scope all future interactions to the current hook execution + revision.""" + assert not self.revision + self.cursor.execute( + 'insert into hooks (hook, date) values (?, ?)', + (name or sys.argv[0], + datetime.datetime.utcnow().isoformat())) + self.revision = self.cursor.lastrowid + try: + yield self.revision + self.revision = None + except Exception: + self.flush(False) + self.revision = None + raise + else: + self.flush() + + def flush(self, save=True): + if save: + self.conn.commit() + elif self._closed: + return + else: + self.conn.rollback() + + def _init(self): + self.cursor.execute(''' + create table if not exists kv ( + key text, + data text, + primary key (key) + )''') + self.cursor.execute(''' + create table if not exists kv_revisions ( + key text, + revision integer, + data text, + primary key (key, revision) + )''') + self.cursor.execute(''' + create table if not exists hooks ( + version integer primary key autoincrement, + hook text, + date text + )''') + self.conn.commit() + + def gethistory(self, key, deserialize=False): + self.cursor.execute( + ''' + select kv.revision, kv.key, kv.data, h.hook, h.date + from kv_revisions kv, + hooks h + where kv.key=? + and kv.revision = h.version + ''', [key]) + if deserialize is False: + return self.cursor.fetchall() + return map(_parse_history, self.cursor.fetchall()) + + def debug(self, fh=sys.stderr): + self.cursor.execute('select * from kv') + pprint.pprint(self.cursor.fetchall(), stream=fh) + self.cursor.execute('select * from kv_revisions') + pprint.pprint(self.cursor.fetchall(), stream=fh) + + +def _parse_history(d): + return (d[0], d[1], json.loads(d[2]), d[3], + datetime.datetime.strptime(d[-1], "%Y-%m-%dT%H:%M:%S.%f")) + + +class HookData(object): + """Simple integration for existing hook exec frameworks. + + Records all unit information, and stores deltas for processing + by the hook. + + Sample:: + + from charmhelper.core import hookenv, unitdata + + changes = unitdata.HookData() + db = unitdata.kv() + hooks = hookenv.Hooks() + + @hooks.hook + def config_changed(): + # View all changes to configuration + for changed, (prev, cur) in changes.conf.items(): + print('config changed', changed, + 'previous value', prev, + 'current value', cur) + + # Get some unit specific bookkeeping + if not db.get('pkg_key'): + key = urllib.urlopen('https://example.com/pkg_key').read() + db.set('pkg_key', key) + + if __name__ == '__main__': + with changes(): + hook.execute() + + """ + def __init__(self): + self.kv = kv() + self.conf = None + self.rels = None + + @contextlib.contextmanager + def __call__(self): + from charmhelpers.core import hookenv + hook_name = hookenv.hook_name() + + with self.kv.hook_scope(hook_name): + self._record_charm_version(hookenv.charm_dir()) + delta_config, delta_relation = self._record_hook(hookenv) + yield self.kv, delta_config, delta_relation + + def _record_charm_version(self, charm_dir): + # Record revisions.. charm revisions are meaningless + # to charm authors as they don't control the revision. + # so logic dependnent on revision is not particularly + # useful, however it is useful for debugging analysis. + charm_rev = open( + os.path.join(charm_dir, 'revision')).read().strip() + charm_rev = charm_rev or '0' + revs = self.kv.get('charm_revisions', []) + if charm_rev not in revs: + revs.append(charm_rev.strip() or '0') + self.kv.set('charm_revisions', revs) + + def _record_hook(self, hookenv): + data = hookenv.execution_environment() + self.conf = conf_delta = self.kv.delta(data['conf'], 'config') + self.rels = rels_delta = self.kv.delta(data['rels'], 'rels') + self.kv.set('env', dict(data['env'])) + self.kv.set('unit', data['unit']) + self.kv.set('relid', data.get('relid')) + return conf_delta, rels_delta + + +class Record(dict): + + __slots__ = () + + def __getattr__(self, k): + if k in self: + return self[k] + raise AttributeError(k) + + +class DeltaSet(Record): + + __slots__ = () + + +Delta = collections.namedtuple('Delta', ['previous', 'current']) + + +_KV = None + + +def kv(): + global _KV + + # If we are running unit tests, it is useful to go into memory-backed KV store to + # avoid concurrency issues when running multiple tests. This is not a + # problem when juju is running normally. + + env_var = os.environ.get("CHARM_HELPERS_TESTMODE", "auto").lower() + if env_var not in ["auto", "no", "yes"]: + logging.warning("Unknown value for CHARM_HELPERS_TESTMODE '%s'" + ", assuming 'no'", env_var) + env_var = "no" + + if env_var == "no": + in_memory_db = False + elif env_var == "yes": + in_memory_db = True + elif env_var == "auto": + # If UNIT_STATE_DB is set, respect this request + if "UNIT_STATE_DB" in os.environ: + in_memory_db = False + # Autodetect normal juju execution by looking for juju variables + elif "JUJU_CHARM_DIR" in os.environ or "JUJU_UNIT_NAME" in os.environ: + in_memory_db = False + else: + # We are probably running in unit test mode + logging.warning("Auto-detected unit test environment for KV store.") + in_memory_db = True + else: + # Help the linter realise that in_memory_db is always set + raise Exception("Cannot reach this line") + + if _KV is None: + if in_memory_db: + _KV = Storage(":memory:") + else: + _KV = Storage() + else: + if in_memory_db and _KV.db_path != ":memory:": + logging.warning("Running with in_memory_db and KV is not set to :memory:") + return _KV diff --git a/ceph-radosgw/hooks/charmhelpers/fetch/__init__.py b/ceph-radosgw/hooks/charmhelpers/fetch/__init__.py new file mode 100644 index 00000000..1283f25b --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/fetch/__init__.py @@ -0,0 +1,208 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +from charmhelpers.osplatform import get_platform +from yaml import safe_load +from charmhelpers.core.hookenv import ( + config, + log, +) + +from urllib.parse import urlparse, urlunparse + + +# The order of this list is very important. Handlers should be listed in from +# least- to most-specific URL matching. +FETCH_HANDLERS = ( + 'charmhelpers.fetch.archiveurl.ArchiveUrlFetchHandler', + 'charmhelpers.fetch.bzrurl.BzrUrlFetchHandler', + 'charmhelpers.fetch.giturl.GitUrlFetchHandler', +) + + +class SourceConfigError(Exception): + pass + + +class UnhandledSource(Exception): + pass + + +class AptLockError(Exception): + pass + + +class GPGKeyError(Exception): + """Exception occurs when a GPG key cannot be fetched or used. The message + indicates what the problem is. + """ + pass + + +class BaseFetchHandler(object): + + """Base class for FetchHandler implementations in fetch plugins""" + + def can_handle(self, source): + """Returns True if the source can be handled. Otherwise returns + a string explaining why it cannot""" + return "Wrong source type" + + def install(self, source): + """Try to download and unpack the source. Return the path to the + unpacked files or raise UnhandledSource.""" + raise UnhandledSource("Wrong source type {}".format(source)) + + def parse_url(self, url): + return urlparse(url) + + def base_url(self, url): + """Return url without querystring or fragment""" + parts = list(self.parse_url(url)) + parts[4:] = ['' for i in parts[4:]] + return urlunparse(parts) + + +__platform__ = get_platform() +module = "charmhelpers.fetch.%s" % __platform__ +fetch = importlib.import_module(module) + +filter_installed_packages = fetch.filter_installed_packages +filter_missing_packages = fetch.filter_missing_packages +install = fetch.apt_install +upgrade = fetch.apt_upgrade +update = _fetch_update = fetch.apt_update +purge = fetch.apt_purge +add_source = fetch.add_source + +if __platform__ == "ubuntu": + apt_cache = fetch.apt_cache + apt_install = fetch.apt_install + apt_update = fetch.apt_update + apt_upgrade = fetch.apt_upgrade + apt_purge = fetch.apt_purge + apt_autoremove = fetch.apt_autoremove + apt_mark = fetch.apt_mark + apt_hold = fetch.apt_hold + apt_unhold = fetch.apt_unhold + import_key = fetch.import_key + get_upstream_version = fetch.get_upstream_version + apt_pkg = fetch.ubuntu_apt_pkg + get_apt_dpkg_env = fetch.get_apt_dpkg_env + get_installed_version = fetch.get_installed_version + OPENSTACK_RELEASES = fetch.OPENSTACK_RELEASES + UBUNTU_OPENSTACK_RELEASE = fetch.UBUNTU_OPENSTACK_RELEASE +elif __platform__ == "centos": + yum_search = fetch.yum_search + + +def configure_sources(update=False, + sources_var='install_sources', + keys_var='install_keys'): + """Configure multiple sources from charm configuration. + + The lists are encoded as yaml fragments in the configuration. + The fragment needs to be included as a string. Sources and their + corresponding keys are of the types supported by add_source(). + + Example config: + install_sources: | + - "ppa:foo" + - "http://example.com/repo precise main" + install_keys: | + - null + - "a1b2c3d4" + + Note that 'null' (a.k.a. None) should not be quoted. + """ + sources = safe_load((config(sources_var) or '').strip()) or [] + keys = safe_load((config(keys_var) or '').strip()) or None + + if isinstance(sources, str): + sources = [sources] + + if keys is None: + for source in sources: + add_source(source, None) + else: + if isinstance(keys, str): + keys = [keys] + + if len(sources) != len(keys): + raise SourceConfigError( + 'Install sources and keys lists are different lengths') + for source, key in zip(sources, keys): + add_source(source, key) + if update: + _fetch_update(fatal=True) + + +def install_remote(source, *args, **kwargs): + """Install a file tree from a remote source. + + The specified source should be a url of the form: + scheme://[host]/path[#[option=value][&...]] + + Schemes supported are based on this modules submodules. + Options supported are submodule-specific. + Additional arguments are passed through to the submodule. + + For example:: + + dest = install_remote('http://example.com/archive.tgz', + checksum='deadbeef', + hash_type='sha1') + + This will download `archive.tgz`, validate it using SHA1 and, if + the file is ok, extract it and return the directory in which it + was extracted. If the checksum fails, it will raise + :class:`charmhelpers.core.host.ChecksumError`. + """ + # We ONLY check for True here because can_handle may return a string + # explaining why it can't handle a given source. + handlers = [h for h in plugins() if h.can_handle(source) is True] + for handler in handlers: + try: + return handler.install(source, *args, **kwargs) + except UnhandledSource as e: + log('Install source attempt unsuccessful: {}'.format(e), + level='WARNING') + raise UnhandledSource("No handler found for source {}".format(source)) + + +def install_from_config(config_var_name): + """Install a file from config.""" + charm_config = config() + source = charm_config[config_var_name] + return install_remote(source) + + +def plugins(fetch_handlers=None): + if not fetch_handlers: + fetch_handlers = FETCH_HANDLERS + plugin_list = [] + for handler_name in fetch_handlers: + package, classname = handler_name.rsplit('.', 1) + try: + handler_class = getattr( + importlib.import_module(package), + classname) + plugin_list.append(handler_class()) + except NotImplementedError: + # Skip missing plugins so that they can be omitted from + # installation if desired + log("FetchHandler {} not found, skipping plugin".format( + handler_name)) + return plugin_list diff --git a/ceph-radosgw/hooks/charmhelpers/fetch/archiveurl.py b/ceph-radosgw/hooks/charmhelpers/fetch/archiveurl.py new file mode 100644 index 00000000..0e35c901 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/fetch/archiveurl.py @@ -0,0 +1,173 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import os +import hashlib +import re + +from charmhelpers.fetch import ( + BaseFetchHandler, + UnhandledSource +) +from charmhelpers.payload.archive import ( + get_archive_handler, + extract, +) +from charmhelpers.core.hookenv import ( + env_proxy_settings, +) +from charmhelpers.core.host import mkdir, check_hash + +from urllib.request import ( + build_opener, install_opener, urlopen, urlretrieve, + HTTPPasswordMgrWithDefaultRealm, HTTPBasicAuthHandler, + ProxyHandler +) +from urllib.parse import urlparse, urlunparse, parse_qs +from urllib.error import URLError + + +def splituser(host): + _userprog = re.compile('^(.*)@(.*)$') + match = _userprog.match(host) + if match: + return match.group(1, 2) + return None, host + + +def splitpasswd(user): + _passwdprog = re.compile('^([^:]*):(.*)$', re.S) + match = _passwdprog.match(user) + if match: + return match.group(1, 2) + return user, None + + +@contextlib.contextmanager +def proxy_env(): + """ + Creates a context which temporarily modifies the proxy settings in os.environ. + """ + restore = {**os.environ} # Copy the current os.environ + juju_proxies = env_proxy_settings() or {} + os.environ.update(**juju_proxies) # Insert or Update the os.environ + yield os.environ + for key in juju_proxies: + del os.environ[key] # remove any keys which were added or updated + os.environ.update(**restore) # restore any original values + + +class ArchiveUrlFetchHandler(BaseFetchHandler): + """ + Handler to download archive files from arbitrary URLs. + + Can fetch from http, https, ftp, and file URLs. + + Can install either tarballs (.tar, .tgz, .tbz2, etc) or zip files. + + Installs the contents of the archive in $CHARM_DIR/fetched/. + """ + def can_handle(self, source): + url_parts = self.parse_url(source) + if url_parts.scheme not in ('http', 'https', 'ftp', 'file'): + # XXX: Why is this returning a boolean and a string? It's + # doomed to fail since "bool(can_handle('foo://'))" will be True. + return "Wrong source type" + if get_archive_handler(self.base_url(source)): + return True + return False + + def download(self, source, dest): + """ + Download an archive file. + + :param str source: URL pointing to an archive file. + :param str dest: Local path location to download archive file to. + """ + # propagate all exceptions + # URLError, OSError, etc + proto, netloc, path, params, query, fragment = urlparse(source) + handlers = [] + if proto in ('http', 'https'): + auth, barehost = splituser(netloc) + if auth is not None: + source = urlunparse((proto, barehost, path, params, query, fragment)) + username, password = splitpasswd(auth) + passman = HTTPPasswordMgrWithDefaultRealm() + # Realm is set to None in add_password to force the username and password + # to be used whatever the realm + passman.add_password(None, source, username, password) + handlers.append(HTTPBasicAuthHandler(passman)) + + with proxy_env(): + handlers.append(ProxyHandler()) + opener = build_opener(*handlers) + install_opener(opener) + response = urlopen(source) + try: + with open(dest, 'wb') as dest_file: + dest_file.write(response.read()) + except Exception as e: + if os.path.isfile(dest): + os.unlink(dest) + raise e + + # Mandatory file validation via Sha1 or MD5 hashing. + def download_and_validate(self, url, hashsum, validate="sha1"): + tempfile, headers = urlretrieve(url) + check_hash(tempfile, hashsum, validate) + return tempfile + + def install(self, source, dest=None, checksum=None, hash_type='sha1'): + """ + Download and install an archive file, with optional checksum validation. + + The checksum can also be given on the `source` URL's fragment. + For example:: + + handler.install('http://example.com/file.tgz#sha1=deadbeef') + + :param str source: URL pointing to an archive file. + :param str dest: Local destination path to install to. If not given, + installs to `$CHARM_DIR/archives/archive_file_name`. + :param str checksum: If given, validate the archive file after download. + :param str hash_type: Algorithm used to generate `checksum`. + Can be any hash alrgorithm supported by :mod:`hashlib`, + such as md5, sha1, sha256, sha512, etc. + + """ + url_parts = self.parse_url(source) + dest_dir = os.path.join(os.environ.get('CHARM_DIR'), 'fetched') + if not os.path.exists(dest_dir): + mkdir(dest_dir, perms=0o755) + dld_file = os.path.join(dest_dir, os.path.basename(url_parts.path)) + try: + self.download(source, dld_file) + except URLError as e: + raise UnhandledSource(e.reason) + except OSError as e: + raise UnhandledSource(e.strerror) + options = parse_qs(url_parts.fragment) + for key, value in options.items(): + algorithms = hashlib.algorithms_available + if key in algorithms: + if len(value) != 1: + raise TypeError( + "Expected 1 hash value, not %d" % len(value)) + expected = value[0] + check_hash(dld_file, expected, key) + if checksum: + check_hash(dld_file, checksum, hash_type) + return extract(dld_file, dest) diff --git a/ceph-radosgw/hooks/charmhelpers/fetch/bzrurl.py b/ceph-radosgw/hooks/charmhelpers/fetch/bzrurl.py new file mode 100644 index 00000000..c4ab3ff1 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/fetch/bzrurl.py @@ -0,0 +1,76 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from subprocess import STDOUT, check_output +from charmhelpers.fetch import ( + BaseFetchHandler, + UnhandledSource, + filter_installed_packages, + install, +) +from charmhelpers.core.host import mkdir + + +if filter_installed_packages(['bzr']) != []: + install(['bzr']) + if filter_installed_packages(['bzr']) != []: + raise NotImplementedError('Unable to install bzr') + + +class BzrUrlFetchHandler(BaseFetchHandler): + """Handler for bazaar branches via generic and lp URLs.""" + + def can_handle(self, source): + url_parts = self.parse_url(source) + if url_parts.scheme not in ('bzr+ssh', 'lp', ''): + return False + elif not url_parts.scheme: + return os.path.exists(os.path.join(source, '.bzr')) + else: + return True + + def branch(self, source, dest, revno=None): + if not self.can_handle(source): + raise UnhandledSource("Cannot handle {}".format(source)) + cmd_opts = [] + if revno: + cmd_opts += ['-r', str(revno)] + if os.path.exists(dest): + cmd = ['bzr', 'pull'] + cmd += cmd_opts + cmd += ['--overwrite', '-d', dest, source] + else: + cmd = ['bzr', 'branch'] + cmd += cmd_opts + cmd += [source, dest] + check_output(cmd, stderr=STDOUT) + + def install(self, source, dest=None, revno=None): + url_parts = self.parse_url(source) + branch_name = url_parts.path.strip("/").split("/")[-1] + if dest: + dest_dir = os.path.join(dest, branch_name) + else: + dest_dir = os.path.join(os.environ.get('CHARM_DIR'), "fetched", + branch_name) + + if dest and not os.path.exists(dest): + mkdir(dest, perms=0o755) + + try: + self.branch(source, dest_dir, revno) + except OSError as e: + raise UnhandledSource(e.strerror) + return dest_dir diff --git a/ceph-radosgw/hooks/charmhelpers/fetch/centos.py b/ceph-radosgw/hooks/charmhelpers/fetch/centos.py new file mode 100644 index 00000000..f8492018 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/fetch/centos.py @@ -0,0 +1,170 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import subprocess +import os +import time +import yum + +from tempfile import NamedTemporaryFile +from charmhelpers.core.hookenv import log + +YUM_NO_LOCK = 1 # The return code for "couldn't acquire lock" in YUM. +YUM_NO_LOCK_RETRY_DELAY = 10 # Wait 10 seconds between apt lock checks. +YUM_NO_LOCK_RETRY_COUNT = 30 # Retry to acquire the lock X times. + + +def filter_installed_packages(packages): + """Return a list of packages that require installation.""" + yb = yum.YumBase() + package_list = yb.doPackageLists() + temp_cache = {p.base_package_name: 1 for p in package_list['installed']} + + _pkgs = [p for p in packages if not temp_cache.get(p, False)] + return _pkgs + + +def install(packages, options=None, fatal=False): + """Install one or more packages.""" + cmd = ['yum', '--assumeyes'] + if options is not None: + cmd.extend(options) + cmd.append('install') + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + log("Installing {} with options: {}".format(packages, + options)) + _run_yum_command(cmd, fatal) + + +def upgrade(options=None, fatal=False, dist=False): + """Upgrade all packages.""" + cmd = ['yum', '--assumeyes'] + if options is not None: + cmd.extend(options) + cmd.append('upgrade') + log("Upgrading with options: {}".format(options)) + _run_yum_command(cmd, fatal) + + +def update(fatal=False): + """Update local yum cache.""" + cmd = ['yum', '--assumeyes', 'update'] + log("Update with fatal: {}".format(fatal)) + _run_yum_command(cmd, fatal) + + +def purge(packages, fatal=False): + """Purge one or more packages.""" + cmd = ['yum', '--assumeyes', 'remove'] + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + log("Purging {}".format(packages)) + _run_yum_command(cmd, fatal) + + +def yum_search(packages): + """Search for a package.""" + output = {} + cmd = ['yum', 'search'] + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + log("Searching for {}".format(packages)) + result = subprocess.check_output(cmd) + for package in list(packages): + output[package] = package in result + return output + + +def add_source(source, key=None): + """Add a package source to this system. + + @param source: a URL with a rpm package + + @param key: A key to be added to the system's keyring and used + to verify the signatures on packages. Ideally, this should be an + ASCII format GPG public key including the block headers. A GPG key + id may also be used, but be aware that only insecure protocols are + available to retrieve the actual public key from a public keyserver + placing your Juju environment at risk. + """ + if source is None: + log('Source is not present. Skipping') + return + + if source.startswith('http'): + directory = '/etc/yum.repos.d/' + for filename in os.listdir(directory): + with open(directory + filename, 'r') as rpm_file: + if source in rpm_file.read(): + break + else: + log("Add source: {!r}".format(source)) + # write in the charms.repo + with open(directory + 'Charms.repo', 'a') as rpm_file: + rpm_file.write('[%s]\n' % source[7:].replace('/', '_')) + rpm_file.write('name=%s\n' % source[7:]) + rpm_file.write('baseurl=%s\n\n' % source) + else: + log("Unknown source: {!r}".format(source)) + + if key: + if '-----BEGIN PGP PUBLIC KEY BLOCK-----' in key: + with NamedTemporaryFile('w+') as key_file: + key_file.write(key) + key_file.flush() + key_file.seek(0) + subprocess.check_call(['rpm', '--import', key_file.name]) + else: + subprocess.check_call(['rpm', '--import', key]) + + +def _run_yum_command(cmd, fatal=False): + """Run an YUM command. + + Checks the output and retry if the fatal flag is set to True. + + :param: cmd: str: The yum command to run. + :param: fatal: bool: Whether the command's output should be checked and + retried. + """ + env = os.environ.copy() + + if fatal: + retry_count = 0 + result = None + + # If the command is considered "fatal", we need to retry if the yum + # lock was not acquired. + + while result is None or result == YUM_NO_LOCK: + try: + result = subprocess.check_call(cmd, env=env) + except subprocess.CalledProcessError as e: + retry_count = retry_count + 1 + if retry_count > YUM_NO_LOCK_RETRY_COUNT: + raise + result = e.returncode + log("Couldn't acquire YUM lock. Will retry in {} seconds." + "".format(YUM_NO_LOCK_RETRY_DELAY)) + time.sleep(YUM_NO_LOCK_RETRY_DELAY) + + else: + subprocess.call(cmd, env=env) diff --git a/ceph-radosgw/hooks/charmhelpers/fetch/giturl.py b/ceph-radosgw/hooks/charmhelpers/fetch/giturl.py new file mode 100644 index 00000000..070ca9bb --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/fetch/giturl.py @@ -0,0 +1,69 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from subprocess import check_output, CalledProcessError, STDOUT +from charmhelpers.fetch import ( + BaseFetchHandler, + UnhandledSource, + filter_installed_packages, + install, +) + +if filter_installed_packages(['git']) != []: + install(['git']) + if filter_installed_packages(['git']) != []: + raise NotImplementedError('Unable to install git') + + +class GitUrlFetchHandler(BaseFetchHandler): + """Handler for git branches via generic and github URLs.""" + + def can_handle(self, source): + url_parts = self.parse_url(source) + # TODO (mattyw) no support for ssh git@ yet + if url_parts.scheme not in ('http', 'https', 'git', ''): + return False + elif not url_parts.scheme: + return os.path.exists(os.path.join(source, '.git')) + else: + return True + + def clone(self, source, dest, branch="master", depth=None): + if not self.can_handle(source): + raise UnhandledSource("Cannot handle {}".format(source)) + + if os.path.exists(dest): + cmd = ['git', '-C', dest, 'pull', source, branch] + else: + cmd = ['git', 'clone', source, dest, '--branch', branch] + if depth: + cmd.extend(['--depth', depth]) + check_output(cmd, stderr=STDOUT) + + def install(self, source, branch="master", dest=None, depth=None): + url_parts = self.parse_url(source) + branch_name = url_parts.path.strip("/").split("/")[-1] + if dest: + dest_dir = os.path.join(dest, branch_name) + else: + dest_dir = os.path.join(os.environ.get('CHARM_DIR'), "fetched", + branch_name) + try: + self.clone(source, dest_dir, branch, depth) + except CalledProcessError as e: + raise UnhandledSource(e) + except OSError as e: + raise UnhandledSource(e.strerror) + return dest_dir diff --git a/ceph-radosgw/hooks/charmhelpers/fetch/python/__init__.py b/ceph-radosgw/hooks/charmhelpers/fetch/python/__init__.py new file mode 100644 index 00000000..bff99dc9 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/fetch/python/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2019 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-radosgw/hooks/charmhelpers/fetch/python/debug.py b/ceph-radosgw/hooks/charmhelpers/fetch/python/debug.py new file mode 100644 index 00000000..dd5cca80 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/fetch/python/debug.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# coding: utf-8 + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import atexit +import sys + +from charmhelpers.fetch.python.rpdb import Rpdb +from charmhelpers.core.hookenv import ( + open_port, + close_port, + ERROR, + log +) + +__author__ = "Jorge Niedbalski " + +DEFAULT_ADDR = "0.0.0.0" +DEFAULT_PORT = 4444 + + +def _error(message): + log(message, level=ERROR) + + +def set_trace(addr=DEFAULT_ADDR, port=DEFAULT_PORT): + """ + Set a trace point using the remote debugger + """ + atexit.register(close_port, port) + try: + log("Starting a remote python debugger session on %s:%s" % (addr, + port)) + open_port(port) + debugger = Rpdb(addr=addr, port=port) + debugger.set_trace(sys._getframe().f_back) + except Exception: + _error("Cannot start a remote debug session on %s:%s" % (addr, + port)) diff --git a/ceph-radosgw/hooks/charmhelpers/fetch/python/packages.py b/ceph-radosgw/hooks/charmhelpers/fetch/python/packages.py new file mode 100644 index 00000000..93f1fa3f --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/fetch/python/packages.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python +# coding: utf-8 + +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import subprocess +import sys + +from charmhelpers.fetch import apt_install, apt_update +from charmhelpers.core.hookenv import charm_dir, log + +__author__ = "Jorge Niedbalski " + + +def pip_execute(*args, **kwargs): + """Overridden pip_execute() to stop sys.path being changed. + + The act of importing main from the pip module seems to cause add wheels + from the /usr/share/python-wheels which are installed by various tools. + This function ensures that sys.path remains the same after the call is + executed. + """ + try: + _path = sys.path + try: + from pip import main as _pip_execute + except ImportError: + apt_update() + apt_install('python3-pip') + from pip import main as _pip_execute + _pip_execute(*args, **kwargs) + finally: + sys.path = _path + + +def parse_options(given, available): + """Given a set of options, check if available""" + for key, value in sorted(given.items()): + if not value: + continue + if key in available: + yield "--{0}={1}".format(key, value) + + +def pip_install_requirements(requirements, constraints=None, **options): + """Install a requirements file. + + :param constraints: Path to pip constraints file. + http://pip.readthedocs.org/en/stable/user_guide/#constraints-files + """ + command = ["install"] + + available_options = ('proxy', 'src', 'log', ) + for option in parse_options(options, available_options): + command.append(option) + + command.append("-r {0}".format(requirements)) + if constraints: + command.append("-c {0}".format(constraints)) + log("Installing from file: {} with constraints {} " + "and options: {}".format(requirements, constraints, command)) + else: + log("Installing from file: {} with options: {}".format(requirements, + command)) + pip_execute(command) + + +def pip_install(package, fatal=False, upgrade=False, venv=None, + constraints=None, **options): + """Install a python package""" + if venv: + venv_python = os.path.join(venv, 'bin/pip') + command = [venv_python, "install"] + else: + command = ["install"] + + available_options = ('proxy', 'src', 'log', 'index-url', ) + for option in parse_options(options, available_options): + command.append(option) + + if upgrade: + command.append('--upgrade') + + if constraints: + command.extend(['-c', constraints]) + + if isinstance(package, list): + command.extend(package) + else: + command.append(package) + + log("Installing {} package with options: {}".format(package, + command)) + if venv: + subprocess.check_call(command) + else: + pip_execute(command) + + +def pip_uninstall(package, **options): + """Uninstall a python package""" + command = ["uninstall", "-q", "-y"] + + available_options = ('proxy', 'log', ) + for option in parse_options(options, available_options): + command.append(option) + + if isinstance(package, list): + command.extend(package) + else: + command.append(package) + + log("Uninstalling {} package with options: {}".format(package, + command)) + pip_execute(command) + + +def pip_list(): + """Returns the list of current python installed packages + """ + return pip_execute(["list"]) + + +def pip_create_virtualenv(path=None): + """Create an isolated Python environment.""" + apt_install(['python3-virtualenv', 'virtualenv']) + extra_flags = ['--python=python3'] + + if path: + venv_path = path + else: + venv_path = os.path.join(charm_dir(), 'venv') + + if not os.path.exists(venv_path): + subprocess.check_call(['virtualenv', venv_path] + extra_flags) diff --git a/ceph-radosgw/hooks/charmhelpers/fetch/python/rpdb.py b/ceph-radosgw/hooks/charmhelpers/fetch/python/rpdb.py new file mode 100644 index 00000000..9b31610c --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/fetch/python/rpdb.py @@ -0,0 +1,56 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Remote Python Debugger (pdb wrapper).""" + +import pdb +import socket +import sys + +__author__ = "Bertrand Janin " +__version__ = "0.1.3" + + +class Rpdb(pdb.Pdb): + + def __init__(self, addr="127.0.0.1", port=4444): + """Initialize the socket and initialize pdb.""" + + # Backup stdin and stdout before replacing them by the socket handle + self.old_stdout = sys.stdout + self.old_stdin = sys.stdin + + # Open a 'reusable' socket to let the webapp reload on the same port + self.skt = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.skt.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, True) + self.skt.bind((addr, port)) + self.skt.listen(1) + (clientsocket, address) = self.skt.accept() + handle = clientsocket.makefile('rw') + pdb.Pdb.__init__(self, completekey='tab', stdin=handle, stdout=handle) + sys.stdout = sys.stdin = handle + + def shutdown(self): + """Revert stdin and stdout, close the socket.""" + sys.stdout = self.old_stdout + sys.stdin = self.old_stdin + self.skt.close() + self.set_continue() + + def do_continue(self, arg): + """Stop all operation on ``continue``.""" + self.shutdown() + return 1 + + do_EOF = do_quit = do_exit = do_c = do_cont = do_continue diff --git a/ceph-radosgw/hooks/charmhelpers/fetch/python/version.py b/ceph-radosgw/hooks/charmhelpers/fetch/python/version.py new file mode 100644 index 00000000..3eb42103 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/fetch/python/version.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# coding: utf-8 + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +__author__ = "Jorge Niedbalski " + + +def current_version(): + """Current system python version""" + return sys.version_info + + +def current_version_string(): + """Current system python version as string major.minor.micro""" + return "{0}.{1}.{2}".format(sys.version_info.major, + sys.version_info.minor, + sys.version_info.micro) diff --git a/ceph-radosgw/hooks/charmhelpers/fetch/snap.py b/ceph-radosgw/hooks/charmhelpers/fetch/snap.py new file mode 100644 index 00000000..7ab7ce3e --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/fetch/snap.py @@ -0,0 +1,150 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Charm helpers snap for classic charms. + +If writing reactive charms, use the snap layer: +https://lists.ubuntu.com/archives/snapcraft/2016-September/001114.html +""" +import subprocess +import os +from time import sleep +from charmhelpers.core.hookenv import log + +__author__ = 'Joseph Borg ' + +# The return code for "couldn't acquire lock" in Snap +# (hopefully this will be improved). +SNAP_NO_LOCK = 1 +SNAP_NO_LOCK_RETRY_DELAY = 10 # Wait X seconds between Snap lock checks. +SNAP_NO_LOCK_RETRY_COUNT = 30 # Retry to acquire the lock X times. +SNAP_CHANNELS = [ + 'edge', + 'beta', + 'candidate', + 'stable', +] + + +class CouldNotAcquireLockException(Exception): + pass + + +class InvalidSnapChannel(Exception): + pass + + +def _snap_exec(commands): + """ + Execute snap commands. + + :param commands: List commands + :return: Integer exit code + """ + assert isinstance(commands, list) + + retry_count = 0 + return_code = None + + while return_code is None or return_code == SNAP_NO_LOCK: + try: + return_code = subprocess.check_call(['snap'] + commands, + env=os.environ) + except subprocess.CalledProcessError as e: + retry_count += + 1 + if retry_count > SNAP_NO_LOCK_RETRY_COUNT: + raise CouldNotAcquireLockException( + 'Could not acquire lock after {} attempts' + .format(SNAP_NO_LOCK_RETRY_COUNT)) + return_code = e.returncode + log('Snap failed to acquire lock, trying again in {} seconds.' + .format(SNAP_NO_LOCK_RETRY_DELAY), level='WARN') + sleep(SNAP_NO_LOCK_RETRY_DELAY) + + return return_code + + +def snap_install(packages, *flags): + """ + Install a snap package. + + :param packages: String or List String package name + :param flags: List String flags to pass to install command + :return: Integer return code from snap + """ + if type(packages) is not list: + packages = [packages] + + flags = list(flags) + + message = 'Installing snap(s) "%s"' % ', '.join(packages) + if flags: + message += ' with option(s) "%s"' % ', '.join(flags) + + log(message, level='INFO') + return _snap_exec(['install'] + flags + packages) + + +def snap_remove(packages, *flags): + """ + Remove a snap package. + + :param packages: String or List String package name + :param flags: List String flags to pass to remove command + :return: Integer return code from snap + """ + if type(packages) is not list: + packages = [packages] + + flags = list(flags) + + message = 'Removing snap(s) "%s"' % ', '.join(packages) + if flags: + message += ' with options "%s"' % ', '.join(flags) + + log(message, level='INFO') + return _snap_exec(['remove'] + flags + packages) + + +def snap_refresh(packages, *flags): + """ + Refresh / Update snap package. + + :param packages: String or List String package name + :param flags: List String flags to pass to refresh command + :return: Integer return code from snap + """ + if type(packages) is not list: + packages = [packages] + + flags = list(flags) + + message = 'Refreshing snap(s) "%s"' % ', '.join(packages) + if flags: + message += ' with options "%s"' % ', '.join(flags) + + log(message, level='INFO') + return _snap_exec(['refresh'] + flags + packages) + + +def valid_snap_channel(channel): + """ Validate snap channel exists + + :raises InvalidSnapChannel: When channel does not exist + :return: Boolean + """ + if channel.lower() in SNAP_CHANNELS: + return True + else: + raise InvalidSnapChannel("Invalid Snap Channel: {}".format(channel)) diff --git a/ceph-radosgw/hooks/charmhelpers/fetch/ubuntu.py b/ceph-radosgw/hooks/charmhelpers/fetch/ubuntu.py new file mode 100644 index 00000000..d0089eb7 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/fetch/ubuntu.py @@ -0,0 +1,1061 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict +import platform +import re +import subprocess +import sys +import time + +from charmhelpers import deprecate +from charmhelpers.core.host import get_distrib_codename, get_system_env + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + WARNING, + env_proxy_settings, +) +from charmhelpers.fetch import SourceConfigError, GPGKeyError +from charmhelpers.fetch import ubuntu_apt_pkg + +PROPOSED_POCKET = ( + "# Proposed\n" + "deb http://archive.ubuntu.com/ubuntu {}-proposed main universe " + "multiverse restricted\n") +PROPOSED_PORTS_POCKET = ( + "# Proposed\n" + "deb http://ports.ubuntu.com/ubuntu-ports {}-proposed main universe " + "multiverse restricted\n") +# Only supports 64bit and ppc64 at the moment. +ARCH_TO_PROPOSED_POCKET = { + 'x86_64': PROPOSED_POCKET, + 'ppc64le': PROPOSED_PORTS_POCKET, + 'aarch64': PROPOSED_PORTS_POCKET, + 's390x': PROPOSED_PORTS_POCKET, +} +CLOUD_ARCHIVE_URL = "http://ubuntu-cloud.archive.canonical.com/ubuntu" +CLOUD_ARCHIVE_KEY_ID = '5EDB1B62EC4926EA' +CLOUD_ARCHIVE = """# Ubuntu Cloud Archive +deb http://ubuntu-cloud.archive.canonical.com/ubuntu {} main +""" +CLOUD_ARCHIVE_POCKETS = { + # Folsom + 'folsom': 'precise-updates/folsom', + 'folsom/updates': 'precise-updates/folsom', + 'precise-folsom': 'precise-updates/folsom', + 'precise-folsom/updates': 'precise-updates/folsom', + 'precise-updates/folsom': 'precise-updates/folsom', + 'folsom/proposed': 'precise-proposed/folsom', + 'precise-folsom/proposed': 'precise-proposed/folsom', + 'precise-proposed/folsom': 'precise-proposed/folsom', + # Grizzly + 'grizzly': 'precise-updates/grizzly', + 'grizzly/updates': 'precise-updates/grizzly', + 'precise-grizzly': 'precise-updates/grizzly', + 'precise-grizzly/updates': 'precise-updates/grizzly', + 'precise-updates/grizzly': 'precise-updates/grizzly', + 'grizzly/proposed': 'precise-proposed/grizzly', + 'precise-grizzly/proposed': 'precise-proposed/grizzly', + 'precise-proposed/grizzly': 'precise-proposed/grizzly', + # Havana + 'havana': 'precise-updates/havana', + 'havana/updates': 'precise-updates/havana', + 'precise-havana': 'precise-updates/havana', + 'precise-havana/updates': 'precise-updates/havana', + 'precise-updates/havana': 'precise-updates/havana', + 'havana/proposed': 'precise-proposed/havana', + 'precise-havana/proposed': 'precise-proposed/havana', + 'precise-proposed/havana': 'precise-proposed/havana', + # Icehouse + 'icehouse': 'precise-updates/icehouse', + 'icehouse/updates': 'precise-updates/icehouse', + 'precise-icehouse': 'precise-updates/icehouse', + 'precise-icehouse/updates': 'precise-updates/icehouse', + 'precise-updates/icehouse': 'precise-updates/icehouse', + 'icehouse/proposed': 'precise-proposed/icehouse', + 'precise-icehouse/proposed': 'precise-proposed/icehouse', + 'precise-proposed/icehouse': 'precise-proposed/icehouse', + # Juno + 'juno': 'trusty-updates/juno', + 'juno/updates': 'trusty-updates/juno', + 'trusty-juno': 'trusty-updates/juno', + 'trusty-juno/updates': 'trusty-updates/juno', + 'trusty-updates/juno': 'trusty-updates/juno', + 'juno/proposed': 'trusty-proposed/juno', + 'trusty-juno/proposed': 'trusty-proposed/juno', + 'trusty-proposed/juno': 'trusty-proposed/juno', + # Kilo + 'kilo': 'trusty-updates/kilo', + 'kilo/updates': 'trusty-updates/kilo', + 'trusty-kilo': 'trusty-updates/kilo', + 'trusty-kilo/updates': 'trusty-updates/kilo', + 'trusty-updates/kilo': 'trusty-updates/kilo', + 'kilo/proposed': 'trusty-proposed/kilo', + 'trusty-kilo/proposed': 'trusty-proposed/kilo', + 'trusty-proposed/kilo': 'trusty-proposed/kilo', + # Liberty + 'liberty': 'trusty-updates/liberty', + 'liberty/updates': 'trusty-updates/liberty', + 'trusty-liberty': 'trusty-updates/liberty', + 'trusty-liberty/updates': 'trusty-updates/liberty', + 'trusty-updates/liberty': 'trusty-updates/liberty', + 'liberty/proposed': 'trusty-proposed/liberty', + 'trusty-liberty/proposed': 'trusty-proposed/liberty', + 'trusty-proposed/liberty': 'trusty-proposed/liberty', + # Mitaka + 'mitaka': 'trusty-updates/mitaka', + 'mitaka/updates': 'trusty-updates/mitaka', + 'trusty-mitaka': 'trusty-updates/mitaka', + 'trusty-mitaka/updates': 'trusty-updates/mitaka', + 'trusty-updates/mitaka': 'trusty-updates/mitaka', + 'mitaka/proposed': 'trusty-proposed/mitaka', + 'trusty-mitaka/proposed': 'trusty-proposed/mitaka', + 'trusty-proposed/mitaka': 'trusty-proposed/mitaka', + # Newton + 'newton': 'xenial-updates/newton', + 'newton/updates': 'xenial-updates/newton', + 'xenial-newton': 'xenial-updates/newton', + 'xenial-newton/updates': 'xenial-updates/newton', + 'xenial-updates/newton': 'xenial-updates/newton', + 'newton/proposed': 'xenial-proposed/newton', + 'xenial-newton/proposed': 'xenial-proposed/newton', + 'xenial-proposed/newton': 'xenial-proposed/newton', + # Ocata + 'ocata': 'xenial-updates/ocata', + 'ocata/updates': 'xenial-updates/ocata', + 'xenial-ocata': 'xenial-updates/ocata', + 'xenial-ocata/updates': 'xenial-updates/ocata', + 'xenial-updates/ocata': 'xenial-updates/ocata', + 'ocata/proposed': 'xenial-proposed/ocata', + 'xenial-ocata/proposed': 'xenial-proposed/ocata', + 'xenial-proposed/ocata': 'xenial-proposed/ocata', + # Pike + 'pike': 'xenial-updates/pike', + 'xenial-pike': 'xenial-updates/pike', + 'xenial-pike/updates': 'xenial-updates/pike', + 'xenial-updates/pike': 'xenial-updates/pike', + 'pike/proposed': 'xenial-proposed/pike', + 'xenial-pike/proposed': 'xenial-proposed/pike', + 'xenial-proposed/pike': 'xenial-proposed/pike', + # Queens + 'queens': 'xenial-updates/queens', + 'xenial-queens': 'xenial-updates/queens', + 'xenial-queens/updates': 'xenial-updates/queens', + 'xenial-updates/queens': 'xenial-updates/queens', + 'queens/proposed': 'xenial-proposed/queens', + 'xenial-queens/proposed': 'xenial-proposed/queens', + 'xenial-proposed/queens': 'xenial-proposed/queens', + # Rocky + 'rocky': 'bionic-updates/rocky', + 'bionic-rocky': 'bionic-updates/rocky', + 'bionic-rocky/updates': 'bionic-updates/rocky', + 'bionic-updates/rocky': 'bionic-updates/rocky', + 'rocky/proposed': 'bionic-proposed/rocky', + 'bionic-rocky/proposed': 'bionic-proposed/rocky', + 'bionic-proposed/rocky': 'bionic-proposed/rocky', + # Stein + 'stein': 'bionic-updates/stein', + 'bionic-stein': 'bionic-updates/stein', + 'bionic-stein/updates': 'bionic-updates/stein', + 'bionic-updates/stein': 'bionic-updates/stein', + 'stein/proposed': 'bionic-proposed/stein', + 'bionic-stein/proposed': 'bionic-proposed/stein', + 'bionic-proposed/stein': 'bionic-proposed/stein', + # Train + 'train': 'bionic-updates/train', + 'bionic-train': 'bionic-updates/train', + 'bionic-train/updates': 'bionic-updates/train', + 'bionic-updates/train': 'bionic-updates/train', + 'train/proposed': 'bionic-proposed/train', + 'bionic-train/proposed': 'bionic-proposed/train', + 'bionic-proposed/train': 'bionic-proposed/train', + # Ussuri + 'ussuri': 'bionic-updates/ussuri', + 'bionic-ussuri': 'bionic-updates/ussuri', + 'bionic-ussuri/updates': 'bionic-updates/ussuri', + 'bionic-updates/ussuri': 'bionic-updates/ussuri', + 'ussuri/proposed': 'bionic-proposed/ussuri', + 'bionic-ussuri/proposed': 'bionic-proposed/ussuri', + 'bionic-proposed/ussuri': 'bionic-proposed/ussuri', + # Victoria + 'victoria': 'focal-updates/victoria', + 'focal-victoria': 'focal-updates/victoria', + 'focal-victoria/updates': 'focal-updates/victoria', + 'focal-updates/victoria': 'focal-updates/victoria', + 'victoria/proposed': 'focal-proposed/victoria', + 'focal-victoria/proposed': 'focal-proposed/victoria', + 'focal-proposed/victoria': 'focal-proposed/victoria', + # Wallaby + 'wallaby': 'focal-updates/wallaby', + 'focal-wallaby': 'focal-updates/wallaby', + 'focal-wallaby/updates': 'focal-updates/wallaby', + 'focal-updates/wallaby': 'focal-updates/wallaby', + 'wallaby/proposed': 'focal-proposed/wallaby', + 'focal-wallaby/proposed': 'focal-proposed/wallaby', + 'focal-proposed/wallaby': 'focal-proposed/wallaby', + # Xena + 'xena': 'focal-updates/xena', + 'focal-xena': 'focal-updates/xena', + 'focal-xena/updates': 'focal-updates/xena', + 'focal-updates/xena': 'focal-updates/xena', + 'xena/proposed': 'focal-proposed/xena', + 'focal-xena/proposed': 'focal-proposed/xena', + 'focal-proposed/xena': 'focal-proposed/xena', + # Yoga + 'yoga': 'focal-updates/yoga', + 'focal-yoga': 'focal-updates/yoga', + 'focal-yoga/updates': 'focal-updates/yoga', + 'focal-updates/yoga': 'focal-updates/yoga', + 'yoga/proposed': 'focal-proposed/yoga', + 'focal-yoga/proposed': 'focal-proposed/yoga', + 'focal-proposed/yoga': 'focal-proposed/yoga', + # Zed + 'zed': 'jammy-updates/zed', + 'jammy-zed': 'jammy-updates/zed', + 'jammy-zed/updates': 'jammy-updates/zed', + 'jammy-updates/zed': 'jammy-updates/zed', + 'zed/proposed': 'jammy-proposed/zed', + 'jammy-zed/proposed': 'jammy-proposed/zed', + 'jammy-proposed/zed': 'jammy-proposed/zed', + # antelope + 'antelope': 'jammy-updates/antelope', + 'jammy-antelope': 'jammy-updates/antelope', + 'jammy-antelope/updates': 'jammy-updates/antelope', + 'jammy-updates/antelope': 'jammy-updates/antelope', + 'antelope/proposed': 'jammy-proposed/antelope', + 'jammy-antelope/proposed': 'jammy-proposed/antelope', + 'jammy-proposed/antelope': 'jammy-proposed/antelope', + # bobcat + 'bobcat': 'jammy-updates/bobcat', + 'jammy-bobcat': 'jammy-updates/bobcat', + 'jammy-bobcat/updates': 'jammy-updates/bobcat', + 'jammy-updates/bobcat': 'jammy-updates/bobcat', + 'bobcat/proposed': 'jammy-proposed/bobcat', + 'jammy-bobcat/proposed': 'jammy-proposed/bobcat', + 'jammy-proposed/bobcat': 'jammy-proposed/bobcat', + # caracal + 'caracal': 'jammy-updates/caracal', + 'jammy-caracal': 'jammy-updates/caracal', + 'jammy-caracal/updates': 'jammy-updates/caracal', + 'jammy-updates/caracal': 'jammy-updates/caracal', + 'caracal/proposed': 'jammy-proposed/caracal', + 'jammy-caracal/proposed': 'jammy-proposed/caracal', + 'jammy-proposed/caracal': 'jammy-proposed/caracal', + + # OVN + 'focal-ovn-22.03': 'focal-updates/ovn-22.03', + 'focal-ovn-22.03/proposed': 'focal-proposed/ovn-22.03', +} + + +OPENSTACK_RELEASES = ( + 'diablo', + 'essex', + 'folsom', + 'grizzly', + 'havana', + 'icehouse', + 'juno', + 'kilo', + 'liberty', + 'mitaka', + 'newton', + 'ocata', + 'pike', + 'queens', + 'rocky', + 'stein', + 'train', + 'ussuri', + 'victoria', + 'wallaby', + 'xena', + 'yoga', + 'zed', + 'antelope', + 'bobcat', + 'caracal', +) + + +UBUNTU_OPENSTACK_RELEASE = OrderedDict([ + ('oneiric', 'diablo'), + ('precise', 'essex'), + ('quantal', 'folsom'), + ('raring', 'grizzly'), + ('saucy', 'havana'), + ('trusty', 'icehouse'), + ('utopic', 'juno'), + ('vivid', 'kilo'), + ('wily', 'liberty'), + ('xenial', 'mitaka'), + ('yakkety', 'newton'), + ('zesty', 'ocata'), + ('artful', 'pike'), + ('bionic', 'queens'), + ('cosmic', 'rocky'), + ('disco', 'stein'), + ('eoan', 'train'), + ('focal', 'ussuri'), + ('groovy', 'victoria'), + ('hirsute', 'wallaby'), + ('impish', 'xena'), + ('jammy', 'yoga'), + ('kinetic', 'zed'), + ('lunar', 'antelope'), + ('mantic', 'bobcat'), + ('noble', 'caracal'), +]) + + +APT_NO_LOCK = 100 # The return code for "couldn't acquire lock" in APT. +CMD_RETRY_DELAY = 10 # Wait 10 seconds between command retries. +CMD_RETRY_COUNT = 10 # Retry a failing fatal command X times. + + +def filter_installed_packages(packages): + """Return a list of packages that require installation.""" + cache = apt_cache() + _pkgs = [] + for package in packages: + try: + p = cache[package] + p.current_ver or _pkgs.append(package) + except KeyError: + log('Package {} has no installation candidate.'.format(package), + level='WARNING') + _pkgs.append(package) + return _pkgs + + +def filter_missing_packages(packages): + """Return a list of packages that are installed. + + :param packages: list of packages to evaluate. + :returns list: Packages that are installed. + """ + return list( + set(packages) - + set(filter_installed_packages(packages)) + ) + + +def apt_cache(*_, **__): + """Shim returning an object simulating the apt_pkg Cache. + + :param _: Accept arguments for compatibility, not used. + :type _: any + :param __: Accept keyword arguments for compatibility, not used. + :type __: any + :returns:Object used to interrogate the system apt and dpkg databases. + :rtype:ubuntu_apt_pkg.Cache + """ + if 'apt_pkg' in sys.modules: + # NOTE(fnordahl): When our consumer use the upstream ``apt_pkg`` module + # in conjunction with the apt_cache helper function, they may expect us + # to call ``apt_pkg.init()`` for them. + # + # Detect this situation, log a warning and make the call to + # ``apt_pkg.init()`` to avoid the consumer Python interpreter from + # crashing with a segmentation fault. + @deprecate( + 'Support for use of upstream ``apt_pkg`` module in conjunction' + 'with charm-helpers is deprecated since 2019-06-25', + date=None, log=lambda x: log(x, level=WARNING)) + def one_shot_log(): + pass + + one_shot_log() + sys.modules['apt_pkg'].init() + return ubuntu_apt_pkg.Cache() + + +def apt_install(packages, options=None, fatal=False, quiet=False): + """Install one or more packages. + + :param packages: Package(s) to install + :type packages: Option[str, List[str]] + :param options: Options to pass on to apt-get + :type options: Option[None, List[str]] + :param fatal: Whether the command's output should be checked and + retried. + :type fatal: bool + :param quiet: if True (default), suppress log message to stdout/stderr + :type quiet: bool + :raises: subprocess.CalledProcessError + """ + if not packages: + log("Nothing to install", level=DEBUG) + return + if options is None: + options = ['--option=Dpkg::Options::=--force-confold'] + + cmd = ['apt-get', '--assume-yes'] + cmd.extend(options) + cmd.append('install') + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + if not quiet: + log("Installing {} with options: {}" + .format(packages, options)) + _run_apt_command(cmd, fatal, quiet=quiet) + + +def apt_upgrade(options=None, fatal=False, dist=False): + """Upgrade all packages. + + :param options: Options to pass on to apt-get + :type options: Option[None, List[str]] + :param fatal: Whether the command's output should be checked and + retried. + :type fatal: bool + :param dist: Whether ``dist-upgrade`` should be used over ``upgrade`` + :type dist: bool + :raises: subprocess.CalledProcessError + """ + if options is None: + options = ['--option=Dpkg::Options::=--force-confold'] + + cmd = ['apt-get', '--assume-yes'] + cmd.extend(options) + if dist: + cmd.append('dist-upgrade') + else: + cmd.append('upgrade') + log("Upgrading with options: {}".format(options)) + _run_apt_command(cmd, fatal) + + +def apt_update(fatal=False): + """Update local apt cache.""" + cmd = ['apt-get', 'update'] + _run_apt_command(cmd, fatal) + + +def apt_purge(packages, fatal=False): + """Purge one or more packages. + + :param packages: Package(s) to install + :type packages: Option[str, List[str]] + :param fatal: Whether the command's output should be checked and + retried. + :type fatal: bool + :raises: subprocess.CalledProcessError + """ + cmd = ['apt-get', '--assume-yes', 'purge'] + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + log("Purging {}".format(packages)) + _run_apt_command(cmd, fatal) + + +def apt_autoremove(purge=True, fatal=False): + """Purge one or more packages. + :param purge: Whether the ``--purge`` option should be passed on or not. + :type purge: bool + :param fatal: Whether the command's output should be checked and + retried. + :type fatal: bool + :raises: subprocess.CalledProcessError + """ + cmd = ['apt-get', '--assume-yes', 'autoremove'] + if purge: + cmd.append('--purge') + _run_apt_command(cmd, fatal) + + +def apt_mark(packages, mark, fatal=False): + """Flag one or more packages using apt-mark.""" + log("Marking {} as {}".format(packages, mark)) + cmd = ['apt-mark', mark] + if isinstance(packages, str): + cmd.append(packages) + else: + cmd.extend(packages) + + if fatal: + subprocess.check_call(cmd, universal_newlines=True) + else: + subprocess.call(cmd, universal_newlines=True) + + +def apt_hold(packages, fatal=False): + return apt_mark(packages, 'hold', fatal=fatal) + + +def apt_unhold(packages, fatal=False): + return apt_mark(packages, 'unhold', fatal=fatal) + + +def import_key(key): + """Import an ASCII Armor key. + + A Radix64 format keyid is also supported for backwards + compatibility. In this case Ubuntu keyserver will be + queried for a key via HTTPS by its keyid. This method + is less preferable because https proxy servers may + require traffic decryption which is equivalent to a + man-in-the-middle attack (a proxy server impersonates + keyserver TLS certificates and has to be explicitly + trusted by the system). + + :param key: A GPG key in ASCII armor format, + including BEGIN and END markers or a keyid. + :type key: (bytes, str) + :raises: GPGKeyError if the key could not be imported + """ + key = key.strip() + if '-' in key or '\n' in key: + # Send everything not obviously a keyid to GPG to import, as + # we trust its validation better than our own. eg. handling + # comments before the key. + log("PGP key found (looks like ASCII Armor format)", level=DEBUG) + if ('-----BEGIN PGP PUBLIC KEY BLOCK-----' in key and + '-----END PGP PUBLIC KEY BLOCK-----' in key): + log("Writing provided PGP key in the binary format", level=DEBUG) + key_bytes = key.encode('utf-8') + key_name = _get_keyid_by_gpg_key(key_bytes) + key_gpg = _dearmor_gpg_key(key_bytes) + _write_apt_gpg_keyfile(key_name=key_name, key_material=key_gpg) + else: + raise GPGKeyError("ASCII armor markers missing from GPG key") + else: + log("PGP key found (looks like Radix64 format)", level=WARNING) + log("SECURELY importing PGP key from keyserver; " + "full key not provided.", level=WARNING) + # as of bionic add-apt-repository uses curl with an HTTPS keyserver URL + # to retrieve GPG keys. `apt-key adv` command is deprecated as is + # apt-key in general as noted in its manpage. See lp:1433761 for more + # history. Instead, /etc/apt/trusted.gpg.d is used directly to drop + # gpg + key_asc = _get_key_by_keyid(key) + # write the key in GPG format so that apt-key list shows it + key_gpg = _dearmor_gpg_key(key_asc) + _write_apt_gpg_keyfile(key_name=key, key_material=key_gpg) + + +def _get_keyid_by_gpg_key(key_material): + """Get a GPG key fingerprint by GPG key material. + Gets a GPG key fingerprint (40-digit, 160-bit) by the ASCII armor-encoded + or binary GPG key material. Can be used, for example, to generate file + names for keys passed via charm options. + + :param key_material: ASCII armor-encoded or binary GPG key material + :type key_material: bytes + :raises: GPGKeyError if invalid key material has been provided + :returns: A GPG key fingerprint + :rtype: str + """ + # Use the same gpg command for both Xenial and Bionic + cmd = 'gpg --with-colons --with-fingerprint' + ps = subprocess.Popen(cmd.split(), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + stdin=subprocess.PIPE) + out, err = ps.communicate(input=key_material) + out = out.decode('utf-8') + err = err.decode('utf-8') + if 'gpg: no valid OpenPGP data found.' in err: + raise GPGKeyError('Invalid GPG key material provided') + # from gnupg2 docs: fpr :: Fingerprint (fingerprint is in field 10) + return re.search(r"^fpr:{9}([0-9A-F]{40}):$", out, re.MULTILINE).group(1) + + +def _get_key_by_keyid(keyid): + """Get a key via HTTPS from the Ubuntu keyserver. + Different key ID formats are supported by SKS keyservers (the longer ones + are more secure, see "dead beef attack" and https://evil32.com/). Since + HTTPS is used, if SSLBump-like HTTPS proxies are in place, they will + impersonate keyserver.ubuntu.com and generate a certificate with + keyserver.ubuntu.com in the CN field or in SubjAltName fields of a + certificate. If such proxy behavior is expected it is necessary to add the + CA certificate chain containing the intermediate CA of the SSLBump proxy to + every machine that this code runs on via ca-certs cloud-init directive (via + cloudinit-userdata model-config) or via other means (such as through a + custom charm option). Also note that DNS resolution for the hostname in a + URL is done at a proxy server - not at the client side. + + 8-digit (32 bit) key ID + https://keyserver.ubuntu.com/pks/lookup?search=0x4652B4E6 + 16-digit (64 bit) key ID + https://keyserver.ubuntu.com/pks/lookup?search=0x6E85A86E4652B4E6 + 40-digit key ID: + https://keyserver.ubuntu.com/pks/lookup?search=0x35F77D63B5CEC106C577ED856E85A86E4652B4E6 + + :param keyid: An 8, 16 or 40 hex digit keyid to find a key for + :type keyid: (bytes, str) + :returns: A key material for the specified GPG key id + :rtype: (str, bytes) + :raises: subprocess.CalledProcessError + """ + # options=mr - machine-readable output (disables html wrappers) + keyserver_url = ('https://keyserver.ubuntu.com' + '/pks/lookup?op=get&options=mr&exact=on&search=0x{}') + curl_cmd = ['curl', keyserver_url.format(keyid)] + # use proxy server settings in order to retrieve the key + return subprocess.check_output(curl_cmd, + env=env_proxy_settings(['https', 'no_proxy'])) + + +def _dearmor_gpg_key(key_asc): + """Converts a GPG key in the ASCII armor format to the binary format. + + :param key_asc: A GPG key in ASCII armor format. + :type key_asc: (str, bytes) + :returns: A GPG key in binary format + :rtype: (str, bytes) + :raises: GPGKeyError + """ + ps = subprocess.Popen(['gpg', '--dearmor'], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + stdin=subprocess.PIPE) + out, err = ps.communicate(input=key_asc) + # no need to decode output as it is binary (invalid utf-8), only error + err = err.decode('utf-8') + if 'gpg: no valid OpenPGP data found.' in err: + raise GPGKeyError('Invalid GPG key material. Check your network setup' + ' (MTU, routing, DNS) and/or proxy server settings' + ' as well as destination keyserver status.') + else: + return out + + +def _write_apt_gpg_keyfile(key_name, key_material): + """Writes GPG key material into a file at a provided path. + + :param key_name: A key name to use for a key file (could be a fingerprint) + :type key_name: str + :param key_material: A GPG key material (binary) + :type key_material: (str, bytes) + """ + with open('/etc/apt/trusted.gpg.d/{}.gpg'.format(key_name), + 'wb') as keyf: + keyf.write(key_material) + + +def add_source(source, key=None, fail_invalid=False): + """Add a package source to this system. + + @param source: a URL or sources.list entry, as supported by + add-apt-repository(1). Examples:: + + ppa:charmers/example + deb https://stub:key@private.example.com/ubuntu trusty main + + In addition: + 'proposed:' may be used to enable the standard 'proposed' + pocket for the release. + 'cloud:' may be used to activate official cloud archive pockets, + such as 'cloud:icehouse' + 'distro' may be used as a noop + + Full list of source specifications supported by the function are: + + 'distro': A NOP; i.e. it has no effect. + 'proposed': the proposed deb spec [2] is wrtten to + /etc/apt/sources.list/proposed + 'distro-proposed': adds -proposed to the debs [2] + 'ppa:': add-apt-repository --yes + 'deb ': add-apt-repository --yes deb + 'http://....': add-apt-repository --yes http://... + 'cloud-archive:': add-apt-repository -yes cloud-archive: + 'cloud:[-staging]': specify a Cloud Archive pocket with + optional staging version. If staging is used then the staging PPA [2] + with be used. If staging is NOT used then the cloud archive [3] will be + added, and the 'ubuntu-cloud-keyring' package will be added for the + current distro. + '': translate to cloud: based on the current + distro version (i.e. for 'ussuri' this will either be 'bionic-ussuri' or + 'distro'. + '/proposed': as above, but for proposed. + + Otherwise the source is not recognised and this is logged to the juju log. + However, no error is raised, unless sys_error_on_exit is True. + + [1] deb http://ubuntu-cloud.archive.canonical.com/ubuntu {} main + where {} is replaced with the derived pocket name. + [2] deb http://archive.ubuntu.com/ubuntu {}-proposed \ + main universe multiverse restricted + where {} is replaced with the lsb_release codename (e.g. xenial) + [3] deb http://ubuntu-cloud.archive.canonical.com/ubuntu + to /etc/apt/sources.list.d/cloud-archive-list + + @param key: A key to be added to the system's APT keyring and used + to verify the signatures on packages. Ideally, this should be an + ASCII format GPG public key including the block headers. A GPG key + id may also be used, but be aware that only insecure protocols are + available to retrieve the actual public key from a public keyserver + placing your Juju environment at risk. ppa and cloud archive keys + are securely added automatically, so should not be provided. + + @param fail_invalid: (boolean) if True, then the function raises a + SourceConfigError is there is no matching installation source. + + @raises SourceConfigError() if for cloud:, the is not a + valid pocket in CLOUD_ARCHIVE_POCKETS + """ + # extract the OpenStack versions from the CLOUD_ARCHIVE_POCKETS; can't use + # the list in contrib.openstack.utils as it might not be included in + # classic charms and would break everything. Having OpenStack specific + # code in this file is a bit of an antipattern, anyway. + os_versions_regex = "({})".format("|".join(OPENSTACK_RELEASES)) + + _mapping = OrderedDict([ + (r"^distro$", lambda: None), # This is a NOP + (r"^(?:proposed|distro-proposed)$", _add_proposed), + (r"^cloud-archive:(.*)$", _add_apt_repository), + (r"^((?:deb |http:|https:|ppa:).*)$", _add_apt_repository), + (r"^cloud:(.*)-(.*)\/staging$", _add_cloud_staging), + (r"^cloud:(.*)-(ovn-.*)$", _add_cloud_distro_check), + (r"^cloud:(.*)-(.*)$", _add_cloud_distro_check), + (r"^cloud:(.*)$", _add_cloud_pocket), + (r"^snap:.*-(.*)-(.*)$", _add_cloud_distro_check), + (r"^{}\/proposed$".format(os_versions_regex), + _add_bare_openstack_proposed), + (r"^{}$".format(os_versions_regex), _add_bare_openstack), + ]) + if source is None: + source = '' + for r, fn in _mapping.items(): + m = re.match(r, source) + if m: + if key: + # Import key before adding the source which depends on it, + # as refreshing packages could fail otherwise. + try: + import_key(key) + except GPGKeyError as e: + raise SourceConfigError(str(e)) + # call the associated function with the captured groups + # raises SourceConfigError on error. + fn(*m.groups()) + break + else: + # nothing matched. log an error and maybe sys.exit + err = "Unknown source: {!r}".format(source) + log(err) + if fail_invalid: + raise SourceConfigError(err) + + +def _add_proposed(): + """Add the PROPOSED_POCKET as /etc/apt/source.list.d/proposed.list + + Uses get_distrib_codename to determine the correct stanza for + the deb line. + + For Intel architectures PROPOSED_POCKET is used for the release, but for + other architectures PROPOSED_PORTS_POCKET is used for the release. + """ + release = get_distrib_codename() + arch = platform.machine() + if arch not in ARCH_TO_PROPOSED_POCKET.keys(): + raise SourceConfigError("Arch {} not supported for (distro-)proposed" + .format(arch)) + with open('/etc/apt/sources.list.d/proposed.list', 'w') as apt: + apt.write(ARCH_TO_PROPOSED_POCKET[arch].format(release)) + + +def _add_apt_repository(spec): + """Add the spec using add_apt_repository + + :param spec: the parameter to pass to add_apt_repository + :type spec: str + """ + if '{series}' in spec: + series = get_distrib_codename() + spec = spec.replace('{series}', series) + _run_with_retries(['add-apt-repository', '--yes', spec], + cmd_env=env_proxy_settings(['https', 'http', 'no_proxy']) + ) + + +def __write_sources_list_d_actual_pocket(file, actual_pocket): + with open('/etc/apt/sources.list.d/{}'.format(file), 'w') as apt: + apt.write(CLOUD_ARCHIVE.format(actual_pocket)) + + +def _add_cloud_pocket(pocket): + """Add a cloud pocket as /etc/apt/sources.d/cloud-archive.list + + Note that this overwrites the existing file if there is one. + + This function also converts the simple pocket in to the actual pocket using + the CLOUD_ARCHIVE_POCKETS mapping. + + :param pocket: string representing the pocket to add a deb spec for. + :raises: SourceConfigError if the cloud pocket doesn't exist or the + requested release doesn't match the current distro version. + """ + apt_install(filter_installed_packages(['ubuntu-cloud-keyring']), + fatal=True) + if pocket not in CLOUD_ARCHIVE_POCKETS: + raise SourceConfigError( + 'Unsupported cloud: source option %s' % + pocket) + actual_pocket = CLOUD_ARCHIVE_POCKETS[pocket] + __write_sources_list_d_actual_pocket( + 'cloud-archive{}.list'.format('' if 'ovn' not in pocket else '-ovn'), + actual_pocket) + + +def _add_cloud_staging(cloud_archive_release, openstack_release): + """Add the cloud staging repository which is in + ppa:ubuntu-cloud-archive/-staging + + This function checks that the cloud_archive_release matches the current + codename for the distro that charm is being installed on. + + :param cloud_archive_release: string, codename for the release. + :param openstack_release: String, codename for the openstack release. + :raises: SourceConfigError if the cloud_archive_release doesn't match the + current version of the os. + """ + _verify_is_ubuntu_rel(cloud_archive_release, openstack_release) + ppa = 'ppa:ubuntu-cloud-archive/{}-staging'.format(openstack_release) + cmd = 'add-apt-repository -y {}'.format(ppa) + _run_with_retries(cmd.split(' ')) + + +def _add_cloud_distro_check(cloud_archive_release, openstack_release): + """Add the cloud pocket, but also check the cloud_archive_release against + the current distro, and use the openstack_release as the full lookup. + + This just calls _add_cloud_pocket() with the openstack_release as pocket + to get the correct cloud-archive.list for dpkg to work with. + + :param cloud_archive_release:String, codename for the distro release. + :param openstack_release: String, spec for the release to look up in the + CLOUD_ARCHIVE_POCKETS + :raises: SourceConfigError if this is the wrong distro, or the pocket spec + doesn't exist. + """ + _verify_is_ubuntu_rel(cloud_archive_release, openstack_release) + _add_cloud_pocket("{}-{}".format(cloud_archive_release, openstack_release)) + + +def _verify_is_ubuntu_rel(release, os_release): + """Verify that the release is in the same as the current ubuntu release. + + :param release: String, lowercase for the release. + :param os_release: String, the os_release being asked for + :raises: SourceConfigError if the release is not the same as the ubuntu + release. + """ + ubuntu_rel = get_distrib_codename() + if release != ubuntu_rel: + raise SourceConfigError( + 'Invalid Cloud Archive release specified: {}-{} on this Ubuntu' + 'version ({})'.format(release, os_release, ubuntu_rel)) + + +def _add_bare_openstack(openstack_release): + """Add cloud or distro based on the release given. + + The spec given is, say, 'ussuri', but this could apply cloud:bionic-ussuri + or 'distro' depending on whether the ubuntu release is bionic or focal. + + :param openstack_release: the OpenStack codename to determine the release + for. + :type openstack_release: str + :raises: SourceConfigError + """ + # TODO(ajkavanagh) - surely this means we should be removing cloud archives + # if they exist? + __add_bare_helper(openstack_release, "{}-{}", lambda: None) + + +def _add_bare_openstack_proposed(openstack_release): + """Add cloud of distro but with proposed. + + The spec given is, say, 'ussuri' but this could apply + cloud:bionic-ussuri/proposed or 'distro/proposed' depending on whether the + ubuntu release is bionic or focal. + + :param openstack_release: the OpenStack codename to determine the release + for. + :type openstack_release: str + :raises: SourceConfigError + """ + __add_bare_helper(openstack_release, "{}-{}/proposed", _add_proposed) + + +def __add_bare_helper(openstack_release, pocket_format, final_function): + """Helper for _add_bare_openstack[_proposed] + + The bulk of the work between the two functions is exactly the same except + for the pocket format and the function that is run if it's the distro + version. + + :param openstack_release: the OpenStack codename. e.g. ussuri + :type openstack_release: str + :param pocket_format: the pocket formatter string to construct a pocket str + from the openstack_release and the current ubuntu version. + :type pocket_format: str + :param final_function: the function to call if it is the distro version. + :type final_function: Callable + :raises SourceConfigError on error + """ + ubuntu_version = get_distrib_codename() + possible_pocket = pocket_format.format(ubuntu_version, openstack_release) + if possible_pocket in CLOUD_ARCHIVE_POCKETS: + _add_cloud_pocket(possible_pocket) + return + # Otherwise it's almost certainly the distro version; verify that it + # exists. + try: + assert UBUNTU_OPENSTACK_RELEASE[ubuntu_version] == openstack_release + except KeyError: + raise SourceConfigError( + "Invalid ubuntu version {} isn't known to this library" + .format(ubuntu_version)) + except AssertionError: + raise SourceConfigError( + 'Invalid OpenStack release specified: {} for Ubuntu version {}' + .format(openstack_release, ubuntu_version)) + final_function() + + +def _run_with_retries(cmd, max_retries=CMD_RETRY_COUNT, retry_exitcodes=(1,), + retry_message="", cmd_env=None, quiet=False): + """Run a command and retry until success or max_retries is reached. + + :param cmd: The apt command to run. + :type cmd: str + :param max_retries: The number of retries to attempt on a fatal + command. Defaults to CMD_RETRY_COUNT. + :type max_retries: int + :param retry_exitcodes: Optional additional exit codes to retry. + Defaults to retry on exit code 1. + :type retry_exitcodes: tuple + :param retry_message: Optional log prefix emitted during retries. + :type retry_message: str + :param: cmd_env: Environment variables to add to the command run. + :type cmd_env: Option[None, Dict[str, str]] + :param quiet: if True, silence the output of the command from stdout and + stderr + :type quiet: bool + """ + env = get_apt_dpkg_env() + if cmd_env: + env.update(cmd_env) + + kwargs = {} + if quiet: + kwargs['stdout'] = subprocess.DEVNULL + kwargs['stderr'] = subprocess.DEVNULL + + if not retry_message: + retry_message = "Failed executing '{}'".format(" ".join(cmd)) + retry_message += ". Will retry in {} seconds".format(CMD_RETRY_DELAY) + + retry_count = 0 + result = None + + retry_results = (None,) + retry_exitcodes + while result in retry_results: + try: + result = subprocess.check_call(cmd, env=env, **kwargs) + except subprocess.CalledProcessError as e: + result = e.returncode + if result not in retry_results: + # a non-retriable exitcode was produced + raise + retry_count += 1 + if retry_count > max_retries: + # a retriable exitcode was produced more than {max_retries} times + raise + log(retry_message) + time.sleep(CMD_RETRY_DELAY) + + +def _run_apt_command(cmd, fatal=False, quiet=False): + """Run an apt command with optional retries. + + :param cmd: The apt command to run. + :type cmd: str + :param fatal: Whether the command's output should be checked and + retried. + :type fatal: bool + :param quiet: if True, silence the output of the command from stdout and + stderr + :type quiet: bool + """ + if fatal: + _run_with_retries( + cmd, retry_exitcodes=(1, APT_NO_LOCK,), + retry_message="Couldn't acquire DPKG lock", + quiet=quiet) + else: + kwargs = {} + if quiet: + kwargs['stdout'] = subprocess.DEVNULL + kwargs['stderr'] = subprocess.DEVNULL + subprocess.call(cmd, env=get_apt_dpkg_env(), **kwargs) + + +def get_upstream_version(package): + """Determine upstream version based on installed package + + @returns None (if not installed) or the upstream version + """ + cache = apt_cache() + try: + pkg = cache[package] + except Exception: + # the package is unknown to the current apt cache. + return None + + if not pkg.current_ver: + # package is known, but no version is currently installed. + return None + + return ubuntu_apt_pkg.upstream_version(pkg.current_ver.ver_str) + + +def get_installed_version(package): + """Determine installed version of a package + + @returns None (if not installed) or the installed version as + Version object + """ + cache = apt_cache() + dpkg_result = cache.dpkg_list([package]).get(package, {}) + current_ver = None + installed_version = dpkg_result.get('version') + + if installed_version: + current_ver = ubuntu_apt_pkg.Version({'ver_str': installed_version}) + return current_ver + + +def get_apt_dpkg_env(): + """Get environment suitable for execution of APT and DPKG tools. + + We keep this in a helper function instead of in a global constant to + avoid execution on import of the library. + :returns: Environment suitable for execution of APT and DPKG tools. + :rtype: Dict[str, str] + """ + # The fallback is used in the event of ``/etc/environment`` not containing + # avalid PATH variable. + return {'DEBIAN_FRONTEND': 'noninteractive', + 'PATH': get_system_env('PATH', '/usr/sbin:/usr/bin:/sbin:/bin')} diff --git a/ceph-radosgw/hooks/charmhelpers/fetch/ubuntu_apt_pkg.py b/ceph-radosgw/hooks/charmhelpers/fetch/ubuntu_apt_pkg.py new file mode 100644 index 00000000..f4dde4a9 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/fetch/ubuntu_apt_pkg.py @@ -0,0 +1,327 @@ +# Copyright 2019-2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Provide a subset of the ``python-apt`` module API. + +Data collection is done through subprocess calls to ``apt-cache`` and +``dpkg-query`` commands. + +The main purpose for this module is to avoid dependency on the +``python-apt`` python module. + +The indicated python module is a wrapper around the ``apt`` C++ library +which is tightly connected to the version of the distribution it was +shipped on. It is not developed in a backward/forward compatible manner. + +This in turn makes it incredibly hard to distribute as a wheel for a piece +of python software that supports a span of distro releases [0][1]. + +Upstream feedback like [2] does not give confidence in this ever changing, +so with this we get rid of the dependency. + +0: https://github.com/juju-solutions/layer-basic/pull/135 +1: https://bugs.launchpad.net/charm-octavia/+bug/1824112 +2: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=845330#10 +""" + +import locale +import os +import subprocess +import sys + +from charmhelpers import deprecate +from charmhelpers.core.hookenv import log + + +class _container(dict): + """Simple container for attributes.""" + __getattr__ = dict.__getitem__ + __setattr__ = dict.__setitem__ + + +class Package(_container): + """Simple container for package attributes.""" + + +class Version(_container): + """Simple container for version attributes.""" + + +class Cache(object): + """Simulation of ``apt_pkg`` Cache object.""" + def __init__(self, progress=None): + pass + + def __contains__(self, package): + try: + pkg = self.__getitem__(package) + return pkg is not None + except KeyError: + return False + + def __getitem__(self, package): + """Get information about a package from apt and dpkg databases. + + :param package: Name of package + :type package: str + :returns: Package object + :rtype: object + :raises: KeyError, subprocess.CalledProcessError + """ + apt_result = self._apt_cache_show([package])[package] + apt_result['name'] = apt_result.pop('package') + pkg = Package(apt_result) + dpkg_result = self.dpkg_list([package]).get(package, {}) + current_ver = None + installed_version = dpkg_result.get('version') + if installed_version: + current_ver = Version({'ver_str': installed_version}) + pkg.current_ver = current_ver + pkg.architecture = dpkg_result.get('architecture') + return pkg + + @deprecate("use dpkg_list() instead.", "2022-05", log=log) + def _dpkg_list(self, packages): + return self.dpkg_list(packages) + + def dpkg_list(self, packages): + """Get data from system dpkg database for package. + + Note that this method is also useful for querying package names + containing wildcards, for example + + apt_cache().dpkg_list(['nvidia-vgpu-ubuntu-*']) + + may return + + { + 'nvidia-vgpu-ubuntu-470': { + 'name': 'nvidia-vgpu-ubuntu-470', + 'version': '470.68', + 'architecture': 'amd64', + 'description': 'NVIDIA vGPU driver - version 470.68' + } + } + + :param packages: Packages to get data from + :type packages: List[str] + :returns: Structured data about installed packages, keys like + ``dpkg-query --list`` + :rtype: dict + :raises: subprocess.CalledProcessError + """ + pkgs = {} + cmd = [ + 'dpkg-query', '--show', + '--showformat', + r'${db:Status-Abbrev}\t${Package}\t${Version}\t${Architecture}\t${binary:Summary}\n' + ] + cmd.extend(packages) + try: + output = subprocess.check_output(cmd, + stderr=subprocess.STDOUT, + universal_newlines=True) + except subprocess.CalledProcessError as cp: + # ``dpkg-query`` may return error and at the same time have + # produced useful output, for example when asked for multiple + # packages where some are not installed + if cp.returncode != 1: + raise + output = cp.output + for line in output.splitlines(): + # only process lines for successfully installed packages + if not (line.startswith('ii ') or line.startswith('hi ')): + continue + status, name, version, arch, desc = line.split('\t', 4) + pkgs[name] = { + 'name': name, + 'version': version, + 'architecture': arch, + 'description': desc, + } + return pkgs + + def _apt_cache_show(self, packages): + """Get data from system apt cache for package. + + :param packages: Packages to get data from + :type packages: List[str] + :returns: Structured data about package, keys like + ``apt-cache show`` + :rtype: dict + :raises: subprocess.CalledProcessError + """ + pkgs = {} + cmd = ['apt-cache', 'show', '--no-all-versions'] + cmd.extend(packages) + if locale.getlocale() == (None, None): + # subprocess calls out to locale.getpreferredencoding(False) to + # determine encoding. Workaround for Trusty where the + # environment appears to not be set up correctly. + locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') + try: + output = subprocess.check_output(cmd, + stderr=subprocess.STDOUT, + universal_newlines=True) + previous = None + pkg = {} + for line in output.splitlines(): + if not line: + if 'package' in pkg: + pkgs.update({pkg['package']: pkg}) + pkg = {} + continue + if line.startswith(' '): + if previous and previous in pkg: + pkg[previous] += os.linesep + line.lstrip() + continue + if ':' in line: + kv = line.split(':', 1) + key = kv[0].lower() + if key == 'n': + continue + previous = key + pkg.update({key: kv[1].lstrip()}) + except subprocess.CalledProcessError as cp: + # ``apt-cache`` returns 100 if none of the packages asked for + # exist in the apt cache. + if cp.returncode != 100: + raise + return pkgs + + +class Config(_container): + def __init__(self): + super(Config, self).__init__(self._populate()) + + def _populate(self): + cfgs = {} + cmd = ['apt-config', 'dump'] + output = subprocess.check_output(cmd, + stderr=subprocess.STDOUT, + universal_newlines=True) + for line in output.splitlines(): + if not line.startswith("CommandLine"): + k, v = line.split(" ", 1) + cfgs[k] = v.strip(";").strip("\"") + + return cfgs + + +# Backwards compatibility with old apt_pkg module +sys.modules[__name__].config = Config() + + +def init(): + """Compatibility shim that does nothing.""" + pass + + +def upstream_version(version): + """Extracts upstream version from a version string. + + Upstream reference: https://salsa.debian.org/apt-team/apt/blob/master/ + apt-pkg/deb/debversion.cc#L259 + + :param version: Version string + :type version: str + :returns: Upstream version + :rtype: str + """ + if version: + version = version.split(':')[-1] + version = version.split('-')[0] + return version + + +def version_compare(a, b): + """Compare the given versions. + + Call out to ``dpkg`` to make sure the code doing the comparison is + compatible with what the ``apt`` library would do. Mimic the return + values. + + Upstream reference: + https://apt-team.pages.debian.net/python-apt/library/apt_pkg.html + ?highlight=version_compare#apt_pkg.version_compare + + :param a: version string + :type a: str + :param b: version string + :type b: str + :returns: >0 if ``a`` is greater than ``b``, 0 if a equals b, + <0 if ``a`` is smaller than ``b`` + :rtype: int + :raises: subprocess.CalledProcessError, RuntimeError + """ + for op in ('gt', 1), ('eq', 0), ('lt', -1): + try: + subprocess.check_call(['dpkg', '--compare-versions', + a, op[0], b], + stderr=subprocess.STDOUT, + universal_newlines=True) + return op[1] + except subprocess.CalledProcessError as cp: + if cp.returncode == 1: + continue + raise + else: + raise RuntimeError('Unable to compare "{}" and "{}", according to ' + 'our logic they are neither greater, equal nor ' + 'less than each other.'.format(a, b)) + + +class PkgVersion(): + """Allow package versions to be compared. + + For example:: + + >>> import charmhelpers.fetch as fetch + >>> (fetch.apt_pkg.PkgVersion('2:20.4.0') < + ... fetch.apt_pkg.PkgVersion('2:20.5.0')) + True + >>> pkgs = [fetch.apt_pkg.PkgVersion('2:20.4.0'), + ... fetch.apt_pkg.PkgVersion('2:21.4.0'), + ... fetch.apt_pkg.PkgVersion('2:17.4.0')] + >>> pkgs.sort() + >>> pkgs + [2:17.4.0, 2:20.4.0, 2:21.4.0] + """ + + def __init__(self, version): + self.version = version + + def __lt__(self, other): + return version_compare(self.version, other.version) == -1 + + def __le__(self, other): + return self.__lt__(other) or self.__eq__(other) + + def __gt__(self, other): + return version_compare(self.version, other.version) == 1 + + def __ge__(self, other): + return self.__gt__(other) or self.__eq__(other) + + def __eq__(self, other): + return version_compare(self.version, other.version) == 0 + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return self.version + + def __hash__(self): + return hash(repr(self)) diff --git a/ceph-radosgw/hooks/charmhelpers/osplatform.py b/ceph-radosgw/hooks/charmhelpers/osplatform.py new file mode 100644 index 00000000..5d121866 --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/osplatform.py @@ -0,0 +1,61 @@ +import platform +import os + + +def get_platform(): + """Return the current OS platform. + + For example: if current os platform is Ubuntu then a string "ubuntu" + will be returned (which is the name of the module). + This string is used to decide which platform module should be imported. + """ + current_platform = _get_current_platform() + + if "Ubuntu" in current_platform: + return "ubuntu" + elif "CentOS" in current_platform: + return "centos" + elif "debian" in current_platform or "Debian" in current_platform: + # Stock Python does not detect Ubuntu and instead returns debian. + # Or at least it does in some build environments like Travis CI + return "ubuntu" + elif "elementary" in current_platform: + # ElementaryOS fails to run tests locally without this. + return "ubuntu" + elif "Pop!_OS" in current_platform: + # Pop!_OS also fails to run tests locally without this. + return "ubuntu" + else: + raise RuntimeError("This module is not supported on {}." + .format(current_platform)) + + +def _get_current_platform(): + """Return the current platform information for the OS. + + Attempts to lookup linux distribution information from the platform + module for releases of python < 3.7. For newer versions of python, + the platform is determined from the /etc/os-release file. + """ + # linux_distribution is deprecated and will be removed in Python 3.7 + # Warnings *not* disabled, as we certainly need to fix this. + if hasattr(platform, 'linux_distribution'): + tuple_platform = platform.linux_distribution() + current_platform = tuple_platform[0] + else: + current_platform = _get_platform_from_fs() + + return current_platform + + +def _get_platform_from_fs(): + """Get Platform from /etc/os-release.""" + with open(os.path.join(os.sep, 'etc', 'os-release')) as fin: + content = dict( + line.split('=', 1) + for line in fin.read().splitlines() + if '=' in line + ) + for k, v in content.items(): + content[k] = v.strip('"') + return content["NAME"] diff --git a/ceph-radosgw/hooks/charmhelpers/payload/__init__.py b/ceph-radosgw/hooks/charmhelpers/payload/__init__.py new file mode 100644 index 00000000..ee55cb3d --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/payload/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"Tools for working with files injected into a charm just before deployment." diff --git a/ceph-radosgw/hooks/charmhelpers/payload/execd.py b/ceph-radosgw/hooks/charmhelpers/payload/execd.py new file mode 100644 index 00000000..1502aa0b --- /dev/null +++ b/ceph-radosgw/hooks/charmhelpers/payload/execd.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python + +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import subprocess +from charmhelpers.core import hookenv + + +def default_execd_dir(): + return os.path.join(os.environ['CHARM_DIR'], 'exec.d') + + +def execd_module_paths(execd_dir=None): + """Generate a list of full paths to modules within execd_dir.""" + if not execd_dir: + execd_dir = default_execd_dir() + + if not os.path.exists(execd_dir): + return + + for subpath in os.listdir(execd_dir): + module = os.path.join(execd_dir, subpath) + if os.path.isdir(module): + yield module + + +def execd_submodule_paths(command, execd_dir=None): + """Generate a list of full paths to the specified command within exec_dir. + """ + for module_path in execd_module_paths(execd_dir): + path = os.path.join(module_path, command) + if os.access(path, os.X_OK) and os.path.isfile(path): + yield path + + +def execd_run(command, execd_dir=None, die_on_error=True, stderr=subprocess.STDOUT): + """Run command for each module within execd_dir which defines it.""" + for submodule_path in execd_submodule_paths(command, execd_dir): + try: + subprocess.check_output(submodule_path, stderr=stderr, + universal_newlines=True) + except subprocess.CalledProcessError as e: + hookenv.log("Error ({}) running {}. Output: {}".format( + e.returncode, e.cmd, e.output)) + if die_on_error: + sys.exit(e.returncode) + + +def execd_preinstall(execd_dir=None): + """Run charm-pre-install for each module within execd_dir.""" + execd_run('charm-pre-install', execd_dir=execd_dir) diff --git a/ceph-radosgw/hooks/cluster-relation-changed b/ceph-radosgw/hooks/cluster-relation-changed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/cluster-relation-changed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/cluster-relation-joined b/ceph-radosgw/hooks/cluster-relation-joined new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/cluster-relation-joined @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/config-changed b/ceph-radosgw/hooks/config-changed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/config-changed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/gateway-relation-joined b/ceph-radosgw/hooks/gateway-relation-joined new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/gateway-relation-joined @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/ha-relation-changed b/ceph-radosgw/hooks/ha-relation-changed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/ha-relation-changed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/ha-relation-joined b/ceph-radosgw/hooks/ha-relation-joined new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/ha-relation-joined @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/hooks.py b/ceph-radosgw/hooks/hooks.py new file mode 100755 index 00000000..5d54a4f4 --- /dev/null +++ b/ceph-radosgw/hooks/hooks.py @@ -0,0 +1,1299 @@ +#!/usr/bin/env python3 + +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import json +import os +import subprocess +import sys +import socket +import uuid + +import utils + +sys.path.append('lib') + +import ceph_rgw as ceph +import charms_ceph.utils as ceph_utils +import multisite + +from charmhelpers.core.hookenv import ( + ERROR, + relation_get, + relation_id as ch_relation_id, + relation_ids, + related_units, + config, + open_port, + opened_ports, + close_port, + relation_set, + log, + DEBUG, + WARNING, + Hooks, UnregisteredHookError, + status_set, + is_leader, + leader_set, + leader_get, + remote_service_name, + WORKLOAD_STATES, +) +from charmhelpers.core.strutils import bool_from_string +from charmhelpers.fetch import ( + apt_update, + apt_install, + apt_purge, + add_source, + filter_installed_packages, + filter_missing_packages, +) +from charmhelpers.payload.execd import execd_preinstall +from charmhelpers.core.host import ( + cmp_pkgrevno, + service, + service_pause, + service_reload, + service_restart, + service_resume, + service_stop, +) +from charmhelpers.contrib.network.ip import ( + get_relation_ip, +) +from charmhelpers.contrib.openstack.context import ADDRESS_TYPES +from charmhelpers.contrib.openstack.ip import ( + canonical_url, + PUBLIC, INTERNAL, ADMIN, +) +from charmhelpers.contrib.storage.linux.ceph import ( + send_request_if_needed, + is_request_complete, +) +from charmhelpers.contrib.openstack.utils import ( + is_unit_paused_set, + pausable_restart_on_change as restart_on_change, + series_upgrade_prepare, + series_upgrade_complete, +) +from charmhelpers.contrib.openstack.ha.utils import ( + generate_ha_relation_data, +) +from utils import ( + assess_status, + boto_client, + disable_unused_apache_sites, + listen_port, + multisite_deployment, + pause_unit_helper, + ready_for_service, + register_configs, + request_per_unit_key, + restart_map, + restart_nonce_changed, + resume_unit_helper, + s3_app, + service_name, + services, + set_s3_app, + clear_s3_app, + setup_ipv6, + systemd_based_radosgw, +) +from charmhelpers.contrib.charmsupport import nrpe +from charmhelpers.contrib.hardening.harden import harden + +from charmhelpers.contrib.openstack.cert_utils import ( + get_certificate_request, + process_certificates, +) + +hooks = Hooks() +CONFIGS = register_configs() + +PACKAGES = [ + 'haproxy', + 'radosgw', + 'apache2', +] + +APACHE_PACKAGES = [ + 'libapache2-mod-fastcgi', +] + +MULTISITE_SYSTEM_USER = 'multisite-sync' +MULTISITE_DEFAULT_SYNC_GROUP_ID = 'default' + + +def upgrade_available(): + """Check for upgrade for ceph + + :returns: whether an upgrade is available + :rtype: boolean + """ + c = config() + old_version = ceph_utils.resolve_ceph_version(c.previous('source') or + 'distro') + new_version = ceph_utils.resolve_ceph_version(c.get('source')) + if (old_version in ceph_utils.UPGRADE_PATHS and + new_version == ceph_utils.UPGRADE_PATHS[old_version]): + return True + return False + + +def install_packages(): + """Installs necessary packages for the ceph-radosgw service. + + Calling this method when the source config value has changed + will cause an upgrade of ceph packages to be performed. + + :returns: whether packages were installed or not + :rtype: boolean + """ + pkgs_installed = False + c = config() + if c.changed('source') or c.changed('key'): + add_source(c.get('source'), c.get('key')) + apt_update(fatal=True) + + # NOTE: just use full package list if we're in an upgrade + # config-changed execution + pkgs = ( + PACKAGES if upgrade_available() else + filter_installed_packages(PACKAGES) + ) + if pkgs: + status_set('maintenance', 'Installing radosgw packages') + if ('apache2' in pkgs): + # NOTE(lourot): Apache's default config makes it listen on port 80, + # which will prevent HAProxy from listening on that same port. We + # use Apache in this setup however for SSL (different port). We + # need to let Apache free port 80 before we can install HAProxy + # otherwise HAProxy will crash. See lp:1904411 + log('Installing Apache') + apt_install(['apache2'], fatal=True) + disable_unused_apache_sites() + apt_install(pkgs, fatal=True) + pkgs_installed = True + + pkgs = filter_missing_packages(APACHE_PACKAGES) + if pkgs: + apt_purge(pkgs) + + return pkgs_installed + + +@hooks.hook('install.real') +@harden() +def install(): + status_set('maintenance', 'Executing pre-install') + execd_preinstall() + install_packages() + # hold the service down until we have keys from ceph + log('Disable service "{}" until we have keys for it.' + .format(service_name()), level=DEBUG) + service_pause(service_name()) + if not os.path.exists('/etc/ceph'): + os.makedirs('/etc/ceph') + if is_leader(): + leader_set(namespace_tenants=config('namespace-tenants')) + + +@hooks.hook('object-store-relation-joined') +def object_store_joined(relation_id=None): + relation_data = { + 'swift-url': + "{}:{}".format(canonical_url(CONFIGS, INTERNAL), listen_port()) + } + relation_set(relation_id=relation_id, relation_settings=relation_data) + + +@hooks.hook('upgrade-charm.real') +def upgrade_charm(): + if is_leader() and not leader_get('namespace_tenants') == 'True': + leader_set(namespace_tenants=False) + + +@hooks.hook('config-changed') +@harden() +def config_changed(): + @restart_on_change(restart_map()) + def _config_changed(): + # if we are paused, delay doing any config changed hooks. + # It is forced on the resume. + if is_unit_paused_set(): + log("Unit is pause or upgrading. Skipping config_changed", "WARN") + return + + # NOTE(wolsen) if an upgrade has been applied, then the radosgw + # service needs to be restarted as the package doesn't do it by + # itself. See LP#1906707 + if install_packages(): + log("Packages have been installed/upgraded... restarting", "INFO") + service_restart(service_name()) + + if config('prefer-ipv6'): + status_set('maintenance', 'configuring ipv6') + setup_ipv6() + + for r_id in relation_ids('identity-service'): + identity_changed(relid=r_id) + + for r_id in relation_ids('cluster'): + cluster_joined(rid=r_id) + + # NOTE(jamespage): Re-exec mon relation for any changes to + # enable ceph pool permissions restrictions + for r_id in relation_ids('mon'): + for unit in related_units(r_id): + mon_relation(r_id, unit) + + # Re-trigger hacluster relations to switch to ifaceless + # vip configuration + for r_id in relation_ids('ha'): + ha_relation_joined(r_id) + + # Refire certificates relations for VIP changes + for r_id in relation_ids('certificates'): + certs_joined(r_id) + + # Refire object-store relations for VIP/port changes + for r_id in relation_ids('object-store'): + object_store_joined(r_id) + + for r_id in relation_ids('radosgw-user'): + radosgw_user_changed(r_id) + + process_multisite_relations() + + CONFIGS.write_all() + configure_https() + + update_nrpe_config() + + port = listen_port() + open_port(port) + for opened_port in opened_ports(): + opened_port_number = opened_port.split('/')[0] + if str(opened_port_number) != str(port): + close_port(opened_port_number) + log('Closed port %s in favor of port %s' % + (opened_port_number, port)) + _config_changed() + + # Update s3 apps with ssl-ca, if available + ssl_ca = config('ssl-ca') + if ssl_ca: + update_s3_ca_info([ssl_ca]) + + +@hooks.hook('mon-relation-departed', + 'mon-relation-changed') +def mon_relation(rid=None, unit=None): + @restart_on_change(restart_map()) + def _mon_relation(): + key_name = 'rgw.{}'.format(socket.gethostname()) + legacy = True + if request_per_unit_key(): + legacy = False + relation_set(relation_id=rid, + key_name=key_name) + try: + rq = ceph.get_create_rgw_pools_rq( + prefix=config('zone') or config('pool-prefix')) + except ValueError as e: + # The end user has most likely provided a invalid value for + # a configuration option. Just log the traceback here, the + # end user will be notified by assess_status() called at + # the end of the hook execution. + log('Caught ValueError, invalid value provided for ' + 'configuration?: "{}"'.format(str(e)), + level=DEBUG) + return + + if is_request_complete(rq, relation='mon'): + log('Broker request complete', level=DEBUG) + CONFIGS.write_all() + # New style per unit keys + key = relation_get(attribute='{}_key'.format(key_name), + rid=rid, unit=unit) + if not key: + # Fallback to old style global key + key = relation_get(attribute='radosgw_key', + rid=rid, unit=unit) + key_name = None + + if key: + ceph.import_radosgw_key(key, name=key_name) + # NOTE(jamespage): + # Deal with switch from radosgw init script to + # systemd named units for radosgw instances by + # stopping and disabling the radosgw unit + if systemd_based_radosgw(): + service_stop('radosgw') + service('disable', 'radosgw') + # Update the nrpe config. If we wait for the below + # to be called elsewhere, there exists a period + # where nagios will report the radosgw service as + # down, and also not be monitoring the per + # host services. + update_nrpe_config(checks_to_remove=['radosgw']) + + # NOTE(jamespage): + # Multi-site deployments need to defer restart as the + # zone is not created until the primary relation is + # joined; restarting here will cause a restart burst + # in systemd and stop the process restarting once + # zone configuration is complete. + if (not is_unit_paused_set() and + not multisite_deployment()): + log('Resume service "{}" as we now have keys for it.' + .format(service_name()), level=DEBUG) + service_resume(service_name()) + + if multisite_deployment(): + process_multisite_relations() + elif (ready_for_service(legacy=legacy) and is_leader() and + 'mon' in CONFIGS.complete_contexts()): + # In a non multi-site deployment create the + # zone using the default zonegroup and restart the service + internal_url = '{}:{}'.format( + canonical_url(CONFIGS, INTERNAL), + listen_port(), + ) + endpoints = [internal_url] + zonegroup = 'default' + zone = config('zone') + existing_zones = multisite.list_zones() + log('Existing zones {}'.format(existing_zones), level=DEBUG) + if zone not in existing_zones: + log("zone '{}' doesn't exist, creating".format(zone)) + try: + multisite.create_zone(zone, + endpoints=endpoints, + default=True, master=True, + zonegroup=zonegroup) + except subprocess.CalledProcessError: + if zone in multisite.list_zones(retry_on_empty=True): + log("zone '{}' existed already after all" + .format(zone)) + else: + raise + + existing_zones = multisite.list_zones(retry_on_empty=True) + log('Existing zones {}'.format(existing_zones), + level=DEBUG) + if zone not in existing_zones: + raise RuntimeError("Could not create zone '{}'".format( + zone)) + + service_restart(service_name()) + + for r_id in relation_ids('radosgw-user'): + radosgw_user_changed(r_id) + + else: + send_request_if_needed(rq, relation='mon') + _mon_relation() + + +@hooks.hook('gateway-relation-joined') +def gateway_relation(): + relation_set(hostname=get_relation_ip('gateway-relation'), + port=listen_port()) + + +@hooks.hook('identity-service-relation-joined') +def identity_joined(relid=None): + if cmp_pkgrevno('radosgw', '0.55') < 0: + log('Integration with keystone requires ceph >= 0.55') + sys.exit(1) + + port = listen_port() + admin_url = '%s:%i/swift' % (canonical_url(CONFIGS, ADMIN), port) + if leader_get('namespace_tenants') == 'True': + internal_url = '%s:%s/swift/v1/AUTH_$(project_id)s' % \ + (canonical_url(CONFIGS, INTERNAL), port) + public_url = '%s:%s/swift/v1/AUTH_$(project_id)s' % \ + (canonical_url(CONFIGS, PUBLIC), port) + else: + internal_url = '%s:%s/swift/v1' % \ + (canonical_url(CONFIGS, INTERNAL), port) + public_url = '%s:%s/swift/v1' % \ + (canonical_url(CONFIGS, PUBLIC), port) + roles = [x for x in [config('operator-roles'), config('admin-roles')] if x] + requested_roles = '' + if roles: + requested_roles = ','.join(roles) if len(roles) > 1 else roles[0] + # remove stale settings without service prefix left by old charms, + # which cause the keystone charm to ignore new settings w/ prefix. + relation_set(service='', + region='', + public_url='', + internal_url='', + admin_url='', + relation_id=relid) + relation_set(swift_service='swift', + swift_region=config('region'), + swift_public_url=public_url, + swift_internal_url=internal_url, + swift_admin_url=admin_url, + requested_roles=requested_roles, + relation_id=relid) + if cmp_pkgrevno('radosgw', '12.2') >= 0: + relation_set(s3_service='s3', + s3_region=config('region'), + s3_public_url='{}:{}/'.format( + canonical_url(CONFIGS, PUBLIC), port), + s3_internal_url='{}:{}/'.format( + canonical_url(CONFIGS, INTERNAL), port), + s3_admin_url='{}:{}/'.format( + canonical_url(CONFIGS, ADMIN), port), + relation_id=relid) + + +@hooks.hook('identity-service-relation-changed') +def identity_changed(relid=None): + @restart_on_change(restart_map()) + def _identity_changed(): + identity_joined(relid) + CONFIGS.write_all() + _identity_changed() + + +@hooks.hook('cluster-relation-joined') +def cluster_joined(rid=None): + @restart_on_change(restart_map()) + def _cluster_joined(): + settings = {} + + for addr_type in ADDRESS_TYPES: + address = get_relation_ip( + addr_type, + cidr_network=config('os-{}-network'.format(addr_type))) + if address: + settings['{}-address'.format(addr_type)] = address + + settings['private-address'] = get_relation_ip('cluster') + + relation_set(relation_id=rid, relation_settings=settings) + _cluster_joined() + + +@hooks.hook('cluster-relation-changed') +def cluster_changed(): + @restart_on_change(restart_map()) + def _cluster_changed(): + CONFIGS.write_all() + for r_id in relation_ids('identity-service'): + identity_joined(relid=r_id) + for r_id in relation_ids('certificates'): + for unit in related_units(r_id): + certs_changed(r_id, unit) + _cluster_changed() + + +@hooks.hook('ha-relation-joined') +def ha_relation_joined(relation_id=None): + settings = generate_ha_relation_data('cephrg') + relation_set(relation_id=relation_id, **settings) + + +@hooks.hook('ha-relation-changed') +def ha_relation_changed(): + clustered = relation_get('clustered') + if clustered: + log('Cluster configured, notifying other services and' + 'updating keystone endpoint configuration') + # Tell all related services to start using + # the VIP instead + for r_id in relation_ids('identity-service'): + identity_joined(relid=r_id) + + +@hooks.hook('nrpe-external-master-relation-joined', + 'nrpe-external-master-relation-changed') +def update_nrpe_config(checks_to_remove=None): + """ + Update the checks for the nagios plugin. + + :param checks_to_remove: list of short names of nrpe checks to + remove. For example, pass ['radosgw'] to remove the check for + the default systemd radosgw service, to make way for per host + services. + :type checks_to_remove: list + + """ + # python-dbus is used by check_upstart_job + apt_install('python-dbus') + hostname = nrpe.get_nagios_hostname() + current_unit = nrpe.get_nagios_unit_name() + nrpe_setup = nrpe.NRPE(hostname=hostname) + nrpe.copy_nrpe_checks() + if checks_to_remove is not None: + log("Removing the following nrpe checks: {}".format(checks_to_remove), + level=DEBUG) + for svc in checks_to_remove: + nrpe_setup.remove_check(shortname=svc) + nrpe.add_init_service_checks(nrpe_setup, services(), current_unit) + nrpe.add_haproxy_checks(nrpe_setup, current_unit) + nrpe_setup.write() + + +def configure_https(): + '''Enables SSL API Apache config if appropriate and kicks + identity-service and image-service with any required + updates + ''' + CONFIGS.write_all() + if 'https' in CONFIGS.complete_contexts(): + cmd = ['a2ensite', 'openstack_https_frontend'] + subprocess.check_call(cmd) + else: + cmd = ['a2dissite', 'openstack_https_frontend'] + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError: + # The site is not yet enabled or + # https is not configured + pass + + # TODO: improve this by checking if local CN certs are available + # first then checking reload status (see LP #1433114). + if not is_unit_paused_set(): + service_reload('apache2', restart_on_failure=True) + + +@hooks.hook('update-status') +@harden() +def update_status(): + log('Updating status.') + + +@hooks.hook('pre-series-upgrade') +def pre_series_upgrade(): + log("Running prepare series upgrade hook", "INFO") + series_upgrade_prepare( + pause_unit_helper, CONFIGS) + + +@hooks.hook('post-series-upgrade') +def post_series_upgrade(): + log("Running complete series upgrade hook", "INFO") + series_upgrade_complete( + resume_unit_helper, CONFIGS) + + +@hooks.hook('certificates-relation-joined') +def certs_joined(relation_id=None): + cert_req_obj = get_certificate_request() + if config('virtual-hosted-bucket-enabled'): + import json + cert_req = json.loads(cert_req_obj["cert_requests"]) + for cn in cert_req.keys(): + if cn == config('os-public-hostname'): + log("Adding wildcard hostname for virtual hosted buckets", + "INFO") + cert_req[cn]["sans"].append("*."+config('os-public-hostname')) + cert_req_obj['cert_requests'] = json.dumps(cert_req, + sort_keys=True) + log("Cert request: {}".format(cert_req_obj), "INFO") + relation_set( + relation_id=relation_id, + relation_settings=cert_req_obj) + + +@hooks.hook('certificates-relation-changed') +def certs_changed(relation_id=None, unit=None): + @restart_on_change(restart_map(), stopstart=True) + def _certs_changed(): + process_certificates('ceph-radosgw', relation_id, unit) + configure_https() + _certs_changed() + for r_id in relation_ids('identity-service'): + identity_joined(relid=r_id) + # Update s3 apps with ca material, if available + ca_chains = cert_rel_ca() + if ca_chains: + update_s3_ca_info(ca_chains) + + +def get_radosgw_username(r_id): + """Generate a username based on a relation id""" + gw_user = 'juju-' + r_id.replace(":", "-") + return gw_user + + +def get_radosgw_system_username(r_id): + """Generate a username for a system user based on a relation id""" + gw_user = get_radosgw_username(r_id) + # There is no way to switch a user from being a system user to a + # non-system user, so add the '-system' suffix to ensure there is + # no clash if the user request is updated in the future. + gw_user = gw_user + "-system" + return gw_user + + +@hooks.hook('radosgw-user-relation-departed') +def radosgw_user_departed(): + # If there are no related units then the last unit + # is currently departing. + if not related_units(): + r_id = ch_relation_id() + for user in [get_radosgw_system_username(r_id), + get_radosgw_username(r_id)]: + multisite.suspend_user(user) + + +@hooks.hook('radosgw-user-relation-changed') +def radosgw_user_changed(relation_id=None): + if not ready_for_service(legacy=False): + log('unit not ready, deferring radosgw_user configuration') + return + if relation_id: + r_ids = [relation_id] + else: + r_ids = relation_ids('radosgw-user') + # The leader manages the users and sets the credentials using the + # the application relation data bag. + if is_leader(): + for r_id in r_ids: + remote_app = remote_service_name(r_id) + relation_data = relation_get( + rid=r_id, + app=remote_app) + if 'system-role' not in relation_data: + log('system-role not in relation data, cannot create user', + level=DEBUG) + return + system_user = bool_from_string( + relation_data.get('system-role', 'false')) + if system_user: + gw_user = get_radosgw_system_username(r_id) + # If there is a pre-existing non-system user then ensure it is + # suspended + multisite.suspend_user(get_radosgw_username(r_id)) + else: + gw_user = get_radosgw_username(r_id) + # If there is a pre-existing system user then ensure it is + # suspended + multisite.suspend_user(get_radosgw_system_username(r_id)) + if gw_user in multisite.list_users(): + (access_key, secret_key) = multisite.get_user_creds(gw_user) + else: + (access_key, secret_key) = multisite.create_user( + gw_user, + system_user=system_user) + relation_set( + app=remote_app, + relation_id=r_id, + relation_settings={ + 'uid': gw_user, + 'access-key': access_key, + 'secret-key': secret_key}) + # Each unit publishes its own endpoint data and daemon id using the + # unit relation data bag. + for r_id in r_ids: + relation_set( + relation_id=r_id, + relation_settings={ + 'internal-url': "{}:{}".format( + canonical_url(CONFIGS, INTERNAL), + listen_port()), + 'daemon-id': socket.gethostname()}) + + +@hooks.hook('primary-relation-joined') +def primary_relation_joined(relation_id=None): + if not ready_for_service(legacy=False): + log('unit not ready, deferring multisite configuration') + return + + public_url = '{}:{}'.format( + canonical_url(CONFIGS, PUBLIC), + listen_port(), + ) + endpoints = [public_url] + realm = config('realm') + zonegroup = config('zonegroup') + zone = config('zone') + access_key = leader_get('access_key') + secret = leader_get('secret') + + if not all((realm, zonegroup, zone)): + log('Cannot setup multisite configuration, required config is ' + 'missing. realm, zonegroup and zone charm config options must all ' + 'be set', + 'WARN') + return + + relation_set(relation_id=relation_id, + realm=realm, + zonegroup=zonegroup, + url=endpoints[0], + access_key=access_key, + secret=secret) + + if not is_leader(): + log('Cannot setup multisite configuration, this unit is not the ' + 'leader') + return + + if not leader_get('restart_nonce'): + log('No restart_nonce found') + # NOTE(jamespage): + # This is an ugly kludge to force creation of the required data + # items in the .rgw.root pool prior to the radosgw process being + # started; radosgw-admin does not currently have a way of doing + # this operation but a period update will force it to be created. + multisite.update_period(fatal=False) + + mutation = False + + if realm not in multisite.list_realms(): + log('Realm {} not found, creating now'.format(realm)) + multisite.create_realm(realm, default=True) + mutation = True + + # Migration if primary site has buckets configured. + # Migration involves renaming existing zone/zongroups such that existing + # buckets and their objects can be preserved on the primary site. + if multisite.check_cluster_has_buckets() is True: + log('Migrating to multisite with zone ({}) and zonegroup ({})' + .format(zone, zonegroup), level=DEBUG) + zones = multisite.list_zones() + zonegroups = multisite.list_zonegroups() + + if (len(zonegroups) > 1) and (zonegroup not in zonegroups): + log('Multiple zonegroups found {}, aborting.' + .format(zonegroups), level=ERROR) + return + + if (len(zones) > 1) and (zone not in zones): + log('Multiple zones found {}, aborting.' + .format(zones), level=ERROR) + return + + rename_result = multisite.rename_multisite_config( + zonegroups, zonegroup, + zones, zone + ) + if rename_result is None: + return + + modify_result = multisite.modify_multisite_config( + zone, zonegroup, + endpoints=endpoints, + realm=realm + ) + if modify_result is None: + return + mutation = True + + if zonegroup not in multisite.list_zonegroups(): + log('zonegroup {} not found, creating now'.format(zonegroup)) + multisite.create_zonegroup(zonegroup, + endpoints=endpoints, + default=True, master=True, + realm=realm) + mutation = True + + if zone not in multisite.list_zones(): + log('zone {} not found, creating now'.format(zone)) + multisite.create_zone(zone, + endpoints=endpoints, + default=True, master=True, + zonegroup=zonegroup) + mutation = True + + if MULTISITE_SYSTEM_USER not in multisite.list_users(): + log('User {} not found, creating now'.format(MULTISITE_SYSTEM_USER)) + access_key, secret = multisite.create_system_user( + MULTISITE_SYSTEM_USER + ) + multisite.modify_zone(zone, + access_key=access_key, + secret=secret) + leader_set(access_key=access_key, + secret=secret) + mutation = True + + if mutation: + log( + 'Mutation detected. Restarting {}.'.format(service_name()), + 'INFO') + multisite.update_period(zonegroup=zonegroup, zone=zone) + CONFIGS.write_all() + service_restart(service_name()) + leader_set(restart_nonce=str(uuid.uuid4())) + else: + log('No mutation detected.', 'INFO') + + relation_set(relation_id=relation_id, + access_key=access_key, + secret=secret) + + +@hooks.hook('primary-relation-changed') +def primary_relation_changed(relation_id=None, unit=None): + if not is_leader(): + log('Cannot setup multisite configuration, this unit is not the ' + 'leader') + return + if not ready_for_service(legacy=False): + log('unit not ready, deferring multisite configuration') + return + + sync_policy_state = config('sync-policy-state') + if not sync_policy_state: + log("The config sync-policy-state is not set. Skipping zone group " + "default sync policy configuration") + return + + secondary_data = relation_get(rid=relation_id, unit=unit) + if not all((secondary_data.get('zone'), + secondary_data.get('sync_policy_flow_type'))): + log("Defer processing until secondary RGW has provided required data") + return + + zonegroup = config('zonegroup') + primary_zone = config('zone') + secondary_zone = secondary_data['zone'] + sync_flow_type = secondary_data['sync_policy_flow_type'] + + if (secondary_data.get('zone_tier_type') == 'cloud' and + sync_flow_type != multisite.SYNC_FLOW_DIRECTIONAL): + log("The secondary zone is set with cloud tier type. Ignoring " + "configured {} sync policy flow, and using {}.".format( + sync_flow_type, + multisite.SYNC_FLOW_DIRECTIONAL), + level=WARNING) + sync_flow_type = multisite.SYNC_FLOW_DIRECTIONAL + + flow_id = '{}-{}'.format(primary_zone, secondary_zone) + pipe_id = '{}-{}'.format(primary_zone, secondary_zone) + + mutation = multisite.is_sync_group_update_needed( + group_id=MULTISITE_DEFAULT_SYNC_GROUP_ID, + flow_id=flow_id, + pipe_id=pipe_id, + source_zone=primary_zone, + dest_zone=secondary_zone, + desired_status=sync_policy_state, + desired_flow_type=sync_flow_type, + ) + + if mutation: + multisite.create_sync_group( + group_id=MULTISITE_DEFAULT_SYNC_GROUP_ID, + status=sync_policy_state) + multisite.create_sync_group_flow( + group_id=MULTISITE_DEFAULT_SYNC_GROUP_ID, + flow_id=flow_id, + flow_type=sync_flow_type, + source_zone=primary_zone, + dest_zone=secondary_zone) + source_zones = [primary_zone, secondary_zone] + dest_zones = [primary_zone, secondary_zone] + if sync_flow_type == multisite.SYNC_FLOW_DIRECTIONAL: + source_zones = [primary_zone] + dest_zones = [secondary_zone] + multisite.create_sync_group_pipe( + group_id=MULTISITE_DEFAULT_SYNC_GROUP_ID, + pipe_id=pipe_id, + source_zones=source_zones, + dest_zones=dest_zones) + log( + 'Mutation detected. Restarting {}.'.format(service_name()), + 'INFO') + multisite.update_period(zonegroup=zonegroup, zone=primary_zone) + CONFIGS.write_all() + service_restart(service_name()) + leader_set(restart_nonce=str(uuid.uuid4())) + else: + log('No mutation detected.', 'INFO') + + +@hooks.hook('primary-relation-departed') +@hooks.hook('secondary-relation-departed') +def multisite_relation_departed(): + if not is_leader(): + log('Cannot remove multisite relation, this unit is not the leader') + return + + if not ready_for_service(legacy=False): + raise RuntimeError("Leader unit not ready for service.") + + zone = config('zone') + zonegroup = config('zonegroup') + realm = config('realm') + + # If config zone/zonegroup not present on site, + # remove-relation is called prematurely + if not multisite.is_multisite_configured(zone=zone, + zonegroup=zonegroup): + log('Multisite is not configured, skipping scaledown.') + return + + zonegroup_info = multisite.get_zonegroup_info(zonegroup) + # remove other zones from zonegroup + for zone_info in zonegroup_info['zones']: + if zone_info['name'] is not zone: + multisite.remove_zone_from_zonegroup( + zone_info['name'], zonegroup + ) + + # modify self as master zone. + multisite.modify_zone(zone, default=True, master=True, + zonegroup=zonegroup) + + # Update period. + multisite.update_period( + fatal=True, zonegroup=zonegroup, + zone=zone, realm=realm + ) + + # Verify multisite is not configured. + if multisite.is_multisite_configured(zone=zone, + zonegroup=zonegroup): + status_set(WORKLOAD_STATES.BLOCKED, + "Failed to do a clean scaledown.") + raise RuntimeError("Residual multisite config at local site.") + + +@hooks.hook('secondary-relation-changed') +def secondary_relation_changed(relation_id=None, unit=None): + if not is_leader(): + log('Cannot setup multisite configuration, this unit is not the ' + 'leader') + return + if not ready_for_service(legacy=False): + log('unit not ready, deferring multisite configuration') + return + + master_data = relation_get(rid=relation_id, unit=unit) + if not all((master_data.get('realm'), + master_data.get('zonegroup'), + master_data.get('access_key'), + master_data.get('secret'), + master_data.get('url'))): + log("Defer processing until primary RGW has provided required data") + return + + public_url = '{}:{}'.format( + canonical_url(CONFIGS, PUBLIC), + listen_port(), + ) + endpoints = [public_url] + + realm = config('realm') + zonegroup = config('zonegroup') + zone = config('zone') + + if (realm, zonegroup) != (master_data['realm'], + master_data['zonegroup']): + log("Mismatched configuration so stop multi-site configuration now") + return + + if not leader_get('restart_nonce'): + log('No restart_nonce found') + # NOTE(jamespage): + # This is an ugly kludge to force creation of the required data + # items in the .rgw.root pool prior to the radosgw process being + # started; radosgw-admin does not currently have a way of doing + # this operation but a period update will force it to be created. + multisite.update_period(fatal=False) + + relation_set(relation_id=relation_id, + sync_policy_flow_type=config('sync-policy-flow-type')) + + mutation = False + + # NOTE(utkarshbhatthere): + # A site with existing data can create inconsistencies when added as a + # secondary site for RGW. Hence it must be pristine. + if multisite.check_cluster_has_buckets(): + log("Non-Pristine site can't be used as secondary", level=ERROR) + return + + if realm not in multisite.list_realms(): + log('Realm {} not found, pulling now'.format(realm)) + multisite.pull_realm(url=master_data['url'], + access_key=master_data['access_key'], + secret=master_data['secret']) + multisite.pull_period(url=master_data['url'], + access_key=master_data['access_key'], + secret=master_data['secret']) + multisite.set_default_realm(realm) + mutation = True + + if zone not in multisite.list_zones(): + log('zone {} not found, creating now'.format(zone)) + multisite.pull_period(url=master_data['url'], + access_key=master_data['access_key'], + secret=master_data['secret']) + multisite.create_zone(zone, + endpoints=endpoints, + default=False, master=False, + zonegroup=zonegroup, + access_key=master_data['access_key'], + secret=master_data['secret']) + mutation = True + + if mutation: + log( + 'Mutation detected. Restarting {}.'.format(service_name()), + 'INFO') + multisite.update_period(zonegroup=zonegroup, zone=zone) + CONFIGS.write_all() + service_restart(service_name()) + leader_set(restart_nonce=str(uuid.uuid4())) + else: + log('No mutation detected.', 'INFO') + + relation_set(relation_id=relation_id, zone=zone) + + +@hooks.hook('master-relation-departed') +@hooks.hook('slave-relation-departed') +def master_slave_relation_departed(): + log("departed relation is deprecated", "WARN") + multisite_relation_departed() + + +@hooks.hook('master-relation-joined') +def master_relation_joined(relation_id=None): + log("This relation is deprecated, use primary-secondary relation instead", + "WARN") + primary_relation_joined(relation_id) + + +@hooks.hook('slave-relation-changed') +def slave_relation_changed(relation_id=None, unit=None): + log("This relation is deprecated, use primary-secondary relation instead", + "WARN") + secondary_relation_changed(relation_id, unit) + + +@hooks.hook('leader-settings-changed') +def leader_settings_changed(): + # NOTE: leader unit will only ever set leader storage + # data when multi-site realm, zonegroup, zone or user + # data has been created/changed - trigger restarts + # of rgw services. + if restart_nonce_changed(leader_get('restart_nonce')): + service_restart(service_name()) + if not is_leader(): + # Deprecated Master/Slave relation + for r_id in relation_ids('master'): + master_relation_joined(r_id) + # Primary/Secondary relation + for r_id in relation_ids('primary'): + primary_relation_joined(r_id) + for unit in related_units(r_id): + primary_relation_changed(r_id, unit) + for r_id in relation_ids('radosgw-user'): + radosgw_user_changed(r_id) + + +def process_multisite_relations(): + """Re-trigger any pending multisite relations""" + # Deprecated Master/Slave relation + for r_id in relation_ids('master'): + master_relation_joined(r_id) + for r_id in relation_ids('slave'): + for unit in related_units(r_id): + slave_relation_changed(r_id, unit) + # Primary/Secondary relation + for r_id in relation_ids('primary'): + primary_relation_joined(r_id) + for unit in related_units(r_id): + primary_relation_changed(r_id, unit) + for r_id in relation_ids('secondary'): + for unit in related_units(r_id): + secondary_relation_changed(r_id, unit) + + +def cert_rel_ca(): + """Get ca material from the certificates relation. + + Returns a list of base64 encoded strings + """ + data = None + for r_id in relation_ids('certificates'): + # First check for app data + remote_app = remote_service_name(r_id) + data = relation_get(rid=r_id, app=remote_app) + if data: + break + # No app data, check for unit data + for unit in related_units(r_id): + data = relation_get(rid=r_id, unit=unit) + if data: + break + if not data: + log('No certificates rel data found', level=DEBUG) + return + ca_chain = [base64.b64encode(d.encode('utf-8')).decode() + for d in (data.get('chain'), data.get('ca')) if d] + return ca_chain + + +def update_s3_ca_info(ca_chains): + """Update tls ca info for s3 connected apps. + + Takes a list of base64 encoded ca chains and sets them on the s3 + relations + """ + apps = utils.all_s3_apps() + if not apps: + return + for app, s3_info in apps.items(): + s3_info['tls-ca-chain'] = ca_chains + for rid in relation_ids('s3'): + relation_set(rid=rid, app=app, relation_settings=s3_info) + + +def get_relation_info(relation_id): + rid = relation_id or ch_relation_id() + remote_app = remote_service_name(rid) + bucket = relation_get(app=remote_app, attribute='bucket') + return rid, remote_app, bucket + + +def create_new_s3_user(remote_app): + user = f"{remote_app}-{uuid.uuid4()}" + access_key, secret_key = multisite.create_user(user) + if not access_key or not secret_key: + raise RuntimeError("Failed to create user: {}".format(user)) + return user, access_key, secret_key + + +def handle_existing_s3_info( + rid, remote_app, + bucket, endpoint, ca_chains, + existing_s3_info): + log( + "s3 info found, not adding new user/bucket: {}".format(rid), + level=DEBUG + ) + # Pass back previously computed data for convenience, but omit the + # secret key + update = { + "bucket": bucket, + "access-key": existing_s3_info['access-key'], + "endpoint": endpoint, + "tls-ca-chain": json.dumps(ca_chains) + } + relation_set(rid=rid, app=remote_app, relation_settings=update) + + +def create_bucket(user, access_key, secret_key, bucket, endpoint, ca_chains): + client = boto_client(access_key, secret_key, endpoint) + try: + client.create_bucket(Bucket=bucket) + # Ignore already existing bucket, just log it + except client.meta.client.exceptions.BucketAlreadyExists as e: + log("Bucket {} already exists: {}".format(bucket, e)) + log( + "s3: added user={}, bucket: {}".format(user, bucket), + level=DEBUG + ) + + +@hooks.hook('s3-relation-changed') +def s3_relation_changed(relation_id=None): + """ + Handle the s3 relation changed hook. + + If this unit is the leader, the charm will set up a user, secret and access + key and bucket, then set this data on the relation. It will also set + endpoint info on the relation as well. + """ + if not is_leader(): + log('Not leader, defer s3 relation changed hook') + return + + if not ready_for_service(legacy=False): + log('Not ready for service, defer s3 relation changed hook') + return + + rid, remote_app, bucket = get_relation_info(relation_id) + if not bucket: + # Non-leader remote unit or otherwise missing bucket info + log( + 'No bucket app={}, rid={}, skip s3 rel'.format(remote_app, rid), + level=DEBUG + ) + return + + endpoint = '{}:{}'.format( + canonical_url(CONFIGS, PUBLIC), + listen_port(), + ) + + ssl_ca = config('ssl-ca') + if ssl_ca: + ca_chains = [ssl_ca] + else: + ca_chains = cert_rel_ca() + + existing_s3_info = s3_app(remote_app) + if existing_s3_info: + handle_existing_s3_info( + rid, remote_app, bucket, endpoint, ca_chains, existing_s3_info) + return + + # This is a new request, create user and bucket + user, access_key, secret_key = create_new_s3_user(remote_app) + create_bucket(user, access_key, secret_key, bucket, endpoint, ca_chains) + + # Store bucket, creds, endpoint in the app databag + update = { + "bucket": bucket, + "access-key": access_key, + "secret-key": secret_key, + "endpoint": endpoint, + "tls-ca-chain": json.dumps(ca_chains) + } + relation_set(app=remote_app, relation_settings=update) + set_s3_app(remote_app, bucket, access_key, secret_key) + log("Added new s3 app update: {}".format(update), level=DEBUG) + + +@hooks.hook("s3-relation-departed") +def s3_relation_departed(relation_id=None): + """Handle the s3 relation departed hook.""" + if not is_leader() or not ready_for_service(legacy=False): + log('Not leader or not ready, skip depart s3 rel') + return + + remote_app = remote_service_name() + clear_s3_app(remote_app) + log("Removed s3 app for: {}, {}".format( + relation_id, remote_app), level=DEBUG) + + +if __name__ == '__main__': + try: + hooks.execute(sys.argv) + except UnregisteredHookError as e: + log('Unknown hook {} - skipping.'.format(e)) + except ValueError as e: + # Handle any invalid configuration values + status_set(WORKLOAD_STATES.BLOCKED, str(e)) + else: + assess_status(CONFIGS) diff --git a/ceph-radosgw/hooks/identity-service-relation-changed b/ceph-radosgw/hooks/identity-service-relation-changed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/identity-service-relation-changed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/identity-service-relation-joined b/ceph-radosgw/hooks/identity-service-relation-joined new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/identity-service-relation-joined @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/install b/ceph-radosgw/hooks/install new file mode 100755 index 00000000..e9027a88 --- /dev/null +++ b/ceph-radosgw/hooks/install @@ -0,0 +1,7 @@ +#!/bin/bash -e +# Wrapper to deal with newer Ubuntu versions that don't have py2 installed +# by default. + +./hooks/install_deps + +exec ./hooks/install.real diff --git a/ceph-radosgw/hooks/install.real b/ceph-radosgw/hooks/install.real new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/install.real @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/install_deps b/ceph-radosgw/hooks/install_deps new file mode 100755 index 00000000..9d4f7646 --- /dev/null +++ b/ceph-radosgw/hooks/install_deps @@ -0,0 +1,17 @@ +#!/bin/bash -e +# Install required dependencies for charm runtime + +declare -a DEPS=('apt' 'netaddr' 'netifaces' 'yaml' 'jinja2' 'dnspython' 'pyudev' 'boto3') + +check_and_install() { + pkg="${1}-${2}" + if ! dpkg -s ${pkg} 2>&1 > /dev/null; then + apt-get -y install ${pkg} + fi +} + +PYTHON="python3" + +for dep in ${DEPS[@]}; do + check_and_install ${PYTHON} ${dep} +done diff --git a/ceph-radosgw/hooks/leader-settings-changed b/ceph-radosgw/hooks/leader-settings-changed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/leader-settings-changed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/master-relation-departed b/ceph-radosgw/hooks/master-relation-departed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/master-relation-departed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/master-relation-joined b/ceph-radosgw/hooks/master-relation-joined new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/master-relation-joined @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/mon-relation-changed b/ceph-radosgw/hooks/mon-relation-changed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/mon-relation-changed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/mon-relation-departed b/ceph-radosgw/hooks/mon-relation-departed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/mon-relation-departed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/multisite.py b/ceph-radosgw/hooks/multisite.py new file mode 100644 index 00000000..57f8878f --- /dev/null +++ b/ceph-radosgw/hooks/multisite.py @@ -0,0 +1,1291 @@ +# +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import functools +import subprocess +import socket +import utils + +import charmhelpers.core.hookenv as hookenv +import charmhelpers.core.decorators as decorators + +RGW_ADMIN = 'radosgw-admin' + +SYNC_POLICY_ENABLED = 'enabled' +SYNC_POLICY_ALLOWED = 'allowed' +SYNC_POLICY_FORBIDDEN = 'forbidden' +SYNC_POLICY_STATES = [ + SYNC_POLICY_ENABLED, + SYNC_POLICY_ALLOWED, + SYNC_POLICY_FORBIDDEN +] +SYNC_FLOW_DIRECTIONAL = 'directional' +SYNC_FLOW_SYMMETRICAL = 'symmetrical' +SYNC_FLOW_TYPES = [ + SYNC_FLOW_DIRECTIONAL, + SYNC_FLOW_SYMMETRICAL, +] + + +class UnknownSyncPolicyState(Exception): + """Raised when an unknown sync policy state is encountered""" + pass + + +class UnknownSyncFlowType(Exception): + """Raised when an unknown sync flow type is encountered""" + pass + + +@decorators.retry_on_exception(num_retries=10, base_delay=5, + exc_type=subprocess.CalledProcessError) +def _check_output(cmd): + """Logging wrapper for subprocess.check_ouput""" + hookenv.log("Executing: {}".format(' '.join(cmd)), level=hookenv.DEBUG) + return subprocess.check_output(cmd).decode('UTF-8') + + +@decorators.retry_on_exception(num_retries=5, base_delay=3, + exc_type=subprocess.CalledProcessError) +def _check_call(cmd): + """Logging wrapper for subprocess.check_call""" + hookenv.log("Executing: {}".format(' '.join(cmd)), level=hookenv.DEBUG) + return subprocess.check_call(cmd) + + +def _call(cmd): + """Logging wrapper for subprocess.call""" + hookenv.log("Executing: {}".format(' '.join(cmd)), level=hookenv.DEBUG) + return subprocess.call(cmd) + + +def _key_name(): + """Determine the name of the cephx key for the local unit""" + if utils.request_per_unit_key(): + return 'rgw.{}'.format(socket.gethostname()) + else: + return 'radosgw.gateway' + + +def _list(key): + """ + Internal implementation for list_* functions + + :param key: string for required entity (zone, zonegroup, realm, user) + :type key: str + :return: List of specified entities found + :rtype: list + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + key, 'list' + ] + try: + result = json.loads(_check_output(cmd)) + hookenv.log("Results: {}".format( + result), + level=hookenv.DEBUG) + if isinstance(result, dict): + return result['{}s'.format(key)] + else: + return result + except TypeError: + return [] + + +def plain_list(key): + """Simple Implementation for list_*, where execution may fail expectedly. + + On failure, retries are not attempted and empty list is returned. + + :param key: string for required resource (zone, zonegroup, realm, user) + :type key: str + :return: list of specified entities found + :rtype: list + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + key, 'list' + ] + try: + result = json.loads(subprocess.check_output( + cmd, stderr=subprocess.PIPE + ).decode('UTF-8')) + hookenv.log("Results: {}".format(result), level=hookenv.DEBUG) + if isinstance(result, dict): + return result['{}s'.format(key)] + else: + return result + except subprocess.CalledProcessError: + return [] + except TypeError: + return [] + + +@decorators.retry_on_exception(num_retries=5, base_delay=3, + exc_type=ValueError) +def list_zones(retry_on_empty=False): + """ + List zones + + :param retry_on_empty: Whether to retry if no zones are returned. + :type retry_on_empty: bool + :return: List of specified entities found + :rtype: list + :raises: ValueError + """ + _zones = _list('zone') + if retry_on_empty and not _zones: + hookenv.log("No zones found", level=hookenv.DEBUG) + raise ValueError("No zones found") + return _zones + + +list_realms = functools.partial(_list, 'realm') +list_zonegroups = functools.partial(_list, 'zonegroup') +list_users = functools.partial(_list, 'user') + + +def list_buckets(zone, zonegroup): + """List Buckets served under the provided zone and zonegroup pair. + + :param zonegroup: Parent zonegroup. + :type zonegroup: str + :param zone: Parent zone. + :type zone: str + :returns: List of buckets found + :rtype: list + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'bucket', 'list', + '--rgw-zone={}'.format(zone), + '--rgw-zonegroup={}'.format(zonegroup), + ] + try: + return json.loads(_check_output(cmd)) + except subprocess.CalledProcessError: + hookenv.log("Bucket queried for incorrect zone({})-zonegroup({}) " + "pair".format(zone, zonegroup), level=hookenv.ERROR) + return None + except TypeError: + return None + + +def create_realm(name, default=False): + """ + Create a new RADOS Gateway Realm. + + :param name: name of realm to create + :type name: str + :param default: set new realm as the default realm + :type default: boolean + :return: realm configuration + :rtype: dict + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'realm', 'create', + '--rgw-realm={}'.format(name) + ] + if default: + cmd += ['--default'] + try: + return json.loads(_check_output(cmd)) + except TypeError: + return None + + +def set_default_realm(name): + """ + Set the default RADOS Gateway Realm + + :param name: name of realm to create + :type name: str + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'realm', 'default', + '--rgw-realm={}'.format(name) + ] + _check_call(cmd) + + +def create_zonegroup(name, endpoints, default=False, master=False, realm=None): + """ + Create a new RADOS Gateway zone Group + + :param name: name of zonegroup to create + :type name: str + :param endpoints: list of URLs to endpoints for zonegroup + :type endpoints: list[str] + :param default: set new zonegroup as the default zonegroup + :type default: boolean + :param master: set new zonegroup as the master zonegroup + :type master: boolean + :param realm: realm to use for zonegroup + :type realm: str + :return: zonegroup configuration + :rtype: dict + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'zonegroup', 'create', + '--rgw-zonegroup={}'.format(name), + '--endpoints={}'.format(','.join(endpoints)), + ] + if realm: + cmd.append('--rgw-realm={}'.format(realm)) + if default: + cmd.append('--default') + if master: + cmd.append('--master') + try: + return json.loads(_check_output(cmd)) + except TypeError: + return None + + +def modify_zonegroup(name, endpoints=None, default=False, + master=False, realm=None): + """Modify an existing RADOS Gateway zonegroup + + An empty list of endpoints would cause NO-CHANGE in the configured + endpoints for the zonegroup. + + :param name: name of zonegroup to modify + :type name: str + :param endpoints: list of URLs to endpoints for zonegroup + :type endpoints: list[str] + :param default: set zonegroup as the default zonegroup + :type default: boolean + :param master: set zonegroup as the master zonegroup + :type master: boolean + :param realm: realm name for provided zonegroup + :type realm: str + :return: zonegroup configuration + :rtype: dict + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'zonegroup', 'modify', + '--rgw-zonegroup={}'.format(name), + ] + if realm: + cmd.append('--rgw-realm={}'.format(realm)) + if endpoints: + cmd.append('--endpoints={}'.format(','.join(endpoints))) + if default: + cmd.append('--default') + if master: + cmd.append('--master') + try: + return json.loads(_check_output(cmd)) + except TypeError: + return None + + +def create_zone(name, endpoints, default=False, master=False, zonegroup=None, + access_key=None, secret=None, readonly=False): + """ + Create a new RADOS Gateway zone + + :param name: name of zone to create + :type name: str + :param endpoints: list of URLs to endpoints for zone + :type endpoints: list[str] + :param default: set new zone as the default zone + :type default: boolean + :param master: set new zone as the master zone + :type master: boolean + :param zonegroup: zonegroup to use for zone + :type zonegroup: str + :param access_key: access-key to use for the zone + :type access_key: str + :param secret: secret to use with access-key for the zone + :type secret: str + :param readonly: set zone as read only + :type: readonly: boolean + :return: dict of zone configuration + :rtype: dict + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'zone', 'create', + '--rgw-zone={}'.format(name), + '--endpoints={}'.format(','.join(endpoints)), + ] + if zonegroup: + cmd.append('--rgw-zonegroup={}'.format(zonegroup)) + if default: + cmd.append('--default') + if master: + cmd.append('--master') + if access_key and secret: + cmd.append('--access-key={}'.format(access_key)) + cmd.append('--secret={}'.format(secret)) + cmd.append('--read-only={}'.format(1 if readonly else 0)) + try: + return json.loads(_check_output(cmd)) + except TypeError: + return None + + +def modify_zone(name, endpoints=None, default=False, master=False, + access_key=None, secret=None, readonly=False, + realm=None, zonegroup=None): + """Modify an existing RADOS Gateway zone + + :param name: name of zone to create + :type name: str + :param endpoints: list of URLs to endpoints for zone + :type endpoints: list[str] + :param default: set zone as the default zone + :type default: boolean + :param master: set zone as the master zone + :type master: boolean + :param access_key: access-key to use for the zone + :type access_key: str + :param secret: secret to use with access-key for the zone + :type secret: str + :param readonly: set zone as read only + :type readonly: boolean + :param realm: realm to use for zone + :type realm: str + :param zonegroup: zonegroup to use for zone + :type zonegroup: str + :return: zone configuration + :rtype: dict + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'zone', 'modify', + '--rgw-zone={}'.format(name), + ] + if realm: + cmd.append('--rgw-realm={}'.format(realm)) + if zonegroup: + cmd.append('--rgw-zonegroup={}'.format(zonegroup)) + if endpoints: + cmd.append('--endpoints={}'.format(','.join(endpoints))) + if access_key and secret: + cmd.append('--access-key={}'.format(access_key)) + cmd.append('--secret={}'.format(secret)) + if master: + cmd.append('--master') + if default: + cmd.append('--default') + cmd.append('--read-only={}'.format(1 if readonly else 0)) + try: + return json.loads(_check_output(cmd)) + except TypeError: + return None + + +def get_zone_info(name, zonegroup=None): + """Fetch detailed info for the provided zone + + :param name: zone name + :type name: str + :param zonegroup: parent zonegroup name + :type zonegroup: str + :rtype: dict + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'zone', 'get', + '--rgw-zone={}'.format(name), + ] + if zonegroup: + cmd.append('--rgw-zonegroup={}'.format(zonegroup)) + try: + return json.loads(_check_output(cmd)) + except TypeError: + return None + + +def remove_zone_from_zonegroup(zone, zonegroup): + """Remove RADOS Gateway zone from provided parent zonegroup + + Removal is different from deletion, this operation removes zone/zonegroup + affiliation but does not delete the actual zone. + + :param zonegroup: parent zonegroup name + :type zonegroup: str + :param zone: zone name + :type zone: str + :return: modified zonegroup config + :rtype: dict + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'zonegroup', 'remove', + '--rgw-zonegroup={}'.format(zonegroup), + '--rgw-zone={}'.format(zone), + ] + try: + result = _check_output(cmd) + return json.loads(result) + except (TypeError, subprocess.CalledProcessError) as exc: + raise RuntimeError( + "Error removing zone {} from zonegroup {}. Result: {}" + .format(zone, zonegroup, result)) from exc + + +def add_zone_to_zonegroup(zone, zonegroup): + """Add RADOS Gateway zone to provided zonegroup + + :param zonegroup: parent zonegroup name + :type zonegroup: str + :param zone: zone name + :type zone: str + :return: modified zonegroup config + :rtype: dict + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'zonegroup', 'add', + '--rgw-zonegroup={}'.format(zonegroup), + '--rgw-zone={}'.format(zone), + ] + try: + result = _check_output(cmd) + return json.loads(result) + except (TypeError, subprocess.CalledProcessError) as exc: + raise RuntimeError( + "Error adding zone {} from zonegroup {}. Result: {}" + .format(zone, zonegroup, result)) from exc + + +def update_period(fatal=True, zonegroup=None, zone=None, realm=None): + """Update RADOS Gateway configuration period + + :param fatal: In failure case, whether CalledProcessError is to be raised. + :type fatal: boolean + :param zonegroup: zonegroup name + :type zonegroup: str + :param zone: zone name + :type zone: str + :param realm: realm name + :type realm: str + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'period', 'update', '--commit' + ] + if zonegroup is not None: + cmd.append('--rgw-zonegroup={}'.format(zonegroup)) + if zone is not None: + cmd.append('--rgw-zone={}'.format(zone)) + if realm is not None: + cmd.append('--rgw-realm={}'.format(realm)) + if fatal: + _check_call(cmd) + else: + _call(cmd) + + +def tidy_defaults(): + """ + Purge any default zonegroup and zone definitions + """ + if ('default' in list_zonegroups() and + 'default' in list_zones()): + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'zonegroup', 'remove', + '--rgw-zonegroup=default', + '--rgw-zone=default' + ] + _call(cmd) + update_period() + + if 'default' in list_zones(): + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'zone', 'delete', + '--rgw-zone=default' + ] + _call(cmd) + update_period() + + if 'default' in list_zonegroups(): + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'zonegroup', 'delete', + '--rgw-zonegroup=default' + ] + _call(cmd) + update_period() + + +def get_user_creds(username): + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'user', 'info', + '--uid={}'.format(username) + ] + result = json.loads(_check_output(cmd)) + return (result['keys'][0]['access_key'], + result['keys'][0]['secret_key']) + + +def suspend_user(username): + """ + Suspend a RADOS Gateway user + + :param username: username of user to create + :type username: str + """ + if username not in list_users(): + hookenv.log( + "Cannot suspended user {}. User not found.".format(username), + level=hookenv.DEBUG) + return + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'user', 'suspend', + '--uid={}'.format(username) + ] + _check_output(cmd) + hookenv.log( + "Suspended user {}".format(username), + level=hookenv.DEBUG) + + +def create_user(username, system_user=False): + """ + Create a RADOS Gateway user + + :param username: username of user to create + :type username: str + :param system_user: Whether to grant system user role + :type system_user: bool + :return: access key and secret + :rtype: (str, str) + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'user', 'create', + '--uid={}'.format(username), + '--display-name=Synchronization User' + ] + if system_user: + cmd.append('--system') + try: + result = json.loads(_check_output(cmd)) + return (result['keys'][0]['access_key'], + result['keys'][0]['secret_key']) + except TypeError: + return (None, None) + + +def create_system_user(username): + """ + Create a RADOS Gateway system user + + :param username: username of user to create + :type username: str + :return: access key and secret + :rtype: (str, str) + """ + return create_user(username, system_user=True) + + +def pull_realm(url, access_key, secret): + """ + Pull in a RADOS Gateway Realm from a master RGW instance + + :param url: url of remote rgw deployment + :type url: str + :param access_key: access-key for remote rgw deployment + :type access_key: str + :param secret: secret for remote rgw deployment + :type secret: str + :return: realm configuration + :rtype: dict + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'realm', 'pull', + '--url={}'.format(url), + '--access-key={}'.format(access_key), + '--secret={}'.format(secret), + ] + try: + return json.loads(_check_output(cmd)) + except TypeError: + return None + + +def pull_period(url, access_key, secret): + """ + Pull in a RADOS Gateway period from a master RGW instance + + :param url: url of remote rgw deployment + :type url: str + :param access_key: access-key for remote rgw deployment + :type access_key: str + :param secret: secret for remote rgw deployment + :type secret: str + :return: realm configuration + :rtype: dict + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'period', 'pull', + '--url={}'.format(url), + '--access-key={}'.format(access_key), + '--secret={}'.format(secret), + ] + try: + return json.loads(_check_output(cmd)) + except TypeError: + return None + + +def rename_zone(name, new_name, zonegroup): + """Rename an existing RADOS Gateway zone + + If the command execution succeeds, 0 is returned, otherwise + None is returned to the caller. + + :param name: current name for the zone being renamed + :type name: str + :param new_name: new name for the zone being renamed + :type new_name: str + :rtype: int + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'zone', 'rename', + '--rgw-zone={}'.format(name), + '--zone-new-name={}'.format(new_name), + '--rgw-zonegroup={}'.format(zonegroup) + ] + result = _call(cmd) + return 0 if result == 0 else None + + +def rename_zonegroup(name, new_name): + """Rename an existing RADOS Gateway zonegroup + + If the command execution succeeds, 0 is returned, otherwise + None is returned to the caller. + + :param name: current name for the zonegroup being renamed + :type name: str + :param new_name: new name for the zonegroup being renamed + :type new_name: str + :rtype: int + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'zonegroup', 'rename', + '--rgw-zonegroup={}'.format(name), + '--zonegroup-new-name={}'.format(new_name), + ] + result = _call(cmd) + return 0 if result == 0 else None + + +def get_zonegroup_info(zonegroup): + """Fetch detailed info for the provided zonegroup + + :param zonegroup: zonegroup Name for detailed query + :type zonegroup: str + :rtype: dict + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'zonegroup', 'get', + '--rgw-zonegroup={}'.format(zonegroup), + ] + try: + return json.loads(_check_output(cmd)) + except TypeError: + return None + + +def get_sync_status(): + """ + Get sync status + :returns: Sync Status Report from radosgw-admin + :rtype: str + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'sync', 'status', + ] + try: + return _check_output(cmd) + except subprocess.CalledProcessError: + hookenv.log("Failed to fetch sync status", level=hookenv.ERROR) + return None + + +def is_multisite_configured(zone, zonegroup): + """Check if system is already multisite configured + + Checks if zone and zonegroup are configured appropriately and + remote data sync source is detected in sync status + + :rtype: Boolean + """ + local_zones = list_zones() + if zone not in local_zones: + hookenv.log("zone {} not found in local zones {}" + .format(zone, local_zones), level=hookenv.ERROR) + return False + + local_zonegroups = list_zonegroups() + if zonegroup not in local_zonegroups: + hookenv.log("zonegroup {} not found in local zonegroups {}" + .format(zonegroup, local_zonegroups), level=hookenv.ERROR) + return False + + sync_status = get_sync_status() + hookenv.log("Multisite sync status {}".format(sync_status), + level=hookenv.DEBUG) + if sync_status is not None: + return ('data sync source:' in sync_status) + + return False + + +def get_local_zone(zonegroup): + """Get local zone to provided parent zonegroup. + + In multisite systems, zonegroup contains both local and remote zone info + this method is used to fetch the zone local to querying site. + + :param zonegroup: parent zonegroup name. + :type zonegroup: str + :returns: tuple with parent zonegroup and local zone name + :rtype: tuple + """ + local_zones = list_zones() + zonegroup_info = get_zonegroup_info(zonegroup) + + if zonegroup_info is None: + hookenv.log("Failed to fetch zonegroup ({}) info".format(zonegroup), + level=hookenv.ERROR) + return None, None + + # zonegroup info always contains self name and zones list so fetching + # directly is safe. + master_zonegroup = zonegroup_info['name'] + for zone_info in zonegroup_info['zones']: + zone = zone_info['name'] + if zone in local_zones: + return zone, master_zonegroup + + hookenv.log( + "No local zone configured for zonegroup ({})".format(zonegroup), + level=hookenv.ERROR + ) + return None, None + + +def rename_multisite_config(zonegroups, new_zonegroup_name, + zones, new_zone_name): + """Rename zone and zonegroup to provided new names. + + If zone list (zones) or zonegroup list (zonegroups) contain 1 element + rename the only element present in the list to provided (new_) value. + + :param zonegroups: List of zonegroups available at site. + :type zonegroups: list[str] + :param new_zonegroup_name: Desired new name for master zonegroup. + :type new_zonegroup_name: str + :param zones: List of zones available at site. + :type zones: list[str] + :param new_zonegroup_name: Desired new name for master zone. + :type new_zonegroup_name: str + + :return: Whether any of the zone or zonegroup is renamed. + :rtype: Boolean + """ + mutation = False + if (len(zonegroups) == 1) and (len(zones) == 1): + if new_zonegroup_name not in zonegroups: + result = rename_zonegroup(zonegroups[0], new_zonegroup_name) + if result is None: + hookenv.log( + "Failed renaming zonegroup from {} to {}" + .format(zonegroups[0], new_zonegroup_name), + level=hookenv.ERROR + ) + return None + mutation = True + + if new_zone_name not in zones: + result = rename_zone(zones[0], new_zone_name, new_zonegroup_name) + if result is None: + hookenv.log( + "Failed renaming zone from {} to {}" + .format(zones[0], new_zone_name), level=hookenv.ERROR + ) + return None + mutation = True + + if mutation: + hookenv.log("Renamed zonegroup {} to {}, and zone {} to {}".format( + zonegroups[0], new_zonegroup_name, + zones[0], new_zone_name)) + return True + + return False + + +def modify_multisite_config(zone, zonegroup, endpoints=None, realm=None): + """Configure zone and zonegroup as master for multisite system. + + :param zonegroup: zonegroup name being configured for multisite + :type zonegroup: str + :param zone: zone name being configured for multisite + :type zone: str + :param endpoints: list of URLs to RGW endpoints + :type endpoints: list[str] + :param realm: realm to use for multisite + :type realm: str + :rtype: Boolean + """ + if modify_zonegroup(zonegroup, endpoints=endpoints, default=True, + master=True, realm=realm) is None: + hookenv.log( + "Failed configuring zonegroup {}".format(zonegroup), + level=hookenv.ERROR + ) + return None + + if modify_zone(zone, endpoints=endpoints, default=True, + master=True, zonegroup=zonegroup, realm=realm) is None: + hookenv.log( + "Failed configuring zone {}".format(zone), level=hookenv.ERROR + ) + return None + + update_period(zonegroup=zonegroup, zone=zone) + hookenv.log("Configured zonegroup {}, and zone {} for multisite".format( + zonegroup, zone)) + return True + + +def check_zone_has_buckets(zone, zonegroup): + """Checks whether provided zone-zonegroup pair contains any bucket. + + :param zone: zone name to query buckets in. + :type zone: str + :param zonegroup: Parent zonegroup of zone. + :type zonegroup: str + :rtype: Boolean + """ + buckets = list_buckets(zone, zonegroup) + if buckets is not None: + return (len(buckets) > 0) + hookenv.log( + "Failed to query buckets for zone {} zonegroup {}" + .format(zone, zonegroup), + level=hookenv.WARNING + ) + return False + + +def check_zonegroup_has_buckets(zonegroup): + """Checks whether any bucket exists in the master zone of a zonegroup. + + :param zone: zonegroup name to query buckets. + :type zone: str + :rtype: Boolean + """ + # NOTE(utkarshbhatthere): sometimes querying against a particular + # zonegroup results in info of an entirely different zonegroup, thus to + # prevent a query against an incorrect pair in such cases, both zone and + # zonegroup names are taken from zonegroup info. + master_zone, master_zonegroup = get_local_zone(zonegroup) + + # If master zone is not configured for zonegroup + if master_zone is None: + hookenv.log("No master zone configured for zonegroup {}" + .format(master_zonegroup), level=hookenv.WARNING) + return False + return check_zone_has_buckets(master_zone, master_zonegroup) + + +def check_cluster_has_buckets(): + """Iteratively check if ANY zonegroup has buckets on cluster. + + :rtype: Boolean + """ + for zonegroup in list_zonegroups(): + if check_zonegroup_has_buckets(zonegroup): + return True + return False + + +def list_sync_groups(bucket=None): + """List sync policy groups. + + :param bucket: Bucket name. If this this given, the bucket level group + policies are listed. + :type bucket: str + + :return: List of sync policy groups. + :rtype: list + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'sync', 'group', 'get', + ] + if bucket: + cmd.append('--bucket={}'.format(bucket)) + try: + return json.loads(_check_output(cmd)) + except TypeError: + return [] + + +def sync_group_exists(group_id, bucket=None): + """Check if the sync policy group exists. + + :param group_id: Sync policy group id. + :type group_id: str + :param bucket: Bucket name. If this this given, the bucket level group + policy is checked. + :type bucket: str + + :rtype: Boolean + """ + for group in list_sync_groups(bucket=bucket): + if group['key'] == group_id: + return True + return False + + +def get_sync_group(group_id, bucket=None): + """Get the sync policy group configuration. + + :param group_id: Sync policy group id. + :type group_id: str + :param bucket: Bucket name. If this this given, the bucket level group + policy is returned. + :type bucket: str + + :return: Sync policy group configuration. + :rtype: dict + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'sync', 'group', 'get', + '--group-id={}'.format(group_id), + ] + if bucket: + cmd.append('--bucket={}'.format(bucket)) + try: + return json.loads(_check_output(cmd)) + except TypeError: + return None + + +def create_sync_group(group_id, status, bucket=None): + """Create a sync policy group. + + :param group_id: ID of the sync policy group to be created. + :type group_id: str + :param status: Status of the sync policy group to be created. Must be one + of the following: 'enabled', 'allowed', 'forbidden'. + :type status: str + :param bucket: Bucket name. If this this given, the bucket level group + policy is created. + :type bucket: str + + :raises UnknownSyncPolicyState: if the provided status is not one of the + allowed values. + + :return: Sync policy group configuration. + :rtype: dict + """ + if status not in SYNC_POLICY_STATES: + raise UnknownSyncPolicyState( + 'Unknown sync policy state: {}'.format(status)) + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'sync', 'group', 'create', + '--group-id={}'.format(group_id), + '--status={}'.format(status), + ] + if bucket: + cmd.append('--bucket={}'.format(bucket)) + try: + return json.loads(_check_output(cmd)) + except TypeError: + return None + + +def remove_sync_group(group_id, bucket=None): + """Remove a sync group with the given group ID and optional bucket. + + :param group_id: The ID of the sync group to remove. + :type group_id: str + :param bucket: Bucket name. If this this given, the bucket level group + policy is removed. + :type bucket: str + + :return: The output of the command as a dict. + :rtype: dict + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'sync', 'group', 'remove', + '--group-id={}'.format(group_id), + ] + if bucket: + cmd.append('--bucket={}'.format(bucket)) + try: + return json.loads(_check_output(cmd)) + except TypeError: + return None + + +def is_sync_group_update_needed(group_id, flow_id, pipe_id, source_zone, + dest_zone, desired_status, desired_flow_type): + """Check if the sync group (with the given ID) needs updating. + + :param group_id: The ID of the sync group to check. + :type group_id: str + :param flow_id: The ID of the sync group flow to check. + :type flow_id: str + :param pipe_id: The ID of the sync group pipe to check. + :type pipe_id: str + :param source_zone: Source zone of the sync group flow to check. + :type source_zone: str + :param dest_zone: Dest zone of the sync group flow to check. + :type dest_zone: str + :param desired_status: Desired status of the sync group. + :type desired_status: str + :param desired_flow_type: Desired flow type of the sync group data flow. + :type desired_flow_type: str + + :rtype: Boolean + """ + # Check if sync group exists. + if not sync_group_exists(group_id): + hookenv.log('Sync group "{}" not configured yet'.format(group_id)) + return True + group = get_sync_group(group_id) + + # Check sync group status. + if group.get('status') != desired_status: + hookenv.log('Sync group "{}" status changed to "{}"'.format( + group["id"], desired_status)) + return True + + # Check if data flow needs to be created or updated. + if is_sync_group_flow_update_needed(group=group, + flow_id=flow_id, + source_zone=source_zone, + dest_zone=dest_zone, + desired_flow_type=desired_flow_type): + return True + + # Check if data pipe needs to be created. + pipes = group.get('pipes', []) + pipes_ids = [pipe['id'] for pipe in pipes] + if pipe_id not in pipes_ids: + hookenv.log('Sync group pipe "{}" not created yet'.format(pipe_id)) + return True + + # Sync group configuration is up-to-date. + return False + + +def create_sync_group_flow(group_id, flow_id, flow_type, source_zone, + dest_zone): + """Create a new sync group data flow with the given parameters. + + :param group_id: The ID of the sync group to create the data flow for. + :type group_id: str + :param flow_id: The ID of the new data flow. + :type flow_id: str + :param flow_type: The type of the new data flow. + :type flow_type: str + :param source_zone: The source zone for the new data flow. + :type source_zone: str + :param dest_zone: The destination zone for the new data flow. + :type dest_zone: str + + :raises UnknownSyncFlowType: If an unknown sync flow type is provided. + + :return: Sync group data flow configuration. + :rtype: dict + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'sync', 'group', 'flow', 'create', + '--group-id={}'.format(group_id), + '--flow-id={}'.format(flow_id), + '--flow-type={}'.format(flow_type), + ] + if flow_type == SYNC_FLOW_SYMMETRICAL: + cmd.append('--zones={},{}'.format(source_zone, dest_zone)) + elif flow_type == SYNC_FLOW_DIRECTIONAL: + cmd.append('--source-zone={}'.format(source_zone)) + cmd.append('--dest-zone={}'.format(dest_zone)) + else: + raise UnknownSyncFlowType( + 'Unknown sync flow type {}'.format(flow_type)) + try: + return json.loads(_check_output(cmd)) + except TypeError: + return None + + +def remove_sync_group_flow(group_id, flow_id, flow_type, source_zone=None, + dest_zone=None): + """Remove a sync group data flow. + + :param group_id: The ID of the sync group. + :type group_id: str + :param flow_id: The ID of the flow to remove. + :type flow_id: str + :param flow_type: The type of the flow to remove. + :type flow_type: str + :param source_zone: The source zone of the flow to remove (only for + directional flows). + :type source_zone: str + :param dest_zone: The destination zone of the flow to remove (only for + directional flows). + :type dest_zone: str + + :return: The output of the command as a dict. + :rtype: dict + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'sync', 'group', 'flow', 'remove', + '--group-id={}'.format(group_id), + '--flow-id={}'.format(flow_id), + '--flow-type={}'.format(flow_type), + ] + if flow_type == SYNC_FLOW_DIRECTIONAL: + cmd.append('--source-zone={}'.format(source_zone)) + cmd.append('--dest-zone={}'.format(dest_zone)) + try: + return json.loads(_check_output(cmd)) + except TypeError: + return None + + +def create_sync_group_pipe(group_id, pipe_id, source_zones, dest_zones, + source_bucket='*', dest_bucket='*', bucket=None): + """Create a sync group pipe between source and destination zones. + + :param group_id: The ID of the sync group. + :type group_id: str + :param pipe_id: The ID of the sync group pipe. + :type pipe_id: str + :param source_zones: A list of source zones. + :type source_zones: list + :param dest_zones: A list of destination zones. + :type dest_zones: list + :param source_bucket: The source bucket name. Default is '*'. + :type source_bucket: str + :param dest_bucket: The destination bucket name. Default is '*'. + :type dest_bucket: str + :param bucket: The bucket name. If specified, the sync group pipe will be + created for this bucket only. + :type bucket: str + + :return: Sync group pipe configuration. + :rtype: dict + """ + cmd = [ + RGW_ADMIN, '--id={}'.format(_key_name()), + 'sync', 'group', 'pipe', 'create', + '--group-id={}'.format(group_id), + '--pipe-id={}'.format(pipe_id), + '--source-zones={}'.format(','.join(source_zones)), + '--source-bucket={}'.format(source_bucket), + '--dest-zones={}'.format(','.join(dest_zones)), + '--dest-bucket={}'.format(dest_bucket), + ] + if bucket: + cmd.append('--bucket={}'.format(bucket)) + try: + return json.loads(_check_output(cmd)) + except TypeError: + return None + + +def is_sync_group_flow_update_needed(group, flow_id, source_zone, dest_zone, + desired_flow_type): + """Check if the given sync group flow needs updating. + + :param group: The sync policy group configuration. + :type group: dict + :param flow_id: The ID of the sync group flow to check. + :type flow_id: str + :param source_zone: Source zone of the sync group flow to check. + :type source_zone: str + :param dest_zone: Dest zone of the sync group flow to check. + :type dest_zone: str + :param desired_flow_type: Desired flow type of the sync group data flow. + :type desired_flow_type: str + + :rtype: Boolean + """ + symmetrical_flows = group['data_flow'].get('symmetrical', []) + symmetrical_flows_ids = [flow['id'] for flow in symmetrical_flows] + + directional_flows = group['data_flow'].get('directional', []) + directional_flows_ids = [ + # NOTE: Directional flows IDs are not present in the sync group + # configuration. We assume that the ID is a concatenation of the source + # zone and destination zone, as currently configured by the charm code. + # This is a safe assumption, because there are unique directional + # flows for each pair of zones. + "{}-{}".format(flow['source_zone'], flow['dest_zone']) + for flow in directional_flows + ] + + data_flows_ids = symmetrical_flows_ids + directional_flows_ids + if flow_id not in data_flows_ids: + hookenv.log('Data flow "{}" not configured yet'.format(flow_id)) + return True + + # Check if the flow type is consistent with the current configuration. + is_symmetrical = (desired_flow_type == SYNC_FLOW_SYMMETRICAL and + flow_id in symmetrical_flows_ids) + is_directional = (desired_flow_type == SYNC_FLOW_DIRECTIONAL and + flow_id in directional_flows_ids) + if is_symmetrical or is_directional: + # Data flow is consistent with the current configuration. + return False + + # Data flow type has changed. We need to remove the old data flow. + hookenv.log('Data flow "{}" type changed to "{}"'.format( + flow_id, desired_flow_type)) + old_flow_type = ( + SYNC_FLOW_SYMMETRICAL if desired_flow_type == SYNC_FLOW_DIRECTIONAL + else SYNC_FLOW_DIRECTIONAL) + hookenv.log( + 'Removing old data flow "{}" before configuring the new one'.format( + flow_id)) + remove_sync_group_flow( + group_id=group["id"], flow_id=flow_id, flow_type=old_flow_type, + source_zone=source_zone, dest_zone=dest_zone) + return True diff --git a/ceph-radosgw/hooks/nrpe-external-master-relation-changed b/ceph-radosgw/hooks/nrpe-external-master-relation-changed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/nrpe-external-master-relation-changed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/nrpe-external-master-relation-joined b/ceph-radosgw/hooks/nrpe-external-master-relation-joined new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/nrpe-external-master-relation-joined @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/object-store-relation-joined b/ceph-radosgw/hooks/object-store-relation-joined new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/object-store-relation-joined @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/post-series-upgrade b/ceph-radosgw/hooks/post-series-upgrade new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/post-series-upgrade @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/pre-series-upgrade b/ceph-radosgw/hooks/pre-series-upgrade new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/pre-series-upgrade @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/primary-relation-changed b/ceph-radosgw/hooks/primary-relation-changed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/primary-relation-changed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/primary-relation-departed b/ceph-radosgw/hooks/primary-relation-departed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/primary-relation-departed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/primary-relation-joined b/ceph-radosgw/hooks/primary-relation-joined new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/primary-relation-joined @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/radosgw-user-relation-changed b/ceph-radosgw/hooks/radosgw-user-relation-changed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/radosgw-user-relation-changed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/radosgw-user-relation-departed b/ceph-radosgw/hooks/radosgw-user-relation-departed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/radosgw-user-relation-departed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/s3-relation-changed b/ceph-radosgw/hooks/s3-relation-changed new file mode 120000 index 00000000..f94593a0 --- /dev/null +++ b/ceph-radosgw/hooks/s3-relation-changed @@ -0,0 +1 @@ +./hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/s3-relation-departed b/ceph-radosgw/hooks/s3-relation-departed new file mode 120000 index 00000000..f94593a0 --- /dev/null +++ b/ceph-radosgw/hooks/s3-relation-departed @@ -0,0 +1 @@ +./hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/secondary-relation-changed b/ceph-radosgw/hooks/secondary-relation-changed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/secondary-relation-changed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/secondary-relation-departed b/ceph-radosgw/hooks/secondary-relation-departed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/secondary-relation-departed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/slave-relation-changed b/ceph-radosgw/hooks/slave-relation-changed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/slave-relation-changed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/slave-relation-departed b/ceph-radosgw/hooks/slave-relation-departed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/slave-relation-departed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/start b/ceph-radosgw/hooks/start new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/start @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/stop b/ceph-radosgw/hooks/stop new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/stop @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/update-status b/ceph-radosgw/hooks/update-status new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/update-status @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/upgrade-charm b/ceph-radosgw/hooks/upgrade-charm new file mode 100755 index 00000000..4ae2e75f --- /dev/null +++ b/ceph-radosgw/hooks/upgrade-charm @@ -0,0 +1,11 @@ +#!/bin/bash -e + +# Wrapper to ensure that old python bytecode isn't hanging around +# after we upgrade the charm with newer libraries +find . -iname '*.pyc' -delete +find . -name '__pycache__' -prune -exec rm -rf "{}" \; + +# Re-install dependencies to deal with py2->py3 switch for charm +./hooks/install_deps + +./hooks/upgrade-charm.real diff --git a/ceph-radosgw/hooks/upgrade-charm.real b/ceph-radosgw/hooks/upgrade-charm.real new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/ceph-radosgw/hooks/upgrade-charm.real @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/ceph-radosgw/hooks/utils.py b/ceph-radosgw/hooks/utils.py new file mode 100644 index 00000000..b861162c --- /dev/null +++ b/ceph-radosgw/hooks/utils.py @@ -0,0 +1,562 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +import socket +import subprocess + +from collections import OrderedDict +from copy import deepcopy + +import boto3 + +import ceph_radosgw_context +import multisite + +from charmhelpers.core.hookenv import ( + relation_get, + relation_ids, + related_units, + application_version_set, + config, + leader_get, + leader_set, + log, +) +from charmhelpers.contrib.openstack import ( + context, + templating, +) +from charmhelpers.contrib.openstack.utils import ( + make_assess_status_func, + pause_unit, + resume_unit, +) +from charmhelpers.contrib.hahelpers.cluster import ( + get_hacluster_config, + https, +) +from charmhelpers.core.host import ( + cmp_pkgrevno, + lsb_release, + CompareHostReleases, + init_is_systemd, + service, + service_running, +) +from charmhelpers.fetch import ( + apt_cache, + apt_install, + apt_pkg, + apt_update, + add_source, + filter_installed_packages, + get_upstream_version, +) +from charmhelpers.core import unitdata + +# The interface is said to be satisfied if anyone of the interfaces in the +# list has a complete context. +REQUIRED_INTERFACES = { + 'mon': ['mon'], +} +CEPHRG_HA_RES = 'grp_cephrg_vips' +TEMPLATES_DIR = 'templates' +TEMPLATES = 'templates/' +HAPROXY_CONF = '/etc/haproxy/haproxy.cfg' +CEPH_DIR = '/etc/ceph' +CEPH_CONF = '{}/ceph.conf'.format(CEPH_DIR) + +VERSION_PACKAGE = 'radosgw' + +UNUSED_APACHE_SITE_FILES = ["/etc/apache2/sites-available/000-default.conf"] +APACHE_PORTS_FILE = "/etc/apache2/ports.conf" +APACHE_SITE_CONF = '/etc/apache2/sites-available/openstack_https_frontend' +APACHE_SITE_24_CONF = '/etc/apache2/sites-available/' \ + 'openstack_https_frontend.conf' + +BASE_RESOURCE_MAP = OrderedDict([ + (HAPROXY_CONF, { + 'contexts': [context.HAProxyContext(singlenode_mode=True), + ceph_radosgw_context.HAProxyContext()], + 'services': ['haproxy'], + }), + (CEPH_CONF, { + 'contexts': [ceph_radosgw_context.MonContext()], + 'services': [], + }), + (APACHE_SITE_CONF, { + 'contexts': [ceph_radosgw_context.ApacheSSLContext()], + 'services': ['apache2'], + }), + (APACHE_SITE_24_CONF, { + 'contexts': [ceph_radosgw_context.ApacheSSLContext()], + 'services': ['apache2'], + }), +]) + + +def listen_port(): + """Determine port to listen to. + + The value in configuration will be used if specified, otherwise the default + will be determined based on presence of TLS configuration. + + :returns: Port number + :rtype: int + """ + if https(): + default_port = 443 + else: + default_port = 80 + return config('port') or default_port + + +def resource_map(): + """Dynamically generate a map of resources. + + These will be managed for a single hook execution. + """ + resource_map = deepcopy(BASE_RESOURCE_MAP) + + if not https(): + resource_map.pop(APACHE_SITE_CONF) + resource_map.pop(APACHE_SITE_24_CONF) + else: + if os.path.exists('/etc/apache2/conf-available'): + resource_map.pop(APACHE_SITE_CONF) + else: + resource_map.pop(APACHE_SITE_24_CONF) + + resource_map[CEPH_CONF]['services'] = [service_name()] + return resource_map + + +def restart_map(): + return OrderedDict([(cfg, v['services']) + for cfg, v in resource_map().items() + if v['services']]) + + +# Hardcoded to icehouse to enable use of charmhelper templating/context tools +# Ideally these function would support non-OpenStack services +def register_configs(release='icehouse'): + configs = templating.OSConfigRenderer(templates_dir=TEMPLATES, + openstack_release=release) + CONFIGS = resource_map() + pkg = 'radosgw' + if not filter_installed_packages([pkg]) and cmp_pkgrevno(pkg, '0.55') >= 0: + # Add keystone configuration if found + CONFIGS[CEPH_CONF]['contexts'].append( + ceph_radosgw_context.IdentityServiceContext() + ) + for cfg, rscs in CONFIGS.items(): + configs.register(cfg, rscs['contexts']) + return configs + + +def services(): + """Returns a list of services associate with this charm.""" + _services = [] + for v in resource_map().values(): + _services.extend(v.get('services', [])) + return list(set(_services)) + + +def get_optional_interfaces(): + """Return the optional interfaces that should be checked if the relavent + relations have appeared. + :returns: {general_interface: [specific_int1, specific_int2, ...], ...} + """ + optional_interfaces = {} + if relation_ids('ha'): + optional_interfaces['ha'] = ['cluster'] + if (cmp_pkgrevno('radosgw', '0.55') >= 0 and + relation_ids('identity-service')): + optional_interfaces['identity'] = ['identity-service'] + return optional_interfaces + + +def get_zones_zonegroups(): + """Get a tuple with lists of zones and zonegroups existing on site + + :rtype: tuple + """ + return multisite.list_zones(), multisite.list_zonegroups() + + +def check_optional_config_and_relations(configs): + """Check that if we have a relation_id for high availability that we can + get the hacluster config. If we can't then we are blocked. This function + is called from assess_status/set_os_workload_status as the charm_func and + needs to return either 'unknown', '' if there is no problem or the status, + message if there is a problem. + + :param configs: an OSConfigRender() instance. + :return 2-tuple: (string, string) = (status, message) + """ + if relation_ids('ha'): + try: + get_hacluster_config() + except Exception: + return ('blocked', + 'hacluster missing configuration: ' + 'vip, vip_iface, vip_cidr') + + multisite_config = (config('realm'), + config('zonegroup'), + config('zone')) + master_configured = (leader_get('access_key'), + leader_get('secret'), + leader_get('restart_nonce')) + + # An operator may have deployed both relations + primary_rids = relation_ids('master') + relation_ids('primary') + secondary_rids = relation_ids('slave') + relation_ids('secondary') + multisite_rids = primary_rids + secondary_rids + + # Any realm or zonegroup config is present, multisite checks can be done. + # zone config can't be used because it's used by default. + if config('realm') or config('zonegroup') or multisite_rids: + # All of Realm, zonegroup, and zone must be configured. + if not all(multisite_config): + return ('blocked', + 'multi-site configuration incomplete ' + '(realm={realm}, zonegroup={zonegroup}' + ', zone={zone})'.format(**config())) + + # Primary/Secondary Relation should be configured. + if not multisite_rids: + return ('blocked', + 'multi-site configuration but primary/secondary ' + 'relation missing') + + # Primary site status check + if primary_rids: + # Migration: The system is not multisite already. + if (ready_for_service(legacy=False) and + not multisite.is_multisite_configured(config('zone'), + config('zonegroup'))): + if multisite.check_cluster_has_buckets(): + zones, zonegroups = get_zones_zonegroups() + status_msg = "Multiple zone or zonegroup configured, " \ + "use action 'config-multisite-values' to " \ + "resolve." + if (len(zonegroups) > 1 and + config('zonegroup') not in zonegroups): + return ('blocked', status_msg) + + if len(zones) > 1 and config('zone') not in zones: + return ('blocked', status_msg) + + if not all(master_configured): + return ('blocked', "Failure in Multisite migration, " + "Refer to Logs.") + # Non-Migration scenario. + if not all(master_configured): + return ('waiting', + 'waiting for configuration of master zone') + + # Secondary site status check + if secondary_rids: + # Migration: The system is not multisite already. + if (ready_for_service(legacy=False) and + not multisite.is_multisite_configured(config('zone'), + config('zonegroup'))): + if multisite.check_cluster_has_buckets(): + return ('blocked', + "Non-Pristine RGW site can't be used as secondary") + + multisite_ready = False + for rid in secondary_rids: + for unit in related_units(rid): + if relation_get('url', unit=unit, rid=rid): + multisite_ready = True + continue + if not multisite_ready: + return ('waiting', + 'multi-site master relation incomplete') + + # Check that provided Ceph BlueStoe configuration is valid. + try: + bluestore_compression = context.CephBlueStoreCompressionContext() + bluestore_compression.validate() + except ValueError as e: + return ('blocked', 'Invalid configuration: {}'.format(str(e))) + + if (config('virtual-hosted-bucket-enabled') and + not config('os-public-hostname')): + return ('blocked', "os-public-hostname must have a value " + "when virtual hosted bucket is enabled") + + # return 'unknown' as the lowest priority to not clobber an existing + # status. + return 'unknown', '' + + +def setup_ipv6(): + ubuntu_rel = lsb_release()['DISTRIB_CODENAME'].lower() + if CompareHostReleases(ubuntu_rel) < "trusty": + raise Exception("IPv6 is not supported in the charms for Ubuntu " + "versions less than Trusty 14.04") + + # Need haproxy >= 1.5.3 for ipv6 so for Trusty if we are <= Kilo we need to + # use trusty-backports otherwise we can use the UCA. + vc = apt_pkg.version_compare(get_pkg_version('haproxy'), '1.5.3') + if ubuntu_rel == 'trusty' and vc == -1: + add_source('deb http://archive.ubuntu.com/ubuntu trusty-backports ' + 'main') + apt_update(fatal=True) + apt_install('haproxy/trusty-backports', fatal=True) + + +def assess_status(configs): + """Assess status of current unit. + + Decides what the state of the unit should be based on the current + configuration. + SIDE EFFECT: calls set_os_workload_status(...) which sets the workload + status of the unit. + Also calls status_set(...) directly if paused state isn't complete. + @param configs: a templating.OSConfigRenderer() object + @returns None - this function is executed for its side-effect + """ + assess_status_func(configs)() + application_version_set(get_upstream_version(VERSION_PACKAGE)) + + +def assess_status_func(configs): + """Helper function to create the function that will assess_status() for + the unit. + Uses charmhelpers.contrib.openstack.utils.make_assess_status_func() to + create the appropriate status function and then returns it. + Used directly by assess_status() and also for pausing and resuming + the unit. + + NOTE: REQUIRED_INTERFACES is augmented with the optional interfaces + depending on the current config before being passed to the + make_assess_status_func() function. + + NOTE(ajkavanagh) ports are not checked due to race hazards with services + that don't behave sychronously w.r.t their service scripts. e.g. + apache2. + @param configs: a templating.OSConfigRenderer() object + @return f() -> None : a function that assesses the unit's workload status + """ + required_interfaces = REQUIRED_INTERFACES.copy() + required_interfaces.update(get_optional_interfaces()) + return make_assess_status_func( + configs, required_interfaces, + charm_func=check_optional_config_and_relations, + services=services(), ports=None) + + +def pause_unit_helper(configs): + """Helper function to pause a unit, and then call assess_status(...) in + effect, so that the status is correctly updated. + Uses charmhelpers.contrib.openstack.utils.pause_unit() to do the work. + @param configs: a templating.OSConfigRenderer() object + @returns None - this function is executed for its side-effect + """ + _pause_resume_helper(pause_unit, configs) + + +def resume_unit_helper(configs): + """Helper function to resume a unit, and then call assess_status(...) in + effect, so that the status is correctly updated. + Uses charmhelpers.contrib.openstack.utils.resume_unit() to do the work. + @param configs: a templating.OSConfigRenderer() object + @returns None - this function is executed for its side-effect + """ + _pause_resume_helper(resume_unit, configs) + + +def _pause_resume_helper(f, configs): + """Helper function that uses the make_assess_status_func(...) from + charmhelpers.contrib.openstack.utils to create an assess_status(...) + function that can be used with the pause/resume of the unit + @param f: the function to be used with the assess_status(...) function + @returns None - this function is executed for its side-effect + """ + # TODO(ajkavanagh) - ports= has been left off because of the race hazard + # that exists due to service_start() + f(assess_status_func(configs), + services=services(), + ports=None) + + +def get_pkg_version(name): + pkg = apt_cache()[name] + version = None + if pkg.current_ver: + version = apt_pkg.upstream_version(pkg.current_ver.ver_str) + return version + + +def disable_unused_apache_sites(): + """Ensure that unused apache configurations are disabled to prevent them + from conflicting with the charm-provided version. + """ + log('Disabling unused Apache sites') + for apache_site_file in UNUSED_APACHE_SITE_FILES: + apache_site = apache_site_file.split('/')[-1].split('.')[0] + if os.path.exists(apache_site_file): + try: + # Try it cleanly + subprocess.check_call(['a2dissite', apache_site]) + except subprocess.CalledProcessError: + # Remove the file + os.remove(apache_site_file) + + with open(APACHE_PORTS_FILE, 'w') as ports: + ports.write("") + + if service_running('apache2'): + log('Restarting Apache') + service('restart', 'apache2') + + +def systemd_based_radosgw(): + """Determine if install should use systemd based radosgw instances""" + host = socket.gethostname() + for rid in relation_ids('mon'): + for unit in related_units(rid): + if relation_get('rgw.{}_key'.format(host), rid=rid, unit=unit): + return True + return False + + +def request_per_unit_key(): + """Determine if a per-unit cephx key should be requested""" + return (cmp_pkgrevno('radosgw', '12.2.0') >= 0 and init_is_systemd()) + + +def service_name(): + """Determine the name of the RADOS Gateway service + + :return: service name to use + :rtype: str + """ + if systemd_based_radosgw(): + return 'ceph-radosgw@rgw.{}'.format(socket.gethostname()) + else: + return 'radosgw' + + +def ready_for_service(legacy=True): + """ + Determine when local unit is ready to service requests determined + by presentation of required cephx keys on the mon relation and + presence of the associated keyring in /etc/ceph. + + :param legacy: whether to check for legacy key support + :type legacy: boolean + :return: whether unit is ready + :rtype: boolean + """ + name = 'rgw.{}'.format(socket.gethostname()) + for rid in relation_ids('mon'): + for unit in related_units(rid): + if (relation_get('{}_key'.format(name), + rid=rid, unit=unit) and + os.path.exists( + os.path.join( + CEPH_DIR, + 'ceph.client.{}.keyring'.format(name) + ))): + return True + if (legacy and + relation_get('radosgw_key', + rid=rid, unit=unit) and + os.path.exists( + os.path.join( + CEPH_DIR, + 'keyring.rados.gateway' + ))): + return True + return False + + +def restart_nonce_changed(nonce): + """ + Determine whether the restart nonce provided has changed + since this function was last invoked. + + :param nonce: value to confirm has changed against the + remembered value for restart_nonce. + :type nonce: str + :return: whether nonce has changed value + :rtype: boolean + """ + db = unitdata.kv() + nonce_key = 'restart_nonce' + if nonce != db.get(nonce_key): + db.set(nonce_key, nonce) + db.flush() + return True + return False + + +def multisite_deployment(): + """Determine if deployment is multi-site + + :returns: whether multi-site deployment is configured + :rtype: boolean + """ + return all((config('zone'), + config('zonegroup'), + config('realm'))) + + +def boto_client(access_key, secret_key, endpoint): + return boto3.resource("s3", + verify=False, + endpoint_url=endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + + +def set_s3_app(app, bucket, access_key, secret_key): + """Store known s3 app info.""" + apps = all_s3_apps() + if app not in apps: + apps[app] = { + "bucket": bucket, + "access-key": access_key, + "secret-key": secret_key, + } + leader_set({"s3-apps": json.dumps(apps)}) + + +def s3_app(app): + """Return s3 app info.""" + apps = all_s3_apps() + return apps.get(app) + + +def all_s3_apps(): + """Return all s3 app info.""" + apps = leader_get("s3-apps") + if not apps: + return {} + return json.loads(apps) + + +def clear_s3_app(app): + """Delete s3 app info if present.""" + apps = all_s3_apps() + if app in apps: + del apps[app] + leader_set({"s3-apps": json.dumps(apps)}) diff --git a/ceph-radosgw/icon.svg b/ceph-radosgw/icon.svg new file mode 100644 index 00000000..e9383990 --- /dev/null +++ b/ceph-radosgw/icon.svg @@ -0,0 +1,311 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + diff --git a/ceph-radosgw/lib/charms_ceph/__init__.py b/ceph-radosgw/lib/charms_ceph/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ceph-radosgw/lib/charms_ceph/broker.py b/ceph-radosgw/lib/charms_ceph/broker.py new file mode 100644 index 00000000..7f453ec8 --- /dev/null +++ b/ceph-radosgw/lib/charms_ceph/broker.py @@ -0,0 +1,983 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import json +import os + +from subprocess import check_call, check_output, CalledProcessError +from tempfile import NamedTemporaryFile + +from charms_ceph.utils import ( + get_cephfs, + get_osd_weight +) +from charms_ceph.crush_utils import Crushmap + +from charmhelpers.core.hookenv import ( + log, + DEBUG, + INFO, + ERROR, +) +from charmhelpers.contrib.storage.linux.ceph import ( + create_erasure_profile, + delete_pool, + erasure_profile_exists, + get_osds, + monitor_key_get, + monitor_key_set, + pool_exists, + pool_set, + remove_pool_snapshot, + rename_pool, + snapshot_pool, + validator, + ErasurePool, + BasePool, + ReplicatedPool, +) + +# This comes from http://docs.ceph.com/docs/master/rados/operations/pools/ +# This should do a decent job of preventing people from passing in bad values. +# It will give a useful error message + +POOL_KEYS = { + # "Ceph Key Name": [Python type, [Valid Range]] + "size": [int], + "min_size": [int], + "crash_replay_interval": [int], + "pgp_num": [int], # = or < pg_num + "crush_ruleset": [int], + "hashpspool": [bool], + "nodelete": [bool], + "nopgchange": [bool], + "nosizechange": [bool], + "write_fadvise_dontneed": [bool], + "noscrub": [bool], + "nodeep-scrub": [bool], + "hit_set_type": [str, ["bloom", "explicit_hash", + "explicit_object"]], + "hit_set_count": [int, [1, 1]], + "hit_set_period": [int], + "hit_set_fpp": [float, [0.0, 1.0]], + "cache_target_dirty_ratio": [float], + "cache_target_dirty_high_ratio": [float], + "cache_target_full_ratio": [float], + "target_max_bytes": [int], + "target_max_objects": [int], + "cache_min_flush_age": [int], + "cache_min_evict_age": [int], + "fast_read": [bool], + "allow_ec_overwrites": [bool], + "compression_mode": [str, ["none", "passive", "aggressive", "force"]], + "compression_algorithm": [str, ["lz4", "snappy", "zlib", "zstd"]], + "compression_required_ratio": [float, [0.0, 1.0]], + "crush_rule": [str], +} + +CEPH_BUCKET_TYPES = [ + 'osd', + 'host', + 'chassis', + 'rack', + 'row', + 'pdu', + 'pod', + 'room', + 'datacenter', + 'region', + 'root' +] + + +def decode_req_encode_rsp(f): + """Decorator to decode incoming requests and encode responses.""" + + def decode_inner(req): + if isinstance(req, bytes): + req = req.decode('utf-8') + return json.dumps(f(json.loads(req))) + + return decode_inner + + +@decode_req_encode_rsp +def process_requests(reqs): + """Process Ceph broker request(s). + + This is a versioned api. API version must be supplied by the client making + the request. + + :param reqs: dict of request parameters. + :returns: dict. exit-code and reason if not 0 + """ + request_id = reqs.get('request-id') + try: + version = reqs.get('api-version') + if version == 1: + log('Processing request {}'.format(request_id), level=DEBUG) + resp = process_requests_v1(reqs['ops']) + if request_id: + resp['request-id'] = request_id + + return resp + + except Exception as exc: + log(str(exc), level=ERROR) + msg = ("Unexpected error occurred while processing requests: %s" % + reqs) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + msg = ("Missing or invalid api version ({})".format(version)) + resp = {'exit-code': 1, 'stderr': msg} + if request_id: + resp['request-id'] = request_id + + return resp + + +def handle_create_erasure_profile(request, service): + """Create an erasure profile. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + # "isa" | "lrc" | "shec" | "clay" or it defaults to "jerasure" + erasure_type = request.get('erasure-type') + # dependent on erasure coding type + erasure_technique = request.get('erasure-technique') + # "host" | "rack" | ... + failure_domain = request.get('failure-domain') + name = request.get('name') + # Binary Distribution Matrix (BDM) parameters + bdm_k = request.get('k') + bdm_m = request.get('m') + # LRC parameters + bdm_l = request.get('l') + crush_locality = request.get('crush-locality') + # SHEC parameters + bdm_c = request.get('c') + # CLAY parameters + bdm_d = request.get('d') + scalar_mds = request.get('scalar-mds') + # Device Class + device_class = request.get('device-class') + + if failure_domain and failure_domain not in CEPH_BUCKET_TYPES: + msg = "failure-domain must be one of {}".format(CEPH_BUCKET_TYPES) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + create_erasure_profile(service=service, + erasure_plugin_name=erasure_type, + profile_name=name, + failure_domain=failure_domain, + data_chunks=bdm_k, + coding_chunks=bdm_m, + locality=bdm_l, + durability_estimator=bdm_d, + helper_chunks=bdm_c, + scalar_mds=scalar_mds, + crush_locality=crush_locality, + device_class=device_class, + erasure_plugin_technique=erasure_technique) + + return {'exit-code': 0} + + +def handle_add_permissions_to_key(request, service): + """Groups are defined by the key cephx.groups.(namespace-)?-(name). This + key will contain a dict serialized to JSON with data about the group, + including pools and members. + + A group can optionally have a namespace defined that will be used to + further restrict pool access. + """ + resp = {'exit-code': 0} + + service_name = request.get('name') + group_name = request.get('group') + group_namespace = request.get('group-namespace') + if group_namespace: + group_name = "{}-{}".format(group_namespace, group_name) + group = get_group(group_name=group_name) + service_obj = get_service_groups(service=service_name, + namespace=group_namespace) + if request.get('object-prefix-permissions'): + service_obj['object_prefix_perms'] = request.get( + 'object-prefix-permissions') + format("Service object: {}".format(service_obj)) + permission = request.get('group-permission') or "rwx" + if service_name not in group['services']: + group['services'].append(service_name) + save_group(group=group, group_name=group_name) + if permission not in service_obj['group_names']: + service_obj['group_names'][permission] = [] + if group_name not in service_obj['group_names'][permission]: + service_obj['group_names'][permission].append(group_name) + save_service(service=service_obj, service_name=service_name) + service_obj['groups'] = _build_service_groups(service_obj, + group_namespace) + update_service_permissions(service_name, service_obj, group_namespace) + + return resp + + +def handle_set_key_permissions(request, service): + """Ensure the key has the requested permissions.""" + permissions = request.get('permissions') + client = request.get('client') + call = ['ceph', '--id', service, 'auth', 'caps', + 'client.{}'.format(client)] + permissions + try: + check_call(call) + except CalledProcessError as e: + log("Error updating key capabilities: {}".format(e), level=ERROR) + + +def update_service_permissions(service, service_obj=None, namespace=None): + """Update the key permissions for the named client in Ceph""" + if not service_obj: + service_obj = get_service_groups(service=service, namespace=namespace) + permissions = pool_permission_list_for_service(service_obj) + call = ['ceph', 'auth', 'caps', 'client.{}'.format(service)] + permissions + try: + check_call(call) + except CalledProcessError as e: + log("Error updating key capabilities: {}".format(e)) + + +def add_pool_to_group(pool, group, namespace=None): + """Add a named pool to a named group""" + group_name = group + if namespace: + group_name = "{}-{}".format(namespace, group_name) + group = get_group(group_name=group_name) + if pool not in group['pools']: + group["pools"].append(pool) + save_group(group, group_name=group_name) + for service in group['services']: + update_service_permissions(service, namespace=namespace) + + +def pool_permission_list_for_service(service): + """Build the permission string for Ceph for a given service""" + permissions = [] + permission_types = collections.OrderedDict() + for permission, group in sorted(service["group_names"].items()): + if permission not in permission_types: + permission_types[permission] = [] + for item in group: + permission_types[permission].append(item) + for permission, groups in permission_types.items(): + permission = "allow {}".format(permission) + for group in groups: + for pool in service['groups'][group].get('pools', []): + permissions.append("{} pool={}".format(permission, pool)) + for permission, prefixes in sorted( + service.get("object_prefix_perms", {}).items()): + for prefix in prefixes: + permissions.append("allow {} object_prefix {}".format(permission, + prefix)) + return ['mon', ('allow r, allow command "osd blacklist"' + ', allow command "osd blocklist"'), + 'osd', ', '.join(permissions)] + + +def get_service_groups(service, namespace=None): + """Services are objects stored with some metadata, they look like (for a + service named "nova"): + { + group_names: {'rwx': ['images']}, + groups: {} + } + After populating the group, it looks like: + { + group_names: {'rwx': ['images']}, + groups: { + 'images': { + pools: ['glance'], + services: ['nova'] + } + } + } + """ + service_json = monitor_key_get(service='admin', + key="cephx.services.{}".format(service)) + try: + service = json.loads(service_json) + except (TypeError, ValueError): + service = None + if service: + service['groups'] = _build_service_groups(service, namespace) + else: + service = {'group_names': {}, 'groups': {}} + return service + + +def _build_service_groups(service, namespace=None): + """Rebuild the 'groups' dict for a service group + + :returns: dict: dictionary keyed by group name of the following + format: + + { + 'images': { + pools: ['glance'], + services: ['nova', 'glance] + }, + 'vms':{ + pools: ['nova'], + services: ['nova'] + } + } + """ + all_groups = {} + for groups in service['group_names'].values(): + for group in groups: + name = group + if namespace: + name = "{}-{}".format(namespace, name) + all_groups[group] = get_group(group_name=name) + return all_groups + + +def get_group(group_name): + """A group is a structure to hold data about a named group, structured as: + { + pools: ['glance'], + services: ['nova'] + } + """ + group_key = get_group_key(group_name=group_name) + group_json = monitor_key_get(service='admin', key=group_key) + try: + group = json.loads(group_json) + except (TypeError, ValueError): + group = None + if not group: + group = { + 'pools': [], + 'services': [] + } + return group + + +def save_service(service_name, service): + """Persist a service in the monitor cluster""" + service['groups'] = {} + return monitor_key_set(service='admin', + key="cephx.services.{}".format(service_name), + value=json.dumps(service, sort_keys=True)) + + +def save_group(group, group_name): + """Persist a group in the monitor cluster""" + group_key = get_group_key(group_name=group_name) + return monitor_key_set(service='admin', + key=group_key, + value=json.dumps(group, sort_keys=True)) + + +def get_group_key(group_name): + """Build group key""" + return 'cephx.groups.{}'.format(group_name) + + +def handle_erasure_pool(request, service): + """Create a new erasure coded pool. + + :param request: dict of request operations and params. + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0. + """ + pool_name = request.get('name') + erasure_profile = request.get('erasure-profile') + group_name = request.get('group') + + if erasure_profile is None: + erasure_profile = "default-canonical" + + if group_name: + group_namespace = request.get('group-namespace') + # Add the pool to the group named "group_name" + add_pool_to_group(pool=pool_name, + group=group_name, + namespace=group_namespace) + + # TODO: Default to 3/2 erasure coding. I believe this requires min 5 osds + if not erasure_profile_exists(service=service, name=erasure_profile): + # TODO: Fail and tell them to create the profile or default + msg = ("erasure-profile {} does not exist. Please create it with: " + "create-erasure-profile".format(erasure_profile)) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + try: + pool = ErasurePool(service=service, + op=request) + except KeyError: + msg = "Missing parameter." + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + # Ok make the erasure pool + if not pool_exists(service=service, name=pool_name): + log("Creating pool '{}' (erasure_profile={})" + .format(pool.name, erasure_profile), level=INFO) + pool.create() + + # Set/update properties that are allowed to change after pool creation. + pool.update() + + +def handle_replicated_pool(request, service): + """Create a new replicated pool. + + :param request: dict of request operations and params. + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0. + """ + pool_name = request.get('name') + group_name = request.get('group') + + # Optional params + # NOTE: Check this against the handling in the Pool classes, reconcile and + # remove. + pg_num = request.get('pg_num') + replicas = request.get('replicas') + if pg_num: + # Cap pg_num to max allowed just in case. + osds = get_osds(service) + if osds: + pg_num = min(pg_num, (len(osds) * 100 // replicas)) + request.update({'pg_num': pg_num}) + + if group_name: + group_namespace = request.get('group-namespace') + # Add the pool to the group named "group_name" + add_pool_to_group(pool=pool_name, + group=group_name, + namespace=group_namespace) + + try: + pool = ReplicatedPool(service=service, + op=request) + except KeyError: + msg = "Missing parameter." + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + if not pool_exists(service=service, name=pool_name): + log("Creating pool '{}' (replicas={})".format(pool.name, replicas), + level=INFO) + pool.create() + else: + log("Pool '{}' already exists - skipping create".format(pool.name), + level=DEBUG) + + # Set/update properties that are allowed to change after pool creation. + pool.update() + + +def handle_create_cache_tier(request, service): + """Create a cache tier on a cold pool. Modes supported are + "writeback" and "readonly". + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + # mode = "writeback" | "readonly" + storage_pool = request.get('cold-pool') + cache_pool = request.get('hot-pool') + cache_mode = request.get('mode') + + if cache_mode is None: + cache_mode = "writeback" + + # cache and storage pool must exist first + if not pool_exists(service=service, name=storage_pool) or not pool_exists( + service=service, name=cache_pool): + msg = ("cold-pool: {} and hot-pool: {} must exist. Please create " + "them first".format(storage_pool, cache_pool)) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + p = BasePool(service=service, name=storage_pool) + p.add_cache_tier(cache_pool=cache_pool, mode=cache_mode) + + +def handle_remove_cache_tier(request, service): + """Remove a cache tier from the cold pool. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + storage_pool = request.get('cold-pool') + cache_pool = request.get('hot-pool') + # cache and storage pool must exist first + if not pool_exists(service=service, name=storage_pool) or not pool_exists( + service=service, name=cache_pool): + msg = ("cold-pool: {} or hot-pool: {} doesn't exist. Not " + "deleting cache tier".format(storage_pool, cache_pool)) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + pool = BasePool(name=storage_pool, service=service) + pool.remove_cache_tier(cache_pool=cache_pool) + + +def handle_set_pool_value(request, service, coerce=False): + """Sets an arbitrary pool value. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :param coerce: Try to parse/coerce the value into the correct type. + Used by the action code that only gets Str from Juju + :returns: dict. exit-code and reason if not 0 + """ + # Set arbitrary pool values + params = {'pool': request.get('name'), + 'key': request.get('key'), + 'value': request.get('value')} + if params['key'] not in POOL_KEYS: + msg = "Invalid key '{}'".format(params['key']) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + # Get the validation method + validator_params = POOL_KEYS[params['key']] + # BUG: #1838650 - the function needs to try to coerce the value param to + # the type required for the validator to pass. Note, if this blows, then + # the param isn't parsable to the correct type. + if coerce: + try: + params['value'] = validator_params[0](params['value']) + except ValueError: + raise RuntimeError("Value {} isn't of type {}" + .format(params['value'], validator_params[0])) + # end of BUG: #1838650 + if len(validator_params) == 1: + # Validate that what the user passed is actually legal per Ceph's rules + validator(params['value'], validator_params[0]) + else: + # Validate that what the user passed is actually legal per Ceph's rules + validator(params['value'], validator_params[0], validator_params[1]) + + # Set the value + pool_set(service=service, pool_name=params['pool'], key=params['key'], + value=params['value']) + + +def handle_rgw_regionmap_update(request, service): + """Change the radosgw region map. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + name = request.get('client-name') + if not name: + msg = "Missing rgw-region or client-name params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + try: + check_output(['radosgw-admin', + '--id', service, + 'regionmap', 'update', '--name', name]) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + + +def handle_rgw_regionmap_default(request, service): + """Create a radosgw region map. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + region = request.get('rgw-region') + name = request.get('client-name') + if not region or not name: + msg = "Missing rgw-region or client-name params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + try: + check_output( + [ + 'radosgw-admin', + '--id', service, + 'regionmap', + 'default', + '--rgw-region', region, + '--name', name]) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + + +def handle_rgw_zone_set(request, service): + """Create a radosgw zone. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + json_file = request.get('zone-json') + name = request.get('client-name') + region_name = request.get('region-name') + zone_name = request.get('zone-name') + if not json_file or not name or not region_name or not zone_name: + msg = "Missing json-file or client-name params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + infile = NamedTemporaryFile(delete=False) + with open(infile.name, 'w') as infile_handle: + infile_handle.write(json_file) + try: + check_output( + [ + 'radosgw-admin', + '--id', service, + 'zone', + 'set', + '--rgw-zone', zone_name, + '--infile', infile.name, + '--name', name, + ] + ) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + os.unlink(infile.name) + + +def handle_put_osd_in_bucket(request, service): + """Move an osd into a specified crush bucket. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + osd_id = request.get('osd') + target_bucket = request.get('bucket') + if not osd_id or not target_bucket: + msg = "Missing OSD ID or Bucket" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + crushmap = Crushmap() + try: + crushmap.ensure_bucket_is_present(target_bucket) + check_output( + [ + 'ceph', + '--id', service, + 'osd', + 'crush', + 'set', + str(osd_id), + str(get_osd_weight(osd_id)), + "root={}".format(target_bucket) + ] + ) + + except Exception as exc: + msg = "Failed to move OSD " \ + "{} into Bucket {} :: {}".format(osd_id, target_bucket, exc) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + +def handle_rgw_create_user(request, service): + """Create a new rados gateway user. + + :param request: dict of request operations and params + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + user_id = request.get('rgw-uid') + display_name = request.get('display-name') + name = request.get('client-name') + if not name or not display_name or not user_id: + msg = "Missing client-name, display-name or rgw-uid" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + try: + create_output = check_output( + [ + 'radosgw-admin', + '--id', service, + 'user', + 'create', + '--uid', user_id, + '--display-name', display_name, + '--name', name, + '--system' + ] + ) + try: + user_json = json.loads(str(create_output.decode('UTF-8'))) + return {'exit-code': 0, 'user': user_json} + except ValueError as err: + log(err, level=ERROR) + return {'exit-code': 1, 'stderr': err} + + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + + +def handle_create_cephfs(request, service): + """Create a new cephfs. + + :param request: The broker request + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + cephfs_name = request.get('mds_name') + data_pool = request.get('data_pool') + extra_pools = request.get('extra_pools', None) or [] + metadata_pool = request.get('metadata_pool') + # Check if the user params were provided + if not cephfs_name or not data_pool or not metadata_pool: + msg = "Missing mds_name, data_pool or metadata_pool params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + # Sanity check that the required pools exist + for pool_name in [data_pool, metadata_pool] + extra_pools: + if not pool_exists(service=service, name=pool_name): + msg = "CephFS pool {} does not exist. Cannot create CephFS".format( + pool_name) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + if get_cephfs(service=service): + # CephFS new has already been called + log("CephFS already created") + return + + # Finally create CephFS + try: + check_output(["ceph", + '--id', service, + "fs", "new", cephfs_name, + metadata_pool, + data_pool]) + except CalledProcessError as err: + if err.returncode == 22: + log("CephFS already created") + return + else: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + for pool_name in extra_pools: + cmd = ["ceph", '--id', service, "fs", "add_data_pool", cephfs_name, + pool_name] + try: + check_output(cmd) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + + +def handle_rgw_region_set(request, service): + # radosgw-admin region set --infile us.json --name client.radosgw.us-east-1 + """Set the rados gateway region. + + :param request: dict. The broker request. + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0 + """ + json_file = request.get('region-json') + name = request.get('client-name') + region_name = request.get('region-name') + zone_name = request.get('zone-name') + if not json_file or not name or not region_name or not zone_name: + msg = "Missing json-file or client-name params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + infile = NamedTemporaryFile(delete=False) + with open(infile.name, 'w') as infile_handle: + infile_handle.write(json_file) + try: + check_output( + [ + 'radosgw-admin', + '--id', service, + 'region', + 'set', + '--rgw-zone', zone_name, + '--infile', infile.name, + '--name', name, + ] + ) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + os.unlink(infile.name) + + +def handle_create_cephfs_client(request, service): + """Creates a new CephFS client for a filesystem. + + :param request: The broker request + :param service: The ceph client to run the command under. + :returns: dict. exit-code and reason if not 0. + """ + fs_name = request.get('fs_name') + client_id = request.get('client_id') + # TODO: fs allows setting write permissions for a list of paths. + path = request.get('path') + perms = request.get('perms') + # Need all parameters + if not fs_name or not client_id or not path or not perms: + msg = "Missing fs_name, client_id, path or perms params" + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + # Check that the provided fs_name exists + if fs_name not in get_cephfs(service=service): + msg = ("Ceph filesystem {} does not exist." + + "Cannot authorize client").format( + fs_name) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + # Check that the provided client does NOT exist. + try: + cmd = ["ceph", "--id", service, "auth", "ls", "-f", "json"] + auth_ls = json.loads(check_output(cmd, encoding="utf-8")) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + except ValueError as err: + log(str(err), level=ERROR) + return {'exit-code': 1, 'stderr': str(err)} + + client = "client.{}".format(client_id) + if client in (elem["entity"] for elem in auth_ls["auth_dump"]): + msg = "Client {} already exists".format(client) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + # Try to authorize the client + try: + cmd = [ + "ceph", + "--id", service, + "fs", "authorize", + fs_name, + client, + path, + perms, + "-f", "json" + ] + fs_auth = json.loads(check_output(cmd, encoding="utf-8")) + except CalledProcessError as err: + log(err.output, level=ERROR) + return {'exit-code': 1, 'stderr': err.output} + except ValueError as err: + log(str(err), level=ERROR) + return {'exit-code': 1, 'stderr': str(err)} + + return {'exit-code': 0, 'key': fs_auth[0]["key"]} + + +def process_requests_v1(reqs): + """Process v1 requests. + + Takes a list of requests (dicts) and processes each one. If an error is + found, processing stops and the client is notified in the response. + + Returns a response dict containing the exit code (non-zero if any + operation failed along with an explanation). + """ + ret = None + log("Processing {} ceph broker requests".format(len(reqs)), level=INFO) + for req in reqs: + op = req.get('op') + log("Processing op='{}'".format(op), level=DEBUG) + # Use admin client since we do not have other client key locations + # setup to use them for these operations. + svc = 'admin' + if op == "create-pool": + pool_type = req.get('pool-type') # "replicated" | "erasure" + + # Default to replicated if pool_type isn't given + if pool_type == 'erasure': + ret = handle_erasure_pool(request=req, service=svc) + else: + ret = handle_replicated_pool(request=req, service=svc) + elif op == "create-cephfs": + ret = handle_create_cephfs(request=req, service=svc) + elif op == "create-cache-tier": + ret = handle_create_cache_tier(request=req, service=svc) + elif op == "remove-cache-tier": + ret = handle_remove_cache_tier(request=req, service=svc) + elif op == "create-erasure-profile": + ret = handle_create_erasure_profile(request=req, service=svc) + elif op == "delete-pool": + pool = req.get('name') + ret = delete_pool(service=svc, name=pool) + elif op == "rename-pool": + old_name = req.get('name') + new_name = req.get('new-name') + ret = rename_pool(service=svc, old_name=old_name, + new_name=new_name) + elif op == "snapshot-pool": + pool = req.get('name') + snapshot_name = req.get('snapshot-name') + ret = snapshot_pool(service=svc, pool_name=pool, + snapshot_name=snapshot_name) + elif op == "remove-pool-snapshot": + pool = req.get('name') + snapshot_name = req.get('snapshot-name') + ret = remove_pool_snapshot(service=svc, pool_name=pool, + snapshot_name=snapshot_name) + elif op == "set-pool-value": + ret = handle_set_pool_value(request=req, service=svc) + elif op == "rgw-region-set": + ret = handle_rgw_region_set(request=req, service=svc) + elif op == "rgw-zone-set": + ret = handle_rgw_zone_set(request=req, service=svc) + elif op == "rgw-regionmap-update": + ret = handle_rgw_regionmap_update(request=req, service=svc) + elif op == "rgw-regionmap-default": + ret = handle_rgw_regionmap_default(request=req, service=svc) + elif op == "rgw-create-user": + ret = handle_rgw_create_user(request=req, service=svc) + elif op == "move-osd-to-bucket": + ret = handle_put_osd_in_bucket(request=req, service=svc) + elif op == "add-permissions-to-key": + ret = handle_add_permissions_to_key(request=req, service=svc) + elif op == 'set-key-permissions': + ret = handle_set_key_permissions(request=req, service=svc) + elif op == "create-cephfs-client": + ret = handle_create_cephfs_client(request=req, service=svc) + else: + msg = "Unknown operation '{}'".format(op) + log(msg, level=ERROR) + return {'exit-code': 1, 'stderr': msg} + + if isinstance(ret, dict) and 'exit-code' in ret: + return ret + + return {'exit-code': 0} diff --git a/ceph-radosgw/lib/charms_ceph/crush_utils.py b/ceph-radosgw/lib/charms_ceph/crush_utils.py new file mode 100644 index 00000000..37084bf1 --- /dev/null +++ b/ceph-radosgw/lib/charms_ceph/crush_utils.py @@ -0,0 +1,154 @@ +# Copyright 2014 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +from subprocess import check_output, CalledProcessError + +from charmhelpers.core.hookenv import ( + log, + ERROR, +) + +CRUSH_BUCKET = """root {name} {{ + id {id} # do not change unnecessarily + # weight 0.000 + alg straw2 + hash 0 # rjenkins1 +}} + +rule {name} {{ + ruleset 0 + type replicated + min_size 1 + max_size 10 + step take {name} + step chooseleaf firstn 0 type host + step emit +}}""" + +# This regular expression looks for a string like: +# root NAME { +# id NUMBER +# so that we can extract NAME and ID from the crushmap +CRUSHMAP_BUCKETS_RE = re.compile(r"root\s+(.+)\s+\{\s*id\s+(-?\d+)") + +# This regular expression looks for ID strings in the crushmap like: +# id NUMBER +# so that we can extract the IDs from a crushmap +CRUSHMAP_ID_RE = re.compile(r"id\s+(-?\d+)") + + +class Crushmap(object): + """An object oriented approach to Ceph crushmap management.""" + + def __init__(self): + self._crushmap = self.load_crushmap() + roots = re.findall(CRUSHMAP_BUCKETS_RE, self._crushmap) + buckets = [] + ids = list(map( + lambda x: int(x), + re.findall(CRUSHMAP_ID_RE, self._crushmap))) + ids = sorted(ids) + if roots != []: + for root in roots: + buckets.append(CRUSHBucket(root[0], root[1], True)) + + self._buckets = buckets + if ids != []: + self._ids = ids + else: + self._ids = [0] + + def load_crushmap(self): + try: + crush = str(check_output(['ceph', 'osd', 'getcrushmap']) + .decode('UTF-8')) + return str(check_output(['crushtool', '-d', '-'], + stdin=crush.stdout) + .decode('UTF-8')) + except CalledProcessError as e: + log("Error occurred while loading and decompiling CRUSH map:" + "{}".format(e), ERROR) + raise + + def ensure_bucket_is_present(self, bucket_name): + if bucket_name not in [bucket.name for bucket in self.buckets()]: + self.add_bucket(bucket_name) + self.save() + + def buckets(self): + """Return a list of buckets that are in the Crushmap.""" + return self._buckets + + def add_bucket(self, bucket_name): + """Add a named bucket to Ceph""" + new_id = min(self._ids) - 1 + self._ids.append(new_id) + self._buckets.append(CRUSHBucket(bucket_name, new_id)) + + def save(self): + """Persist Crushmap to Ceph""" + try: + crushmap = self.build_crushmap() + compiled = str(check_output(['crushtool', '-c', '/dev/stdin', '-o', + '/dev/stdout'], stdin=crushmap) + .decode('UTF-8')) + ceph_output = str(check_output(['ceph', 'osd', 'setcrushmap', '-i', + '/dev/stdin'], stdin=compiled) + .decode('UTF-8')) + return ceph_output + except CalledProcessError as e: + log("save error: {}".format(e)) + raise + + def build_crushmap(self): + """Modifies the current CRUSH map to include the new buckets""" + tmp_crushmap = self._crushmap + for bucket in self._buckets: + if not bucket.default: + tmp_crushmap = "{}\n\n{}".format( + tmp_crushmap, + Crushmap.bucket_string(bucket.name, bucket.id)) + + return tmp_crushmap + + @staticmethod + def bucket_string(name, id): + return CRUSH_BUCKET.format(name=name, id=id) + + +class CRUSHBucket(object): + """CRUSH bucket description object.""" + + def __init__(self, name, id, default=False): + self.name = name + self.id = int(id) + self.default = default + + def __repr__(self): + return "Bucket {{Name: {name}, ID: {id}}}".format( + name=self.name, id=self.id) + + def __eq__(self, other): + """Override the default Equals behavior""" + if isinstance(other, self.__class__): + return self.__dict__ == other.__dict__ + return NotImplemented + + def __ne__(self, other): + """Define a non-equality test""" + if isinstance(other, self.__class__): + return not self.__eq__(other) + return NotImplemented diff --git a/ceph-radosgw/lib/charms_ceph/utils.py b/ceph-radosgw/lib/charms_ceph/utils.py new file mode 100644 index 00000000..57cb1d7b --- /dev/null +++ b/ceph-radosgw/lib/charms_ceph/utils.py @@ -0,0 +1,3562 @@ +# Copyright 2017-2021 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import glob +import itertools +import json +import os +import pyudev +import random +import re +import socket +import subprocess +import sys +import time +import uuid +import functools + +from contextlib import contextmanager +from datetime import datetime + +from charmhelpers.core import hookenv +from charmhelpers.core import templating +from charmhelpers.core.host import ( + chownr, + cmp_pkgrevno, + lsb_release, + mkdir, + owner, + service_restart, + service_start, + service_stop, + CompareHostReleases, + write_file, + is_container, +) +from charmhelpers.core.hookenv import ( + cached, + config, + log, + status_set, + DEBUG, + ERROR, + WARNING, + storage_get, + storage_list, +) +from charmhelpers.fetch import ( + add_source, + apt_install, + apt_purge, + apt_update, + filter_missing_packages, + get_installed_version +) +from charmhelpers.contrib.storage.linux.ceph import ( + get_mon_map, + monitor_key_set, + monitor_key_exists, + monitor_key_get, +) +from charmhelpers.contrib.storage.linux.utils import ( + is_block_device, + is_device_mounted, +) +from charmhelpers.contrib.openstack.utils import ( + get_os_codename_install_source, +) +from charmhelpers.contrib.storage.linux import lvm +from charmhelpers.core.unitdata import kv + +CEPH_BASE_DIR = os.path.join(os.sep, 'var', 'lib', 'ceph') +OSD_BASE_DIR = os.path.join(CEPH_BASE_DIR, 'osd') +HDPARM_FILE = os.path.join(os.sep, 'etc', 'hdparm.conf') + +LEADER = 'leader' +PEON = 'peon' +QUORUM = [LEADER, PEON] + +PACKAGES = ['ceph', 'gdisk', + 'radosgw', 'xfsprogs', + 'lvm2', 'parted', 'smartmontools'] + +REMOVE_PACKAGES = [] +CHRONY_PACKAGE = 'chrony' + +CEPH_KEY_MANAGER = 'ceph' +VAULT_KEY_MANAGER = 'vault' +KEY_MANAGERS = [ + CEPH_KEY_MANAGER, + VAULT_KEY_MANAGER, +] + +LinkSpeed = { + "BASE_10": 10, + "BASE_100": 100, + "BASE_1000": 1000, + "GBASE_10": 10000, + "GBASE_40": 40000, + "GBASE_100": 100000, + "UNKNOWN": None +} + +# Mapping of adapter speed to sysctl settings +NETWORK_ADAPTER_SYSCTLS = { + # 10Gb + LinkSpeed["GBASE_10"]: { + 'net.core.rmem_default': 524287, + 'net.core.wmem_default': 524287, + 'net.core.rmem_max': 524287, + 'net.core.wmem_max': 524287, + 'net.core.optmem_max': 524287, + 'net.core.netdev_max_backlog': 300000, + 'net.ipv4.tcp_rmem': '10000000 10000000 10000000', + 'net.ipv4.tcp_wmem': '10000000 10000000 10000000', + 'net.ipv4.tcp_mem': '10000000 10000000 10000000' + }, + # Mellanox 10/40Gb + LinkSpeed["GBASE_40"]: { + 'net.ipv4.tcp_timestamps': 0, + 'net.ipv4.tcp_sack': 1, + 'net.core.netdev_max_backlog': 250000, + 'net.core.rmem_max': 4194304, + 'net.core.wmem_max': 4194304, + 'net.core.rmem_default': 4194304, + 'net.core.wmem_default': 4194304, + 'net.core.optmem_max': 4194304, + 'net.ipv4.tcp_rmem': '4096 87380 4194304', + 'net.ipv4.tcp_wmem': '4096 65536 4194304', + 'net.ipv4.tcp_low_latency': 1, + 'net.ipv4.tcp_adv_win_scale': 1 + } +} + + +class Partition(object): + def __init__(self, name, number, size, start, end, sectors, uuid): + """A block device partition. + + :param name: Name of block device + :param number: Partition number + :param size: Capacity of the device + :param start: Starting block + :param end: Ending block + :param sectors: Number of blocks + :param uuid: UUID of the partition + """ + self.name = name, + self.number = number + self.size = size + self.start = start + self.end = end + self.sectors = sectors + self.uuid = uuid + + def __str__(self): + return "number: {} start: {} end: {} sectors: {} size: {} " \ + "name: {} uuid: {}".format(self.number, self.start, + self.end, + self.sectors, self.size, + self.name, self.uuid) + + def __eq__(self, other): + if isinstance(other, self.__class__): + return self.__dict__ == other.__dict__ + return False + + def __ne__(self, other): + return not self.__eq__(other) + + +def unmounted_disks(): + """List of unmounted block devices on the current host.""" + disks = [] + context = pyudev.Context() + for device in context.list_devices(DEVTYPE='disk'): + if device['SUBSYSTEM'] == 'block': + if device.device_node is None: + continue + + matched = False + for block_type in [u'dm-', u'loop', u'ram', u'nbd']: + if block_type in device.device_node: + matched = True + if matched: + continue + + disks.append(device.device_node) + log("Found disks: {}".format(disks)) + return [disk for disk in disks if not is_device_mounted(disk)] + + +def save_sysctls(sysctl_dict, save_location): + """Persist the sysctls to the hard drive. + + :param sysctl_dict: dict + :param save_location: path to save the settings to + :raises: IOError if anything goes wrong with writing. + """ + try: + # Persist the settings for reboots + with open(save_location, "w") as fd: + for key, value in sysctl_dict.items(): + fd.write("{}={}\n".format(key, value)) + + except IOError as e: + log("Unable to persist sysctl settings to {}. Error {}".format( + save_location, e), level=ERROR) + raise + + +def tune_nic(network_interface): + """This will set optimal sysctls for the particular network adapter. + + :param network_interface: string The network adapter name. + """ + speed = get_link_speed(network_interface) + if speed in NETWORK_ADAPTER_SYSCTLS: + status_set('maintenance', 'Tuning device {}'.format( + network_interface)) + sysctl_file = os.path.join( + os.sep, + 'etc', + 'sysctl.d', + '51-ceph-osd-charm-{}.conf'.format(network_interface)) + try: + log("Saving sysctl_file: {} values: {}".format( + sysctl_file, NETWORK_ADAPTER_SYSCTLS[speed]), + level=DEBUG) + save_sysctls(sysctl_dict=NETWORK_ADAPTER_SYSCTLS[speed], + save_location=sysctl_file) + except IOError as e: + log("Write to /etc/sysctl.d/51-ceph-osd-charm-{} " + "failed. {}".format(network_interface, e), + level=ERROR) + + try: + # Apply the settings + log("Applying sysctl settings", level=DEBUG) + subprocess.check_output(["sysctl", "-p", sysctl_file]) + except subprocess.CalledProcessError as err: + log('sysctl -p {} failed with error {}'.format(sysctl_file, + err.output), + level=ERROR) + else: + log("No settings found for network adapter: {}".format( + network_interface), level=DEBUG) + + +def get_link_speed(network_interface): + """This will find the link speed for a given network device. Returns None + if an error occurs. + :param network_interface: string The network adapter interface. + :returns: LinkSpeed + """ + speed_path = os.path.join(os.sep, 'sys', 'class', 'net', + network_interface, 'speed') + # I'm not sure where else we'd check if this doesn't exist + if not os.path.exists(speed_path): + return LinkSpeed["UNKNOWN"] + + try: + with open(speed_path, 'r') as sysfs: + nic_speed = sysfs.readlines() + + # Did we actually read anything? + if not nic_speed: + return LinkSpeed["UNKNOWN"] + + # Try to find a sysctl match for this particular speed + for name, speed in LinkSpeed.items(): + if speed == int(nic_speed[0].strip()): + return speed + # Default to UNKNOWN if we can't find a match + return LinkSpeed["UNKNOWN"] + except IOError as e: + log("Unable to open {path} because of error: {error}".format( + path=speed_path, + error=e), level='error') + return LinkSpeed["UNKNOWN"] + + +def persist_settings(settings_dict): + # Write all settings to /etc/hdparm.conf + """This will persist the hard drive settings to the /etc/hdparm.conf file + + The settings_dict should be in the form of {"uuid": {"key":"value"}} + + :param settings_dict: dict of settings to save + """ + if not settings_dict: + return + + try: + templating.render(source='hdparm.conf', target=HDPARM_FILE, + context=settings_dict) + except IOError as err: + log("Unable to open {path} because of error: {error}".format( + path=HDPARM_FILE, error=err), level=ERROR) + except Exception as e: + # The templating.render can raise a jinja2 exception if the + # template is not found. Rather than polluting the import + # space of this charm, simply catch Exception + log('Unable to render {path} due to error: {error}'.format( + path=HDPARM_FILE, error=e), level=ERROR) + + +def set_max_sectors_kb(dev_name, max_sectors_size): + """This function sets the max_sectors_kb size of a given block device. + + :param dev_name: Name of the block device to query + :param max_sectors_size: int of the max_sectors_size to save + """ + max_sectors_kb_path = os.path.join('sys', 'block', dev_name, 'queue', + 'max_sectors_kb') + try: + with open(max_sectors_kb_path, 'w') as f: + f.write(max_sectors_size) + except IOError as e: + log('Failed to write max_sectors_kb to {}. Error: {}'.format( + max_sectors_kb_path, e), level=ERROR) + + +def get_max_sectors_kb(dev_name): + """This function gets the max_sectors_kb size of a given block device. + + :param dev_name: Name of the block device to query + :returns: int which is either the max_sectors_kb or 0 on error. + """ + max_sectors_kb_path = os.path.join('sys', 'block', dev_name, 'queue', + 'max_sectors_kb') + + # Read in what Linux has set by default + if os.path.exists(max_sectors_kb_path): + try: + with open(max_sectors_kb_path, 'r') as f: + max_sectors_kb = f.read().strip() + return int(max_sectors_kb) + except IOError as e: + log('Failed to read max_sectors_kb to {}. Error: {}'.format( + max_sectors_kb_path, e), level=ERROR) + # Bail. + return 0 + return 0 + + +def get_max_hw_sectors_kb(dev_name): + """This function gets the max_hw_sectors_kb for a given block device. + + :param dev_name: Name of the block device to query + :returns: int which is either the max_hw_sectors_kb or 0 on error. + """ + max_hw_sectors_kb_path = os.path.join('sys', 'block', dev_name, 'queue', + 'max_hw_sectors_kb') + # Read in what the hardware supports + if os.path.exists(max_hw_sectors_kb_path): + try: + with open(max_hw_sectors_kb_path, 'r') as f: + max_hw_sectors_kb = f.read().strip() + return int(max_hw_sectors_kb) + except IOError as e: + log('Failed to read max_hw_sectors_kb to {}. Error: {}'.format( + max_hw_sectors_kb_path, e), level=ERROR) + return 0 + return 0 + + +def set_hdd_read_ahead(dev_name, read_ahead_sectors=256): + """This function sets the hard drive read ahead. + + :param dev_name: Name of the block device to set read ahead on. + :param read_ahead_sectors: int How many sectors to read ahead. + """ + try: + # Set the read ahead sectors to 256 + log('Setting read ahead to {} for device {}'.format( + read_ahead_sectors, + dev_name)) + subprocess.check_output(['hdparm', + '-a{}'.format(read_ahead_sectors), + dev_name]) + except subprocess.CalledProcessError as e: + log('hdparm failed with error: {}'.format(e.output), + level=ERROR) + + +def get_block_uuid(block_dev): + """This queries blkid to get the uuid for a block device. + + :param block_dev: Name of the block device to query. + :returns: The UUID of the device or None on Error. + """ + try: + block_info = str(subprocess + .check_output(['blkid', '-o', 'export', block_dev]) + .decode('UTF-8')) + for tag in block_info.split('\n'): + parts = tag.split('=') + if parts[0] == 'UUID': + return parts[1] + return None + except subprocess.CalledProcessError as err: + log('get_block_uuid failed with error: {}'.format(err.output), + level=ERROR) + return None + + +def check_max_sectors(save_settings_dict, + block_dev, + uuid): + """Tune the max_hw_sectors if needed. + + make sure that /sys/.../max_sectors_kb matches max_hw_sectors_kb or at + least 1MB for spinning disks + If the box has a RAID card with cache this could go much bigger. + + :param save_settings_dict: The dict used to persist settings + :param block_dev: A block device name: Example: /dev/sda + :param uuid: The uuid of the block device + """ + dev_name = None + path_parts = os.path.split(block_dev) + if len(path_parts) == 2: + dev_name = path_parts[1] + else: + log('Unable to determine the block device name from path: {}'.format( + block_dev)) + # Play it safe and bail + return + max_sectors_kb = get_max_sectors_kb(dev_name=dev_name) + max_hw_sectors_kb = get_max_hw_sectors_kb(dev_name=dev_name) + + if max_sectors_kb < max_hw_sectors_kb: + # OK we have a situation where the hardware supports more than Linux is + # currently requesting + config_max_sectors_kb = hookenv.config('max-sectors-kb') + if config_max_sectors_kb < max_hw_sectors_kb: + # Set the max_sectors_kb to the config.yaml value if it is less + # than the max_hw_sectors_kb + log('Setting max_sectors_kb for device {} to {}'.format( + dev_name, config_max_sectors_kb)) + save_settings_dict[ + "drive_settings"][uuid][ + "read_ahead_sect"] = config_max_sectors_kb + set_max_sectors_kb(dev_name=dev_name, + max_sectors_size=config_max_sectors_kb) + else: + # Set to the max_hw_sectors_kb + log('Setting max_sectors_kb for device {} to {}'.format( + dev_name, max_hw_sectors_kb)) + save_settings_dict[ + "drive_settings"][uuid]['read_ahead_sect'] = max_hw_sectors_kb + set_max_sectors_kb(dev_name=dev_name, + max_sectors_size=max_hw_sectors_kb) + else: + log('max_sectors_kb match max_hw_sectors_kb. No change needed for ' + 'device: {}'.format(block_dev)) + + +def tune_dev(block_dev): + """Try to make some intelligent decisions with HDD tuning. Future work will + include optimizing SSDs. + + This function will change the read ahead sectors and the max write + sectors for each block device. + + :param block_dev: A block device name: Example: /dev/sda + """ + uuid = get_block_uuid(block_dev) + if uuid is None: + log('block device {} uuid is None. Unable to save to ' + 'hdparm.conf'.format(block_dev), level=DEBUG) + return + save_settings_dict = {} + log('Tuning device {}'.format(block_dev)) + status_set('maintenance', 'Tuning device {}'.format(block_dev)) + set_hdd_read_ahead(block_dev) + save_settings_dict["drive_settings"] = {} + save_settings_dict["drive_settings"][uuid] = {} + save_settings_dict["drive_settings"][uuid]['read_ahead_sect'] = 256 + + check_max_sectors(block_dev=block_dev, + save_settings_dict=save_settings_dict, + uuid=uuid) + + persist_settings(settings_dict=save_settings_dict) + status_set('maintenance', 'Finished tuning device {}'.format(block_dev)) + + +def ceph_user(): + return 'ceph' + + +class CrushLocation(object): + def __init__(self, identifier, name, osd="", host="", chassis="", + rack="", row="", pdu="", pod="", room="", + datacenter="", zone="", region="", root=""): + self.identifier = identifier + self.name = name + self.osd = osd + self.host = host + self.chassis = chassis + self.rack = rack + self.row = row + self.pdu = pdu + self.pod = pod + self.room = room + self.datacenter = datacenter + self.zone = zone + self.region = region + self.root = root + + def __str__(self): + return "name: {} id: {} osd: {} host: {} chassis: {} rack: {} " \ + "row: {} pdu: {} pod: {} room: {} datacenter: {} zone: {} " \ + "region: {} root: {}".format(self.name, self.identifier, + self.osd, self.host, self.chassis, + self.rack, self.row, self.pdu, + self.pod, self.room, + self.datacenter, self.zone, + self.region, self.root) + + def __eq__(self, other): + return not self.name < other.name and not other.name < self.name + + def __ne__(self, other): + return self.name < other.name or other.name < self.name + + def __gt__(self, other): + return self.name > other.name + + def __ge__(self, other): + return not self.name < other.name + + def __le__(self, other): + return self.name < other.name + + +def get_osd_weight(osd_id): + """Returns the weight of the specified OSD. + + :returns: Float + :raises: ValueError if the monmap fails to parse. + :raises: CalledProcessError if our Ceph command fails. + """ + try: + tree = str(subprocess + .check_output(['ceph', 'osd', 'tree', '--format=json']) + .decode('UTF-8')) + try: + json_tree = json.loads(tree) + # Make sure children are present in the JSON + if not json_tree['nodes']: + return None + for device in json_tree['nodes']: + if device['type'] == 'osd' and device['name'] == osd_id: + return device['crush_weight'] + except ValueError as v: + log("Unable to parse ceph tree json: {}. Error: {}".format( + tree, v)) + raise + except subprocess.CalledProcessError as e: + log("ceph osd tree command failed with message: {}".format( + e)) + raise + + +def _filter_nodes_and_set_attributes(node, node_lookup_map, lookup_type): + """Get all nodes of the desired type, with all their attributes. + + These attributes can be direct or inherited from ancestors. + """ + attribute_dict = {node['type']: node['name']} + if node['type'] == lookup_type: + attribute_dict['name'] = node['name'] + attribute_dict['identifier'] = node['id'] + return [attribute_dict] + elif not node.get('children'): + return [attribute_dict] + else: + descendant_attribute_dicts = [ + _filter_nodes_and_set_attributes(node_lookup_map[node_id], + node_lookup_map, lookup_type) + for node_id in node.get('children', []) + ] + return [dict(attribute_dict, **descendant_attribute_dict) + for descendant_attribute_dict + in itertools.chain.from_iterable(descendant_attribute_dicts)] + + +def _flatten_roots(nodes, lookup_type='host'): + """Get a flattened list of nodes of the desired type. + + :param nodes: list of nodes defined as a dictionary of attributes and + children + :type nodes: List[Dict[int, Any]] + :param lookup_type: type of searched node + :type lookup_type: str + :returns: flattened list of nodes + :rtype: List[Dict[str, Any]] + """ + lookup_map = {node['id']: node for node in nodes} + root_attributes_dicts = [_filter_nodes_and_set_attributes(node, lookup_map, + lookup_type) + for node in nodes if node['type'] == 'root'] + # get a flattened list of roots. + return list(itertools.chain.from_iterable(root_attributes_dicts)) + + +def get_osd_tree(service): + """Returns the current OSD map in JSON. + + :returns: List. + :rtype: List[CrushLocation] + :raises: ValueError if the monmap fails to parse. + Also raises CalledProcessError if our Ceph command fails + """ + try: + tree = str(subprocess + .check_output(['ceph', '--id', service, + 'osd', 'tree', '--format=json']) + .decode('UTF-8')) + try: + json_tree = json.loads(tree) + roots = _flatten_roots(json_tree["nodes"]) + return [CrushLocation(**host) for host in roots] + except ValueError as v: + log("Unable to parse ceph tree json: {}. Error: {}".format( + tree, v)) + raise + except subprocess.CalledProcessError as e: + log("ceph osd tree command failed with message: {}".format(e)) + raise + + +def _get_child_dirs(path): + """Returns a list of directory names in the specified path. + + :param path: a full path listing of the parent directory to return child + directory names + :returns: list. A list of child directories under the parent directory + :raises: ValueError if the specified path does not exist or is not a + directory, + OSError if an error occurs reading the directory listing + """ + if not os.path.exists(path): + raise ValueError('Specified path "%s" does not exist' % path) + if not os.path.isdir(path): + raise ValueError('Specified path "%s" is not a directory' % path) + + files_in_dir = [os.path.join(path, f) for f in os.listdir(path)] + return list(filter(os.path.isdir, files_in_dir)) + + +def _get_osd_num_from_dirname(dirname): + """Parses the dirname and returns the OSD id. + + Parses a string in the form of 'ceph-{osd#}' and returns the OSD number + from the directory name. + + :param dirname: the directory name to return the OSD number from + :return int: the OSD number the directory name corresponds to + :raises ValueError: if the OSD number cannot be parsed from the provided + directory name. + """ + match = re.search(r'ceph-(?P\d+)', dirname) + if not match: + raise ValueError("dirname not in correct format: {}".format(dirname)) + + return match.group('osd_id') + + +def get_crimson_osd_ids(): + """Return a set of the OSDs that are running with the Crimson backend.""" + rv = set() + try: + out = subprocess.check_output(['pgrep', 'crimson-osd', '-a']) + for line in out.decode('utf8').splitlines(): + rv.add(line.split()[-1]) + except Exception: + pass + + return rv + + +def get_local_osd_ids(): + """This will list the /var/lib/ceph/osd/* directories and try + to split the ID off of the directory name and return it in + a list. Excludes crimson OSD's from the returned list. + + :returns: list. A list of OSD identifiers + :raises: OSError if something goes wrong with listing the directory. + """ + osd_ids = [] + crimson_osds = get_crimson_osd_ids() + osd_path = os.path.join(os.sep, 'var', 'lib', 'ceph', 'osd') + if os.path.exists(osd_path): + try: + dirs = os.listdir(osd_path) + for osd_dir in dirs: + osd_id = osd_dir.split('-')[1] if '-' in osd_dir else '' + if (_is_int(osd_id) and + filesystem_mounted(os.path.join( + os.sep, osd_path, osd_dir)) and + osd_id not in crimson_osds): + osd_ids.append(osd_id) + except OSError: + raise + return osd_ids + + +def get_local_mon_ids(): + """This will list the /var/lib/ceph/mon/* directories and try + to split the ID off of the directory name and return it in + a list. + + :returns: list. A list of monitor identifiers + :raises: OSError if something goes wrong with listing the directory. + """ + mon_ids = [] + mon_path = os.path.join(os.sep, 'var', 'lib', 'ceph', 'mon') + if os.path.exists(mon_path): + try: + dirs = os.listdir(mon_path) + for mon_dir in dirs: + # Basically this takes everything after ceph- as the monitor ID + match = re.search('ceph-(?P.*)', mon_dir) + if match: + mon_ids.append(match.group('mon_id')) + except OSError: + raise + return mon_ids + + +def _is_int(v): + """Return True if the object v can be turned into an integer.""" + try: + int(v) + return True + except ValueError: + return False + + +def get_version(): + """Derive Ceph release from an installed package.""" + import apt_pkg as apt + + package = "ceph" + + current_ver = get_installed_version(package) + if not current_ver: + # package is known, but no version is currently installed. + e = 'Could not determine version of uninstalled package: %s' % package + error_out(e) + + vers = apt.upstream_version(current_ver.ver_str) + + # x.y match only for 20XX.X + # and ignore patch level for other packages + match = re.match(r'^(\d+)\.(\d+)', vers) + + if match: + vers = match.group(0) + return float(vers) + + +def error_out(msg): + log("FATAL ERROR: {}".format(msg), + level=ERROR) + sys.exit(1) + + +def is_quorum(): + asok = "/var/run/ceph/ceph-mon.{}.asok".format(socket.gethostname()) + cmd = [ + "sudo", + "-u", + ceph_user(), + "ceph", + "--admin-daemon", + asok, + "mon_status" + ] + if os.path.exists(asok): + try: + result = json.loads(str(subprocess + .check_output(cmd) + .decode('UTF-8'))) + except subprocess.CalledProcessError: + return False + except ValueError: + # Non JSON response from mon_status + return False + if result['state'] in QUORUM: + return True + else: + return False + else: + return False + + +def is_leader(): + asok = "/var/run/ceph/ceph-mon.{}.asok".format(socket.gethostname()) + cmd = [ + "sudo", + "-u", + ceph_user(), + "ceph", + "--admin-daemon", + asok, + "mon_status" + ] + if os.path.exists(asok): + try: + result = json.loads(str(subprocess + .check_output(cmd) + .decode('UTF-8'))) + except subprocess.CalledProcessError: + return False + except ValueError: + # Non JSON response from mon_status + return False + if result['state'] == LEADER: + return True + else: + return False + else: + return False + + +def manager_available(): + # if manager daemon isn't on this release, just say it is Fine + if cmp_pkgrevno('ceph', '11.0.0') < 0: + return True + cmd = ["sudo", "-u", "ceph", "ceph", "mgr", "dump", "-f", "json"] + try: + result = json.loads(subprocess.check_output(cmd).decode('UTF-8')) + return result['available'] + except subprocess.CalledProcessError as e: + log("'{}' failed: {}".format(" ".join(cmd), str(e))) + return False + except Exception: + return False + + +def wait_for_quorum(): + while not is_quorum(): + log("Waiting for quorum to be reached") + time.sleep(3) + + +def wait_for_manager(): + while not manager_available(): + log("Waiting for manager to be available") + time.sleep(5) + + +def add_bootstrap_hint(peer): + asok = "/var/run/ceph/ceph-mon.{}.asok".format(socket.gethostname()) + cmd = [ + "sudo", + "-u", + ceph_user(), + "ceph", + "--admin-daemon", + asok, + "add_bootstrap_peer_hint", + peer + ] + if os.path.exists(asok): + # Ignore any errors for this call + subprocess.call(cmd) + + +DISK_FORMATS = [ + 'xfs', + 'ext4', + 'btrfs' +] + +CEPH_PARTITIONS = [ + '89C57F98-2FE5-4DC0-89C1-5EC00CEFF2BE', # Ceph encrypted disk in creation + '45B0969E-9B03-4F30-B4C6-5EC00CEFF106', # Ceph encrypted journal + '4FBD7E29-9D25-41B8-AFD0-5EC00CEFF05D', # Ceph encrypted OSD data + '4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D', # Ceph OSD data + '45B0969E-9B03-4F30-B4C6-B4B80CEFF106', # Ceph OSD journal + '89C57F98-2FE5-4DC0-89C1-F3AD0CEFF2BE', # Ceph disk in creation +] + + +def get_partition_list(dev): + """Lists the partitions of a block device. + + :param dev: Path to a block device. ex: /dev/sda + :returns: Returns a list of Partition objects. + :raises: CalledProcessException if lsblk fails + """ + partitions_list = [] + try: + partitions = get_partitions(dev) + # For each line of output + for partition in partitions: + parts = partition.split() + try: + partitions_list.append( + Partition(number=parts[0], + start=parts[1], + end=parts[2], + sectors=parts[3], + size=parts[4], + name=parts[5], + uuid=parts[6]) + ) + except IndexError: + partitions_list.append( + Partition(number=parts[0], + start=parts[1], + end=parts[2], + sectors=parts[3], + size=parts[4], + name="", + uuid=parts[5]) + ) + + return partitions_list + except subprocess.CalledProcessError: + raise + + +def is_pristine_disk(dev): + """ + Read first 2048 bytes (LBA 0 - 3) of block device to determine whether it + is actually all zeros and safe for us to use. + + Existing partitioning tools does not discern between a failure to read from + block device, failure to understand a partition table and the fact that a + block device has no partition table. Since we need to be positive about + which is which we need to read the device directly and confirm ourselves. + + :param dev: Path to block device + :type dev: str + :returns: True all 2048 bytes == 0x0, False if not + :rtype: bool + """ + want_bytes = 2048 + + try: + f = open(dev, 'rb') + except OSError as e: + log(e) + return False + + data = f.read(want_bytes) + read_bytes = len(data) + if read_bytes != want_bytes: + log('{}: short read, got {} bytes expected {}.' + .format(dev, read_bytes, want_bytes), level=WARNING) + return False + + return all(byte == 0x0 for byte in data) + + +def is_osd_disk(dev): + db = kv() + osd_devices = db.get('osd-devices', []) + if dev in osd_devices: + log('Device {} already processed by charm,' + ' skipping'.format(dev)) + return True + + partitions = get_partition_list(dev) + for partition in partitions: + try: + info = str(subprocess + .check_output(['sgdisk', '-i', partition.number, dev]) + .decode('UTF-8')) + info = info.split("\n") # IGNORE:E1103 + for line in info: + for ptype in CEPH_PARTITIONS: + sig = 'Partition GUID code: {}'.format(ptype) + if line.startswith(sig): + return True + except subprocess.CalledProcessError as e: + log("sgdisk inspection of partition {} on {} failed with " + "error: {}. Skipping".format(partition.minor, dev, e), + level=ERROR) + return False + + +def start_osds(devices): + # Scan for Ceph block devices + rescan_osd_devices() + if (cmp_pkgrevno('ceph', '0.56.6') >= 0 and + cmp_pkgrevno('ceph', '14.2.0') < 0): + # Use ceph-disk activate for directory based OSD's + for dev_or_path in devices: + if os.path.exists(dev_or_path) and os.path.isdir(dev_or_path): + subprocess.check_call( + ['ceph-disk', 'activate', dev_or_path]) + + +def udevadm_settle(): + cmd = ['udevadm', 'settle'] + subprocess.call(cmd) + + +def rescan_osd_devices(): + cmd = [ + 'udevadm', 'trigger', + '--subsystem-match=block', '--action=add' + ] + + subprocess.call(cmd) + + udevadm_settle() + + +_client_admin_keyring = '/etc/ceph/ceph.client.admin.keyring' + + +def is_bootstrapped(): + return os.path.exists( + '/var/lib/ceph/mon/ceph-{}/done'.format(socket.gethostname())) + + +def wait_for_bootstrap(): + while not is_bootstrapped(): + time.sleep(3) + + +def generate_monitor_secret(): + cmd = [ + 'ceph-authtool', + '/dev/stdout', + '--name=mon.', + '--gen-key' + ] + res = str(subprocess.check_output(cmd).decode('UTF-8')) + + return "{}==".format(res.split('=')[1].strip()) + + +# OSD caps taken from ceph-create-keys +_osd_bootstrap_caps = { + 'mon': [ + 'allow command osd create ...', + 'allow command osd crush set ...', + r'allow command auth add * osd allow\ * mon allow\ rwx', + 'allow command mon getmap' + ] +} + +_osd_bootstrap_caps_profile = { + 'mon': [ + 'allow profile bootstrap-osd' + ] +} + + +def parse_key(raw_key): + # get-or-create appears to have different output depending + # on whether its 'get' or 'create' + # 'create' just returns the key, 'get' is more verbose and + # needs parsing + key = None + if len(raw_key.splitlines()) == 1: + key = raw_key + else: + for element in raw_key.splitlines(): + if 'key' in element: + return element.split(' = ')[1].strip() # IGNORE:E1103 + return key + + +def get_osd_bootstrap_key(): + try: + # Attempt to get/create a key using the OSD bootstrap profile first + key = get_named_key('bootstrap-osd', + _osd_bootstrap_caps_profile) + except Exception: + # If that fails try with the older style permissions + key = get_named_key('bootstrap-osd', + _osd_bootstrap_caps) + return key + + +_radosgw_keyring = "/etc/ceph/keyring.rados.gateway" + + +def import_radosgw_key(key): + if not os.path.exists(_radosgw_keyring): + cmd = [ + "sudo", + "-u", + ceph_user(), + 'ceph-authtool', + _radosgw_keyring, + '--create-keyring', + '--name=client.radosgw.gateway', + '--add-key={}'.format(key) + ] + subprocess.check_call(cmd) + + +# OSD caps taken from ceph-create-keys +_radosgw_caps = { + 'mon': ['allow rw'], + 'osd': ['allow rwx'] +} +_upgrade_caps = { + 'mon': ['allow rwx'] +} + + +def get_radosgw_key(pool_list=None, name=None): + return get_named_key(name=name or 'radosgw.gateway', + caps=_radosgw_caps, + pool_list=pool_list) + + +def get_mds_key(name): + return create_named_keyring(entity='mds', + name=name, + caps=mds_caps) + + +_mds_bootstrap_caps_profile = { + 'mon': [ + 'allow profile bootstrap-mds' + ] +} + + +def get_mds_bootstrap_key(): + return get_named_key('bootstrap-mds', + _mds_bootstrap_caps_profile) + + +_default_caps = collections.OrderedDict([ + ('mon', ['allow r', + 'allow command "osd blacklist"', + 'allow command "osd blocklist"']), + ('osd', ['allow rwx']), +]) + +admin_caps = collections.OrderedDict([ + ('mds', ['allow *']), + ('mgr', ['allow *']), + ('mon', ['allow *']), + ('osd', ['allow *']) +]) + +mds_caps = collections.OrderedDict([ + ('osd', ['allow *']), + ('mds', ['allow']), + ('mon', ['allow rwx']), +]) + +osd_upgrade_caps = collections.OrderedDict([ + ('mon', ['allow command "config-key"', + 'allow command "osd tree"', + 'allow command "config-key list"', + 'allow command "config-key put"', + 'allow command "config-key get"', + 'allow command "config-key exists"', + 'allow command "osd out"', + 'allow command "osd in"', + 'allow command "osd rm"', + 'allow command "auth del"', + ]) +]) + +rbd_mirror_caps = collections.OrderedDict([ + ('mon', ['allow profile rbd-mirror-peer', + 'allow command "service dump"', + 'allow command "service status"' + ]), + ('osd', ['profile rbd']), + ('mgr', ['allow r']), +]) + + +def get_rbd_mirror_key(name): + return get_named_key(name=name, caps=rbd_mirror_caps) + + +def create_named_keyring(entity, name, caps=None): + caps = caps or _default_caps + cmd = [ + "sudo", + "-u", + ceph_user(), + 'ceph', + '--name', 'mon.', + '--keyring', + '/var/lib/ceph/mon/ceph-{}/keyring'.format( + socket.gethostname() + ), + 'auth', 'get-or-create', '{entity}.{name}'.format(entity=entity, + name=name), + ] + for subsystem, subcaps in caps.items(): + cmd.extend([subsystem, '; '.join(subcaps)]) + log("Calling check_output: {}".format(cmd), level=DEBUG) + return (parse_key(str(subprocess + .check_output(cmd) + .decode('UTF-8')) + .strip())) # IGNORE:E1103 + + +def get_upgrade_key(): + return get_named_key('upgrade-osd', _upgrade_caps) + + +def is_internal_client(name): + keys = ('osd-upgrade', 'osd-removal', 'admin', 'rbd-mirror', 'mds') + return any(name.startswith(key) for key in keys) + + +def get_named_key(name, caps=None, pool_list=None): + """Retrieve a specific named cephx key. + + :param name: String Name of key to get. + :param pool_list: The list of pools to give access to + :param caps: dict of cephx capabilities + :returns: Returns a cephx key + """ + caps = caps or _default_caps + key_name = 'client.{}'.format(name) + + key = ceph_auth_get(key_name) + if key: + if is_internal_client(name): + upgrade_key_caps(key_name, caps) + return key + + log("Creating new key for {}".format(name), level=DEBUG) + cmd = [ + "sudo", + "-u", + ceph_user(), + 'ceph', + '--name', 'mon.', + '--keyring', + '/var/lib/ceph/mon/ceph-{}/keyring'.format( + socket.gethostname() + ), + 'auth', 'get-or-create', key_name, + ] + # Add capabilities + for subsystem, subcaps in caps.items(): + if subsystem == 'osd': + if pool_list: + # This will output a string similar to: + # "pool=rgw pool=rbd pool=something" + pools = " ".join(['pool={0}'.format(i) for i in pool_list]) + subcaps[0] = subcaps[0] + " " + pools + cmd.extend([subsystem, '; '.join(subcaps)]) + ceph_auth_get.cache_clear() + + log("Calling check_output: {}".format(cmd), level=DEBUG) + return parse_key(str(subprocess + .check_output(cmd) + .decode('UTF-8')) + .strip()) # IGNORE:E1103 + + +@functools.lru_cache() +def ceph_auth_get(key_name): + try: + # Does the key already exist? + output = str(subprocess.check_output( + [ + 'sudo', + '-u', ceph_user(), + 'ceph', + '--name', 'mon.', + '--keyring', + '/var/lib/ceph/mon/ceph-{}/keyring'.format( + socket.gethostname() + ), + 'auth', + 'get', + key_name, + ]).decode('UTF-8')).strip() + return parse_key(output) + except subprocess.CalledProcessError: + # Couldn't get the key + pass + + +def upgrade_key_caps(key, caps, pool_list=None): + """Upgrade key to have capabilities caps""" + if not is_leader(): + # Not the MON leader OR not clustered + return + cmd = [ + "sudo", "-u", ceph_user(), 'ceph', 'auth', 'caps', key + ] + for subsystem, subcaps in caps.items(): + if subsystem == 'osd': + if pool_list: + # This will output a string similar to: + # "pool=rgw pool=rbd pool=something" + pools = " ".join(['pool={0}'.format(i) for i in pool_list]) + subcaps[0] = subcaps[0] + " " + pools + cmd.extend([subsystem, '; '.join(subcaps)]) + subprocess.check_call(cmd) + + +@cached +def systemd(): + return CompareHostReleases(lsb_release()['DISTRIB_CODENAME']) >= 'vivid' + + +def bootstrap_monitor_cluster(secret): + """Bootstrap local Ceph mon into the Ceph cluster + + :param secret: cephx secret to use for monitor authentication + :type secret: str + :raises: Exception if Ceph mon cannot be bootstrapped + """ + hostname = socket.gethostname() + path = '/var/lib/ceph/mon/ceph-{}'.format(hostname) + done = '{}/done'.format(path) + if systemd(): + init_marker = '{}/systemd'.format(path) + else: + init_marker = '{}/upstart'.format(path) + + keyring = '/var/lib/ceph/tmp/{}.mon.keyring'.format(hostname) + + if os.path.exists(done): + log('bootstrap_monitor_cluster: mon already initialized.') + else: + # Ceph >= 0.61.3 needs this for ceph-mon fs creation + mkdir('/var/run/ceph', owner=ceph_user(), + group=ceph_user(), perms=0o755) + mkdir(path, owner=ceph_user(), group=ceph_user(), + perms=0o755) + # end changes for Ceph >= 0.61.3 + try: + _create_monitor(keyring, + secret, + hostname, + path, + done, + init_marker) + except Exception: + raise + finally: + os.unlink(keyring) + + +def _create_monitor(keyring, secret, hostname, path, done, init_marker): + """Create monitor filesystem and enable and start ceph-mon process + + :param keyring: path to temporary keyring on disk + :type keyring: str + :param secret: cephx secret to use for monitor authentication + :type: secret: str + :param hostname: hostname of the local unit + :type hostname: str + :param path: full path to Ceph mon directory + :type path: str + :param done: full path to 'done' marker for Ceph mon + :type done: str + :param init_marker: full path to 'init' marker for Ceph mon + :type init_marker: str + """ + subprocess.check_call(['ceph-authtool', keyring, + '--create-keyring', '--name=mon.', + '--add-key={}'.format(secret), + '--cap', 'mon', 'allow *']) + subprocess.check_call(['ceph-mon', '--mkfs', + '-i', hostname, + '--keyring', keyring]) + chownr('/var/log/ceph', ceph_user(), ceph_user()) + chownr(path, ceph_user(), ceph_user()) + with open(done, 'w'): + pass + with open(init_marker, 'w'): + pass + + if systemd(): + if cmp_pkgrevno('ceph', '14.0.0') >= 0: + systemd_unit = 'ceph-mon@{}'.format(socket.gethostname()) + else: + systemd_unit = 'ceph-mon' + subprocess.check_call(['systemctl', 'enable', systemd_unit]) + service_restart(systemd_unit) + else: + service_restart('ceph-mon-all') + + +def create_keyrings(): + """Create keyrings for operation of ceph-mon units + + NOTE: The quorum should be done before to execute this function. + + :raises: Exception if keyrings cannot be created + """ + if cmp_pkgrevno('ceph', '14.0.0') >= 0: + # NOTE(jamespage): At Nautilus, keys are created by the + # monitors automatically and just need + # exporting. + output = str(subprocess.check_output( + [ + 'sudo', + '-u', ceph_user(), + 'ceph', + '--name', 'mon.', + '--keyring', + '/var/lib/ceph/mon/ceph-{}/keyring'.format( + socket.gethostname() + ), + 'auth', 'get', 'client.admin', + ]).decode('UTF-8')).strip() + if not output: + # NOTE: key not yet created, raise exception and retry + raise Exception + # NOTE: octopus wants newline at end of file LP: #1864706 + output += '\n' + write_file(_client_admin_keyring, output, + owner=ceph_user(), group=ceph_user(), + perms=0o400) + else: + # NOTE(jamespage): Later Ceph releases require explicit + # call to ceph-create-keys to setup the + # admin keys for the cluster; this command + # will wait for quorum in the cluster before + # returning. + # NOTE(fnordahl): Explicitly run `ceph-create-keys` for older + # Ceph releases too. This improves bootstrap + # resilience as the charm will wait for + # presence of peer units before attempting + # to bootstrap. Note that charms deploying + # ceph-mon service should disable running of + # `ceph-create-keys` service in init system. + cmd = ['ceph-create-keys', '--id', socket.gethostname()] + if cmp_pkgrevno('ceph', '12.0.0') >= 0: + # NOTE(fnordahl): The default timeout in ceph-create-keys of 600 + # seconds is not adequate. Increase timeout when + # timeout parameter available. For older releases + # we rely on retry_on_exception decorator. + # LP#1719436 + cmd.extend(['--timeout', '1800']) + subprocess.check_call(cmd) + osstat = os.stat(_client_admin_keyring) + if not osstat.st_size: + # NOTE(fnordahl): Retry will fail as long as this file exists. + # LP#1719436 + os.remove(_client_admin_keyring) + raise Exception + + +def update_monfs(): + hostname = socket.gethostname() + monfs = '/var/lib/ceph/mon/ceph-{}'.format(hostname) + if systemd(): + init_marker = '{}/systemd'.format(monfs) + else: + init_marker = '{}/upstart'.format(monfs) + if os.path.exists(monfs) and not os.path.exists(init_marker): + # Mark mon as managed by upstart so that + # it gets start correctly on reboots + with open(init_marker, 'w'): + pass + + +def get_partitions(dev): + cmd = ['partx', '--raw', '--noheadings', dev] + try: + out = str(subprocess.check_output(cmd).decode('UTF-8')).splitlines() + log("get partitions: {}".format(out), level=DEBUG) + return out + except subprocess.CalledProcessError as e: + log("Can't get info for {0}: {1}".format(dev, e.output)) + return [] + + +def get_lvs(dev): + """ + List logical volumes for the provided block device + + :param: dev: Full path to block device. + :raises subprocess.CalledProcessError: in the event that any supporting + operation failed. + :returns: list: List of logical volumes provided by the block device + """ + if not lvm.is_lvm_physical_volume(dev): + return [] + vg_name = lvm.list_lvm_volume_group(dev) + return lvm.list_logical_volumes('vg_name={}'.format(vg_name)) + + +def find_least_used_utility_device(utility_devices, lvs=False): + """ + Find a utility device which has the smallest number of partitions + among other devices in the supplied list. + + :utility_devices: A list of devices to be used for filestore journal + or bluestore wal or db. + :lvs: flag to indicate whether inspection should be based on LVM LV's + :return: string device name + """ + if lvs: + usages = map(lambda a: (len(get_lvs(a)), a), utility_devices) + else: + usages = map(lambda a: (len(get_partitions(a)), a), utility_devices) + least = min(usages, key=lambda t: t[0]) + return least[1] + + +def get_devices(name): + """Merge config and Juju storage based devices + + :name: The name of the device type, e.g.: wal, osd, journal + :returns: Set(device names), which are strings + """ + if config(name): + devices = [dev.strip() for dev in config(name).split(' ')] + else: + devices = [] + storage_ids = storage_list(name) + devices.extend((storage_get('location', sid) for sid in storage_ids)) + devices = filter(os.path.exists, devices) + + return set(devices) + + +def osdize(dev, osd_format, osd_journal, ignore_errors=False, encrypt=False, + key_manager=CEPH_KEY_MANAGER, osd_id=None): + if dev.startswith('/dev'): + osdize_dev(dev, osd_format, osd_journal, + ignore_errors, encrypt, + key_manager, osd_id) + else: + if cmp_pkgrevno('ceph', '14.0.0') >= 0: + log("Directory backed OSDs can not be created on Nautilus", + level=WARNING) + return + osdize_dir(dev, encrypt) + + +def osdize_dev(dev, osd_format, osd_journal, ignore_errors=False, + encrypt=False, key_manager=CEPH_KEY_MANAGER, + osd_id=None): + """ + Prepare a block device for use as a Ceph OSD + + A block device will only be prepared once during the lifetime + of the calling charm unit; future executions will be skipped. + + :param: dev: Full path to block device to use + :param: osd_format: Format for OSD filesystem + :param: osd_journal: List of block devices to use for OSD journals + :param: ignore_errors: Don't fail in the event of any errors during + processing + :param: encrypt: Encrypt block devices using 'key_manager' + :param: key_manager: Key management approach for encryption keys + :raises subprocess.CalledProcessError: in the event that any supporting + subprocess operation failed + :raises ValueError: if an invalid key_manager is provided + """ + if key_manager not in KEY_MANAGERS: + raise ValueError('Unsupported key manager: {}'.format(key_manager)) + + db = kv() + osd_devices = db.get('osd-devices', []) + try: + if dev in osd_devices: + log('Device {} already processed by charm,' + ' skipping'.format(dev)) + return + + if not os.path.exists(dev): + log('Path {} does not exist - bailing'.format(dev)) + return + + if not is_block_device(dev): + log('Path {} is not a block device - bailing'.format(dev)) + return + + if is_osd_disk(dev): + log('Looks like {} is already an' + ' OSD data or journal, skipping.'.format(dev)) + if is_device_mounted(dev): + osd_devices.append(dev) + return + + if is_device_mounted(dev): + log('Looks like {} is in use, skipping.'.format(dev)) + return + + if is_active_bluestore_device(dev): + log('{} is in use as an active bluestore block device,' + ' skipping.'.format(dev)) + osd_devices.append(dev) + return + + if is_mapped_luks_device(dev): + log('{} is a mapped LUKS device,' + ' skipping.'.format(dev)) + return + + if cmp_pkgrevno('ceph', '12.2.4') >= 0: + cmd = _ceph_volume(dev, + osd_journal, + encrypt, + key_manager, + osd_id) + else: + cmd = _ceph_disk(dev, + osd_format, + osd_journal, + encrypt) + + try: + status_set('maintenance', 'Initializing device {}'.format(dev)) + log("osdize cmd: {}".format(cmd)) + subprocess.check_call(cmd) + except subprocess.CalledProcessError: + try: + lsblk_output = subprocess.check_output( + ['lsblk', '-P']).decode('UTF-8') + except subprocess.CalledProcessError as e: + log("Couldn't get lsblk output: {}".format(e), ERROR) + if ignore_errors: + log('Unable to initialize device: {}'.format(dev), WARNING) + if lsblk_output: + log('lsblk output: {}'.format(lsblk_output), DEBUG) + else: + log('Unable to initialize device: {}'.format(dev), ERROR) + if lsblk_output: + log('lsblk output: {}'.format(lsblk_output), WARNING) + raise + + # NOTE: Record processing of device only on success to ensure that + # the charm only tries to initialize a device of OSD usage + # once during its lifetime. + osd_devices.append(dev) + finally: + db.set('osd-devices', osd_devices) + db.flush() + + +def _ceph_disk(dev, osd_format, osd_journal, encrypt=False): + """ + Prepare a device for usage as a Ceph OSD using ceph-disk + + :param: dev: Full path to use for OSD block device setup, + The function looks up realpath of the device + :param: osd_journal: List of block devices to use for OSD journals + :param: encrypt: Use block device encryption (unsupported) + :returns: list. 'ceph-disk' command and required parameters for + execution by check_call + """ + cmd = ['ceph-disk', 'prepare'] + + if encrypt: + cmd.append('--dmcrypt') + + cmd.append('--bluestore') + wal = get_devices('bluestore-wal') + if wal: + cmd.append('--block.wal') + least_used_wal = find_least_used_utility_device(wal) + cmd.append(least_used_wal) + db = get_devices('bluestore-db') + if db: + cmd.append('--block.db') + least_used_db = find_least_used_utility_device(db) + cmd.append(least_used_db) + + cmd.append(os.path.realpath(dev)) + + if osd_journal: + least_used = find_least_used_utility_device(osd_journal) + cmd.append(least_used) + + return cmd + + +def _ceph_volume(dev, osd_journal, encrypt=False, key_manager=CEPH_KEY_MANAGER, + osd_id=None): + """ + Prepare and activate a device for usage as a Ceph OSD using ceph-volume. + + This also includes creation of all PV's, VG's and LV's required to + support the initialization of the OSD. + + :param: dev: Full path to use for OSD block device setup + :param: osd_journal: List of block devices to use for OSD journals + :param: encrypt: Use block device encryption + :param: key_manager: dm-crypt Key Manager to use + :param: osd_id: The OSD-id to recycle, or None to create a new one + :raises subprocess.CalledProcessError: in the event that any supporting + LVM operation failed. + :returns: list. 'ceph-volume' command and required parameters for + execution by check_call + """ + cmd = ['ceph-volume', 'lvm', 'create'] + + osd_fsid = str(uuid.uuid4()) + cmd.append('--osd-fsid') + cmd.append(osd_fsid) + cmd.append('--bluestore') + main_device_type = 'block' + + if encrypt and key_manager == CEPH_KEY_MANAGER: + cmd.append('--dmcrypt') + + if osd_id is not None: + cmd.extend(['--osd-id', str(osd_id)]) + + cmd.append('--data') + cmd.append(_allocate_logical_volume(dev=dev, + lv_type=main_device_type, + osd_fsid=osd_fsid, + encrypt=encrypt, + key_manager=key_manager)) + + for extra_volume in ('wal', 'db'): + devices = get_devices('bluestore-{}'.format(extra_volume)) + if devices: + cmd.append('--block.{}'.format(extra_volume)) + least_used = find_least_used_utility_device(devices, + lvs=True) + cmd.append(_allocate_logical_volume( + dev=least_used, + lv_type=extra_volume, + osd_fsid=osd_fsid, + size='{}M'.format(calculate_volume_size(extra_volume)), + shared=True, + encrypt=encrypt, + key_manager=key_manager) + ) + + return cmd + + +def _partition_name(dev): + """ + Derive the first partition name for a block device + + :param: dev: Full path to block device. + :returns: str: Full path to first partition on block device. + """ + if dev[-1].isdigit(): + return '{}p1'.format(dev) + else: + return '{}1'.format(dev) + + +def is_active_bluestore_device(dev): + """ + Determine whether provided device is part of an active + bluestore based OSD (as its block component). + + :param: dev: Full path to block device to check for Bluestore usage. + :returns: boolean: indicating whether device is in active use. + """ + if not lvm.is_lvm_physical_volume(dev): + return False + + vg_name = lvm.list_lvm_volume_group(dev) + try: + lv_name = lvm.list_logical_volumes('vg_name={}'.format(vg_name))[0] + except IndexError: + return False + + block_symlinks = glob.glob('/var/lib/ceph/osd/ceph-*/block') + for block_candidate in block_symlinks: + if os.path.islink(block_candidate): + target = os.readlink(block_candidate) + if target.endswith(lv_name): + return True + + return False + + +def is_luks_device(dev): + """ + Determine if dev is a LUKS-formatted block device. + + :param: dev: A full path to a block device to check for LUKS header + presence + :returns: boolean: indicates whether a device is used based on LUKS header. + """ + return True if _luks_uuid(dev) else False + + +def is_mapped_luks_device(dev): + """ + Determine if dev is a mapped LUKS device + :param: dev: A full path to a block device to be checked + :returns: boolean: indicates whether a device is mapped + """ + _, dirs, _ = next(os.walk( + '/sys/class/block/{}/holders/' + .format(os.path.basename(os.path.realpath(dev)))) + ) + is_held = len(dirs) > 0 + return is_held and is_luks_device(dev) + + +def get_conf(variable): + """ + Get the value of the given configuration variable from the + cluster. + + :param variable: Ceph configuration variable + :returns: str. configured value for provided variable + + """ + return subprocess.check_output([ + 'ceph-osd', + '--show-config-value={}'.format(variable), + '--no-mon-config', + ]).strip() + + +def calculate_volume_size(lv_type): + """ + Determine the configured size for Bluestore DB/WAL or + Filestore Journal devices + + :param lv_type: volume type (db, wal or journal) + :raises KeyError: if invalid lv_type is supplied + :returns: int. Configured size in megabytes for volume type + """ + # lv_type -> Ceph configuration option + _config_map = { + 'db': 'bluestore_block_db_size', + 'wal': 'bluestore_block_wal_size', + 'journal': 'osd_journal_size', + } + + # default sizes in MB + _default_size = { + 'db': 1024, + 'wal': 576, + 'journal': 1024, + } + + # conversion of Ceph config units to MB + _units = { + 'db': 1048576, # Bytes -> MB + 'wal': 1048576, # Bytes -> MB + 'journal': 1, # Already in MB + } + + configured_size = get_conf(_config_map[lv_type]) + + if configured_size is None or int(configured_size) == 0: + return _default_size[lv_type] + else: + return int(configured_size) / _units[lv_type] + + +def _luks_uuid(dev): + """ + Check to see if dev is a LUKS encrypted volume, returning the UUID + of volume if it is. + + :param: dev: path to block device to check. + :returns: str. UUID of LUKS device or None if not a LUKS device + """ + try: + cmd = ['cryptsetup', 'luksUUID', dev] + return subprocess.check_output(cmd).decode('UTF-8').strip() + except subprocess.CalledProcessError: + return None + + +def _initialize_disk(dev, dev_uuid, encrypt=False, + key_manager=CEPH_KEY_MANAGER): + """ + Initialize a raw block device consuming 100% of the available + disk space. + + Function assumes that block device has already been wiped. + + :param: dev: path to block device to initialize + :param: dev_uuid: UUID to use for any dm-crypt operations + :param: encrypt: Encrypt OSD devices using dm-crypt + :param: key_manager: Key management approach for dm-crypt keys + :raises: subprocess.CalledProcessError: if any parted calls fail + :returns: str: Full path to new partition. + """ + use_vaultlocker = encrypt and key_manager == VAULT_KEY_MANAGER + + if use_vaultlocker: + # NOTE(jamespage): Check to see if already initialized as a LUKS + # volume, which indicates this is a shared block + # device for journal, db or wal volumes. + luks_uuid = _luks_uuid(dev) + if luks_uuid: + return '/dev/mapper/crypt-{}'.format(luks_uuid) + + dm_crypt = '/dev/mapper/crypt-{}'.format(dev_uuid) + + if use_vaultlocker and not os.path.exists(dm_crypt): + subprocess.check_call([ + 'vaultlocker', + 'encrypt', + '--uuid', dev_uuid, + dev, + ]) + subprocess.check_call([ + 'dd', + 'if=/dev/zero', + 'of={}'.format(dm_crypt), + 'bs=512', + 'count=1', + ]) + + if use_vaultlocker: + return dm_crypt + else: + return dev + + +def _allocate_logical_volume(dev, lv_type, osd_fsid, + size=None, shared=False, + encrypt=False, + key_manager=CEPH_KEY_MANAGER): + """ + Allocate a logical volume from a block device, ensuring any + required initialization and setup of PV's and VG's to support + the LV. + + :param: dev: path to block device to allocate from. + :param: lv_type: logical volume type to create + (data, block, journal, wal, db) + :param: osd_fsid: UUID of the OSD associate with the LV + :param: size: Size in LVM format for the device; + if unset 100% of VG + :param: shared: Shared volume group (journal, wal, db) + :param: encrypt: Encrypt OSD devices using dm-crypt + :param: key_manager: dm-crypt Key Manager to use + :raises subprocess.CalledProcessError: in the event that any supporting + LVM or parted operation fails. + :returns: str: String in the format 'vg_name/lv_name'. + """ + lv_name = "osd-{}-{}".format(lv_type, osd_fsid) + current_volumes = lvm.list_logical_volumes() + if shared: + dev_uuid = str(uuid.uuid4()) + else: + dev_uuid = osd_fsid + pv_dev = _initialize_disk(dev, dev_uuid, encrypt, key_manager) + + vg_name = None + if not lvm.is_lvm_physical_volume(pv_dev): + lvm.create_lvm_physical_volume(pv_dev) + if not os.path.exists(pv_dev): + # NOTE: trigger rescan to work around bug 1878752 + rescan_osd_devices() + if shared: + vg_name = 'ceph-{}-{}'.format(lv_type, + str(uuid.uuid4())) + else: + vg_name = 'ceph-{}'.format(osd_fsid) + lvm.create_lvm_volume_group(vg_name, pv_dev) + else: + vg_name = lvm.list_lvm_volume_group(pv_dev) + + if lv_name not in current_volumes: + lvm.create_logical_volume(lv_name, vg_name, size) + + return "{}/{}".format(vg_name, lv_name) + + +def osdize_dir(path, encrypt=False): + """Ask ceph-disk to prepare a directory to become an OSD. + + :param path: str. The directory to osdize + :param encrypt: bool. Should the OSD directory be encrypted at rest + :returns: None + """ + + db = kv() + osd_devices = db.get('osd-devices', []) + if path in osd_devices: + log('Device {} already processed by charm,' + ' skipping'.format(path)) + return + + for t in ['upstart', 'systemd']: + if os.path.exists(os.path.join(path, t)): + log('Path {} is already used as an OSD dir - bailing'.format(path)) + return + + if cmp_pkgrevno('ceph', "0.56.6") < 0: + log('Unable to use directories for OSDs with ceph < 0.56.6', + level=ERROR) + return + + mkdir(path, owner=ceph_user(), group=ceph_user(), perms=0o755) + chownr('/var/lib/ceph', ceph_user(), ceph_user()) + cmd = [ + 'sudo', '-u', ceph_user(), + 'ceph-disk', + 'prepare', + '--data-dir', + path + ] + if cmp_pkgrevno('ceph', '0.60') >= 0: + if encrypt: + cmd.append('--dmcrypt') + cmd.append('--bluestore') + + log("osdize dir cmd: {}".format(cmd)) + subprocess.check_call(cmd) + + # NOTE: Record processing of device only on success to ensure that + # the charm only tries to initialize a device of OSD usage + # once during its lifetime. + osd_devices.append(path) + db.set('osd-devices', osd_devices) + db.flush() + + +def filesystem_mounted(fs): + return subprocess.call(['grep', '-wqs', fs, '/proc/mounts']) == 0 + + +def get_running_osds(): + """Returns a list of the pids of the current running OSD daemons""" + cmd = ['pgrep', 'ceph-osd|crimson-osd'] + try: + result = str(subprocess.check_output(cmd).decode('UTF-8')) + return result.split() + except subprocess.CalledProcessError: + return [] + + +def get_cephfs(service): + """List the Ceph Filesystems that exist. + + :param service: The service name to run the Ceph command under + :returns: list. Returns a list of the Ceph filesystems + """ + if get_version() < 0.86: + # This command wasn't introduced until 0.86 Ceph + return [] + try: + output = str(subprocess + .check_output(["ceph", '--id', service, "fs", "ls"]) + .decode('UTF-8')) + if not output: + return [] + """ + Example subprocess output: + 'name: ip-172-31-23-165, metadata pool: ip-172-31-23-165_metadata, + data pools: [ip-172-31-23-165_data ]\n' + output: filesystems: ['ip-172-31-23-165'] + """ + filesystems = [] + for line in output.splitlines(): + parts = line.split(',') + for part in parts: + if "name" in part: + filesystems.append(part.split(' ')[1]) + except subprocess.CalledProcessError: + return [] + + +def wait_for_all_monitors_to_upgrade(new_version, upgrade_key): + """Fairly self explanatory name. This function will wait + for all monitors in the cluster to upgrade or it will + return after a timeout period has expired. + + :param new_version: str of the version to watch + :param upgrade_key: the cephx key name to use + """ + done = False + start_time = time.time() + monitor_list = [] + + mon_map = get_mon_map('admin') + if mon_map['monmap']['mons']: + for mon in mon_map['monmap']['mons']: + monitor_list.append(mon['name']) + while not done: + try: + done = all(monitor_key_exists(upgrade_key, "{}_{}_{}_done".format( + "mon", mon, new_version + )) for mon in monitor_list) + current_time = time.time() + if current_time > (start_time + 10 * 60): + raise Exception + else: + # Wait 30 seconds and test again if all monitors are upgraded + time.sleep(30) + except subprocess.CalledProcessError: + raise + + +# Edge cases: +# 1. Previous node dies on upgrade, can we retry? +def roll_monitor_cluster(new_version, upgrade_key): + """This is tricky to get right so here's what we're going to do. + + There's 2 possible cases: Either I'm first in line or not. + If I'm not first in line I'll wait a random time between 5-30 seconds + and test to see if the previous monitor is upgraded yet. + + :param new_version: str of the version to upgrade to + :param upgrade_key: the cephx key name to use when upgrading + """ + log('roll_monitor_cluster called with {}'.format(new_version)) + my_name = socket.gethostname() + monitor_list = [] + mon_map = get_mon_map('admin') + if mon_map['monmap']['mons']: + for mon in mon_map['monmap']['mons']: + monitor_list.append(mon['name']) + else: + status_set('blocked', 'Unable to get monitor cluster information') + sys.exit(1) + log('monitor_list: {}'.format(monitor_list)) + + # A sorted list of OSD unit names + mon_sorted_list = sorted(monitor_list) + + # Install packages immediately but defer restarts to when it's our time. + upgrade_monitor(new_version, restart_daemons=False) + try: + position = mon_sorted_list.index(my_name) + log("upgrade position: {}".format(position)) + if position == 0: + # I'm first! Roll + # First set a key to inform others I'm about to roll + lock_and_roll(upgrade_key=upgrade_key, + service='mon', + my_name=my_name, + version=new_version) + else: + # Check if the previous node has finished + status_set('waiting', + 'Waiting on {} to finish upgrading'.format( + mon_sorted_list[position - 1])) + wait_on_previous_node(upgrade_key=upgrade_key, + service='mon', + previous_node=mon_sorted_list[position - 1], + version=new_version) + lock_and_roll(upgrade_key=upgrade_key, + service='mon', + my_name=my_name, + version=new_version) + # NOTE(jamespage): + # Wait until all monitors have upgraded before bootstrapping + # the ceph-mgr daemons due to use of new mgr keyring profiles + if new_version == 'luminous': + wait_for_all_monitors_to_upgrade(new_version=new_version, + upgrade_key=upgrade_key) + bootstrap_manager() + + # NOTE(jmcvaughn): + # Nautilus and later binaries use msgr2 by default, but existing + # clusters that have been upgraded from pre-Nautilus will not + # automatically have msgr2 enabled. Without this, Ceph will show + # a warning only (with no impact to operations), but newly added units + # will not be able to join the cluster. Therefore, we ensure it is + # enabled on upgrade for all versions including and after Nautilus + # (to cater for previous charm versions that will not have done this). + nautilus_or_later = cmp_pkgrevno('ceph-common', '14.0.0') >= 0 + if nautilus_or_later: + wait_for_all_monitors_to_upgrade(new_version=new_version, + upgrade_key=upgrade_key) + enable_msgr2() + except ValueError: + log("Failed to find {} in list {}.".format( + my_name, mon_sorted_list)) + status_set('blocked', 'failed to upgrade monitor') + + +# For E731 we can't assign a lambda, therefore, instead pass this. +def noop(): + pass + + +def upgrade_monitor(new_version, kick_function=None, restart_daemons=True): + """Upgrade the current Ceph monitor to the new version + + :param new_version: String version to upgrade to. + """ + if kick_function is None: + kick_function = noop + current_version = get_version() + status_set("maintenance", "Upgrading monitor") + log("Current Ceph version is {}".format(current_version)) + log("Upgrading to: {}".format(new_version)) + + # Needed to determine if whether to stop/start ceph-mgr + luminous_or_later = cmp_pkgrevno('ceph-common', '12.2.0') >= 0 + # Needed to differentiate between systemd unit names + nautilus_or_later = cmp_pkgrevno('ceph-common', '14.0.0') >= 0 + kick_function() + try: + add_source(config('source'), config('key')) + apt_update(fatal=True) + except subprocess.CalledProcessError as err: + log("Adding the Ceph source failed with message: {}".format( + err)) + status_set("blocked", "Upgrade to {} failed".format(new_version)) + sys.exit(1) + kick_function() + + try: + apt_install(packages=determine_packages(), fatal=True) + rm_packages = determine_packages_to_remove() + if rm_packages: + apt_purge(packages=rm_packages, fatal=True) + except subprocess.CalledProcessError as err: + log("Upgrading packages failed " + "with message: {}".format(err)) + status_set("blocked", "Upgrade to {} failed".format(new_version)) + sys.exit(1) + + if not restart_daemons: + log("Packages upgraded but not restarting daemons yet.") + return + + try: + if systemd(): + if nautilus_or_later: + systemd_unit = 'ceph-mon@{}'.format(socket.gethostname()) + else: + systemd_unit = 'ceph-mon' + service_stop(systemd_unit) + log("restarting ceph-mgr.target maybe: {}" + .format(luminous_or_later)) + if luminous_or_later: + service_stop('ceph-mgr.target') + else: + service_stop('ceph-mon-all') + + kick_function() + + owner = ceph_user() + + # Ensure the files and directories under /var/lib/ceph is chowned + # properly as part of the move to the Jewel release, which moved the + # ceph daemons to running as ceph:ceph instead of root:root. + if new_version == 'jewel': + # Ensure the ownership of Ceph's directories is correct + chownr(path=os.path.join(os.sep, "var", "lib", "ceph"), + owner=owner, + group=owner, + follow_links=True) + + kick_function() + + # Ensure that mon directory is user writable + hostname = socket.gethostname() + path = '/var/lib/ceph/mon/ceph-{}'.format(hostname) + mkdir(path, owner=ceph_user(), group=ceph_user(), + perms=0o755) + + if systemd(): + if nautilus_or_later: + systemd_unit = 'ceph-mon@{}'.format(socket.gethostname()) + else: + systemd_unit = 'ceph-mon' + service_restart(systemd_unit) + log("starting ceph-mgr.target maybe: {}".format(luminous_or_later)) + if luminous_or_later: + # due to BUG: #1849874 we have to force a restart to get it to + # drop the previous version of ceph-manager and start the new + # one. + service_restart('ceph-mgr.target') + else: + service_start('ceph-mon-all') + except subprocess.CalledProcessError as err: + log("Stopping ceph and upgrading packages failed " + "with message: {}".format(err)) + status_set("blocked", "Upgrade to {} failed".format(new_version)) + sys.exit(1) + + +def lock_and_roll(upgrade_key, service, my_name, version): + """Create a lock on the Ceph monitor cluster and upgrade. + + :param upgrade_key: str. The cephx key to use + :param service: str. The cephx id to use + :param my_name: str. The current hostname + :param version: str. The version we are upgrading to + """ + start_timestamp = time.time() + + log('monitor_key_set {}_{}_{}_start {}'.format( + service, + my_name, + version, + start_timestamp)) + monitor_key_set(upgrade_key, "{}_{}_{}_start".format( + service, my_name, version), start_timestamp) + + # alive indication: + alive_function = ( + lambda: monitor_key_set( + upgrade_key, "{}_{}_{}_alive" + .format(service, my_name, version), time.time())) + dog = WatchDog(kick_interval=3 * 60, + kick_function=alive_function) + + log("Rolling") + + # This should be quick + if service == 'osd': + upgrade_osd(version, kick_function=dog.kick_the_dog) + elif service == 'mon': + upgrade_monitor(version, kick_function=dog.kick_the_dog) + else: + log("Unknown service {}. Unable to upgrade".format(service), + level=ERROR) + log("Done") + + stop_timestamp = time.time() + # Set a key to inform others I am finished + log('monitor_key_set {}_{}_{}_done {}'.format(service, + my_name, + version, + stop_timestamp)) + status_set('maintenance', 'Finishing upgrade') + monitor_key_set(upgrade_key, "{}_{}_{}_done".format(service, + my_name, + version), + stop_timestamp) + + +def wait_on_previous_node(upgrade_key, service, previous_node, version): + """A lock that sleeps the current thread while waiting for the previous + node to finish upgrading. + + :param upgrade_key: + :param service: str. the cephx id to use + :param previous_node: str. The name of the previous node to wait on + :param version: str. The version we are upgrading to + :returns: None + """ + log("Previous node is: {}".format(previous_node)) + + previous_node_started_f = ( + lambda: monitor_key_exists( + upgrade_key, + "{}_{}_{}_start".format(service, previous_node, version))) + previous_node_finished_f = ( + lambda: monitor_key_exists( + upgrade_key, + "{}_{}_{}_done".format(service, previous_node, version))) + previous_node_alive_time_f = ( + lambda: monitor_key_get( + upgrade_key, + "{}_{}_{}_alive".format(service, previous_node, version))) + + # wait for 30 minutes until the previous node starts. We don't proceed + # unless we get a start condition. + try: + WatchDog.wait_until(previous_node_started_f, timeout=30 * 60) + except WatchDog.WatchDogTimeoutException: + log("Waited for previous node to start for 30 minutes. " + "It didn't start, so may have a serious issue. Continuing with " + "upgrade of this node.", + level=WARNING) + return + + # keep the time it started from this nodes' perspective. + previous_node_started_at = time.time() + log("Detected that previous node {} has started. Time now: {}" + .format(previous_node, previous_node_started_at)) + + # Now wait for the node to complete. The node may optionally be kicking + # with the *_alive key, which allows this node to wait longer as it 'knows' + # the other node is proceeding. + try: + WatchDog.timed_wait(kicked_at_function=previous_node_alive_time_f, + complete_function=previous_node_finished_f, + wait_time=30 * 60, + compatibility_wait_time=10 * 60, + max_kick_interval=5 * 60) + except WatchDog.WatchDogDeadException: + # previous node was kicking, but timed out; log this condition and move + # on. + now = time.time() + waited = int((now - previous_node_started_at) / 60) + log("Previous node started, but has now not ticked for 5 minutes. " + "Waited total of {} mins on node {}. current time: {} > " + "previous node start time: {}. " + "Continuing with upgrade of this node." + .format(waited, previous_node, now, previous_node_started_at), + level=WARNING) + except WatchDog.WatchDogTimeoutException: + # previous node never kicked, or simply took too long; log this + # condition and move on. + now = time.time() + waited = int((now - previous_node_started_at) / 60) + log("Previous node is taking too long; assuming it has died." + "Waited {} mins on node {}. current time: {} > " + "previous node start time: {}. " + "Continuing with upgrade of this node." + .format(waited, previous_node, now, previous_node_started_at), + level=WARNING) + + +class WatchDog(object): + """Watch a dog; basically a kickable timer with a timeout between two async + units. + + The idea is that you have an overall timeout and then can kick that timeout + with intermediary hits, with a max time between those kicks allowed. + + Note that this watchdog doesn't rely on the clock of the other side; just + roughly when it detects when the other side started. All timings are based + on the local clock. + + The kicker will not 'kick' more often than a set interval, regardless of + how often the kick_the_dog() function is called. The kicker provides a + function (lambda: -> None) that is called when the kick interval is + reached. + + The waiter calls the static method with a check function + (lambda: -> Boolean) that indicates when the wait should be over and the + maximum interval to wait. e.g. 30 minutes with a 5 minute kick interval. + + So the waiter calls wait(f, 30, 3) and the kicker sets up a 3 minute kick + interval, or however long it is expected for the key to propagate and to + allow for other delays. + + There is a compatibility mode where if the otherside never kicks, then it + simply waits for the compatibility timer. + """ + + class WatchDogDeadException(Exception): + pass + + class WatchDogTimeoutException(Exception): + pass + + def __init__(self, kick_interval=3 * 60, kick_function=None): + """Initialise a new WatchDog + + :param kick_interval: the interval when this side kicks the other in + seconds. + :type kick_interval: Int + :param kick_function: The function to call that does the kick. + :type kick_function: Callable[] + """ + self.start_time = time.time() + self.last_run_func = None + self.last_kick_at = None + self.kick_interval = kick_interval + self.kick_f = kick_function + + def kick_the_dog(self): + """Might call the kick_function if it's time. + + This function can be called as frequently as needed, but will run the + self.kick_function after kick_interval seconds have passed. + """ + now = time.time() + if (self.last_run_func is None or + (now - self.last_run_func > self.kick_interval)): + if self.kick_f is not None: + self.kick_f() + self.last_run_func = now + self.last_kick_at = now + + @staticmethod + def wait_until(wait_f, timeout=10 * 60): + """Wait for timeout seconds until the passed function return True. + + :param wait_f: The function to call that will end the wait. + :type wait_f: Callable[[], Boolean] + :param timeout: The time to wait in seconds. + :type timeout: int + """ + start_time = time.time() + while not wait_f(): + now = time.time() + if now > start_time + timeout: + raise WatchDog.WatchDogTimeoutException() + wait_time = random.randrange(5, 30) + log('wait_until: waiting for {} seconds'.format(wait_time)) + time.sleep(wait_time) + + @staticmethod + def timed_wait(kicked_at_function, + complete_function, + wait_time=30 * 60, + compatibility_wait_time=10 * 60, + max_kick_interval=5 * 60): + """Wait a maximum time with an intermediate 'kick' time. + + This function will wait for max_kick_interval seconds unless the + kicked_at_function() call returns a time that is not older that + max_kick_interval (in seconds). i.e. the other side can signal that it + is still doing things during the max_kick_interval as long as it kicks + at least every max_kick_interval seconds. + + The maximum wait is "wait_time", but the otherside must keep kicking + during this period. + + The "compatibility_wait_time" is used if the other side never kicks + (i.e. the kicked_at_function() always returns None. In this case the + function wait up to "compatibility_wait_time". + + Note that the type of the return from the kicked_at_function is an + Optional[str], not a Float. The function will coerce this to a float + for the comparison. This represents the return value of + time.time() at the "other side". It's a string to simplify the + function obtaining the time value from the other side. + + The function raises WatchDogTimeoutException if either the + compatibility_wait_time or the wait_time are exceeded. + + The function raises WatchDogDeadException if the max_kick_interval is + exceeded. + + Note that it is possible that the first kick interval is extended to + compatibility_wait_time if the "other side" doesn't kick immediately. + The best solution is for the other side to kick early and often. + + :param kicked_at_function: The function to call to retrieve the time + that the other side 'kicked' at. None if the other side hasn't + kicked. + :type kicked_at_function: Callable[[], Optional[str]] + :param complete_function: The callable that returns True when done. + :type complete_function: Callable[[], Boolean] + :param wait_time: the maximum time to wait, even with kicks, in + seconds. + :type wait_time: int + :param compatibility_wait_time: The time to wait if no kicks are + received, in seconds. + :type compatibility_wait_time: int + :param max_kick_interval: The maximum time allowed between kicks before + the wait is over, in seconds: + :type max_kick_interval: int + :raises: WatchDog.WatchDogTimeoutException, + WatchDog.WatchDogDeadException + """ + start_time = time.time() + while True: + if complete_function(): + break + # the time when the waiting for unit last kicked. + kicked_at = kicked_at_function() + now = time.time() + if kicked_at is None: + # assume other end doesn't do alive kicks + if (now - start_time > compatibility_wait_time): + raise WatchDog.WatchDogTimeoutException() + else: + # other side is participating in kicks; must kick at least + # every 'max_kick_interval' to stay alive. + if (now - float(kicked_at) > max_kick_interval): + raise WatchDog.WatchDogDeadException() + if (now - start_time > wait_time): + raise WatchDog.WatchDogTimeoutException() + delay_time = random.randrange(5, 30) + log('waiting for {} seconds'.format(delay_time)) + time.sleep(delay_time) + + +def get_upgrade_position(osd_sorted_list, match_name): + """Return the upgrade position for the given OSD. + + :param osd_sorted_list: OSDs sorted + :type osd_sorted_list: [str] + :param match_name: The OSD name to match + :type match_name: str + :returns: The position of the name + :rtype: int + :raises: ValueError if name is not found + """ + for index, item in enumerate(osd_sorted_list): + if item.name == match_name: + return index + raise ValueError("OSD name '{}' not found in get_upgrade_position list" + .format(match_name)) + + +# Edge cases: +# 1. Previous node dies on upgrade, can we retry? +# 2. This assumes that the OSD failure domain is not set to OSD. +# It rolls an entire server at a time. +def roll_osd_cluster(new_version, upgrade_key): + """This is tricky to get right so here's what we're going to do. + + There's 2 possible cases: Either I'm first in line or not. + If I'm not first in line I'll wait a random time between 5-30 seconds + and test to see if the previous OSD is upgraded yet. + + TODO: If you're not in the same failure domain it's safe to upgrade + 1. Examine all pools and adopt the most strict failure domain policy + Example: Pool 1: Failure domain = rack + Pool 2: Failure domain = host + Pool 3: Failure domain = row + + outcome: Failure domain = host + + :param new_version: str of the version to upgrade to + :param upgrade_key: the cephx key name to use when upgrading + """ + log('roll_osd_cluster called with {}'.format(new_version)) + my_name = socket.gethostname() + osd_tree = get_osd_tree(service=upgrade_key) + # A sorted list of OSD unit names + osd_sorted_list = sorted(osd_tree) + log("osd_sorted_list: {}".format(osd_sorted_list)) + + try: + position = get_upgrade_position(osd_sorted_list, my_name) + log("upgrade position: {}".format(position)) + if position == 0: + # I'm first! Roll + # First set a key to inform others I'm about to roll + lock_and_roll(upgrade_key=upgrade_key, + service='osd', + my_name=my_name, + version=new_version) + else: + # Check if the previous node has finished + status_set('waiting', + 'Waiting on {} to finish upgrading'.format( + osd_sorted_list[position - 1].name)) + wait_on_previous_node( + upgrade_key=upgrade_key, + service='osd', + previous_node=osd_sorted_list[position - 1].name, + version=new_version) + lock_and_roll(upgrade_key=upgrade_key, + service='osd', + my_name=my_name, + version=new_version) + except ValueError: + log("Failed to find name {} in list {}".format( + my_name, osd_sorted_list)) + status_set('blocked', 'failed to upgrade osd') + + +def upgrade_osd(new_version, kick_function=None): + """Upgrades the current OSD + + :param new_version: str. The new version to upgrade to + """ + if kick_function is None: + kick_function = noop + + current_version = get_version() + status_set("maintenance", "Upgrading OSD") + log("Current Ceph version is {}".format(current_version)) + log("Upgrading to: {}".format(new_version)) + + try: + add_source(config('source'), config('key')) + apt_update(fatal=True) + except subprocess.CalledProcessError as err: + log("Adding the Ceph sources failed with message: {}".format( + err)) + status_set("blocked", "Upgrade to {} failed".format(new_version)) + sys.exit(1) + + kick_function() + + try: + # Upgrade the packages before restarting the daemons. + status_set('maintenance', 'Upgrading packages to %s' % new_version) + apt_install(packages=determine_packages(), fatal=True) + kick_function() + + # If the upgrade does not need an ownership update of any of the + # directories in the OSD service directory, then simply restart + # all of the OSDs at the same time as this will be the fastest + # way to update the code on the node. + if not dirs_need_ownership_update('osd'): + log('Restarting all OSDs to load new binaries', DEBUG) + with maintain_all_osd_states(): + if systemd(): + service_restart('ceph-osd.target') + else: + service_restart('ceph-osd-all') + return + + # Need to change the ownership of all directories which are not OSD + # directories as well. + # TODO - this should probably be moved to the general upgrade function + # and done before mon/OSD. + update_owner(CEPH_BASE_DIR, recurse_dirs=False) + non_osd_dirs = filter(lambda x: not x == 'osd', + os.listdir(CEPH_BASE_DIR)) + non_osd_dirs = map(lambda x: os.path.join(CEPH_BASE_DIR, x), + non_osd_dirs) + for i, path in enumerate(non_osd_dirs): + if i % 100 == 0: + kick_function() + update_owner(path) + + # Fast service restart wasn't an option because each of the OSD + # directories need the ownership updated for all the files on + # the OSD. Walk through the OSDs one-by-one upgrading the OSD. + for osd_dir in _get_child_dirs(OSD_BASE_DIR): + kick_function() + try: + osd_num = _get_osd_num_from_dirname(osd_dir) + _upgrade_single_osd(osd_num, osd_dir) + except ValueError as ex: + # Directory could not be parsed - junk directory? + log('Could not parse OSD directory %s: %s' % (osd_dir, ex), + WARNING) + continue + + except (subprocess.CalledProcessError, IOError) as err: + log("Stopping Ceph and upgrading packages failed " + "with message: {}".format(err)) + status_set("blocked", "Upgrade to {} failed".format(new_version)) + sys.exit(1) + + +def _upgrade_single_osd(osd_num, osd_dir): + """Upgrades the single OSD directory. + + :param osd_num: the num of the OSD + :param osd_dir: the directory of the OSD to upgrade + :raises CalledProcessError: if an error occurs in a command issued as part + of the upgrade process + :raises IOError: if an error occurs reading/writing to a file as part + of the upgrade process + """ + with maintain_osd_state(osd_num): + stop_osd(osd_num) + disable_osd(osd_num) + update_owner(osd_dir) + enable_osd(osd_num) + start_osd(osd_num) + + +def stop_osd(osd_num): + """Stops the specified OSD number. + + :param osd_num: the OSD number to stop + """ + if systemd(): + service_stop('ceph-osd@{}'.format(osd_num)) + else: + service_stop('ceph-osd', id=osd_num) + + +def start_osd(osd_num): + """Starts the specified OSD number. + + :param osd_num: the OSD number to start. + """ + if systemd(): + service_start('ceph-osd@{}'.format(osd_num)) + else: + service_start('ceph-osd', id=osd_num) + + +def disable_osd(osd_num): + """Disables the specified OSD number. + + Ensures that the specified OSD will not be automatically started at the + next reboot of the system. Due to differences between init systems, + this method cannot make any guarantees that the specified OSD cannot be + started manually. + + :param osd_num: the OSD id which should be disabled. + :raises CalledProcessError: if an error occurs invoking the systemd cmd + to disable the OSD + :raises IOError, OSError: if the attempt to read/remove the ready file in + an upstart enabled system fails + """ + if systemd(): + # When running under systemd, the individual ceph-osd daemons run as + # templated units and can be directly addressed by referring to the + # templated service name ceph-osd@. Additionally, systemd + # allows one to disable a specific templated unit by running the + # 'systemctl disable ceph-osd@' command. When disabled, the + # OSD should remain disabled until re-enabled via systemd. + # Note: disabling an already disabled service in systemd returns 0, so + # no need to check whether it is enabled or not. + cmd = ['systemctl', 'disable', 'ceph-osd@{}'.format(osd_num)] + subprocess.check_call(cmd) + else: + # Neither upstart nor the ceph-osd upstart script provides for + # disabling the starting of an OSD automatically. The specific OSD + # cannot be prevented from running manually, however it can be + # prevented from running automatically on reboot by removing the + # 'ready' file in the OSD's root directory. This is due to the + # ceph-osd-all upstart script checking for the presence of this file + # before starting the OSD. + ready_file = os.path.join(OSD_BASE_DIR, 'ceph-{}'.format(osd_num), + 'ready') + if os.path.exists(ready_file): + os.unlink(ready_file) + + +def enable_osd(osd_num): + """Enables the specified OSD number. + + Ensures that the specified osd_num will be enabled and ready to start + automatically in the event of a reboot. + + :param osd_num: the osd id which should be enabled. + :raises CalledProcessError: if the call to the systemd command issued + fails when enabling the service + :raises IOError: if the attempt to write the ready file in an upstart + enabled system fails + """ + if systemd(): + cmd = ['systemctl', 'enable', 'ceph-osd@{}'.format(osd_num)] + subprocess.check_call(cmd) + else: + # When running on upstart, the OSDs are started via the ceph-osd-all + # upstart script which will only start the OSD if it has a 'ready' + # file. Make sure that file exists. + ready_file = os.path.join(OSD_BASE_DIR, 'ceph-{}'.format(osd_num), + 'ready') + with open(ready_file, 'w') as f: + f.write('ready') + + # Make sure the correct user owns the file. It shouldn't be necessary + # as the upstart script should run with root privileges, but its better + # to have all the files matching ownership. + update_owner(ready_file) + + +def update_owner(path, recurse_dirs=True): + """Changes the ownership of the specified path. + + Changes the ownership of the specified path to the new ceph daemon user + using the system's native chown functionality. This may take awhile, + so this method will issue a set_status for any changes of ownership which + recurses into directory structures. + + :param path: the path to recursively change ownership for + :param recurse_dirs: boolean indicating whether to recursively change the + ownership of all the files in a path's subtree or to + simply change the ownership of the path. + :raises CalledProcessError: if an error occurs issuing the chown system + command + """ + user = ceph_user() + user_group = '{ceph_user}:{ceph_user}'.format(ceph_user=user) + cmd = ['chown', user_group, path] + if os.path.isdir(path) and recurse_dirs: + status_set('maintenance', ('Updating ownership of %s to %s' % + (path, user))) + cmd.insert(1, '-R') + + log('Changing ownership of {path} to {user}'.format( + path=path, user=user_group), DEBUG) + start = datetime.now() + subprocess.check_call(cmd) + elapsed_time = (datetime.now() - start) + + log('Took {secs} seconds to change the ownership of path: {path}'.format( + secs=elapsed_time.total_seconds(), path=path), DEBUG) + + +def get_osd_state(osd_num, osd_goal_state=None): + """Get OSD state or loop until OSD state matches OSD goal state. + + If osd_goal_state is None, just return the current OSD state. + If osd_goal_state is not None, loop until the current OSD state matches + the OSD goal state. + + :param osd_num: the OSD id to get state for + :param osd_goal_state: (Optional) string indicating state to wait for + Defaults to None + :returns: Returns a str, the OSD state. + :rtype: str + """ + while True: + asok = "/var/run/ceph/ceph-osd.{}.asok".format(osd_num) + cmd = [ + 'ceph', + 'daemon', + asok, + 'status' + ] + try: + result = json.loads(str(subprocess + .check_output(cmd) + .decode('UTF-8'))) + except (subprocess.CalledProcessError, ValueError) as e: + log("{}".format(e), level=DEBUG) + continue + osd_state = result['state'] + log("OSD {} state: {}, goal state: {}".format( + osd_num, osd_state, osd_goal_state), level=DEBUG) + if not osd_goal_state: + return osd_state + if osd_state == osd_goal_state: + return osd_state + time.sleep(3) + + +def get_all_osd_states(osd_goal_states=None): + """Get all OSD states or loop until all OSD states match OSD goal states. + + If osd_goal_states is None, just return a dictionary of current OSD states. + If osd_goal_states is not None, loop until the current OSD states match + the OSD goal states. + + :param osd_goal_states: (Optional) dict indicating states to wait for + Defaults to None + :returns: Returns a dictionary of current OSD states. + :rtype: dict + """ + osd_states = {} + for osd_num in get_local_osd_ids(): + if not osd_goal_states: + osd_states[osd_num] = get_osd_state(osd_num) + else: + osd_states[osd_num] = get_osd_state( + osd_num, + osd_goal_state=osd_goal_states[osd_num]) + return osd_states + + +@contextmanager +def maintain_osd_state(osd_num): + """Ensure the state of an OSD is maintained. + + Ensures the state of an OSD is the same at the end of a block nested + in a with statement as it was at the beginning of the block. + + :param osd_num: the OSD id to maintain state for + """ + osd_state = get_osd_state(osd_num) + try: + yield + finally: + get_osd_state(osd_num, osd_goal_state=osd_state) + + +@contextmanager +def maintain_all_osd_states(): + """Ensure all local OSD states are maintained. + + Ensures the states of all local OSDs are the same at the end of a + block nested in a with statement as they were at the beginning of + the block. + """ + osd_states = get_all_osd_states() + try: + yield + finally: + get_all_osd_states(osd_goal_states=osd_states) + + +def list_pools(client='admin'): + """This will list the current pools that Ceph has + + :param client: (Optional) client id for Ceph key to use + Defaults to ``admin`` + :type client: str + :returns: Returns a list of available pools. + :rtype: list + :raises: subprocess.CalledProcessError if the subprocess fails to run. + """ + try: + pool_list = [] + pools = subprocess.check_output(['rados', '--id', client, 'lspools'], + universal_newlines=True, + stderr=subprocess.STDOUT) + for pool in pools.splitlines(): + pool_list.append(pool) + return pool_list + except subprocess.CalledProcessError as err: + log("rados lspools failed with error: {}".format(err.output)) + raise + + +def get_pool_param(pool, param, client='admin'): + """Get parameter from pool. + + :param pool: Name of pool to get variable from + :type pool: str + :param param: Name of variable to get + :type param: str + :param client: (Optional) client id for Ceph key to use + Defaults to ``admin`` + :type client: str + :returns: Value of variable on pool or None + :rtype: str or None + :raises: subprocess.CalledProcessError + """ + try: + output = subprocess.check_output( + ['ceph', '--id', client, 'osd', 'pool', 'get', pool, param], + universal_newlines=True, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as cp: + if cp.returncode == 2 and 'ENOENT: option' in cp.output: + return None + raise + if ':' in output: + return output.split(':')[1].lstrip().rstrip() + + +def get_pool_erasure_profile(pool, client='admin'): + """Get erasure code profile for pool. + + :param pool: Name of pool to get variable from + :type pool: str + :param client: (Optional) client id for Ceph key to use + Defaults to ``admin`` + :type client: str + :returns: Erasure code profile of pool or None + :rtype: str or None + :raises: subprocess.CalledProcessError + """ + try: + return get_pool_param(pool, 'erasure_code_profile', client=client) + except subprocess.CalledProcessError as cp: + if cp.returncode == 13 and 'EACCES: pool' in cp.output: + # Not a Erasure coded pool + return None + raise + + +def get_pool_quota(pool, client='admin'): + """Get pool quota. + + :param pool: Name of pool to get variable from + :type pool: str + :param client: (Optional) client id for Ceph key to use + Defaults to ``admin`` + :type client: str + :returns: Dictionary with quota variables + :rtype: dict + :raises: subprocess.CalledProcessError + """ + output = subprocess.check_output( + ['ceph', '--id', client, 'osd', 'pool', 'get-quota', pool], + universal_newlines=True, stderr=subprocess.STDOUT) + rc = re.compile(r'\s+max\s+(\S+)\s*:\s+(\d+)') + result = {} + for line in output.splitlines(): + m = rc.match(line) + if m: + result.update({'max_{}'.format(m.group(1)): m.group(2)}) + return result + + +def get_pool_applications(pool='', client='admin'): + """Get pool applications. + + :param pool: (Optional) Name of pool to get applications for + Defaults to get for all pools + :type pool: str + :param client: (Optional) client id for Ceph key to use + Defaults to ``admin`` + :type client: str + :returns: Dictionary with pool name as key + :rtype: dict + :raises: subprocess.CalledProcessError + """ + + cmd = ['ceph', '--id', client, 'osd', 'pool', 'application', 'get'] + if pool: + cmd.append(pool) + try: + output = subprocess.check_output(cmd, + universal_newlines=True, + stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as cp: + if cp.returncode == 2 and 'ENOENT' in cp.output: + return {} + raise + return json.loads(output) + + +def list_pools_detail(): + """Get detailed information about pools. + + Structure: + {'pool_name_1': {'applications': {'application': {}}, + 'parameters': {'pg_num': '42', 'size': '42'}, + 'quota': {'max_bytes': '1000', + 'max_objects': '10'}, + }, + 'pool_name_2': ... + } + + :returns: Dictionary with detailed pool information. + :rtype: dict + :raises: subproces.CalledProcessError + """ + get_params = ['pg_num', 'size'] + result = {} + applications = get_pool_applications() + for pool in list_pools(): + result[pool] = { + 'applications': applications.get(pool, {}), + 'parameters': {}, + 'quota': get_pool_quota(pool), + } + for param in get_params: + result[pool]['parameters'].update({ + param: get_pool_param(pool, param)}) + erasure_profile = get_pool_erasure_profile(pool) + if erasure_profile: + result[pool]['parameters'].update({ + 'erasure_code_profile': erasure_profile}) + return result + + +def dirs_need_ownership_update(service): + """Determines if directories still need change of ownership. + + Examines the set of directories under the /var/lib/ceph/{service} directory + and determines if they have the correct ownership or not. This is + necessary due to the upgrade from Hammer to Jewel where the daemon user + changes from root: to ceph:. + + :param service: the name of the service folder to check (e.g. OSD, mon) + :returns: boolean. True if the directories need a change of ownership, + False otherwise. + :raises IOError: if an error occurs reading the file stats from one of + the child directories. + :raises OSError: if the specified path does not exist or some other error + """ + expected_owner = expected_group = ceph_user() + path = os.path.join(CEPH_BASE_DIR, service) + for child in _get_child_dirs(path): + curr_owner, curr_group = owner(child) + + if (curr_owner == expected_owner) and (curr_group == expected_group): + continue + + # NOTE(lathiat): when config_changed runs on reboot, the OSD might not + # yet be mounted or started, and the underlying directory the OSD is + # mounted to is expected to be owned by root. So skip the check. This + # may also happen for OSD directories for OSDs that were removed. + if (service == 'osd' and + not os.path.exists(os.path.join(child, 'magic'))): + continue + + log('Directory "%s" needs its ownership updated' % child, DEBUG) + return True + + # All child directories had the expected ownership + return False + + +# A dict of valid Ceph upgrade paths. Mapping is old -> new +UPGRADE_PATHS = collections.OrderedDict([ + ('firefly', 'hammer'), + ('hammer', 'jewel'), + ('jewel', 'luminous'), + ('luminous', 'mimic'), + ('mimic', 'nautilus'), + ('nautilus', 'octopus'), + ('octopus', 'pacific'), + ('pacific', 'quincy'), + ('quincy', 'reef'), + ('reef', 'squid'), +]) + +# Map UCA codenames to Ceph codenames +UCA_CODENAME_MAP = { + 'icehouse': 'firefly', + 'juno': 'firefly', + 'kilo': 'hammer', + 'liberty': 'hammer', + 'mitaka': 'jewel', + 'newton': 'jewel', + 'ocata': 'jewel', + 'pike': 'luminous', + 'queens': 'luminous', + 'rocky': 'mimic', + 'stein': 'mimic', + 'train': 'nautilus', + 'ussuri': 'octopus', + 'victoria': 'octopus', + 'wallaby': 'pacific', + 'xena': 'pacific', + 'yoga': 'quincy', + 'zed': 'quincy', + 'antelope': 'quincy', + 'bobcat': 'reef', + 'caracal': 'squid', +} + + +def pretty_print_upgrade_paths(): + """Pretty print supported upgrade paths for Ceph""" + return ["{} -> {}".format(key, value) + for key, value in UPGRADE_PATHS.items()] + + +def resolve_ceph_version(source): + """Resolves a version of Ceph based on source configuration + based on Ubuntu Cloud Archive pockets. + + @param: source: source configuration option of charm + :returns: Ceph release codename or None if not resolvable + """ + os_release = get_os_codename_install_source(source) + return UCA_CODENAME_MAP.get(os_release) + + +def get_ceph_pg_stat(): + """Returns the result of 'ceph pg stat'. + + :returns: dict + """ + try: + tree = str(subprocess + .check_output(['ceph', 'pg', 'stat', '--format=json']) + .decode('UTF-8')) + try: + json_tree = json.loads(tree) + if not json_tree['num_pg_by_state']: + return None + return json_tree + except ValueError as v: + log("Unable to parse ceph pg stat json: {}. Error: {}".format( + tree, v)) + raise + except subprocess.CalledProcessError as e: + log("ceph pg stat command failed with message: {}".format(e)) + raise + + +def get_ceph_health(): + """Returns the health of the cluster from a 'ceph status' + + :returns: dict tree of ceph status + :raises: CalledProcessError if our ceph command fails to get the overall + status, use get_ceph_health()['overall_status']. + """ + try: + tree = str(subprocess + .check_output(['ceph', 'status', '--format=json']) + .decode('UTF-8')) + try: + json_tree = json.loads(tree) + # Make sure children are present in the JSON + if not json_tree['overall_status']: + return None + + return json_tree + except ValueError as v: + log("Unable to parse ceph tree json: {}. Error: {}".format( + tree, v)) + raise + except subprocess.CalledProcessError as e: + log("ceph status command failed with message: {}".format(e)) + raise + + +def reweight_osd(osd_num, new_weight): + """Changes the crush weight of an OSD to the value specified. + + :param osd_num: the OSD id which should be changed + :param new_weight: the new weight for the OSD + :returns: bool. True if output looks right, else false. + :raises CalledProcessError: if an error occurs invoking the systemd cmd + """ + try: + cmd_result = str(subprocess + .check_output(['ceph', 'osd', 'crush', + 'reweight', "osd.{}".format(osd_num), + new_weight], + stderr=subprocess.STDOUT) + .decode('UTF-8')) + expected_result = "reweighted item id {ID} name \'osd.{ID}\'".format( + ID=osd_num) + " to {}".format(new_weight) + log(cmd_result) + if expected_result in cmd_result: + return True + return False + except subprocess.CalledProcessError as e: + log("ceph osd crush reweight command failed" + " with message: {}".format(e)) + raise + + +def determine_packages(): + """Determines packages for installation. + + :returns: list of Ceph packages + """ + packages = PACKAGES.copy() + if CompareHostReleases(lsb_release()['DISTRIB_CODENAME']) >= 'eoan': + btrfs_package = 'btrfs-progs' + else: + btrfs_package = 'btrfs-tools' + packages.append(btrfs_package) + return packages + + +def determine_packages_to_remove(): + """Determines packages for removal + + Note: if in a container, then the CHRONY_PACKAGE is removed. + + :returns: list of packages to be removed + :rtype: List[str] + """ + rm_packages = REMOVE_PACKAGES.copy() + if is_container(): + rm_packages.extend(filter_missing_packages([CHRONY_PACKAGE])) + return rm_packages + + +def bootstrap_manager(): + hostname = socket.gethostname() + path = '/var/lib/ceph/mgr/ceph-{}'.format(hostname) + keyring = os.path.join(path, 'keyring') + + if os.path.exists(keyring): + log('bootstrap_manager: mgr already initialized.') + else: + mkdir(path, owner=ceph_user(), group=ceph_user()) + subprocess.check_call(['ceph', 'auth', 'get-or-create', + 'mgr.{}'.format(hostname), 'mon', + 'allow profile mgr', 'osd', 'allow *', + 'mds', 'allow *', '--out-file', + keyring]) + chownr(path, ceph_user(), ceph_user()) + + unit = 'ceph-mgr@{}'.format(hostname) + subprocess.check_call(['systemctl', 'enable', unit]) + service_restart(unit) + + +def enable_msgr2(): + """ + Enables msgr2 + + :raises: subprocess.CalledProcessError if the command fails + """ + cmd = ['ceph', 'mon', 'enable-msgr2'] + subprocess.check_call(cmd) + + +def osd_noout(enable): + """Sets or unsets 'noout' + + :param enable: bool. True to set noout, False to unset. + :returns: bool. True if output looks right. + :raises CalledProcessError: if an error occurs invoking the systemd cmd + """ + operation = { + True: 'set', + False: 'unset', + } + try: + subprocess.check_call(['ceph', '--id', 'admin', + 'osd', operation[enable], + 'noout']) + log('running ceph osd {} noout'.format(operation[enable])) + return True + except subprocess.CalledProcessError as e: + log(e) + raise + + +class OSDConfigSetError(Exception): + """Error occurred applying OSD settings.""" + pass + + +def apply_osd_settings(settings): + """Applies the provided OSD settings + + Apply the provided settings to all local OSD unless settings are already + present. Settings stop being applied on encountering an error. + + :param settings: dict. Dictionary of settings to apply. + :returns: bool. True if commands ran successfully. + :raises: OSDConfigSetError + """ + current_settings = {} + base_cmd = 'ceph daemon osd.{osd_id} config --format=json' + get_cmd = base_cmd + ' get {key}' + set_cmd = base_cmd + ' set {key} {value}' + + def _get_cli_key(key): + return key.replace(' ', '_') + # Retrieve the current values to check keys are correct and to make this a + # noop if setting are already applied. + for osd_id in get_local_osd_ids(): + for key, value in sorted(settings.items()): + cli_key = _get_cli_key(key) + cmd = get_cmd.format(osd_id=osd_id, key=cli_key) + out = json.loads( + subprocess.check_output(cmd.split()).decode('UTF-8')) + if 'error' in out: + log("Error retrieving OSD setting: {}".format(out['error']), + level=ERROR) + return False + current_settings[key] = out[cli_key] + settings_diff = { + k: v + for k, v in settings.items() + if str(v) != str(current_settings[k])} + for key, value in sorted(settings_diff.items()): + log("Setting {} to {}".format(key, value), level=DEBUG) + cmd = set_cmd.format( + osd_id=osd_id, + key=_get_cli_key(key), + value=value) + out = json.loads( + subprocess.check_output(cmd.split()).decode('UTF-8')) + if 'error' in out: + log("Error applying OSD setting: {}".format(out['error']), + level=ERROR) + raise OSDConfigSetError + return True + + +def enabled_manager_modules(): + """Return a list of enabled manager modules. + + :rtype: List[str] + """ + cmd = ['ceph', 'mgr', 'module', 'ls'] + quincy_or_later = cmp_pkgrevno('ceph-common', '17.1.0') >= 0 + if quincy_or_later: + cmd.append('--format=json') + try: + modules = subprocess.check_output(cmd).decode('UTF-8') + except subprocess.CalledProcessError as e: + log("Failed to list ceph modules: {}".format(e), WARNING) + return [] + modules = json.loads(modules) + return modules['enabled_modules'] + + +def is_mgr_module_enabled(module): + """Is a given manager module enabled. + + :param module: + :type module: str + :returns: Whether the named module is enabled + :rtype: bool + """ + return module in enabled_manager_modules() + + +is_dashboard_enabled = functools.partial(is_mgr_module_enabled, 'dashboard') + + +def mgr_enable_module(module): + """Enable a Ceph Manager Module. + + :param module: The module name to enable + :type module: str + + :raises: subprocess.CalledProcessError + """ + if not is_mgr_module_enabled(module): + subprocess.check_call(['ceph', 'mgr', 'module', 'enable', module]) + return True + return False + + +mgr_enable_dashboard = functools.partial(mgr_enable_module, 'dashboard') + + +def mgr_disable_module(module): + """Enable a Ceph Manager Module. + + :param module: The module name to enable + :type module: str + + :raises: subprocess.CalledProcessError + """ + if is_mgr_module_enabled(module): + subprocess.check_call(['ceph', 'mgr', 'module', 'disable', module]) + return True + return False + + +mgr_disable_dashboard = functools.partial(mgr_disable_module, 'dashboard') + + +def ceph_config_set(name, value, who): + """Set a Ceph config option + + :param name: key to set + :type name: str + :param value: value corresponding to key + :type value: str + :param who: Config area the key is associated with (e.g. 'dashboard') + :type who: str + + :raises: subprocess.CalledProcessError + """ + subprocess.check_call(['ceph', 'config', 'set', who, name, value]) + + +mgr_config_set = functools.partial(ceph_config_set, who='mgr') + + +def ceph_config_get(name, who): + """Retrieve the value of a Ceph config option + + :param name: key to lookup + :type name: str + :param who: Config area the key is associated with (e.g. 'dashboard') + :type who: str + :returns: Value associated with key + :rtype: str + :raises: subprocess.CalledProcessError + """ + return subprocess.check_output( + ['ceph', 'config', 'get', who, name]).decode('UTF-8') + + +mgr_config_get = functools.partial(ceph_config_get, who='mgr') + + +def _dashboard_set_ssl_artifact(path, artifact_name, hostname=None): + """Set SSL dashboard config option. + + :param path: Path to file + :type path: str + :param artifact_name: Option name for setting the artifact + :type artifact_name: str + :param hostname: If hostname is set artifact will only be associated with + the dashboard on that host. + :type hostname: str + :raises: subprocess.CalledProcessError + """ + cmd = ['ceph', 'dashboard', artifact_name] + if hostname: + cmd.append(hostname) + cmd.extend(['-i', path]) + log(cmd, level=DEBUG) + subprocess.check_call(cmd) + + +dashboard_set_ssl_certificate = functools.partial( + _dashboard_set_ssl_artifact, + artifact_name='set-ssl-certificate') + + +dashboard_set_ssl_certificate_key = functools.partial( + _dashboard_set_ssl_artifact, + artifact_name='set-ssl-certificate-key') diff --git a/ceph-radosgw/metadata.yaml b/ceph-radosgw/metadata.yaml new file mode 100644 index 00000000..16fc00bb --- /dev/null +++ b/ceph-radosgw/metadata.yaml @@ -0,0 +1,55 @@ +name: ceph-radosgw +summary: Highly scalable distributed storage - RADOS HTTP Gateway +maintainer: OpenStack Charmers +description: | + Ceph is a distributed storage and network file system designed to provide + excellent performance, reliability, and scalability. + . + This charm provides the RADOS HTTP gateway supporting S3 and Swift protocols + for object storage. +docs: https://discourse.charmhub.io/t/ceph-radosgw-docs-index/11005 +tags: +- openstack +- storage +- file-servers +- misc +series: +- focal +- jammy +extra-bindings: + public: + admin: + internal: +requires: + mon: + interface: ceph-radosgw + identity-service: + interface: keystone + ha: + interface: hacluster + scope: container + certificates: + interface: tls-certificates + slave: + interface: radosgw-multisite + secondary: + interface: radosgw-multisite +provides: + nrpe-external-master: + interface: nrpe-external-master + scope: container + gateway: + interface: http + master: + interface: radosgw-multisite + primary: + interface: radosgw-multisite + object-store: + interface: swift-proxy + radosgw-user: + interface: radosgw-user + s3: + interface: s3 +peers: + cluster: + interface: swift-ha diff --git a/ceph-radosgw/osci.yaml b/ceph-radosgw/osci.yaml new file mode 100644 index 00000000..7173e4b2 --- /dev/null +++ b/ceph-radosgw/osci.yaml @@ -0,0 +1,118 @@ +- project: + templates: + - charm-unit-jobs-py38 + - charm-unit-jobs-py310 + check: + jobs: + - vault-focal-yoga_rgw + - vault-focal-yoga-namespaced + - focal-yoga-multisite + - jammy-antelope-multisite: + voting: false + - jammy-bobcat-multisite: + voting: false + - mantic-bobcat-multisite: + voting: false + - vault-jammy-antelope_rgw: + voting: false + - vault-jammy-antelope-namespaced: + voting: false + - vault-jammy-bobcat_rgw: + voting: false + - vault-jammy-bobcat-namespaced: + voting: false + - vault-mantic-bobcat_rgw: + voting: false + - vault-mantic-bobcat-namespaced: + voting: false + vars: + needs_charm_build: true + charm_build_name: ceph-radosgw + build_type: charmcraft + charmcraft_channel: 2.x/stable +- job: + name: focal-yoga-multisite + parent: func-target + dependencies: + - osci-lint + - charm-build + - tox-py38 + vars: + tox_extra_args: '-- focal-yoga-multisite' +- job: + name: jammy-antelope-multisite + parent: func-target + dependencies: + - focal-yoga-multisite + vars: + tox_extra_args: '-- jammy-antelope-multisite' +- job: + name: jammy-bobcat-multisite + parent: func-target + dependencies: + - jammy-antelope-multisite + vars: + tox_extra_args: '-- jammy-bobcat-multisite' +- job: + name: mantic-bobcat-multisite + parent: func-target + dependencies: + - jammy-antelope-multisite + vars: + tox_extra_args: '-- mantic-bobcat-multisite' +- job: + name: vault-focal-yoga_rgw + parent: func-target + dependencies: + - focal-yoga-multisite + vars: + tox_extra_args: '-- vault:focal-yoga' +- job: + name: vault-focal-yoga-namespaced + parent: func-target + dependencies: + - focal-yoga-multisite + vars: + tox_extra_args: '-- vault:focal-yoga-namespaced' +- job: + name: vault-jammy-antelope-namespaced + parent: func-target + dependencies: + - jammy-antelope-multisite + vars: + tox_extra_args: '-- vault:jammy-antelope-namespaced' +- job: + name: vault-jammy-bobcat-namespaced + parent: func-target + dependencies: + - jammy-antelope-multisite + vars: + tox_extra_args: '-- vault:jammy-bobcat-namespaced' +- job: + name: vault-jammy-antelope_rgw + parent: func-target + dependencies: + - jammy-antelope-multisite + vars: + tox_extra_args: '-- vault:jammy-antelope' +- job: + name: vault-jammy-bobcat_rgw + parent: func-target + dependencies: + - vault-jammy-antelope_rgw + vars: + tox_extra_args: '-- vault:jammy-bobcat' +- job: + name: vault-mantic-bobcat-namespaced + parent: func-target + dependencies: + - vault-jammy-antelope_rgw + vars: + tox_extra_args: '-- vault:mantic-bobcat-namespaced' +- job: + name: vault-mantic-bobcat_rgw + parent: func-target + dependencies: + - vault-jammy-antelope_rgw + vars: + tox_extra_args: '-- vault:mantic-bobcat' diff --git a/ceph-radosgw/rename.sh b/ceph-radosgw/rename.sh new file mode 100755 index 00000000..d0c35c97 --- /dev/null +++ b/ceph-radosgw/rename.sh @@ -0,0 +1,13 @@ +#!/bin/bash +charm=$(grep "charm_build_name" osci.yaml | awk '{print $2}') +echo "renaming ${charm}_*.charm to ${charm}.charm" +echo -n "pwd: " +pwd +ls -al +echo "Removing bad downloaded charm maybe?" +if [[ -e "${charm}.charm" ]]; +then + rm "${charm}.charm" +fi +echo "Renaming charm here." +mv ${charm}_*.charm ${charm}.charm diff --git a/ceph-radosgw/requirements.txt b/ceph-radosgw/requirements.txt new file mode 100644 index 00000000..3b1cb7b1 --- /dev/null +++ b/ceph-radosgw/requirements.txt @@ -0,0 +1,29 @@ +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of *requirements.txt files for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools +# +# TODO: Distill the func test requirements from the lint/unit test +# requirements. They are intertwined. Also, Zaza itself should specify +# all of its own requirements and if it doesn't, fix it there. +# +pbr==5.6.0 +simplejson>=2.2.0 +netifaces>=0.10.4 + +# NOTE: newer versions of cryptography require a Rust compiler to build, +# see +# * https://github.com/openstack-charmers/zaza/issues/421 +# * https://mail.python.org/pipermail/cryptography-dev/2021-January/001003.html +# +cryptography<3.4 + +# Strange import error with newer netaddr: +netaddr>0.7.16,<0.8.0 + +Jinja2>=2.6 # BSD License (3 clause) +six>=1.9.0 + +dnspython + +psutil>=1.1.1,<2.0.0 diff --git a/ceph-radosgw/revision b/ceph-radosgw/revision new file mode 100644 index 00000000..6f4247a6 --- /dev/null +++ b/ceph-radosgw/revision @@ -0,0 +1 @@ +26 diff --git a/ceph-radosgw/templates/ceph.conf b/ceph-radosgw/templates/ceph.conf new file mode 100644 index 00000000..738bbb14 --- /dev/null +++ b/ceph-radosgw/templates/ceph.conf @@ -0,0 +1,92 @@ +[global] +{% if old_auth %} +auth supported = {{ auth_supported }} +{% else %} +auth cluster required = {{ auth_supported }} +auth service required = {{ auth_supported }} +auth client required = {{ auth_supported }} +{% endif %} +mon host = {{ mon_hosts }} +log to syslog = {{ use_syslog }} +err to syslog = {{ use_syslog }} +clog to syslog = {{ use_syslog }} +debug rgw = {{ loglevel }}/5 +{% if ipv6 -%} +ms bind ipv6 = true +{% endif %} +rgw swift versioning enabled = {{ rgw_swift_versioning }} +rgw relaxed s3 bucket names = {{ relaxed_s3_bucket_names }} +{% if behind_https_proxy -%} +rgw trust forwarded https = true +{% endif %} +{% if global -%} +# The following are user-provided options provided via the config-flags charm option. +# User-provided [global] section config +{% for key in global -%} +{{ key }} = {{ global[key] }} +{% endfor %} +{% endif %} + +{% if systemd_rgw -%} +[client.rgw.{{ hostname }}] +host = {{ hostname }} +{% else -%} +[client.radosgw.gateway] +keyring = /etc/ceph/keyring.rados.gateway +host = {{ hostname }} +rgw socket path = /tmp/radosgw.sock +log file = /var/log/ceph/radosgw.log +{% endif %} + +{% if virtual_hosted_bucket_enabled -%} +rgw_dns_name = {{ public_hostname }} +{% endif %} + +{% if rgw_zone -%} +rgw_zone = {{ rgw_zone }} +{% endif %} + +{% if rgw_zonegroup -%} +rgw_zonegroup = {{ rgw_zonegroup }} +{% endif %} + +{% if rgw_realm -%} +rgw_realm = {{ rgw_realm }} +{% endif %} + +rgw init timeout = 1200 +rgw frontends = {{ frontend }} port={{ port }} +{% if auth_type == 'keystone' %} +rgw keystone url = {{ auth_protocol }}://{{ auth_host }}:{{ auth_port }}/ +rgw keystone admin user = {{ admin_user }} +rgw keystone admin password = {{ admin_password }} +{% if auth_keystone_v3_supported and api_version == '3' -%} +rgw keystone api version = 3 +rgw keystone admin domain = {{ admin_domain_name }} +rgw keystone admin project = {{ admin_tenant_name }} +{% else -%} +rgw keystone api version = 2 +rgw keystone admin tenant = {{ admin_tenant_name }} +{% endif -%} +rgw keystone accepted roles = {{ user_roles }} +rgw keystone accepted admin roles = {{ admin_roles }} +rgw keystone token cache size = {{ cache_size }} +{% if keystone_revocation_parameter_supported -%} +rgw keystone revocation interval = 0 +{% endif -%} +rgw s3 auth use keystone = true +rgw s3 auth order = external, local +{% if namespace_tenants %} +rgw swift account in url = true +rgw keystone implicit tenants = true +{% endif %} +{% else -%} +rgw swift url = http://{{ unit_public_ip }} +{% endif -%} +{% if client_radosgw_gateway -%} +# The following are user-provided options provided via the config-flags charm option. +# User-provided [client.radosgw.gateway] section config +{% for key in client_radosgw_gateway -%} +{{ key }} = {{ client_radosgw_gateway[key] }} +{% endfor %} +{% endif %} diff --git a/ceph-radosgw/templates/openstack_https_frontend.conf b/ceph-radosgw/templates/openstack_https_frontend.conf new file mode 100644 index 00000000..2f061654 --- /dev/null +++ b/ceph-radosgw/templates/openstack_https_frontend.conf @@ -0,0 +1,41 @@ +{% if endpoints -%} +{% for ext_port in ext_ports -%} +Listen {{ ext_port }} +{% endfor -%} +{% for address, endpoint, ext, int in endpoints -%} + + ServerName {{ endpoint }} +{% if virtual_hosted_bucket_enabled and address != endpoint %} + ServerAlias *.{{ endpoint }} +{% endif %} + SSLEngine on + + # This section is based on Mozilla's recommendation + # as the "intermediate" profile as of July 7th, 2020. + # https://wiki.mozilla.org/Security/Server_Side_TLS + SSLProtocol all -SSLv3 -TLSv1 -TLSv1.1 + SSLCipherSuite ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384 + SSLHonorCipherOrder off + + SSLCertificateFile /etc/apache2/ssl/{{ namespace }}/cert_{{ endpoint }} + # See LP 1484489 - this is to support <= 2.4.7 and >= 2.4.8 + SSLCertificateChainFile /etc/apache2/ssl/{{ namespace }}/cert_{{ endpoint }} + SSLCertificateKeyFile /etc/apache2/ssl/{{ namespace }}/key_{{ endpoint }} + AllowEncodedSlashes On + ProxyPass / http://localhost:{{ int }}/ nocanon + ProxyPassReverse / http://localhost:{{ int }}/ + ProxyPreserveHost on + RequestHeader set X-Forwarded-Proto "https" + KeepAliveTimeout 75 + MaxKeepAliveRequests 1000 + +{% endfor -%} + + Order deny,allow + Allow from all + + + Order allow,deny + Allow from all + +{% endif -%} diff --git a/ceph-radosgw/test-requirements.txt b/ceph-radosgw/test-requirements.txt new file mode 100644 index 00000000..c2b3d818 --- /dev/null +++ b/ceph-radosgw/test-requirements.txt @@ -0,0 +1,35 @@ +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of *requirements.txt files for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools +# +# TODO: Distill the func test requirements from the lint/unit test +# requirements. They are intertwined. Also, Zaza itself should specify +# all of its own requirements and if it doesn't, fix it there. +# +pyparsing<3.0.0 # aodhclient is pinned in zaza and needs pyparsing < 3.0.0, but cffi also needs it, so pin here. + +requests>=2.18.4 + +stestr>=2.2.0 + +# Dependency of stestr. Workaround for +# https://github.com/mtreinish/stestr/issues/145 +cliff<3.0.0 + +coverage>=4.5.2 +pyudev # for ceph-* charm unit tests (need to fix the ceph-* charm unit tests/mocking) +git+https://github.com/openstack-charmers/zaza.git#egg=zaza +git+https://github.com/openstack-charmers/zaza-openstack-tests.git#egg=zaza.openstack + +# netaddr is pinned in requirements.txt, but temptest below sometimes pulls in a newer version +netaddr>0.7.16,<0.8.0 + +# Needed for charm-glance: +git+https://opendev.org/openstack/tempest.git#egg=tempest + +croniter # needed for charm-rabbitmq-server unit tests +psutil + +# https://github.com/boto/boto3/issues/4392 +boto3<1.36.0 diff --git a/ceph-radosgw/tests/bundles/jammy-caracal.yaml b/ceph-radosgw/tests/bundles/jammy-caracal.yaml new file mode 100644 index 00000000..4a0bc34b --- /dev/null +++ b/ceph-radosgw/tests/bundles/jammy-caracal.yaml @@ -0,0 +1,61 @@ +options: + source: &source cloud:jammy-caracal + +series: jammy + +comment: +- 'machines section to decide order of deployment. database sooner = faster' +machines: + '0': + '1': + constraints: cores=2 mem=4G root-disk=25G virt-type=virtual-machine + '2': + constraints: cores=2 mem=4G root-disk=25G virt-type=virtual-machine + '3': + constraints: cores=2 mem=4G root-disk=25G virt-type=virtual-machine + '4': + '5': + '6': + +applications: + ceph-radosgw: + charm: ch:ceph-radosgw + channel: squid/edge + num_units: 1 + options: + source: *source + to: + - '0' + + ceph-osd: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 'loop,10G' + options: + source: *source + osd-devices: '/srv/ceph /dev/test-non-existent' + to: + - '1' + - '2' + - '3' + channel: squid/edge + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + monitor-count: 3 + source: *source + to: + - '4' + - '5' + - '6' + channel: squid/edge + +relations: + - - 'ceph-osd:mon' + - 'ceph-mon:osd' + + - - 'ceph-radosgw:mon' + - 'ceph-mon:radosgw' diff --git a/ceph-radosgw/tests/target.py b/ceph-radosgw/tests/target.py new file mode 100644 index 00000000..f00d818d --- /dev/null +++ b/ceph-radosgw/tests/target.py @@ -0,0 +1,1086 @@ +# Copyright 2018 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Ceph Testing.""" + +import unittest +import json +import logging +import requests +import boto3 +import botocore.exceptions +import urllib3 + +import tenacity + +import zaza.openstack.charm_tests.test_utils as test_utils +import zaza.model as zaza_model +import zaza.openstack.utilities.ceph as zaza_ceph +import zaza.openstack.utilities.generic as zaza_utils +import zaza.utilities.networking as network_utils +import zaza.utilities.juju as juju_utils +import zaza.openstack.utilities.openstack as zaza_openstack +import zaza.openstack.utilities.generic as generic_utils + +# Disable warnings for ssl_verify=false +urllib3.disable_warnings( + urllib3.exceptions.InsecureRequestWarning +) + + +class CephRGWTest(test_utils.BaseCharmTest): + """Ceph RADOS Gateway Daemons Test Class. + + This Testset is not idempotent, because we don't support scale down from + multisite to singlesite (yet). Tests can be performed independently. + However, If test_100 has completed migration, retriggering the test-set + would cause a time-out in test_003. + """ + + # String Resources + primary_rgw_app = 'ceph-radosgw' + primary_rgw_unit = 'ceph-radosgw/0' + secondary_rgw_app = 'secondary-ceph-radosgw' + secondary_rgw_unit = 'secondary-ceph-radosgw/0' + + @classmethod + def setUpClass(cls): + """Run class setup for running ceph low level tests.""" + super(CephRGWTest, cls).setUpClass(application_name='ceph-radosgw') + + @property + def expected_apps(self): + """Determine application names for ceph-radosgw apps.""" + _apps = [ + self.primary_rgw_app + ] + try: + zaza_model.get_application(self.secondary_rgw_app) + _apps.append(self.secondary_rgw_app) + except KeyError: + pass + return _apps + + @property + def multisite(self): + """Determine whether deployment is multi-site.""" + try: + zaza_model.get_application(self.secondary_rgw_app) + return True + except KeyError: + return False + + def get_rgwadmin_cmd_skeleton(self, unit_name): + """ + Get radosgw-admin cmd skeleton with rgw.hostname populated key. + + :param unit_name: Unit on which the complete command would be run. + :type unit_name: str + :returns: hostname filled basic command skeleton + :rtype: str + """ + app_name = unit_name.split('/')[0] + juju_units = zaza_model.get_units(app_name) + unit_hostnames = generic_utils.get_unit_hostnames(juju_units) + hostname = unit_hostnames[unit_name] + return 'radosgw-admin --id=rgw.{} '.format(hostname) + + def purge_bucket(self, application, bucket_name): + """Remove a bucket and all it's objects. + + :param application: RGW application name + :type application: str + :param bucket_name: Name for RGW bucket to be deleted + :type bucket_name: str + """ + juju_units = zaza_model.get_units(application) + unit_hostnames = generic_utils.get_unit_hostnames(juju_units) + for unit_name, hostname in unit_hostnames.items(): + key_name = "rgw.{}".format(hostname) + cmd = 'radosgw-admin --id={} bucket rm --bucket={}' \ + ' --purge-objects'.format(key_name, bucket_name) + zaza_model.run_on_unit(unit_name, cmd) + + def wait_for_status(self, application, + is_primary=False, sync_expected=True): + """Wait for required RGW endpoint to finish sync for data and metadata. + + :param application: RGW application which has to be waited for + :type application: str + :param is_primary: whether RGW application is primary or secondary + :type is_primary: boolean + :param sync_expected: whether sync details should be expected in status + :type sync_expected: boolean + """ + juju_units = zaza_model.get_units(application) + unit_hostnames = generic_utils.get_unit_hostnames(juju_units) + data_check = 'data is caught up with source' + meta_primary = 'metadata sync no sync (zone is master)' + meta_secondary = 'metadata is caught up with master' + meta_check = meta_primary if is_primary else meta_secondary + + for attempt in tenacity.Retrying( + wait=tenacity.wait_exponential(multiplier=10, max=300), + reraise=True, stop=tenacity.stop_after_attempt(12), + retry=tenacity.retry_if_exception_type(AssertionError) + ): + with attempt: + for unit_name, hostname in unit_hostnames.items(): + key_name = "rgw.{}".format(hostname) + cmd = 'radosgw-admin --id={} sync status'.format(key_name) + stdout = zaza_model.run_on_unit( + unit_name, cmd + ).get('Stdout', '') + if sync_expected: + # Both data and meta sync. + self.assertIn(data_check, stdout) + self.assertIn(meta_check, stdout) + else: + # ExpectPrimary's Meta Status and no Data sync status + self.assertIn(meta_primary, stdout) + self.assertNotIn(data_check, stdout) + + def fetch_rgw_object(self, target_client, container_name, object_name): + """Fetch RGW object content. + + :param target_client: boto3 client object configured for an endpoint. + :type target_client: str + :param container_name: RGW bucket name for desired object. + :type container_name: str + :param object_name: Object name for desired object. + :type object_name: str + """ + for attempt in tenacity.Retrying( + wait=tenacity.wait_exponential(multiplier=1, max=60), + reraise=True, stop=tenacity.stop_after_attempt(12) + ): + with attempt: + return target_client.Object( + container_name, object_name + ).get()['Body'].read().decode('UTF-8') + + def promote_rgw_to_primary(self, app_name: str): + """Promote provided app to Primary and update period at new secondary. + + :param app_name: Secondary site rgw Application to be promoted. + :type app_name: str + """ + if app_name is self.primary_rgw_app: + new_secondary = self.secondary_rgw_unit + else: + new_secondary = self.primary_rgw_unit + + # Promote to Primary + zaza_model.run_action_on_leader( + app_name, + 'promote', + action_params={}, + ) + + # Period Update Commit new secondary. + cmd = self.get_rgwadmin_cmd_skeleton(new_secondary) + zaza_model.run_on_unit( + new_secondary, cmd + 'period update --commit' + ) + + def get_client_keys(self, rgw_app_name=None): + """Create access_key and secret_key for boto3 client. + + :param rgw_app_name: RGW application for which keys are required. + :type rgw_app_name: str + """ + unit_name = self.primary_rgw_unit + if rgw_app_name is not None: + unit_name = rgw_app_name + '/0' + user_name = 'botoclient' + cmd = self.get_rgwadmin_cmd_skeleton(unit_name) + users = json.loads(zaza_model.run_on_unit( + unit_name, cmd + 'user list' + ).get('Stdout', '')) + # Fetch boto3 user keys if user exists. + if user_name in users: + output = json.loads(zaza_model.run_on_unit( + unit_name, cmd + 'user info --uid={}'.format(user_name) + ).get('Stdout', '')) + keys = output['keys'][0] + return keys['access_key'], keys['secret_key'] + # Create boto3 user if it does not exist. + create_cmd = cmd + 'user create --uid={} --display-name={}'.format( + user_name, user_name + ) + output = json.loads( + zaza_model.run_on_unit(unit_name, create_cmd).get('Stdout', '') + ) + keys = output['keys'][0] + return keys['access_key'], keys['secret_key'] + + @tenacity.retry( + retry=tenacity.retry_if_result(lambda ret: ret is None), + wait=tenacity.wait_fixed(10), + stop=tenacity.stop_after_attempt(5) + ) + def get_rgw_endpoint(self, unit_name: str): + """Fetch Application endpoint for RGW unit. + + :param unit_name: Unit name for which RGW endpoint is required. + :type unit_name: str + """ + # Get address "public" network binding. + unit_address = zaza_model.run_on_unit( + unit_name, "network-get public --bind-address" + ).get('Stdout', '').strip() + + logging.info("Unit: {}, Endpoint: {}".format(unit_name, unit_address)) + if unit_address is None: + return None + unit_address = network_utils.format_addr(unit_address) + # Evaluate port + try: + zaza_model.get_application("vault") + return "https://{}:443".format(unit_address) + except KeyError: + return "http://{}:80".format(unit_address) + + def configure_rgw_apps_for_multisite(self): + """Configure Multisite values on primary and secondary apps.""" + realm = 'zaza_realm' + zonegroup = 'zaza_zg' + + zaza_model.set_application_config( + self.primary_rgw_app, + { + 'realm': realm, + 'zonegroup': zonegroup, + 'zone': 'zaza_primary' + } + ) + zaza_model.set_application_config( + self.secondary_rgw_app, + { + 'realm': realm, + 'zonegroup': zonegroup, + 'zone': 'zaza_secondary' + } + ) + + def configure_rgw_multisite_relation(self): + """Configure multi-site relation between primary and secondary apps.""" + multisite_relation = zaza_model.get_relation_id( + self.primary_rgw_app, self.secondary_rgw_app, + remote_interface_name='secondary' + ) + if multisite_relation is None: + logging.info('Configuring Multisite') + self.configure_rgw_apps_for_multisite() + zaza_model.add_relation( + self.primary_rgw_app, + self.primary_rgw_app + ":primary", + self.secondary_rgw_app + ":secondary" + ) + zaza_model.block_until_unit_wl_status( + self.secondary_rgw_unit, "waiting" + ) + + zaza_model.block_until_unit_wl_status( + self.secondary_rgw_unit, "active" + ) + zaza_model.block_until_unit_wl_status( + self.primary_rgw_unit, "active" + ) + zaza_model.wait_for_unit_idle(self.secondary_rgw_unit) + zaza_model.wait_for_unit_idle(self.primary_rgw_unit) + + def clean_rgw_multisite_config(self, app_name): + """Clear Multisite Juju config values to default. + + :param app_name: App for which config values are to be cleared + :type app_name: str + """ + unit_name = app_name + "/0" + zaza_model.set_application_config( + app_name, + { + 'realm': "", + 'zonegroup': "", + 'zone': "default" + } + ) + # Commit changes to period. + cmd = self.get_rgwadmin_cmd_skeleton(unit_name) + zaza_model.run_on_unit( + unit_name, cmd + 'period update --commit --rgw-zone=default ' + '--rgw-zonegroup=default' + ) + + def enable_virtual_hosted_bucket(self): + """Enable virtual hosted bucket on primary rgw app.""" + zaza_model.set_application_config( + self.primary_rgw_app, + { + 'virtual-hosted-bucket-enabled': "true" + } + ) + + def set_os_public_hostname(self): + """Set os-public-hostname on primary rgw app.""" + zaza_model.set_application_config( + self.primary_rgw_app, + { + 'os-public-hostname': "rgw.example.com", + } + ) + + def clean_virtual_hosted_bucket(self): + """Clear virtual hosted bucket on primary app.""" + zaza_model.set_application_config( + self.primary_rgw_app, + { + 'os-public-hostname': "", + 'virtual-hosted-bucket-enabled': "false" + } + ) + + def test_001_processes(self): + """Verify Ceph processes. + + Verify that the expected service processes are running + on each ceph unit. + """ + logging.info('Checking radosgw processes...') + # Process name and quantity of processes to expect on each unit + ceph_radosgw_processes = { + 'radosgw': 1, + } + + # Units with process names and PID quantities expected + expected_processes = {} + for app in self.expected_apps: + for unit in zaza_model.get_units(app): + expected_processes[unit.entity_id] = ceph_radosgw_processes + + actual_pids = zaza_utils.get_unit_process_ids(expected_processes) + ret = zaza_utils.validate_unit_process_ids(expected_processes, + actual_pids) + self.assertTrue(ret) + + def test_002_services(self): + """Verify the ceph services. + + Verify the expected services are running on the service units. + """ + logging.info('Checking radosgw services...') + services = ['radosgw', 'haproxy'] + for app in self.expected_apps: + for unit in zaza_model.get_units(app): + zaza_model.block_until_service_status( + unit_name=unit.entity_id, + services=services, + target_status='running' + ) + + def test_003_object_storage_and_secondary_block(self): + """Verify Object Storage API and Secondary Migration block.""" + container_name = 'zaza-container' + obj_data = 'Test data from Zaza' + obj_name = 'prefile' + + logging.info('Checking Object Storage API for Primary Cluster') + # 1. Fetch Primary Endpoint Details + primary_endpoint = self.get_rgw_endpoint(self.primary_rgw_unit) + self.assertNotEqual(primary_endpoint, None) + + # 2. Create RGW Client and perform IO + access_key, secret_key = self.get_client_keys() + primary_client = boto3.resource("s3", + verify=False, + endpoint_url=primary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + primary_client.Bucket(container_name).create() + primary_object_one = primary_client.Object( + container_name, + obj_name + ) + primary_object_one.put(Body=obj_data) + + # 3. Fetch Object and Perform Data Integrity check. + content = primary_object_one.get()['Body'].read().decode('UTF-8') + self.assertEqual(content, obj_data) + + # Skip multisite tests if not compatible with bundle. + if not self.multisite: + logging.info('Skipping Secondary Object gatewaty verification') + return + + logging.info('Checking Object Storage API for Secondary Cluster') + # 1. Fetch Secondary Endpoint Details + secondary_endpoint = self.get_rgw_endpoint(self.secondary_rgw_unit) + self.assertNotEqual(secondary_endpoint, None) + + # 2. Create RGW Client and perform IO + access_key, secret_key = self.get_client_keys(self.secondary_rgw_app) + secondary_client = boto3.resource("s3", + verify=False, + endpoint_url=secondary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + secondary_client.Bucket(container_name).create() + secondary_object = secondary_client.Object( + container_name, + obj_name + ) + secondary_object.put(Body=obj_data) + + # 3. Fetch Object and Perform Data Integrity check. + content = secondary_object.get()['Body'].read().decode('UTF-8') + self.assertEqual(content, obj_data) + + logging.info('Checking Secondary Migration Block') + # 1. Migrate to multisite + if zaza_model.get_relation_id( + self.primary_rgw_app, self.secondary_rgw_app, + remote_interface_name='secondary' + ) is not None: + logging.info('Skipping Test, Multisite relation already present.') + return + + logging.info('Configuring Multisite') + self.configure_rgw_apps_for_multisite() + zaza_model.add_relation( + self.primary_rgw_app, + self.primary_rgw_app + ":primary", + self.secondary_rgw_app + ":secondary" + ) + + # 2. Verify secondary fails migration due to existing Bucket. + assert_state = { + self.secondary_rgw_app: { + "workload-status": "blocked", + "workload-status-message-prefix": + "Non-Pristine RGW site can't be used as secondary" + } + } + zaza_model.wait_for_application_states(states=assert_state, + timeout=900) + + # 3. Perform Secondary Cleanup + logging.info('Perform cleanup at secondary') + self.clean_rgw_multisite_config(self.secondary_rgw_app) + zaza_model.remove_relation( + self.primary_rgw_app, + self.primary_rgw_app + ":primary", + self.secondary_rgw_app + ":secondary" + ) + + # Make secondary pristine. + self.purge_bucket(self.secondary_rgw_app, container_name) + + zaza_model.block_until_unit_wl_status(self.secondary_rgw_unit, + 'active') + + def test_004_multisite_directional_sync_policy(self): + """Verify Multisite Directional Sync Policy.""" + # Skip multisite tests if not compatible with bundle. + if not self.multisite: + logging.info('Skipping multisite sync policy verification') + return + + container_name = 'zaza-container' + primary_obj_name = 'primary-testfile' + primary_obj_data = 'Primary test data' + secondary_directional_obj_name = 'secondary-directional-testfile' + secondary_directional_obj_data = 'Secondary directional test data' + secondary_symmetrical_obj_name = 'secondary-symmetrical-testfile' + secondary_symmetrical_obj_data = 'Secondary symmetrical test data' + + logging.info('Verifying multisite directional sync policy') + + # Set default sync policy to "allowed", which allows buckets to sync, + # but the sync is disabled by default in the zone group. Also, set the + # secondary zone sync policy flow type policy to "directional". + zaza_model.set_application_config( + self.primary_rgw_app, + { + "sync-policy-state": "allowed", + } + ) + zaza_model.set_application_config( + self.secondary_rgw_app, + { + "sync-policy-flow-type": "directional", + } + ) + zaza_model.wait_for_unit_idle(self.secondary_rgw_unit) + zaza_model.wait_for_unit_idle(self.primary_rgw_unit) + + # Setup multisite relation. + self.configure_rgw_multisite_relation() + + logging.info('Waiting for Data and Metadata to Synchronize') + # NOTE: We only check the secondary zone, because the sync policy flow + # type is set to "directional" between the primary and secondary zones. + self.wait_for_status(self.secondary_rgw_app, is_primary=False) + + # Create bucket on primary RGW zone. + logging.info('Creating bucket on primary zone') + primary_endpoint = self.get_rgw_endpoint(self.primary_rgw_unit) + self.assertNotEqual(primary_endpoint, None) + + access_key, secret_key = self.get_client_keys() + primary_client = boto3.resource("s3", + verify=False, + endpoint_url=primary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + primary_client.Bucket(container_name).create() + + # Enable sync on the bucket. + logging.info('Enabling sync on the bucket from the primary zone') + zaza_model.run_action_on_leader( + self.primary_rgw_app, + 'enable-buckets-sync', + action_params={ + 'buckets': container_name, + }, + raise_on_failure=True, + ) + + # Check that sync cannot be enabled using secondary Juju RGW app. + with self.assertRaises(zaza_model.ActionFailed): + zaza_model.run_action_on_leader( + self.secondary_rgw_app, + 'enable-buckets-sync', + action_params={ + 'buckets': container_name, + }, + raise_on_failure=True, + ) + + logging.info('Waiting for Data and Metadata to Synchronize') + self.wait_for_status(self.secondary_rgw_app, is_primary=False) + + # Perform IO on primary zone bucket. + logging.info('Performing IO on primary zone bucket') + primary_object = primary_client.Object( + container_name, + primary_obj_name + ) + primary_object.put(Body=primary_obj_data) + + # Verify that the object is replicated to the secondary zone. + logging.info('Verifying that the object is replicated to the ' + 'secondary zone') + secondary_endpoint = self.get_rgw_endpoint(self.secondary_rgw_unit) + self.assertNotEqual(secondary_endpoint, None) + + secondary_client = boto3.resource("s3", + verify=False, + endpoint_url=secondary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + secondary_data = self.fetch_rgw_object( + secondary_client, + container_name, + primary_obj_name + ) + self.assertEqual(secondary_data, primary_obj_data) + + # Write object to the secondary zone bucket, when the sync policy + # flow type is set to "directional" between the zones. + logging.info('Writing object to the secondary zone bucket, which ' + 'should not be replicated to the primary zone') + secondary_object = secondary_client.Object( + container_name, + secondary_directional_obj_name + ) + secondary_object.put(Body=secondary_directional_obj_data) + + # Verify that the object is not replicated to the primary zone. + logging.info('Verifying that the object is not replicated to the ' + 'primary zone') + with self.assertRaises(botocore.exceptions.ClientError): + self.fetch_rgw_object( + primary_client, + container_name, + secondary_directional_obj_name + ) + + logging.info('Setting sync policy flow to "symmetrical" on the ' + 'secondary RGW zone') + zaza_model.set_application_config( + self.secondary_rgw_app, + { + "sync-policy-flow-type": "symmetrical", + } + ) + zaza_model.wait_for_unit_idle(self.secondary_rgw_unit) + zaza_model.wait_for_unit_idle(self.primary_rgw_unit) + + # Write another object to the secondary zone bucket. + logging.info('Writing another object to the secondary zone bucket.') + secondary_object = secondary_client.Object( + container_name, + secondary_symmetrical_obj_name + ) + secondary_object.put(Body=secondary_symmetrical_obj_data) + + logging.info('Waiting for Data and Metadata to Synchronize') + # NOTE: This time, we check both the primary and secondary zones, + # because the sync policy flow type is set to "symmetrical" between + # the zones. + self.wait_for_status(self.secondary_rgw_app, is_primary=False) + self.wait_for_status(self.primary_rgw_app, is_primary=True) + + # Verify that all objects are replicated to the primary zone. + logging.info('Verifying that all objects are replicated to the ' + 'primary zone (including older objects).') + test_cases = [ + { + 'obj_name': primary_obj_name, + 'obj_data': primary_obj_data, + }, + { + 'obj_name': secondary_directional_obj_name, + 'obj_data': secondary_directional_obj_data, + }, + { + 'obj_name': secondary_symmetrical_obj_name, + 'obj_data': secondary_symmetrical_obj_data, + }, + ] + for tc in test_cases: + logging.info('Verifying that object "{}" is replicated'.format( + tc['obj_name'])) + primary_data = self.fetch_rgw_object( + primary_client, + container_name, + tc['obj_name'] + ) + self.assertEqual(primary_data, tc['obj_data']) + + # Cleanup. + logging.info('Cleaning up buckets after test case') + self.purge_bucket(self.primary_rgw_app, container_name) + self.purge_bucket(self.secondary_rgw_app, container_name) + + logging.info('Waiting for Data and Metadata to Synchronize') + self.wait_for_status(self.secondary_rgw_app, is_primary=False) + self.wait_for_status(self.primary_rgw_app, is_primary=True) + + # Set multisite sync policy state to "enabled" on the primary RGW app. + # Paired with "symmetrical" sync policy flow on the secondary RGW app, + # this enables bidirectional sync between the zones (which is the + # default behaviour without multisite sync policies configured). + logging.info('Setting sync policy state to "enabled".') + zaza_model.set_application_config( + self.primary_rgw_app, + { + "sync-policy-state": "enabled", + } + ) + zaza_model.wait_for_unit_idle(self.primary_rgw_unit) + + def test_100_migration_and_multisite_failover(self): + """Perform multisite migration and verify failover.""" + container_name = 'zaza-container' + obj_data = 'Test data from Zaza' + # Skip multisite tests if not compatible with bundle. + if not self.multisite: + raise unittest.SkipTest('Skipping Migration Test') + + logging.info('Perform Pre-Migration IO') + # 1. Fetch Endpoint Details + primary_endpoint = self.get_rgw_endpoint(self.primary_rgw_unit) + self.assertNotEqual(primary_endpoint, None) + + # 2. Create primary client and add pre-migration object. + access_key, secret_key = self.get_client_keys() + primary_client = boto3.resource("s3", + verify=False, + endpoint_url=primary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + primary_client.Bucket(container_name).create() + primary_client.Object( + container_name, + 'prefile' + ).put(Body=obj_data) + + # If Primary/Secondary relation does not exist, add it. + self.configure_rgw_multisite_relation() + + logging.info('Waiting for Data and Metadata to Synchronize') + self.wait_for_status(self.secondary_rgw_app, is_primary=False) + self.wait_for_status(self.primary_rgw_app, is_primary=True) + + logging.info('Performing post migration IO tests.') + # Add another object at primary + primary_client.Object( + container_name, + 'postfile' + ).put(Body=obj_data) + + # 1. Fetch Endpoint Details + secondary_endpoint = self.get_rgw_endpoint(self.secondary_rgw_unit) + self.assertNotEqual(secondary_endpoint, None) + + # 2. Create secondary client and fetch synchronised objects. + secondary_client = boto3.resource("s3", + verify=False, + endpoint_url=secondary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + + # 3. Verify Data Integrity + # fetch_rgw_object has internal retry so waiting for sync beforehand + # is not required for post migration object sync. + pre_migration_data = self.fetch_rgw_object( + secondary_client, container_name, 'prefile' + ) + post_migration_data = self.fetch_rgw_object( + secondary_client, container_name, 'postfile' + ) + + # 4. Verify Syncronisation works and objects are replicated + self.assertEqual(pre_migration_data, obj_data) + self.assertEqual(post_migration_data, obj_data) + + logging.info('Checking multisite failover/failback') + # Failover Scenario, Promote Secondary-Ceph-RadosGW to Primary + self.promote_rgw_to_primary(self.secondary_rgw_app) + + # Wait for Sites to be syncronised. + self.wait_for_status(self.primary_rgw_app, is_primary=False) + self.wait_for_status(self.secondary_rgw_app, is_primary=True) + + # IO Test + container = 'failover-container' + test_data = 'Test data from Zaza on Secondary' + secondary_client.Bucket(container).create() + secondary_object = secondary_client.Object(container, 'testfile') + secondary_object.put( + Body=test_data + ) + secondary_content = secondary_object.get()[ + 'Body' + ].read().decode('UTF-8') + + # Wait for Sites to be syncronised. + self.wait_for_status(self.primary_rgw_app, is_primary=False) + self.wait_for_status(self.secondary_rgw_app, is_primary=True) + + # Recovery scenario, reset ceph-rgw as primary. + self.promote_rgw_to_primary(self.primary_rgw_app) + self.wait_for_status(self.primary_rgw_app, is_primary=True) + self.wait_for_status(self.secondary_rgw_app, is_primary=False) + + # Fetch Syncronised copy of testfile from primary site. + primary_content = self.fetch_rgw_object( + primary_client, container, 'testfile' + ) + + # Verify Data Integrity. + self.assertEqual(secondary_content, primary_content) + + # Scaledown and verify replication has stopped. + logging.info('Checking multisite scaledown') + zaza_model.remove_relation( + self.primary_rgw_app, + self.primary_rgw_app + ":primary", + self.secondary_rgw_app + ":secondary" + ) + + # wait for sync stop + self.wait_for_status(self.primary_rgw_app, sync_expected=False) + self.wait_for_status(self.secondary_rgw_app, sync_expected=False) + + # Refresh client and verify objects are not replicating. + primary_client = boto3.resource("s3", + verify=False, + endpoint_url=primary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + secondary_client = boto3.resource("s3", + verify=False, + endpoint_url=secondary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + + # IO Test + container = 'scaledown-container' + test_data = 'Scaledown Test data' + secondary_client.Bucket(container).create() + secondary_object = secondary_client.Object(container, 'scaledown') + secondary_object.put( + Body=test_data + ) + + # Since bucket is not replicated. + with self.assertRaises(botocore.exceptions.ClientError): + primary_content = self.fetch_rgw_object( + primary_client, container, 'scaledown' + ) + + # Cleanup of scaledown resources and synced resources. + self.purge_bucket(self.secondary_rgw_app, container) + self.purge_bucket(self.secondary_rgw_app, 'zaza-container') + self.purge_bucket(self.secondary_rgw_app, 'failover-container') + + def test_101_virtual_hosted_bucket(self): + """Test virtual hosted bucket.""" + # skip if quincy or older + current_release = zaza_openstack.get_os_release( + application='ceph-mon') + reef = zaza_openstack.get_os_release('jammy_bobcat') + if current_release < reef: + raise unittest.SkipTest( + 'Virtual hosted bucket not supported in quincy or older') + + primary_rgw_unit = zaza_model.get_unit_from_name(self.primary_rgw_unit) + if primary_rgw_unit.workload_status != "active": + logging.info('Skipping virtual hosted bucket test since ' + 'primary rgw unit is not in active state') + return + + logging.info('Testing virtual hosted bucket') + + # 0. Configure virtual hosted bucket + self.enable_virtual_hosted_bucket() + zaza_model.block_until_wl_status_info_starts_with( + self.primary_rgw_app, + 'os-public-hostname must have a value', + timeout=900 + ) + self.set_os_public_hostname() + zaza_model.block_until_all_units_idle(self.model_name) + container_name = 'zaza-bucket' + obj_data = 'Test content from Zaza' + obj_name = 'testfile' + + # 1. Fetch Primary Endpoint Details + primary_endpoint = self.get_rgw_endpoint(self.primary_rgw_unit) + self.assertNotEqual(primary_endpoint, None) + + # 2. Create RGW Client and perform IO + access_key, secret_key = self.get_client_keys() + primary_client = boto3.resource("s3", + verify=False, + endpoint_url=primary_endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + # We may not have certs for the pub hostname yet, so retry a few times. + for attempt in tenacity.Retrying( + stop=tenacity.stop_after_attempt(10), + wait=tenacity.wait_fixed(4), + ): + with attempt: + primary_client.Bucket(container_name).create() + primary_object_one = primary_client.Object( + container_name, + obj_name + ) + primary_object_one.put(Body=obj_data) + primary_client.Bucket(container_name).Acl().put(ACL='public-read') + primary_client.Object(container_name, obj_name).Acl().put( + ACL='public-read' + ) + + # 3. Test if we can get content via virtual hosted bucket name + public_hostname = zaza_model.get_application_config( + self.primary_rgw_app + )["os-public-hostname"]["value"] + url = f"{primary_endpoint}/{obj_name}" + headers = {'host': f"{container_name}.{public_hostname}"} + f = requests.get(url, headers=headers, verify=False) + self.assertEqual(f.text, obj_data) + + # 4. Cleanup and de-configure virtual hosted bucket + self.clean_virtual_hosted_bucket() + zaza_model.block_until_all_units_idle(self.model_name) + self.purge_bucket(self.primary_rgw_app, container_name) + + +class BlueStoreCompressionCharmOperation(test_utils.BaseCharmTest): + """Test charm handling of bluestore compression configuration options.""" + + def _assert_pools_properties(self, pools, pools_detail, + expected_properties, log_func=logging.info): + """Check properties on a set of pools. + + :param pools: List of pool names to check. + :type pools: List[str] + :param pools_detail: List of dictionaries with pool detail + :type pools_detail List[Dict[str,any]] + :param expected_properties: Properties to check and their expected + values. + :type expected_properties: Dict[str,any] + :returns: Nothing + :raises: AssertionError + """ + for pool in pools: + for pd in pools_detail: + if pd['pool_name'] == pool: + if 'options' in expected_properties: + for k, v in expected_properties['options'].items(): + self.assertEqual(pd['options'][k], v) + log_func("['options']['{}'] == {}".format(k, v)) + for k, v in expected_properties.items(): + if k == 'options': + continue + self.assertEqual(pd[k], v) + log_func("{} == {}".format(k, v)) + + def test_configure_compression(self): + """Enable compression and validate properties flush through to pool.""" + # The Ceph RadosGW creates many light weight pools to keep track of + # metadata, we only compress the pool containing actual data. + app_pools = ['.rgw.buckets.data'] + + ceph_pools_detail = zaza_ceph.get_ceph_pool_details( + model_name=self.model_name) + + logging.debug('BEFORE: {}'.format(ceph_pools_detail)) + try: + logging.info('Checking Ceph pool compression_mode prior to change') + self._assert_pools_properties( + app_pools, ceph_pools_detail, + {'options': {'compression_mode': 'none'}}) + except KeyError: + logging.info('property does not exist on pool, which is OK.') + logging.info('Changing "bluestore-compression-mode" to "force" on {}' + .format(self.application_name)) + with self.config_change( + {'bluestore-compression-mode': 'none'}, + {'bluestore-compression-mode': 'force'}): + logging.info('Checking Ceph pool compression_mode after to change') + self._check_pool_compression_mode(app_pools, 'force') + + logging.info('Checking Ceph pool compression_mode after ' + 'restoring config to previous value') + self._check_pool_compression_mode(app_pools, 'none') + + @tenacity.retry( + wait=tenacity.wait_exponential(multiplier=1, min=2, max=10), + stop=tenacity.stop_after_attempt(10), + reraise=True, + retry=tenacity.retry_if_exception_type(AssertionError) + ) + def _check_pool_compression_mode(self, app_pools, mode): + ceph_pools_detail = zaza_ceph.get_ceph_pool_details( + model_name=self.model_name) + logging.debug('ceph_pools_details: %s', ceph_pools_detail) + logging.debug(juju_utils.get_relation_from_unit( + 'ceph-mon', self.application_name, None, + model_name=self.model_name)) + self._assert_pools_properties( + app_pools, ceph_pools_detail, + {'options': {'compression_mode': mode}}) + + def test_invalid_compression_configuration(self): + """Set invalid configuration and validate charm response.""" + stored_target_deploy_status = self.test_config.get( + 'target_deploy_status', {}) + new_target_deploy_status = stored_target_deploy_status.copy() + new_target_deploy_status[self.application_name] = { + 'workload-status': 'blocked', + 'workload-status-message': 'Invalid configuration', + } + if 'target_deploy_status' in self.test_config: + self.test_config['target_deploy_status'].update( + new_target_deploy_status) + else: + self.test_config['target_deploy_status'] = new_target_deploy_status + + with self.config_change( + {'bluestore-compression-mode': 'none'}, + {'bluestore-compression-mode': 'PEBCAK'}): + logging.info('Charm went into blocked state as expected, restore ' + 'configuration') + self.test_config[ + 'target_deploy_status'] = stored_target_deploy_status + + +class CephKeyRotationTests(test_utils.BaseCharmTest): + """Tests for the rotate-key action.""" + + def _get_all_keys(self, unit, entity_filter): + cmd = 'sudo ceph auth ls' + result = zaza_model.run_on_unit(unit, cmd) + # Don't use json formatting, as it's buggy upstream. + data = result['Stdout'].split() + ret = set() + + for ix, line in enumerate(data): + # Structure: + # $ENTITY + # key: + # key contents + # That's why we need to move one position ahead. + if 'key:' in line and entity_filter(data[ix - 1]): + ret.add((data[ix - 1], data[ix + 1])) + return ret + + def _check_key_rotation(self, entity, unit): + def entity_filter(name): + return name.startswith(entity) + + old_keys = self._get_all_keys(unit, entity_filter) + action_obj = zaza_model.run_action( + unit_name=unit, + action_name='rotate-key', + action_params={'entity': entity} + ) + zaza_utils.assertActionRanOK(action_obj) + # NOTE(lmlg): There's a nasty race going on here. Essentially, + # since this action involves 2 different applications, what + # happens is as follows: + # (1) (2) (3) (4) + # ceph-mon rotates key | (idle) | remote-unit rotates key | (idle) + # Between (2) and (3), there's a window where all units are + # idle, _but_ the key hasn't been rotated in the other unit. + # As such, we retry a few times instead of using the + # `wait_for_application_states` interface. + + for attempt in tenacity.Retrying( + wait=tenacity.wait_exponential(multiplier=2, max=32), + reraise=True, stop=tenacity.stop_after_attempt(20), + retry=tenacity.retry_if_exception_type(AssertionError) + ): + with attempt: + new_keys = self._get_all_keys(unit, entity_filter) + self.assertNotEqual(old_keys, new_keys) + + diff = new_keys - old_keys + self.assertEqual(len(diff), 1) + first = next(iter(diff)) + # Check that the entity matches. The 'entity_filter' + # callable will return a true-like value if it + # matches the type of entity we're after (i.e: 'mgr') + self.assertTrue(entity_filter(first[0])) + + def _get_rgw_client(self, unit): + ret = self._get_all_keys(unit, lambda x: x.startswith('client.rgw')) + if not ret: + return None + return next(iter(ret))[0] + + def test_key_rotate(self): + """Test that rotating the keys actually changes them.""" + unit = 'ceph-mon/0' + rgw_client = self._get_rgw_client(unit) + + if rgw_client: + self._check_key_rotation(rgw_client, unit) + else: + logging.info('ceph-radosgw units present, but no RGW service') diff --git a/ceph-radosgw/tests/tests.yaml b/ceph-radosgw/tests/tests.yaml new file mode 100644 index 00000000..32f1fedd --- /dev/null +++ b/ceph-radosgw/tests/tests.yaml @@ -0,0 +1,17 @@ +charm_name: ceph-radosgw + +gate_bundles: + - jammy-caracal + +smoke_bundles: + - jammy-caracal + +dev_bundles: + - jammy-caracal + +tests: + - zaza.charm_tests.lifecycle.refresh.CharmRefreshAll + - tests.target.CephRGWTest + - tests.target.BlueStoreCompressionCharmOperation + - tests.target.CephKeyRotationTests + diff --git a/ceph-radosgw/tox.ini b/ceph-radosgw/tox.ini new file mode 100644 index 00000000..f555757a --- /dev/null +++ b/ceph-radosgw/tox.ini @@ -0,0 +1,139 @@ +# Classic charm (with zaza): ./tox.ini +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of tox.ini for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools +# +# TODO: Distill the func test requirements from the lint/unit test +# requirements. They are intertwined. Also, Zaza itself should specify +# all of its own requirements and if it doesn't, fix it there. +[tox] +envlist = pep8,py3 +# NOTE: Avoid build/test env pollution by not enabling sitepackages. +sitepackages = False +# NOTE: Avoid false positives by not skipping missing interpreters. +skip_missing_interpreters = False + +[testenv] +# We use tox mainly for virtual environment management for test requirements +# and do not install the charm code as a Python package into that environment. +# Ref: https://tox.wiki/en/latest/config.html#skip_install +skip_install = True +setenv = VIRTUAL_ENV={envdir} + PYTHONHASHSEED=0 + TEST_JUJU3=1 + CHARM_DIR={envdir} + CHARMS_ARTIFACT_DIR={toxinidir}/.. + +commands = stestr run --slowest {posargs} +allowlist_externals = + charmcraft + {toxinidir}/rename.sh +passenv = + HOME + TERM + CS_* + OS_* + TEST_* +deps = + -c {toxinidir}/../constraints/test-constraints.txt + -r{toxinidir}/test-requirements.txt + +[testenv:build] +basepython = python3 +deps = +# charmcraft clean is done to ensure that +# `tox -e build` always performs a clean, repeatable build. +# For faster rebuilds during development, +# directly run `charmcraft -v pack && ./rename.sh`. +commands = + charmcraft clean + charmcraft -v pack + {toxinidir}/rename.sh + charmcraft clean + +[testenv:py310] +basepython = python3.10 +deps = + -c {env:TEST_CONSTRAINTS_FILE:https://raw.githubusercontent.com/openstack-charmers/zaza-openstack-tests/master/constraints/constraints-2024.1.txt} + -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:py3] +basepython = python3 +deps = + -c {env:TEST_CONSTRAINTS_FILE:https://raw.githubusercontent.com/openstack-charmers/zaza-openstack-tests/master/constraints/constraints-2024.1.txt} + -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt + +[testenv:pep8] +basepython = python3 +deps = + -c {env:TEST_CONSTRAINTS_FILE:https://raw.githubusercontent.com/openstack-charmers/zaza-openstack-tests/master/constraints/constraints-2024.1.txt} + flake8 + git+https://github.com/juju/charm-tools.git +commands = flake8 {posargs} hooks unit_tests tests actions lib files + charm-proof + +[testenv:cover] +# Technique based heavily upon +# https://github.com/openstack/nova/blob/master/tox.ini +basepython = python3 +deps = + -c {env:TEST_CONSTRAINTS_FILE:https://raw.githubusercontent.com/openstack-charmers/zaza-openstack-tests/master/constraints/constraints-2024.1.txt} + -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt +setenv = + {[testenv]setenv} + PYTHON=coverage run +commands = + coverage erase + stestr run --slowest {posargs} + coverage combine + coverage html -d cover + coverage xml -o cover/coverage.xml + coverage report + +[coverage:run] +branch = True +concurrency = multiprocessing +parallel = True +source = + . +omit = + .tox/* + */charmhelpers/* + unit_tests/* + +[testenv:venv] +basepython = python3 +commands = {posargs} + +[testenv:func-noop] +basepython = python3 +commands = + functest-run-suite --help + +[testenv:func] +basepython = python3 +commands = + functest-run-suite --keep-model + +[testenv:func-smoke] +basepython = python3 +commands = + functest-run-suite --keep-model --smoke + +[testenv:func-dev] +basepython = python3 +commands = + functest-run-suite --keep-model --dev + +[testenv:func-target] +basepython = python3 +commands = + functest-run-suite --keep-model --bundle {posargs} + +[flake8] +ignore = E402,E226,W503,W504 +exclude = */charmhelpers diff --git a/ceph-radosgw/unit_tests/__init__.py b/ceph-radosgw/unit_tests/__init__.py new file mode 100644 index 00000000..e9479e96 --- /dev/null +++ b/ceph-radosgw/unit_tests/__init__.py @@ -0,0 +1,33 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from unittest import mock + +sys.path.append('actions') +sys.path.append('hooks') +sys.path.append('lib') +sys.path.append('unit_tests') + +# Patch out lsb_release() and get_platform() as unit tests should be fully +# insulated from the underlying platform. Unit tests assume that the system is +# ubuntu jammy. +mock.patch( + 'charmhelpers.osplatform.get_platform', return_value='ubuntu' +).start() +mock.patch( + 'charmhelpers.core.host.lsb_release', + return_value={ + 'DISTRIB_CODENAME': 'jammy' + }).start() diff --git a/ceph-radosgw/unit_tests/test_actions.py b/ceph-radosgw/unit_tests/test_actions.py new file mode 100644 index 00000000..01d7407b --- /dev/null +++ b/ceph-radosgw/unit_tests/test_actions.py @@ -0,0 +1,331 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock +from unittest.mock import patch + +from test_utils import CharmTestCase + +with patch('utils.register_configs') as configs: + configs.return_value = 'test-config' + import actions + + +class PauseTestCase(CharmTestCase): + + def setUp(self): + super(PauseTestCase, self).setUp( + actions, ["pause_unit_helper"]) + + def test_pauses_services(self): + actions.pause([]) + self.pause_unit_helper.assert_called_once_with('test-config') + + +class ResumeTestCase(CharmTestCase): + + def setUp(self): + super(ResumeTestCase, self).setUp( + actions, ["resume_unit_helper"]) + + def test_pauses_services(self): + actions.resume([]) + self.resume_unit_helper.assert_called_once_with('test-config') + + +class MainTestCase(CharmTestCase): + + def setUp(self): + super(MainTestCase, self).setUp(actions, ["action_fail"]) + + def test_invokes_action(self): + dummy_calls = [] + + def dummy_action(args): + dummy_calls.append(True) + + with mock.patch.dict(actions.ACTIONS, {"foo": dummy_action}): + actions.main(["foo"]) + self.assertEqual(dummy_calls, [True]) + + def test_unknown_action(self): + """Unknown actions aren't a traceback.""" + exit_string = actions.main(["foo"]) + self.assertEqual("Action foo undefined", exit_string) + + def test_failing_action(self): + """Actions which traceback trigger action_fail() calls.""" + dummy_calls = [] + + self.action_fail.side_effect = dummy_calls.append + + def dummy_action(args): + raise ValueError("uh oh") + + with mock.patch.dict(actions.ACTIONS, {"foo": dummy_action}): + actions.main(["foo"]) + self.assertEqual(dummy_calls, ["uh oh"]) + + +class MultisiteActionsTestCase(CharmTestCase): + + TO_PATCH = [ + 'action_fail', + 'action_get', + 'action_set', + 'multisite', + 'config', + 'is_leader', + 'leader_set', + 'service_name', + 'service_restart', + 'log', + ] + + def setUp(self): + super(MultisiteActionsTestCase, self).setUp(actions, + self.TO_PATCH) + self.config.side_effect = self.test_config.get + + def test_promote(self): + self.is_leader.return_value = True + self.test_config.set('zone', 'testzone') + self.test_config.set('zonegroup', 'testzonegroup') + actions.promote([]) + self.multisite.modify_zone.assert_called_once_with( + 'testzone', + default=True, + master=True, + ) + self.multisite.update_period.assert_called_once_with( + zonegroup='testzonegroup', zone='testzone' + ) + + def test_promote_unconfigured(self): + self.is_leader.return_value = True + self.test_config.set('zone', None) + self.test_config.set('zonegroup', None) + actions.promote([]) + self.action_fail.assert_called_once() + + def test_readonly(self): + self.test_config.set('zone', 'testzone') + actions.readonly([]) + self.multisite.modify_zone.assert_called_once_with( + 'testzone', + readonly=True, + ) + self.multisite.update_period.assert_called_once_with() + + def test_readonly_unconfigured(self): + self.test_config.set('zone', None) + actions.readonly([]) + self.action_fail.assert_called_once() + + def test_readwrite(self): + self.test_config.set('zone', 'testzone') + actions.readwrite([]) + self.multisite.modify_zone.assert_called_once_with( + 'testzone', + readonly=False, + ) + self.multisite.update_period.assert_called_once_with() + + def test_readwrite_unconfigured(self): + self.test_config.set('zone', None) + actions.readwrite([]) + self.action_fail.assert_called_once() + + def test_tidydefaults(self): + self.test_config.set('zone', 'testzone') + actions.tidydefaults([]) + self.multisite.tidy_defaults.assert_called_once_with() + + def test_tidydefaults_unconfigured(self): + self.test_config.set('zone', None) + actions.tidydefaults([]) + self.action_fail.assert_called_once() + + def test_enable_buckets_sync(self): + self.multisite.is_multisite_configured.return_value = True + self.multisite.get_zonegroup_info.return_value = { + 'master_zone': 'test-zone-id', + } + self.multisite.get_zone_info.return_value = { + 'id': 'test-zone-id', + } + self.is_leader.return_value = True + self.action_get.return_value = 'testbucket1,testbucket2,non-existent' + self.test_config.set('zone', 'testzone') + self.test_config.set('zonegroup', 'testzonegroup') + self.test_config.set('realm', 'testrealm') + self.multisite.list_buckets.return_value = ['testbucket1', + 'testbucket2'] + + actions.enable_buckets_sync([]) + + self.multisite.is_multisite_configured.assert_called_once() + self.multisite.get_zonegroup_info.assert_called_once_with( + 'testzonegroup', + ) + self.multisite.get_zone_info.assert_called_once_with( + 'testzone', + ) + self.action_get.assert_called_once_with('buckets') + self.multisite.list_buckets.assert_called_once_with( + zonegroup='testzonegroup', zone='testzone', + ) + self.assertEqual(self.multisite.create_sync_group.call_count, 2) + self.multisite.create_sync_group.assert_has_calls([ + mock.call(bucket='testbucket1', + group_id='default', + status=self.multisite.SYNC_POLICY_ENABLED), + mock.call(bucket='testbucket2', + group_id='default', + status=self.multisite.SYNC_POLICY_ENABLED), + ]) + self.assertEqual(self.multisite.create_sync_group_pipe.call_count, 2) + self.multisite.create_sync_group_pipe.assert_has_calls([ + mock.call(bucket='testbucket1', + group_id='default', + pipe_id='default', + source_zones=['*'], + dest_zones=['*']), + mock.call(bucket='testbucket2', + group_id='default', + pipe_id='default', + source_zones=['*'], + dest_zones=['*']), + ]) + expected_messages = [ + 'Updated "testbucket1" bucket sync policy to "{}"'.format( + self.multisite.SYNC_POLICY_ENABLED), + 'Updated "testbucket2" bucket sync policy to "{}"'.format( + self.multisite.SYNC_POLICY_ENABLED), + ('Bucket "non-existent" does not exist in the zonegroup ' + '"testzonegroup" and zone "testzone"'), + ] + self.assertEqual(self.log.call_count, 3) + self.log.assert_has_calls([ + mock.call(expected_messages[0]), + mock.call(expected_messages[1]), + mock.call(expected_messages[2]), + ]) + self.action_set.assert_called_once_with( + values={ + 'message': '\n'.join(expected_messages), + }) + + def test_disable_buckets_sync(self): + self.multisite.is_multisite_configured.return_value = True + self.multisite.get_zonegroup_info.return_value = { + 'master_zone': 'test-zone-id', + } + self.multisite.get_zone_info.return_value = { + 'id': 'test-zone-id', + } + self.is_leader.return_value = True + self.action_get.return_value = 'testbucket1,non-existent' + self.test_config.set('zone', 'testzone') + self.test_config.set('zonegroup', 'testzonegroup') + self.test_config.set('realm', 'testrealm') + self.multisite.list_buckets.return_value = ['testbucket1'] + + actions.disable_buckets_sync([]) + + self.multisite.is_multisite_configured.assert_called_once() + self.multisite.get_zonegroup_info.assert_called_once_with( + 'testzonegroup', + ) + self.multisite.get_zone_info.assert_called_once_with( + 'testzone', + ) + self.action_get.assert_called_once_with('buckets') + self.multisite.list_buckets.assert_called_once_with( + zonegroup='testzonegroup', zone='testzone', + ) + self.multisite.create_sync_group.assert_called_once_with( + bucket='testbucket1', + group_id='default', + status=self.multisite.SYNC_POLICY_FORBIDDEN, + ) + self.multisite.create_sync_group_pipe.assert_called_once_with( + bucket='testbucket1', + group_id='default', + pipe_id='default', + source_zones=['*'], + dest_zones=['*'], + ) + expected_messages = [ + 'Updated "testbucket1" bucket sync policy to "{}"'.format( + self.multisite.SYNC_POLICY_FORBIDDEN), + ('Bucket "non-existent" does not exist in the zonegroup ' + '"testzonegroup" and zone "testzone"'), + ] + self.assertEqual(self.log.call_count, 2) + self.log.assert_has_calls([ + mock.call(expected_messages[0]), + mock.call(expected_messages[1]), + ]) + self.action_set.assert_called_once_with( + values={ + 'message': '\n'.join(expected_messages), + }) + + def test_reset_buckets_sync(self): + self.multisite.is_multisite_configured.return_value = True + self.multisite.get_zonegroup_info.return_value = { + 'master_zone': 'test-zone-id', + } + self.multisite.get_zone_info.return_value = { + 'id': 'test-zone-id', + } + self.is_leader.return_value = True + self.action_get.return_value = 'testbucket1,non-existent' + self.test_config.set('zone', 'testzone') + self.test_config.set('zonegroup', 'testzonegroup') + self.test_config.set('realm', 'testrealm') + self.multisite.list_buckets.return_value = ['testbucket1'] + + actions.reset_buckets_sync([]) + + self.multisite.is_multisite_configured.assert_called_once() + self.multisite.get_zonegroup_info.assert_called_once_with( + 'testzonegroup', + ) + self.multisite.get_zone_info.assert_called_once_with( + 'testzone', + ) + self.action_get.assert_called_once_with('buckets') + self.multisite.list_buckets.assert_called_once_with( + zonegroup='testzonegroup', zone='testzone', + ) + self.multisite.remove_sync_group.assert_called_once_with( + bucket='testbucket1', + group_id='default', + ) + expected_messages = [ + 'Reset "testbucket1" bucket sync policy', + ('Bucket "non-existent" does not exist in the zonegroup ' + '"testzonegroup" and zone "testzone"'), + ] + self.assertEqual(self.log.call_count, 2) + self.log.assert_has_calls([ + mock.call(expected_messages[0]), + mock.call(expected_messages[1]), + ]) + self.action_set.assert_called_once_with( + values={ + 'message': '\n'.join(expected_messages), + }) diff --git a/ceph-radosgw/unit_tests/test_ceph.py b/ceph-radosgw/unit_tests/test_ceph.py new file mode 100644 index 00000000..98e3d37a --- /dev/null +++ b/ceph-radosgw/unit_tests/test_ceph.py @@ -0,0 +1,399 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest.mock import patch, call + +import ceph_rgw as ceph # noqa +import utils # noqa + +from test_utils import CharmTestCase # noqa + +TO_PATCH = [ + 'config', + 'os', + 'subprocess', + 'mkdir', + 'service_name', +] + + +class CephRadosGWCephTests(CharmTestCase): + def setUp(self): + super(CephRadosGWCephTests, self).setUp(ceph, TO_PATCH) + self.config.side_effect = self.test_config.get + self.service_name.return_value = 'ceph-radosgw' + + def test_import_radosgw_key(self): + self.os.path.exists.return_value = False + self.os.path.join.return_value = '/etc/ceph/keyring.rados.gateway' + ceph.import_radosgw_key('mykey') + cmd = [ + 'ceph-authtool', + '/etc/ceph/keyring.rados.gateway', + '--create-keyring', + '--name=client.radosgw.gateway', + '--add-key=mykey' + ] + self.subprocess.check_call.assert_has_calls([ + call(cmd), + call(['chown', 'root:root', + '/etc/ceph/keyring.rados.gateway']) + ]) + + @patch.object(utils.context, 'CephBlueStoreCompressionContext') + @patch('charmhelpers.contrib.storage.linux.ceph.CephBrokerRq' + '.add_op_create_replicated_pool') + def test_create_rgw_pools_rq_with_prefix( + self, + mock_broker, + mock_bluestore_compression): + self.test_config.set('rgw-lightweight-pool-pg-num', 10) + self.test_config.set('ceph-osd-replication-count', 3) + self.test_config.set('rgw-buckets-pool-weight', 19) + ceph.get_create_rgw_pools_rq(prefix='us-east') + mock_broker.assert_has_calls([ + call(name='us-east.rgw.buckets.data', replica_count=3, weight=19, + group='objects', app_name='rgw'), + call('us-east.rgw.control', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.data.root', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.gc', replica_count=3, pg_num=10, weight=None, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.log', replica_count=3, pg_num=10, weight=None, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.intent-log', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.meta', replica_count=3, pg_num=10, weight=None, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.otp', replica_count=3, pg_num=10, weight=None, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.usage', replica_count=3, pg_num=10, weight=None, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.users.keys', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.users.email', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.users.swift', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.users.uid', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.buckets.extra', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.buckets.index', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + ]) + + # confirm operation with bluestore compression + mock_broker.reset_mock() + mock_bluestore_compression().get_kwargs.return_value = { + 'compression_mode': 'fake', + } + ceph.get_create_rgw_pools_rq(prefix='us-east') + mock_broker.assert_has_calls([ + call(name='us-east.rgw.buckets.data', replica_count=3, weight=19, + group='objects', app_name='rgw', compression_mode='fake'), + call('us-east.rgw.control', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.data.root', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.gc', replica_count=3, pg_num=10, weight=None, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.log', replica_count=3, pg_num=10, weight=None, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.intent-log', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.meta', replica_count=3, pg_num=10, weight=None, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.otp', replica_count=3, pg_num=10, weight=None, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.usage', replica_count=3, pg_num=10, weight=None, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.users.keys', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.users.email', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.users.swift', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.users.uid', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.buckets.extra', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('us-east.rgw.buckets.index', replica_count=3, pg_num=10, + weight=None, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + ]) + + @patch.object(utils.context, 'CephBlueStoreCompressionContext') + @patch('charmhelpers.contrib.storage.linux.ceph.CephBrokerRq' + '.add_op_request_access_to_group') + @patch('charmhelpers.contrib.storage.linux.ceph.CephBrokerRq' + '.add_op_create_replicated_pool') + def test_create_rgw_pools_rq_no_prefix_post_jewel( + self, + mock_broker, + mock_request_access, + mock_bluestore_compression): + self.test_config.set('rgw-lightweight-pool-pg-num', -1) + self.test_config.set('ceph-osd-replication-count', 3) + self.test_config.set('rgw-buckets-pool-weight', 19) + self.test_config.set('restrict-ceph-pools', True) + ceph.get_create_rgw_pools_rq(prefix=None) + mock_broker.assert_has_calls([ + call(name='default.rgw.buckets.data', replica_count=3, weight=19, + group='objects', app_name='rgw'), + call('default.rgw.control', replica_count=3, pg_num=None, + weight=0.1, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.data.root', replica_count=3, pg_num=None, + weight=0.1, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.gc', replica_count=3, pg_num=None, weight=0.1, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.log', replica_count=3, pg_num=None, weight=0.1, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.intent-log', replica_count=3, pg_num=None, + weight=0.1, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.meta', replica_count=3, pg_num=None, weight=0.1, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.otp', replica_count=3, pg_num=None, weight=0.1, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.usage', replica_count=3, pg_num=None, weight=0.1, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.users.keys', replica_count=3, pg_num=None, + weight=0.1, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.users.email', replica_count=3, pg_num=None, + weight=0.1, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.users.swift', replica_count=3, pg_num=None, + weight=0.1, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.users.uid', replica_count=3, pg_num=None, + weight=0.1, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.buckets.extra', replica_count=3, pg_num=None, + weight=1.0, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.buckets.index', replica_count=3, pg_num=None, + weight=3.0, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + ]) + mock_request_access.assert_called_with(key_name='radosgw.gateway', + name='objects', + permission='rwx') + + # confirm operation with bluestore compression + mock_broker.reset_mock() + mock_bluestore_compression().get_kwargs.return_value = { + 'compression_mode': 'fake', + } + ceph.get_create_rgw_pools_rq(prefix=None) + mock_broker.assert_has_calls([ + call(name='default.rgw.buckets.data', replica_count=3, weight=19, + group='objects', app_name='rgw', compression_mode='fake'), + call('default.rgw.control', replica_count=3, pg_num=None, + weight=0.1, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.data.root', replica_count=3, pg_num=None, + weight=0.1, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.gc', replica_count=3, pg_num=None, weight=0.1, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.log', replica_count=3, pg_num=None, weight=0.1, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.intent-log', replica_count=3, pg_num=None, + weight=0.1, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.meta', replica_count=3, pg_num=None, weight=0.1, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.otp', replica_count=3, pg_num=None, weight=0.1, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.usage', replica_count=3, pg_num=None, weight=0.1, + group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.users.keys', replica_count=3, pg_num=None, + weight=0.1, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.users.email', replica_count=3, pg_num=None, + weight=0.1, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.users.swift', replica_count=3, pg_num=None, + weight=0.1, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.users.uid', replica_count=3, pg_num=None, + weight=0.1, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.buckets.extra', replica_count=3, pg_num=None, + weight=1.0, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + call('default.rgw.buckets.index', replica_count=3, pg_num=None, + weight=3.0, group='objects', namespace=None, app_name='rgw', + max_bytes=None, max_objects=None), + ]) + + @patch.object(utils.context, 'CephBlueStoreCompressionContext') + @patch('charmhelpers.contrib.storage.linux.ceph.CephBrokerRq' + '.add_op_create_erasure_profile') + @patch('charmhelpers.contrib.storage.linux.ceph.CephBrokerRq' + '.add_op_create_erasure_pool') + @patch('charmhelpers.contrib.storage.linux.ceph.CephBrokerRq' + '.add_op_request_access_to_group') + @patch('charmhelpers.contrib.storage.linux.ceph.CephBrokerRq' + '.add_op_create_pool') + def test_create_rgw_pools_rq_no_prefix_ec(self, mock_broker, + mock_request_access, + mock_request_create_ec_pool, + mock_request_create_ec_profile, + mock_bluestore_compression): + self.test_config.set('rgw-lightweight-pool-pg-num', -1) + self.test_config.set('ceph-osd-replication-count', 3) + self.test_config.set('rgw-buckets-pool-weight', 19) + self.test_config.set('restrict-ceph-pools', True) + self.test_config.set('pool-type', 'erasure-coded') + self.test_config.set('ec-profile-k', 3) + self.test_config.set('ec-profile-m', 9) + self.test_config.set('ec-profile-technique', 'cauchy_good') + ceph.get_create_rgw_pools_rq(prefix=None) + mock_request_create_ec_profile.assert_called_once_with( + name='ceph-radosgw-profile', + k=3, m=9, + lrc_locality=None, + lrc_crush_locality=None, + shec_durability_estimator=None, + clay_helper_chunks=None, + clay_scalar_mds=None, + device_class=None, + erasure_type='jerasure', + erasure_technique='cauchy_good' + ) + mock_request_create_ec_pool.assert_has_calls([ + call(name='default.rgw.buckets.data', + erasure_profile='ceph-radosgw-profile', + weight=19, + group="objects", + app_name='rgw') + ]) + mock_broker.assert_has_calls([ + call(weight=0.10, replica_count=3, name='default.rgw.control', + group='objects', app_name='rgw'), + call(weight=0.10, replica_count=3, name='default.rgw.data.root', + group='objects', app_name='rgw'), + call(weight=0.10, replica_count=3, name='default.rgw.gc', + group='objects', app_name='rgw'), + call(weight=0.10, replica_count=3, name='default.rgw.log', + group='objects', app_name='rgw'), + call(weight=0.10, replica_count=3, name='default.rgw.intent-log', + group='objects', app_name='rgw'), + call(weight=0.10, replica_count=3, name='default.rgw.meta', + group='objects', app_name='rgw'), + call(weight=0.10, replica_count=3, name='default.rgw.otp', + group='objects', app_name='rgw'), + call(weight=0.10, replica_count=3, name='default.rgw.usage', + group='objects', app_name='rgw'), + call(weight=0.10, replica_count=3, name='default.rgw.users.keys', + group='objects', app_name='rgw'), + call(weight=0.10, replica_count=3, name='default.rgw.users.email', + group='objects', app_name='rgw'), + call(weight=0.10, replica_count=3, name='default.rgw.users.swift', + group='objects', app_name='rgw'), + call(weight=0.10, replica_count=3, name='default.rgw.users.uid', + group='objects', app_name='rgw'), + call(weight=1.00, replica_count=3, + name='default.rgw.buckets.extra', + group='objects', app_name='rgw'), + call(weight=3.00, replica_count=3, + name='default.rgw.buckets.index', + group='objects', app_name='rgw'), + ]) + mock_request_access.assert_called_with(key_name='radosgw.gateway', + name='objects', + permission='rwx') + # confirm operation with bluestore compression + mock_request_create_ec_pool.reset_mock() + mock_bluestore_compression().get_kwargs.return_value = { + 'compression_mode': 'fake', + } + ceph.get_create_rgw_pools_rq(prefix=None) + mock_request_create_ec_pool.assert_has_calls([ + call(name='default.rgw.buckets.data', + erasure_profile='ceph-radosgw-profile', + weight=19, + group="objects", + app_name='rgw', + compression_mode='fake') + ]) + + @patch.object(utils.apt_pkg, 'version_compare', lambda *args: -1) + @patch.object(utils, 'lsb_release', + lambda: {'DISTRIB_CODENAME': 'trusty'}) + @patch.object(utils, 'add_source') + @patch.object(utils, 'apt_update') + @patch.object(utils, 'apt_install') + def test_setup_ipv6_install_backports(self, mock_add_source, + mock_apt_update, + mock_apt_install): + utils.setup_ipv6() + self.assertTrue(mock_apt_update.called) + self.assertTrue(mock_apt_install.called) + + @patch.object(utils.apt_pkg, 'version_compare', lambda *args: 0) + @patch.object(utils, 'lsb_release', + lambda: {'DISTRIB_CODENAME': 'trusty'}) + @patch.object(utils, 'add_source') + @patch.object(utils, 'apt_update') + @patch.object(utils, 'apt_install') + def test_setup_ipv6_not_install_backports(self, mock_add_source, + mock_apt_update, + mock_apt_install): + utils.setup_ipv6() + self.assertFalse(mock_apt_update.called) + self.assertFalse(mock_apt_install.called) diff --git a/ceph-radosgw/unit_tests/test_ceph_radosgw_context.py b/ceph-radosgw/unit_tests/test_ceph_radosgw_context.py new file mode 100644 index 00000000..32d6a962 --- /dev/null +++ b/ceph-radosgw/unit_tests/test_ceph_radosgw_context.py @@ -0,0 +1,589 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest.mock import patch + +import ceph_radosgw_context as context +import charmhelpers.contrib.storage.linux.ceph as ceph +import charmhelpers.fetch as fetch + +from test_utils import CharmTestCase + +TO_PATCH = [ + 'config', + 'log', + 'relation_get', + 'relation_ids', + 'related_units', + 'cmp_pkgrevno', + 'arch', + 'socket', + 'unit_public_ip', + 'determine_api_port', + 'cmp_pkgrevno', + 'leader_get', + 'multisite', + 'utils', +] + + +class HAProxyContextTests(CharmTestCase): + def setUp(self): + super(HAProxyContextTests, self).setUp(context, TO_PATCH) + self.relation_get.side_effect = self.test_relation.get + self.config.side_effect = self.test_config.get + self.cmp_pkgrevno.return_value = 1 + self.arch.return_value = 'amd64' + + @patch('ceph_radosgw_context.https') + @patch('charmhelpers.contrib.openstack.context.is_ipv6_disabled') + @patch('charmhelpers.contrib.openstack.context.get_relation_ip') + @patch('charmhelpers.contrib.openstack.context.mkdir') + @patch('charmhelpers.contrib.openstack.context.local_unit') + @patch('charmhelpers.contrib.openstack.context.config') + @patch('charmhelpers.contrib.hahelpers.cluster.config_get') + @patch('charmhelpers.contrib.openstack.context.relation_ids') + @patch('charmhelpers.contrib.hahelpers.cluster.relation_ids') + def test_ctxt(self, _harelation_ids, _ctxtrelation_ids, _haconfig, + _ctxtconfig, _local_unit, _mkdir, _get_relation_ip, + _is_ipv6_disabled, _mock_https): + _mock_https.return_value = False + _get_relation_ip.return_value = '10.0.0.10' + _ctxtconfig.side_effect = self.test_config.get + _haconfig.side_effect = self.test_config.get + _harelation_ids.return_value = [] + haproxy_context = context.HAProxyContext() + self.utils.listen_port.return_value = 80 + self.determine_api_port.return_value = 70 + expect = { + 'cephradosgw_bind_port': 70, + 'service_ports': {'cephradosgw-server': [80, 70]}, + 'backend_options': {'cephradosgw-server': [{ + 'option': 'httpchk GET /swift/healthcheck', + }]}, + 'https': False + } + self.assertEqual(expect, haproxy_context()) + _is_ipv6_disabled.assert_called_once_with() + + +class MonContextTest(CharmTestCase): + maxDiff = None + + def setUp(self): + super(MonContextTest, self).setUp(context, TO_PATCH) + self.config.side_effect = self.test_config.get + self.unit_public_ip.return_value = '10.255.255.255' + self.cmp_pkgrevno.side_effect = lambda *args: 1 + self.arch.return_value = 'amd64' + self.test_config.set('zonegroup', 'zonegroup1') + self.test_config.set('realm', 'realmX') + + @staticmethod + def plain_list_stub(key): + if key == "zone": + return ["default"] + if key == "zonegroup": + return ["zonegroup1"] + if key == "realm": + return ["realmX"] + else: + return [] + + @patch('ceph_radosgw_context.https') + @patch('charmhelpers.contrib.hahelpers.cluster.relation_ids') + @patch('charmhelpers.contrib.hahelpers.cluster.config_get') + @patch.object(ceph, 'config', lambda *args: + '{"client.radosgw.gateway": {"rgw init timeout": 60}}') + @patch.object(context, 'ensure_host_resolvable_v6') + def test_ctxt( + self, mock_ensure_rsv_v6, mock_config_get, mock_relation_ids, + mock_https, + ): + mock_https.return_value = False + mock_relation_ids.return_value = [] + mock_config_get.side_effect = self.test_config.get + self.socket.gethostname.return_value = 'testhost' + mon_ctxt = context.MonContext() + addresses = ['10.5.4.1', '10.5.4.2', '10.5.4.3'] + + def _relation_get(attr, unit, rid): + if attr == 'ceph-public-address': + return addresses.pop() + elif attr == 'auth': + return 'cephx' + elif attr == 'rgw.testhost_key': + return 'testkey' + elif attr == 'fsid': + return 'testfsid' + + self.relation_get.side_effect = _relation_get + self.relation_ids.return_value = ['mon:6'] + self.related_units.return_value = ['ceph/0', 'ceph/1', 'ceph/2'] + self.multisite.plain_list = self.plain_list_stub + self.determine_api_port.return_value = 70 + expect = { + 'auth_supported': 'cephx', + 'hostname': 'testhost', + 'mon_hosts': '10.5.4.1 10.5.4.2 10.5.4.3', + 'old_auth': False, + 'systemd_rgw': True, + 'unit_public_ip': '10.255.255.255', + 'use_syslog': 'false', + 'loglevel': 1, + 'port': 70, + 'client_radosgw_gateway': {'rgw init timeout': 60}, + 'ipv6': False, + 'rgw_zone': 'default', + 'fsid': 'testfsid', + 'rgw_swift_versioning': False, + 'frontend': 'beast', + 'relaxed_s3_bucket_names': False, + 'rgw_zonegroup': 'zonegroup1', + 'rgw_realm': 'realmX', + 'behind_https_proxy': False, + 'virtual_hosted_bucket_enabled': False, + } + self.assertEqual(expect, mon_ctxt()) + self.assertFalse(mock_ensure_rsv_v6.called) + + self.test_config.set('prefer-ipv6', True) + addresses = ['10.5.4.1', '10.5.4.2', '10.5.4.3'] + expect['ipv6'] = True + expect['port'] = "[::]:%s" % (70) + self.assertEqual(expect, mon_ctxt()) + self.assertTrue(mock_ensure_rsv_v6.called) + + @patch('ceph_radosgw_context.https') + @patch.object(ceph, 'config', lambda *args: + '{"client.radosgw.gateway": {"rgw init timeout": 60}}') + @patch.object(context, 'ensure_host_resolvable_v6') + def test_ctxt_with_https_proxy(self, mock_ensure_rsv_v6, mock_https): + mock_https.return_value = True + self.socket.gethostname.return_value = 'testhost' + mon_ctxt = context.MonContext() + addresses = ['10.5.4.1', '10.5.4.2', '10.5.4.3'] + + def _relation_get(attr, unit, rid): + if attr == 'ceph-public-address': + return addresses.pop() + elif attr == 'auth': + return 'cephx' + elif attr == 'rgw.testhost_key': + return 'testkey' + elif attr == 'fsid': + return 'testfsid' + + self.relation_get.side_effect = _relation_get + self.relation_ids.return_value = ['mon:6'] + self.related_units.return_value = ['ceph/0', 'ceph/1', 'ceph/2'] + self.multisite.plain_list = self.plain_list_stub + self.determine_api_port.return_value = 70 + expect = { + 'auth_supported': 'cephx', + 'hostname': 'testhost', + 'mon_hosts': '10.5.4.1 10.5.4.2 10.5.4.3', + 'old_auth': False, + 'systemd_rgw': True, + 'unit_public_ip': '10.255.255.255', + 'use_syslog': 'false', + 'loglevel': 1, + 'port': 70, + 'client_radosgw_gateway': {'rgw init timeout': 60}, + 'ipv6': False, + 'rgw_zone': 'default', + 'fsid': 'testfsid', + 'rgw_swift_versioning': False, + 'frontend': 'beast', + 'relaxed_s3_bucket_names': False, + 'rgw_zonegroup': 'zonegroup1', + 'rgw_realm': 'realmX', + 'behind_https_proxy': True, + 'virtual_hosted_bucket_enabled': False, + } + self.assertEqual(expect, mon_ctxt()) + self.assertFalse(mock_ensure_rsv_v6.called) + + self.test_config.set('prefer-ipv6', True) + addresses = ['10.5.4.1', '10.5.4.2', '10.5.4.3'] + expect['ipv6'] = True + expect['port'] = "[::]:%s" % (70) + self.assertEqual(expect, mon_ctxt()) + self.assertTrue(mock_ensure_rsv_v6.called) + + @patch('ceph_radosgw_context.https') + @patch('charmhelpers.contrib.hahelpers.cluster.relation_ids') + @patch('charmhelpers.contrib.hahelpers.cluster.config_get') + @patch.object(ceph, 'config', lambda *args: + '{"client.radosgw.gateway": {"rgw init timeout": 60}}') + @patch.object(context, 'ensure_host_resolvable_v6') + def test_list_of_addresses_from_ceph_proxy( + self, mock_ensure_rsv_v6, mock_config_get, mock_relation_ids, + mock_https, + ): + mock_https.return_value = False + mock_relation_ids.return_value = [] + mock_config_get.side_effect = self.test_config.get + self.socket.gethostname.return_value = 'testhost' + mon_ctxt = context.MonContext() + addresses = ['10.5.4.1 10.5.4.2 10.5.4.3'] + self.cmp_pkgrevno.return_value = 1 + + def _relation_get(attr, unit, rid): + if attr == 'ceph-public-address': + return addresses.pop() + elif attr == 'auth': + return 'cephx' + elif attr == 'rgw.testhost_key': + return 'testkey' + elif attr == 'fsid': + return 'testfsid' + + self.relation_get.side_effect = _relation_get + self.relation_ids.return_value = ['mon:6'] + self.multisite.plain_list = self.plain_list_stub + self.related_units.return_value = ['ceph-proxy/0'] + self.determine_api_port.return_value = 70 + expect = { + 'auth_supported': 'cephx', + 'hostname': 'testhost', + 'mon_hosts': '10.5.4.1 10.5.4.2 10.5.4.3', + 'old_auth': False, + 'systemd_rgw': True, + 'unit_public_ip': '10.255.255.255', + 'use_syslog': 'false', + 'loglevel': 1, + 'port': 70, + 'client_radosgw_gateway': {'rgw init timeout': 60}, + 'ipv6': False, + 'rgw_zone': 'default', + 'fsid': 'testfsid', + 'rgw_swift_versioning': False, + 'frontend': 'beast', + 'relaxed_s3_bucket_names': False, + 'rgw_zonegroup': 'zonegroup1', + 'rgw_realm': 'realmX', + 'behind_https_proxy': False, + 'virtual_hosted_bucket_enabled': False, + } + self.assertEqual(expect, mon_ctxt()) + self.assertFalse(mock_ensure_rsv_v6.called) + + self.test_config.set('prefer-ipv6', True) + addresses = ['10.5.4.1 10.5.4.2 10.5.4.3'] + expect['ipv6'] = True + expect['port'] = "[::]:%s" % (70) + self.assertEqual(expect, mon_ctxt()) + self.assertTrue(mock_ensure_rsv_v6.called) + + @patch.object(context, 'format_ipv6_addr', lambda *_: None) + @patch('ceph_radosgw_context.https') + @patch('charmhelpers.contrib.hahelpers.cluster.relation_ids') + @patch('charmhelpers.contrib.hahelpers.cluster.config_get') + @patch.object(ceph, 'config', lambda *args: + '{"client.radosgw.gateway": {"rgw init timeout": 60}}') + def test_ctxt_missing_data(self, mock_config_get, mock_relation_ids, + mock_https): + mock_https.return_value = False + mock_relation_ids.return_value = [] + mock_config_get.side_effect = self.test_config.get + self.socket.gethostname.return_value = 'testhost' + mon_ctxt = context.MonContext() + self.relation_get.return_value = None + self.relation_ids.return_value = ['mon:6'] + self.related_units.return_value = ['ceph/0', 'ceph/1', 'ceph/2'] + self.assertEqual({}, mon_ctxt()) + + @patch('ceph_radosgw_context.https') + @patch('charmhelpers.contrib.hahelpers.cluster.relation_ids') + @patch('charmhelpers.contrib.hahelpers.cluster.config_get') + @patch.object(ceph, 'config', lambda *args: + '{"client.radosgw.gateway": {"rgw init timeout": 60}}') + def test_ctxt_inconsistent_auths(self, mock_config_get, mock_relation_ids, + mock_https): + mock_https.return_value = False + mock_relation_ids.return_value = [] + mock_config_get.side_effect = self.test_config.get + self.socket.gethostname.return_value = 'testhost' + mon_ctxt = context.MonContext() + addresses = ['10.5.4.1', '10.5.4.2', '10.5.4.3'] + auths = ['cephx', 'cephy', 'cephz'] + + def _relation_get(attr, unit, rid): + if attr == 'ceph-public-address': + return addresses.pop() + elif attr == 'auth': + return auths.pop() + elif attr == 'rgw.testhost_key': + return 'testkey' + elif attr == 'fsid': + return 'testfsid' + + self.relation_get.side_effect = _relation_get + self.relation_ids.return_value = ['mon:6'] + self.related_units.return_value = ['ceph/0', 'ceph/1', 'ceph/2'] + self.multisite.plain_list = self.plain_list_stub + self.determine_api_port.return_value = 70 + expect = { + 'auth_supported': 'none', + 'hostname': 'testhost', + 'mon_hosts': '10.5.4.1 10.5.4.2 10.5.4.3', + 'old_auth': False, + 'systemd_rgw': True, + 'unit_public_ip': '10.255.255.255', + 'use_syslog': 'false', + 'loglevel': 1, + 'port': 70, + 'client_radosgw_gateway': {'rgw init timeout': 60}, + 'ipv6': False, + 'rgw_zone': 'default', + 'fsid': 'testfsid', + 'rgw_swift_versioning': False, + 'frontend': 'beast', + 'relaxed_s3_bucket_names': False, + 'rgw_zonegroup': 'zonegroup1', + 'rgw_realm': 'realmX', + 'behind_https_proxy': False, + 'virtual_hosted_bucket_enabled': False, + } + self.assertEqual(expect, mon_ctxt()) + + @patch('ceph_radosgw_context.https') + @patch('charmhelpers.contrib.hahelpers.cluster.relation_ids') + @patch('charmhelpers.contrib.hahelpers.cluster.config_get') + @patch.object(ceph, 'config', lambda *args: + '{"client.radosgw.gateway": {"rgw init timeout": 60}}') + def test_ctxt_consistent_auths(self, mock_config_get, mock_relation_ids, + mock_https): + mock_https.return_value = False + mock_relation_ids.return_value = [] + mock_config_get.side_effect = self.test_config.get + self.socket.gethostname.return_value = 'testhost' + mon_ctxt = context.MonContext() + addresses = ['10.5.4.1', '10.5.4.2', '10.5.4.3'] + auths = ['cephx', 'cephx', 'cephx'] + + def _relation_get(attr, unit, rid): + if attr == 'ceph-public-address': + return addresses.pop() + elif attr == 'auth': + return auths.pop() + elif attr == 'rgw.testhost_key': + return 'testkey' + elif attr == 'fsid': + return 'testfsid' + + self.relation_get.side_effect = _relation_get + self.relation_ids.return_value = ['mon:6'] + self.related_units.return_value = ['ceph/0', 'ceph/1', 'ceph/2'] + self.determine_api_port.return_value = 70 + self.multisite.plain_list = self.plain_list_stub + expect = { + 'auth_supported': 'cephx', + 'hostname': 'testhost', + 'mon_hosts': '10.5.4.1 10.5.4.2 10.5.4.3', + 'old_auth': False, + 'systemd_rgw': True, + 'unit_public_ip': '10.255.255.255', + 'use_syslog': 'false', + 'loglevel': 1, + 'port': 70, + 'client_radosgw_gateway': {'rgw init timeout': 60}, + 'ipv6': False, + 'rgw_zone': 'default', + 'fsid': 'testfsid', + 'rgw_swift_versioning': False, + 'frontend': 'beast', + 'relaxed_s3_bucket_names': False, + 'rgw_zonegroup': 'zonegroup1', + 'rgw_realm': 'realmX', + 'behind_https_proxy': False, + 'virtual_hosted_bucket_enabled': False, + } + self.assertEqual(expect, mon_ctxt()) + + def test_resolve_http_frontend(self): + _test_version = '12.2.0' + + def _compare_version(package, version): + return fetch.apt_pkg.version_compare( + _test_version, version + ) + + # Older releases, default and invalid configuration + self.cmp_pkgrevno.side_effect = _compare_version + self.assertEqual('civetweb', context.resolve_http_frontend()) + + # Default for Octopus but not Pacific + _test_version = '15.2.0' + self.assertEqual('beast', context.resolve_http_frontend()) + + self.arch.return_value = 's390x' + self.assertEqual('civetweb', context.resolve_http_frontend()) + + # Default for Pacific and later + _test_version = '16.2.0' + self.assertEqual('beast', context.resolve_http_frontend()) + self.arch.return_value = 'amd64' + self.assertEqual('beast', context.resolve_http_frontend()) + + def test_validate_http_frontend(self): + _test_version = '12.2.0' + + def _compare_version(package, version): + return fetch.apt_pkg.version_compare( + _test_version, version + ) + + self.cmp_pkgrevno.side_effect = _compare_version + + # Invalid configuration option + with self.assertRaises(ValueError): + context.validate_http_frontend('foobar') + + # beast config but ceph pre mimic + with self.assertRaises(ValueError): + context.validate_http_frontend('beast') + + # Mimic with valid configuration + _test_version = '13.2.0' + context.validate_http_frontend('beast') + context.validate_http_frontend('civetweb') + + # beast config on unsupported s390x/octopus + _test_version = '15.2.0' + self.arch.return_value = 's390x' + with self.assertRaises(ValueError): + context.validate_http_frontend('beast') + + # beast config on s390x/pacific + _test_version = '16.2.0' + context.validate_http_frontend('beast') + + @patch('ceph_radosgw_context.https') + @patch('charmhelpers.contrib.hahelpers.cluster.relation_ids') + @patch('charmhelpers.contrib.hahelpers.cluster.config_get') + @patch.object(ceph, 'config', lambda *args: + '{"client.radosgw.gateway": {"rgw init timeout": 60}}') + def test_ctxt_inconsistent_fsids(self, mock_config_get, mock_relation_ids, + mock_https): + mock_https.return_value = False + mock_relation_ids.return_value = [] + mock_config_get.side_effect = self.test_config.get + self.socket.gethostname.return_value = 'testhost' + mon_ctxt = context.MonContext() + addresses = ['10.5.4.1', '10.5.4.2', '10.5.4.3'] + fsids = ['testfsid', 'testfsid', None] + + def _relation_get(attr, unit, rid): + if attr == 'ceph-public-address': + return addresses.pop() + elif attr == 'auth': + return 'cephx' + elif attr == 'rgw.testhost_key': + return 'testkey' + elif attr == 'fsid': + return fsids.pop() + + self.relation_get.side_effect = _relation_get + self.relation_ids.return_value = ['mon:6'] + self.related_units.return_value = ['ceph/0', 'ceph/1', 'ceph/2'] + self.multisite.plain_list = self.plain_list_stub + self.determine_api_port.return_value = 70 + expect = { + 'auth_supported': 'cephx', + 'hostname': 'testhost', + 'mon_hosts': '10.5.4.1 10.5.4.2 10.5.4.3', + 'old_auth': False, + 'systemd_rgw': True, + 'unit_public_ip': '10.255.255.255', + 'use_syslog': 'false', + 'loglevel': 1, + 'port': 70, + 'client_radosgw_gateway': {'rgw init timeout': 60}, + 'ipv6': False, + 'rgw_zone': 'default', + 'fsid': 'testfsid', + 'rgw_swift_versioning': False, + 'frontend': 'beast', + 'relaxed_s3_bucket_names': False, + 'rgw_zonegroup': 'zonegroup1', + 'rgw_realm': 'realmX', + 'behind_https_proxy': False, + 'virtual_hosted_bucket_enabled': False, + } + self.assertEqual(expect, mon_ctxt()) + + @patch('ceph_radosgw_context.https') + @patch('charmhelpers.contrib.hahelpers.cluster.relation_ids') + @patch('charmhelpers.contrib.hahelpers.cluster.config_get') + @patch.object(ceph, 'config', lambda *args: + '{"client.radosgw.gateway": {"rgw init timeout": 60}}') + def test_ctxt_virtual_hosted_bucket(self, mock_config_get, + mock_relation_ids, mock_https): + mock_https.return_value = False + mock_relation_ids.return_value = [] + self.test_config.set('virtual-hosted-bucket-enabled', True) + self.test_config.set('os-public-hostname', 'rgw.example.com') + mock_config_get.side_effect = self.test_config.get + self.socket.gethostname.return_value = 'testhost' + mon_ctxt = context.MonContext() + addresses = ['10.5.4.1', '10.5.4.2', '10.5.4.3'] + + def _relation_get(attr, unit, rid): + if attr == 'ceph-public-address': + return addresses.pop() + elif attr == 'auth': + return 'cephx' + elif attr == 'rgw.testhost_key': + return 'testkey' + elif attr == 'fsid': + return 'testfsid' + + self.relation_get.side_effect = _relation_get + self.relation_ids.return_value = ['mon:6'] + self.related_units.return_value = ['ceph/0', 'ceph/1', 'ceph/2'] + self.multisite.plain_list = self.plain_list_stub + self.determine_api_port.return_value = 70 + expect = { + 'auth_supported': 'cephx', + 'hostname': 'testhost', + 'mon_hosts': '10.5.4.1 10.5.4.2 10.5.4.3', + 'old_auth': False, + 'systemd_rgw': True, + 'unit_public_ip': '10.255.255.255', + 'use_syslog': 'false', + 'loglevel': 1, + 'port': 70, + 'client_radosgw_gateway': {'rgw init timeout': 60}, + 'ipv6': False, + 'rgw_zone': 'default', + 'fsid': 'testfsid', + 'rgw_swift_versioning': False, + 'frontend': 'beast', + 'relaxed_s3_bucket_names': False, + 'rgw_zonegroup': 'zonegroup1', + 'rgw_realm': 'realmX', + 'behind_https_proxy': False, + 'virtual_hosted_bucket_enabled': True, + 'public_hostname': 'rgw.example.com', + } + self.assertEqual(expect, mon_ctxt()) + + +class ApacheContextTest(CharmTestCase): + + def setUp(self): + super(ApacheContextTest, self).setUp(context, TO_PATCH) + self.config.side_effect = self.test_config.get diff --git a/ceph-radosgw/unit_tests/test_ceph_radosgw_utils.py b/ceph-radosgw/unit_tests/test_ceph_radosgw_utils.py new file mode 100644 index 00000000..ceaf761e --- /dev/null +++ b/ceph-radosgw/unit_tests/test_ceph_radosgw_utils.py @@ -0,0 +1,326 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest.mock import ( + patch, + MagicMock, +) + +import utils + +from test_utils import CharmTestCase + +TO_PATCH = [ + 'application_version_set', + 'get_upstream_version', + 'https', + 'relation_ids', + 'relation_get', + 'related_units', + 'socket', + 'cmp_pkgrevno', + 'init_is_systemd', + 'unitdata', + 'config', + 'leader_get', + 'leader_set', +] + + +class CephRadosGWUtilTests(CharmTestCase): + def setUp(self): + super(CephRadosGWUtilTests, self).setUp(utils, TO_PATCH) + self.get_upstream_version.return_value = '10.2.2' + self.socket.gethostname.return_value = 'testhost' + self.config.side_effect = self.test_config.get + + def test_assess_status(self): + with patch.object(utils, 'assess_status_func') as asf: + callee = MagicMock() + asf.return_value = callee + utils.assess_status('test-config') + asf.assert_called_once_with('test-config') + callee.assert_called_once_with() + self.get_upstream_version.assert_called_with( + utils.VERSION_PACKAGE + ) + self.application_version_set.assert_called_with('10.2.2') + + @patch.object(utils, 'get_optional_interfaces') + @patch.object(utils, 'check_optional_config_and_relations') + @patch.object(utils, 'REQUIRED_INTERFACES') + @patch.object(utils, 'services') + @patch.object(utils, 'make_assess_status_func') + def test_assess_status_func(self, + make_assess_status_func, + services, + REQUIRED_INTERFACES, + check_optional_relations, + get_optional_interfaces): + services.return_value = 's1' + REQUIRED_INTERFACES.copy.return_value = {'int': ['test 1']} + get_optional_interfaces.return_value = {'opt': ['test 2']} + utils.assess_status_func('test-config') + # ports=None whilst port checks are disabled. + make_assess_status_func.assert_called_once_with( + 'test-config', + {'int': ['test 1'], 'opt': ['test 2']}, + charm_func=check_optional_relations, + services='s1', ports=None) + + def test_pause_unit_helper(self): + with patch.object(utils, '_pause_resume_helper') as prh: + utils.pause_unit_helper('random-config') + prh.assert_called_once_with(utils.pause_unit, 'random-config') + with patch.object(utils, '_pause_resume_helper') as prh: + utils.resume_unit_helper('random-config') + prh.assert_called_once_with(utils.resume_unit, 'random-config') + + @patch.object(utils, 'services') + def test_pause_resume_helper(self, services): + f = MagicMock() + services.return_value = 's1' + with patch.object(utils, 'assess_status_func') as asf: + asf.return_value = 'assessor' + utils._pause_resume_helper(f, 'some-config') + asf.assert_called_once_with('some-config') + # ports=None whilst port checks are disabled. + f.assert_called_once_with('assessor', services='s1', ports=None) + + def _setup_relation_data(self, data): + self.relation_ids.return_value = data.keys() + self.related_units.side_effect = ( + lambda rid: data[rid].keys() + ) + self.relation_get.side_effect = ( + lambda attr, rid, unit: data[rid][unit].get(attr) + ) + + def test_systemd_based_radosgw_old_style(self): + _relation_data = { + 'mon:1': { + 'ceph-mon/0': { + 'radosgw_key': 'testkey', + }, + 'ceph-mon/1': { + 'radosgw_key': 'testkey', + }, + 'ceph-mon/2': { + 'radosgw_key': 'testkey', + }, + } + } + self._setup_relation_data(_relation_data) + self.assertFalse(utils.systemd_based_radosgw()) + + def test_systemd_based_radosgw_new_style(self): + _relation_data = { + 'mon:1': { + 'ceph-mon/0': { + 'rgw.testhost_key': 'testkey', + }, + 'ceph-mon/1': { + 'rgw.testhost_key': 'testkey', + }, + 'ceph-mon/2': { + 'rgw.testhost_key': 'testkey', + }, + } + } + self._setup_relation_data(_relation_data) + self.assertTrue(utils.systemd_based_radosgw()) + + @patch.object(utils.os.path, 'exists') + def test_ready_for_service(self, mock_exists): + mock_exists.return_value = True + _relation_data = { + 'mon:1': { + 'ceph-mon/0': { + 'rgw.testhost_key': 'testkey', + }, + 'ceph-mon/1': { + 'rgw.testhost_key': 'testkey', + }, + 'ceph-mon/2': { + 'rgw.testhost_key': 'testkey', + }, + } + } + self._setup_relation_data(_relation_data) + self.assertTrue(utils.ready_for_service()) + mock_exists.assert_called_with( + '/etc/ceph/ceph.client.rgw.testhost.keyring' + ) + + @patch.object(utils.os.path, 'exists') + def test_ready_for_service_legacy(self, mock_exists): + mock_exists.return_value = True + _relation_data = { + 'mon:1': { + 'ceph-mon/0': { + 'radosgw_key': 'testkey', + }, + 'ceph-mon/1': { + 'radosgw_key': 'testkey', + }, + 'ceph-mon/2': { + 'radosgw_key': 'testkey', + }, + } + } + self._setup_relation_data(_relation_data) + self.assertTrue(utils.ready_for_service()) + mock_exists.assert_called_with( + '/etc/ceph/keyring.rados.gateway' + ) + + @patch.object(utils.os.path, 'exists') + def test_ready_for_service_legacy_skip(self, mock_exists): + mock_exists.return_value = True + _relation_data = { + 'mon:1': { + 'ceph-mon/0': { + 'radosgw_key': 'testkey', + }, + 'ceph-mon/1': { + 'radosgw_key': 'testkey', + }, + 'ceph-mon/2': { + 'radosgw_key': 'testkey', + }, + } + } + self._setup_relation_data(_relation_data) + self.assertFalse(utils.ready_for_service(legacy=False)) + + def test_not_ready_for_service(self): + _relation_data = { + 'mon:1': { + 'ceph-mon/0': { + }, + 'ceph-mon/1': { + }, + 'ceph-mon/2': { + }, + } + } + self._setup_relation_data(_relation_data) + self.assertFalse(utils.ready_for_service()) + + @patch.object(utils.os.path, 'exists') + def test_ready_for_service_no_keyring(self, mock_exists): + mock_exists.return_value = False + _relation_data = { + 'mon:1': { + 'ceph-mon/0': { + 'rgw.testhost_key': 'testkey', + }, + 'ceph-mon/1': { + 'rgw.testhost_key': 'testkey', + }, + 'ceph-mon/2': { + 'rgw.testhost_key': 'testkey', + }, + } + } + self._setup_relation_data(_relation_data) + self.assertFalse(utils.ready_for_service()) + mock_exists.assert_called_with( + '/etc/ceph/ceph.client.rgw.testhost.keyring' + ) + + def test_request_per_unit_key(self): + self.init_is_systemd.return_value = False + self.cmp_pkgrevno.return_value = -1 + self.assertFalse(utils.request_per_unit_key()) + self.init_is_systemd.return_value = True + self.cmp_pkgrevno.return_value = 1 + self.assertTrue(utils.request_per_unit_key()) + self.init_is_systemd.return_value = False + self.cmp_pkgrevno.return_value = 1 + self.assertFalse(utils.request_per_unit_key()) + + self.cmp_pkgrevno.assert_called_with('radosgw', '12.2.0') + + @patch.object(utils, 'systemd_based_radosgw') + def test_service_name(self, mock_systemd_based_radosgw): + mock_systemd_based_radosgw.return_value = True + self.assertEqual(utils.service_name(), + 'ceph-radosgw@rgw.testhost') + mock_systemd_based_radosgw.return_value = False + self.assertEqual(utils.service_name(), + 'radosgw') + + def test_restart_nonce_changed_new(self): + _db_data = {} + mock_db = MagicMock() + mock_db.get.side_effect = lambda key: _db_data.get(key) + self.unitdata.kv.return_value = mock_db + self.assertTrue(utils.restart_nonce_changed('foobar')) + mock_db.set.assert_called_once_with('restart_nonce', + 'foobar') + mock_db.flush.assert_called_once_with() + + def test_restart_nonce_changed_existing(self): + _db_data = { + 'restart_nonce': 'foobar' + } + mock_db = MagicMock() + mock_db.get.side_effect = lambda key: _db_data.get(key) + self.unitdata.kv.return_value = mock_db + self.assertFalse(utils.restart_nonce_changed('foobar')) + mock_db.set.assert_not_called() + mock_db.flush.assert_not_called() + + def test_restart_nonce_changed_changed(self): + _db_data = { + 'restart_nonce': 'foobar' + } + mock_db = MagicMock() + mock_db.get.side_effect = lambda key: _db_data.get(key) + self.unitdata.kv.return_value = mock_db + self.assertTrue(utils.restart_nonce_changed('soofar')) + mock_db.set.assert_called_once_with('restart_nonce', + 'soofar') + mock_db.flush.assert_called_once_with() + + def test_multisite_deployment(self): + self.test_config.set('zone', 'testzone') + self.test_config.set('zonegroup', 'testzonegroup') + self.test_config.set('realm', 'testrealm') + self.assertTrue(utils.multisite_deployment()) + self.test_config.set('realm', None) + self.assertFalse(utils.multisite_deployment()) + + def test_listen_port(self): + self.https.return_value = False + self.assertEqual(80, utils.listen_port()) + self.https.return_value = True + self.assertEqual(443, utils.listen_port()) + self.test_config.set('port', 42) + self.assertEqual(42, utils.listen_port()) + + def test_set_s3_app(self): + self.leader_get.return_value = None + utils.set_s3_app('myapp', 'b', 'a', 's') + self.leader_set.assert_called_once_with({ + 's3-apps': + '{"myapp": {"bucket": "b", "access-key": "a", "secret-key": "s"}}' + }) + + def test_s3_app(self): + self.leader_get.return_value = '{"myapp": "a"}' + s3_info = utils.s3_app('myapp') + self.assertEqual(s3_info, 'a') + self.leader_get.assert_called_once_with('s3-apps') diff --git a/ceph-radosgw/unit_tests/test_hooks.py b/ceph-radosgw/unit_tests/test_hooks.py new file mode 100644 index 00000000..a12dd0ac --- /dev/null +++ b/ceph-radosgw/unit_tests/test_hooks.py @@ -0,0 +1,1142 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import base64 +import json +import os +from unittest.mock import ( + patch, call, MagicMock, ANY +) + +from test_utils import ( + CharmTestCase, +) +from charmhelpers.contrib.openstack.ip import PUBLIC + +with patch('charmhelpers.contrib.hardening.harden.harden') as mock_dec: + mock_dec.side_effect = (lambda *dargs, **dkwargs: lambda f: + lambda *args, **kwargs: f(*args, **kwargs)) + with patch('charmhelpers.fetch.apt_install'): + with patch('utils.register_configs'): + import hooks as ceph_hooks + +TO_PATCH = [ + 'CONFIGS', + 'add_source', + 'apt_update', + 'apt_install', + 'apt_purge', + 'boto_client', + 'config', + 'cmp_pkgrevno', + 'execd_preinstall', + 'listen_port', + 'log', + 'open_port', + 'opened_ports', + 'os', + 'relation_ids', + 'relation_set', + 'relation_get', + 'related_units', + 'remote_service_name', + 'status_set', + 'subprocess', + 'sys', + 'generate_ha_relation_data', + 'get_relation_ip', + 'disable_unused_apache_sites', + 'service_reload', + 'service_stop', + 'service_restart', + 'service_pause', + 'service_resume', + 'service', + 'service_name', + 'socket', + 'restart_map', + 'systemd_based_radosgw', + 'request_per_unit_key', + 'get_certificate_request', + 'process_certificates', + 'filter_installed_packages', + 'filter_missing_packages', + 'ceph_utils', + 'multisite_deployment', + 'multisite', + 'ready_for_service', + 'utils', +] + + +# Stub Methods +def get_zonegroup_stub(): + # populate dummy zones info + zone_one = {} + zone_one['id'] = "test_zone_id_one" + zone_one['name'] = "testzone" + + zone_two = {} + zone_two['id'] = "test_zone_id_two" + zone_two['name'] = "testzone_two" + + # populate dummy zonegroup info + zonegroup = {} + zonegroup['name'] = "testzonegroup" + zonegroup['master_zone'] = "test_zone_id_one" + zonegroup['zones'] = [zone_one, zone_two] + return zonegroup + + +class CephRadosGWTests(CharmTestCase): + + def setUp(self): + super(CephRadosGWTests, self).setUp(ceph_hooks, TO_PATCH) + self.config.side_effect = self.test_config.get + self.test_config.set('source', 'distro') + self.test_config.set('key', 'secretkey') + self.test_config.set('use-syslog', False) + self.cmp_pkgrevno.return_value = 0 + self.service_name.return_value = 'radosgw' + self.request_per_unit_key.return_value = False + self.systemd_based_radosgw.return_value = False + self.filter_installed_packages.side_effect = lambda pkgs: pkgs + self.filter_missing_packages.side_effect = lambda pkgs: pkgs + self.multisite_deployment.return_value = False + + def test_upgrade_available(self): + _vers = { + 'distro': 'luminous', + 'cloud:bionic-rocky': 'mimic', + } + mock_config = MagicMock() + self.test_config.set('source', 'cloud:bionic-rocky') + mock_config.get.side_effect = self.test_config.get + mock_config.previous.return_value = 'distro' + self.config.side_effect = None + self.config.return_value = mock_config + self.ceph_utils.UPGRADE_PATHS = { + 'luminous': 'mimic', + } + self.ceph_utils.resolve_ceph_version.side_effect = ( + lambda v: _vers.get(v) + ) + self.assertTrue(ceph_hooks.upgrade_available()) + + @patch.object(ceph_hooks, 'upgrade_available') + def test_install_packages(self, upgrade_available): + mock_config = MagicMock() + mock_config.get.side_effect = self.test_config.get + mock_config.changed.return_value = True + self.config.side_effect = None + self.config.return_value = mock_config + upgrade_available.return_value = False + ceph_hooks.install_packages() + self.add_source.assert_called_with('distro', 'secretkey') + self.apt_update.assert_called_with(fatal=True) + self.apt_purge.assert_called_with(ceph_hooks.APACHE_PACKAGES) + self.apt_install.assert_called_with(ceph_hooks.PACKAGES, + fatal=True) + mock_config.changed.assert_called_with('source') + self.filter_installed_packages.assert_called_with( + ceph_hooks.PACKAGES + ) + self.filter_missing_packages.assert_called_with( + ceph_hooks.APACHE_PACKAGES + ) + + @patch.object(ceph_hooks, 'upgrade_available') + def test_install_packages_upgrades(self, upgrade_available): + mock_config = MagicMock() + mock_config.get.side_effect = self.test_config.get + mock_config.changed.return_value = True + self.config.side_effect = None + self.config.return_value = mock_config + upgrade_available.return_value = True + ceph_hooks.install_packages() + self.add_source.assert_called_with('distro', 'secretkey') + self.apt_update.assert_called_with(fatal=True) + self.apt_purge.assert_called_with(ceph_hooks.APACHE_PACKAGES) + self.apt_install.assert_called_with(ceph_hooks.PACKAGES, + fatal=True) + mock_config.changed.assert_called_with('source') + self.filter_installed_packages.assert_not_called() + self.filter_missing_packages.assert_called_with( + ceph_hooks.APACHE_PACKAGES + ) + + @patch.object(ceph_hooks, 'leader_set') + @patch.object(ceph_hooks, 'is_leader') + def test_install(self, is_leader, leader_set): + _install_packages = self.patch('install_packages') + is_leader.return_value = True + ceph_hooks.install() + self.assertTrue(self.execd_preinstall.called) + self.assertTrue(_install_packages.called) + is_leader.assert_called_once() + leader_set.assert_called_once_with(namespace_tenants=False) + self.service_pause.assert_called_once_with('radosgw') + + @patch.object(ceph_hooks, 'leader_set') + @patch.object(ceph_hooks, 'is_leader') + def test_install_without_namespacing(self, is_leader, leader_set): + _install_packages = self.patch('install_packages') + is_leader.return_value = True + self.test_config.set('namespace-tenants', True) + ceph_hooks.install() + self.assertTrue(self.execd_preinstall.called) + self.assertTrue(_install_packages.called) + is_leader.assert_called_once() + leader_set.assert_called_once_with(namespace_tenants=True) + self.service_pause.assert_called_once_with('radosgw') + + @patch.object(ceph_hooks, 'certs_joined') + @patch.object(ceph_hooks, 'update_nrpe_config') + def test_config_changed(self, update_nrpe_config, mock_certs_joined): + _install_packages = self.patch('install_packages') + _relations = { + 'certificates': ['certificates:1'] + } + self.relation_ids.side_effect = lambda name: _relations.get(name, []) + ceph_hooks.config_changed() + self.assertTrue(_install_packages.called) + self.CONFIGS.write_all.assert_called_with() + update_nrpe_config.assert_called_with() + mock_certs_joined.assert_called_once_with('certificates:1') + + @patch.object(ceph_hooks, 'service_name') + @patch.object(ceph_hooks, 'service_restart') + @patch.object(ceph_hooks, 'certs_joined') + @patch.object(ceph_hooks, 'update_nrpe_config') + def test_config_changed_upgrade(self, update_nrpe_config, + mock_certs_joined, mock_service_restart, + mock_service_name): + _install_packages = self.patch('install_packages') + _install_packages.return_value = True + mock_service_name.return_value = 'radosgw@localhost' + _relations = { + 'certificates': ['certificates:1'] + } + self.relation_ids.side_effect = lambda name: _relations.get(name, []) + ceph_hooks.config_changed() + self.assertTrue(_install_packages.called) + self.CONFIGS.write_all.assert_called_with() + update_nrpe_config.assert_called_with() + mock_certs_joined.assert_called_once_with('certificates:1') + mock_service_restart.assert_called_once_with('radosgw@localhost') + + @patch.object(ceph_hooks, 'is_request_complete', + lambda *args, **kwargs: True) + @patch.object(ceph_hooks, 'is_leader') + @patch('charmhelpers.contrib.openstack.ip.resolve_address') + @patch('charmhelpers.contrib.openstack.ip.config') + def test_mon_relation(self, _config, _resolve_address, is_leader): + _ceph = self.patch('ceph') + _ceph.import_radosgw_key.return_value = True + is_leader.return_value = True + self.relation_get.return_value = 'seckey' + self.multisite.list_zones.side_effect = [ + [], # at first the default zone doesn't exist, then... + ['default'], # ... it got created + ] + self.socket.gethostname.return_value = 'testinghostname' + ceph_hooks.mon_relation() + self.relation_set.assert_not_called() + self.service_resume.assert_called_once_with('radosgw') + _ceph.import_radosgw_key.assert_called_with('seckey', + name='rgw.testinghostname') + self.CONFIGS.write_all.assert_called_with() + + @patch.object(ceph_hooks, 'is_request_complete', + lambda *args, **kwargs: True) + @patch.object(ceph_hooks, 'is_leader') + @patch('charmhelpers.contrib.openstack.ip.resolve_address') + @patch('charmhelpers.contrib.openstack.ip.config') + def test_mon_relation_request_key(self, _config, + _resolve_address, is_leader): + _ceph = self.patch('ceph') + _ceph.import_radosgw_key.return_value = True + is_leader.return_value = True + self.relation_get.return_value = 'seckey' + self.multisite.list_zones.side_effect = [ + [], # at first the default zone doesn't exist, then... + ['default'], # ... it got created + ] + self.socket.gethostname.return_value = 'testinghostname' + self.request_per_unit_key.return_value = True + ceph_hooks.mon_relation() + self.relation_set.assert_called_with( + relation_id=None, + key_name='rgw.testinghostname' + ) + self.service_resume.assert_called_once_with('radosgw') + _ceph.import_radosgw_key.assert_called_with('seckey', + name='rgw.testinghostname') + self.CONFIGS.write_all.assert_called_with() + + @patch.object(ceph_hooks, 'is_request_complete', + lambda *args, **kwargs: True) + @patch.object(ceph_hooks, 'is_leader') + @patch('charmhelpers.contrib.openstack.ip.resolve_address') + @patch('charmhelpers.contrib.openstack.ip.config') + def test_mon_relation_nokey(self, _config, + _resolve_address, is_leader): + _ceph = self.patch('ceph') + _ceph.import_radosgw_key.return_value = False + self.relation_get.return_value = None + is_leader.return_value = True + self.multisite.list_zones.side_effect = [ + [], # at first the default zone doesn't exist, then... + ['default'], # ... it got created + ] + ceph_hooks.mon_relation() + self.assertFalse(_ceph.import_radosgw_key.called) + self.service_resume.assert_not_called() + self.CONFIGS.write_all.assert_called_with() + + @patch.object(ceph_hooks, 'send_request_if_needed') + @patch.object(ceph_hooks, 'is_request_complete', + lambda *args, **kwargs: False) + def test_mon_relation_send_broker_request(self, + mock_send_request_if_needed): + _ceph = self.patch('ceph') + _ceph.import_radosgw_key.return_value = False + self.relation_get.return_value = 'seckey' + ceph_hooks.mon_relation() + self.service_resume.assert_not_called() + self.assertFalse(_ceph.import_radosgw_key.called) + self.assertFalse(self.CONFIGS.called) + self.assertTrue(mock_send_request_if_needed.called) + + def test_gateway_relation(self): + self.get_relation_ip.return_value = '10.0.0.1' + self.listen_port.return_value = 80 + ceph_hooks.gateway_relation() + self.relation_set.assert_called_with(hostname='10.0.0.1', port=80) + + @patch.object(ceph_hooks, "canonical_url") + def test_object_store_relation(self, _canonical_url): + relation_data = { + "swift-url": "http://radosgw:80" + } + self.listen_port.return_value = 80 + _canonical_url.return_value = "http://radosgw" + ceph_hooks.object_store_joined() + self.relation_set.assert_called_with( + relation_id=None, + relation_settings=relation_data) + + @patch.object(ceph_hooks, 'leader_get') + @patch('charmhelpers.contrib.openstack.ip.service_name', + lambda *args: 'ceph-radosgw') + @patch('charmhelpers.contrib.openstack.ip.resolve_address', + lambda *args: 'myserv') + @patch('charmhelpers.contrib.openstack.ip.config') + def test_identity_joined_early_version(self, _config, _leader_get): + self.cmp_pkgrevno.return_value = -1 + _leader_get.return_value = 'False' + self.listen_port.return_value = 80 + ceph_hooks.identity_joined() + self.sys.exit.assert_called_with(1) + + @patch.object(ceph_hooks, 'leader_get') + @patch('charmhelpers.contrib.openstack.ip.service_name', + lambda *args: 'ceph-radosgw') + @patch('charmhelpers.contrib.openstack.ip.resolve_address') + @patch('charmhelpers.contrib.openstack.ip.config') + def test_identity_joined(self, _config, _resolve_address, _leader_get): + + self.listen_port.return_value = 80 + + def _test_identify_joined(expected): + self.related_units = ['unit/0'] + self.cmp_pkgrevno.return_value = 1 + _resolve_address.return_value = 'myserv' + _config.side_effect = self.test_config.get + self.test_config.set('region', 'region1') + _leader_get.return_value = 'False' + ceph_hooks.identity_joined(relid='rid') + self.relation_set.assert_has_calls([ + call(swift_service='swift', + swift_region='region1', + swift_public_url='http://myserv:80/swift/v1', + swift_internal_url='http://myserv:80/swift/v1', + swift_admin_url='http://myserv:80/swift', + requested_roles=expected, + relation_id='rid'), + call(s3_service='s3', + s3_region='region1', + s3_public_url='http://myserv:80/', + s3_internal_url='http://myserv:80/', + s3_admin_url='http://myserv:80/', + relation_id='rid') + ]) + + inputs = [{'operator': 'foo', 'admin': 'bar', 'expected': 'foo,bar'}, + {'operator': 'foo', 'expected': 'foo'}, + {'admin': 'bar', 'expected': 'bar'}, + {'expected': ''}] + for input in inputs: + self.test_config.set('operator-roles', input.get('operator', '')) + self.test_config.set('admin-roles', input.get('admin', '')) + _test_identify_joined(input['expected']) + + @patch.object(ceph_hooks, 'leader_get') + @patch('charmhelpers.contrib.openstack.ip.service_name', + lambda *args: 'ceph-radosgw') + @patch('charmhelpers.contrib.openstack.ip.resolve_address') + @patch('charmhelpers.contrib.openstack.ip.config') + def test_identity_joined_namespaced(self, _config, + _resolve_address, _leader_get): + _leader_get.return_value = True + + def _test_identify_joined(expected): + self.related_units = ['unit/0'] + self.cmp_pkgrevno.return_value = 1 + self.listen_port.return_value = 80 + _resolve_address.return_value = 'myserv' + _config.side_effect = self.test_config.get + self.test_config.set('region', 'region1') + _leader_get.return_value = 'True' + ceph_hooks.identity_joined(relid='rid') + self.relation_set.assert_has_calls([ + call(swift_service='swift', + swift_region='region1', + swift_public_url=( + 'http://myserv:80/swift/v1/AUTH_$(project_id)s'), + swift_internal_url=( + 'http://myserv:80/swift/v1/AUTH_$(project_id)s'), + swift_admin_url='http://myserv:80/swift', + requested_roles=expected, + relation_id='rid'), + call(s3_service='s3', + s3_region='region1', + s3_public_url='http://myserv:80/', + s3_internal_url='http://myserv:80/', + s3_admin_url='http://myserv:80/', + relation_id='rid') + ]) + + inputs = [{'operator': 'foo', 'admin': 'bar', 'expected': 'foo,bar'}, + {'operator': 'foo', 'expected': 'foo'}, + {'admin': 'bar', 'expected': 'bar'}, + {'expected': ''}] + for input in inputs: + self.test_config.set('operator-roles', input.get('operator', '')) + self.test_config.set('admin-roles', input.get('admin', '')) + _test_identify_joined(input['expected']) + + @patch.object(ceph_hooks, 'leader_get') + @patch('charmhelpers.contrib.openstack.ip.service_name', + lambda *args: 'ceph-radosgw') + @patch('charmhelpers.contrib.openstack.ip.is_clustered') + @patch('charmhelpers.contrib.openstack.ip.unit_get') + @patch('charmhelpers.contrib.openstack.ip.config') + def test_identity_joined_public_name(self, _config, _unit_get, + _is_clustered, _leader_get): + self.related_units = ['unit/0'] + _config.side_effect = self.test_config.get + self.test_config.set('os-public-hostname', 'files.example.com') + _unit_get.return_value = 'myserv' + _is_clustered.return_value = False + _leader_get.return_value = 'False' + self.listen_port.return_value = 80 + ceph_hooks.identity_joined(relid='rid') + self.relation_set.assert_has_calls([ + call(swift_service='swift', + swift_region='RegionOne', + swift_public_url='http://files.example.com:80/swift/v1', + swift_internal_url='http://myserv:80/swift/v1', + swift_admin_url='http://myserv:80/swift', + requested_roles='Member,member,Admin', + relation_id='rid'), + call(s3_service='s3', + s3_region='RegionOne', + s3_public_url='http://files.example.com:80/', + s3_internal_url='http://myserv:80/', + s3_admin_url='http://myserv:80/', + relation_id='rid') + ]) + + @patch.object(ceph_hooks, 'identity_joined') + def test_identity_changed(self, mock_identity_joined): + ceph_hooks.identity_changed() + self.CONFIGS.write_all.assert_called_with() + self.assertTrue(mock_identity_joined.called) + + @patch('charmhelpers.contrib.openstack.ip.is_clustered') + @patch('charmhelpers.contrib.openstack.ip.unit_get') + @patch('charmhelpers.contrib.openstack.ip.config') + def test_canonical_url_ipv6(self, _config, _unit_get, _is_clustered): + ipv6_addr = '2001:db8:85a3:8d3:1319:8a2e:370:7348' + _config.side_effect = self.test_config.get + _unit_get.return_value = ipv6_addr + _is_clustered.return_value = False + self.assertEqual(ceph_hooks.canonical_url({}, PUBLIC), + 'http://[%s]' % ipv6_addr) + + def test_cluster_joined(self): + self.get_relation_ip.side_effect = ['10.0.0.1', + '10.0.1.1', + '10.0.2.1', + '10.0.3.1'] + self.test_config.set('os-public-network', '10.0.0.0/24') + self.test_config.set('os-admin-network', '10.0.1.0/24') + self.test_config.set('os-internal-network', '10.0.2.0/24') + + ceph_hooks.cluster_joined() + self.relation_set.assert_has_calls( + [call(relation_id=None, + relation_settings={ + 'admin-address': '10.0.0.1', + 'public-address': '10.0.2.1', + 'internal-address': '10.0.1.1', + 'private-address': '10.0.3.1'})]) + + @patch.object(ceph_hooks, 'certs_changed') + def test_cluster_changed(self, mock_certs_changed): + _id_joined = self.patch('identity_joined') + _relations = { + 'identity-service': ['rid'], + 'certificates': ['certificates:1'], + } + self.relation_ids.side_effect = lambda name: _relations.get(name) + self.related_units.return_value = ['vault/0', 'vault/1'] + ceph_hooks.cluster_changed() + self.CONFIGS.write_all.assert_called_with() + _id_joined.assert_called_with(relid='rid') + mock_certs_changed.assert_has_calls([ + call('certificates:1', 'vault/0'), + call('certificates:1', 'vault/1') + ]) + + def test_ha_relation_joined(self): + self.generate_ha_relation_data.return_value = { + 'test': 'data' + } + ceph_hooks.ha_relation_joined(relation_id='ha:1') + self.relation_set.assert_called_with( + relation_id='ha:1', + test='data' + ) + + def test_ha_relation_changed(self): + _id_joined = self.patch('identity_joined') + self.relation_get.return_value = True + self.relation_ids.return_value = ['rid'] + ceph_hooks.ha_relation_changed() + _id_joined.assert_called_with(relid='rid') + + def test_certs_joined(self): + self.get_certificate_request.return_value = {'foo': 'baa'} + ceph_hooks.certs_joined('certificates:1') + self.relation_set.assert_called_once_with( + relation_id='certificates:1', + relation_settings={'foo': 'baa'} + ) + self.get_certificate_request.assert_called_once_with() + + @patch.object(ceph_hooks, 'configure_https') + def test_certs_changed(self, mock_configure_https): + ceph_hooks.certs_changed('certificates:1', 'vault/0') + self.process_certificates.assert_called_once_with( + 'ceph-radosgw', + 'certificates:1', + 'vault/0' + ) + mock_configure_https.assert_called_once_with() + + @patch.object(ceph_hooks, 'canonical_url') + @patch.object(ceph_hooks, 'is_leader') + def test_radosgw_user_changed(self, is_leader, canonical_url): + relation_data = { + 'radosgw-user:3': {'system-role': 'false'}, + 'radosgw-user:5': {'system-role': 'true'}} + user = { + 'juju-radosgw-user-3': ('access1', 'key1'), + 'juju-radosgw-user-5-system': ('access2', 'key2')} + self.ready_for_service.return_value = True + is_leader.return_value = True + self.remote_service_name.return_value = 'ceph-dashboard' + canonical_url.return_value = 'http://radosgw' + self.listen_port.return_value = 80 + self.socket.gethostname.return_value = 'testinghostname' + self.relation_ids.return_value = relation_data.keys() + self.relation_get.side_effect = lambda rid, app: relation_data[rid] + self.multisite.list_users.return_value = ['juju-radosgw-user-3'] + self.multisite.get_user_creds.side_effect = lambda u: user[u] + self.multisite.create_user.side_effect = lambda u, system_user: user[u] + ceph_hooks.radosgw_user_changed() + expected = [ + call( + app='ceph-dashboard', + relation_id='radosgw-user:3', + relation_settings={ + 'uid': 'juju-radosgw-user-3', + 'access-key': 'access1', + 'secret-key': 'key1'}), + call( + app='ceph-dashboard', + relation_id='radosgw-user:5', + relation_settings={ + 'uid': 'juju-radosgw-user-5-system', + 'access-key': 'access2', + 'secret-key': 'key2'}), + call( + relation_id='radosgw-user:3', + relation_settings={ + 'internal-url': 'http://radosgw:80', + 'daemon-id': 'testinghostname'}), + call( + relation_id='radosgw-user:5', + relation_settings={ + 'internal-url': 'http://radosgw:80', + 'daemon-id': 'testinghostname'})] + self.relation_set.assert_has_calls( + expected, + any_order=True) + + @patch.object(ceph_hooks, 'canonical_url') + @patch.object(ceph_hooks, 'is_leader') + @patch.object(ceph_hooks, 's3_app') + @patch.object(ceph_hooks, 'set_s3_app') + def test_s3_relation_changed( + self, set_s3_app, s3_app, is_leader, canonical_url + ): + self.ready_for_service.return_value = True + self.remote_service_name.return_value = 'mys3app' + is_leader.return_value = True + canonical_url.return_value = 'http://radosgw' + self.multisite.create_user.return_value = ('access1', 'secret1') + s3_app.return_value = None + ceph_hooks.s3_relation_changed('mys3app:1') + + self.relation_set.assert_called_once_with( + app='mys3app', relation_settings=ANY + ) + self.boto_client.return_value.create_bucket.assert_called_once_with( + Bucket=ANY + ) + + def test_cert_rel_ca_app(self): + """Test getting back ca material from the certificates relation. + + This tests the case that the certificates relation has an app + databag with ca material. + """ + relation_data = { + 'certificates/0': { + 'ca': 'ca material', + 'chain': 'chain material', + } + } + self.relation_ids.return_value = relation_data.keys() + self.relation_get.side_effect = lambda rid, app: relation_data[rid] + ca_chain = ceph_hooks.cert_rel_ca() + self.assertEqual(len(ca_chain), 2) + self.assertEqual(base64.b64decode(ca_chain[0]), b'chain material') + self.assertEqual(base64.b64decode(ca_chain[1]), b'ca material') + + def test_cert_rel_ca_unit(self): + """Test getting back ca material from the certificates relation. + + This tests the case that the certificates relation has its ca + material in the unit relation, and only sets ca but not chain. + """ + relation_data = { + 'certificates/0': { + 'ca': 'ca material', + } + } + self.relation_ids.return_value = relation_data.keys() + self.related_units.return_value = ['certificates/0', 'certificates/1'] + self.relation_get.side_effect = [None, relation_data['certificates/0']] + ca_chain = ceph_hooks.cert_rel_ca() + self.assertEqual(len(ca_chain), 1) + self.assertEqual(base64.b64decode(ca_chain[0]), b'ca material') + + def test_update_s3_ca_info(self): + """Test updating the ca info for the s3 relation.""" + self.utils.all_s3_apps.return_value = { + 's3app': {'bucket': 'bucketname'}, + } + self.relation_ids.return_value = ['s3/0'] + ceph_hooks.update_s3_ca_info('foo_ca') + self.relation_set.assert_called_once_with( + rid='s3/0', app='s3app', + relation_settings={ + 'bucket': 'bucketname', + 'tls-ca-chain': 'foo_ca' + } + ) + + +class MiscMultisiteTests(CharmTestCase): + + TO_PATCH = [ + 'restart_nonce_changed', + 'relation_ids', + 'related_units', + 'leader_get', + 'is_leader', + 'primary_relation_joined', + 'primary_relation_changed', + 'secondary_relation_changed', + 'service_restart', + 'service_name', + 'multisite' + ] + + _relation_ids = { + 'primary': ['primary:1'], + 'secondary': ['secondary:1'], + } + + _related_units = { + 'primary:1': ['rgw/0', 'rgw/1'], + 'secondary:1': ['rgw-s/0', 'rgw-s/1'], + } + + def setUp(self): + super(MiscMultisiteTests, self).setUp(ceph_hooks, + self.TO_PATCH) + self.relation_ids.side_effect = ( + lambda endpoint: self._relation_ids.get(endpoint) or [] + ) + self.related_units.side_effect = ( + lambda rid: self._related_units.get(rid) or [] + ) + self.service_name.return_value = 'rgw@hostname' + + def test_leader_settings_changed(self): + self.restart_nonce_changed.return_value = True + self.is_leader.return_value = False + ceph_hooks.leader_settings_changed() + self.service_restart.assert_called_once_with('rgw@hostname') + self.primary_relation_joined.assert_called_once_with('primary:1') + + def test_process_multisite_relations(self): + ceph_hooks.process_multisite_relations() + self.primary_relation_joined.assert_called_once_with('primary:1') + self.assertEqual(self.primary_relation_changed.call_count, 2) + self.primary_relation_changed.assert_has_calls([ + call('primary:1', 'rgw/0'), + call('primary:1', 'rgw/1'), + ]) + self.assertEqual(self.secondary_relation_changed.call_count, 2) + self.secondary_relation_changed.assert_has_calls([ + call('secondary:1', 'rgw-s/0'), + call('secondary:1', 'rgw-s/1'), + ]) + + +class CephRadosMultisiteTests(CharmTestCase): + + TO_PATCH = [ + 'ready_for_service', + 'canonical_url', + 'relation_set', + 'relation_get', + 'leader_get', + 'listen_port', + 'config', + 'is_leader', + 'multisite', + 'leader_set', + 'service_restart', + 'service_name', + 'log', + 'multisite_deployment', + 'systemd_based_radosgw', + ] + + def setUp(self): + super(CephRadosMultisiteTests, self).setUp(ceph_hooks, + self.TO_PATCH) + self.config.side_effect = self.test_config.get + self.ready_for_service.return_value = True + self.canonical_url.return_value = 'http://rgw' + self.service_name.return_value = 'rgw@hostname' + self.multisite_deployment.return_value = True + self.systemd_based_radosgw.return_value = True + + +class PrimaryMultisiteTests(CephRadosMultisiteTests): + + _complete_config = { + 'realm': 'testrealm', + 'zonegroup': 'testzonegroup', + 'zone': 'testzone', + } + + _leader_data = { + 'access_key': 'mykey', + 'secret': 'mysecret', + } + + _leader_data_done = { + 'access_key': 'mykey', + 'secret': 'mysecret', + 'restart_nonce': 'foobar', + } + + def test_primary_relation_joined_missing_config(self): + ceph_hooks.primary_relation_joined('primary:1') + self.config.assert_has_calls([ + call('realm'), + call('zonegroup'), + call('zone'), + ]) + self.relation_set.assert_not_called() + + def test_primary_relation_joined_create_everything(self): + for k, v in self._complete_config.items(): + self.test_config.set(k, v) + self.listen_port.return_value = 80 + self.is_leader.return_value = True + self.leader_get.side_effect = lambda attr: self._leader_data.get(attr) + self.multisite.list_realms.return_value = [] + self.multisite.list_zonegroups.return_value = [] + self.multisite.list_zones.return_value = [] + self.multisite.list_users.return_value = [] + self.multisite.create_system_user.return_value = ( + 'mykey', 'mysecret', + ) + ceph_hooks.primary_relation_joined('primary:1') + self.config.assert_has_calls([ + call('realm'), + call('zonegroup'), + call('zone'), + ]) + self.multisite.create_realm.assert_called_once_with( + 'testrealm', + default=True, + ) + self.multisite.create_zonegroup.assert_called_once_with( + 'testzonegroup', + endpoints=['http://rgw:80'], + default=True, + master=True, + realm='testrealm', + ) + self.multisite.create_zone.assert_called_once_with( + 'testzone', + endpoints=['http://rgw:80'], + default=True, + master=True, + zonegroup='testzonegroup', + ) + self.multisite.create_system_user.assert_called_once_with( + ceph_hooks.MULTISITE_SYSTEM_USER + ) + self.multisite.modify_zone.assert_called_once_with( + 'testzone', + access_key='mykey', + secret='mysecret', + ) + self.multisite.update_period.assert_has_calls([ + call(fatal=False), + call(zonegroup='testzonegroup', zone='testzone'), + ]) + self.service_restart.assert_called_once_with('rgw@hostname') + self.leader_set.assert_has_calls([ + call(access_key='mykey', + secret='mysecret'), + call(restart_nonce=ANY), + ]) + self.relation_set.assert_called_with( + relation_id='primary:1', + access_key='mykey', + secret='mysecret', + ) + + def test_primary_relation_joined_create_nothing(self): + for k, v in self._complete_config.items(): + self.test_config.set(k, v) + self.is_leader.return_value = True + self.leader_get.side_effect = ( + lambda attr: self._leader_data_done.get(attr) + ) + self.multisite.list_realms.return_value = ['testrealm'] + self.multisite.list_zonegroups.return_value = ['testzonegroup'] + self.multisite.list_zones.return_value = ['testzone'] + self.multisite.list_users.return_value = [ + ceph_hooks.MULTISITE_SYSTEM_USER + ] + ceph_hooks.primary_relation_joined('primary:1') + self.multisite.create_realm.assert_not_called() + self.multisite.create_zonegroup.assert_not_called() + self.multisite.create_zone.assert_not_called() + self.multisite.create_system_user.assert_not_called() + self.multisite.update_period.assert_not_called() + self.service_restart.assert_not_called() + self.leader_set.assert_not_called() + + def test_primary_relation_joined_not_leader(self): + for k, v in self._complete_config.items(): + self.test_config.set(k, v) + self.listen_port.return_value = 80 + self.is_leader.return_value = False + self.leader_get.side_effect = lambda attr: self._leader_data.get(attr) + ceph_hooks.primary_relation_joined('primary:1') + self.relation_set.assert_called_once_with( + relation_id='primary:1', + realm='testrealm', + zonegroup='testzonegroup', + url='http://rgw:80', + access_key='mykey', + secret='mysecret', + ) + self.multisite.list_realms.assert_not_called() + + def test_primary_relation_changed_sync_policy_state_unset(self): + self.is_leader.return_value = True + self.test_config.set('sync-policy-state', '') + + ceph_hooks.primary_relation_changed('primary:1') + + self.is_leader.assert_called_once() + self.ready_for_service.assert_called_once_with(legacy=False) + self.config.assert_called_once_with('sync-policy-state') + + def test_primary_relation_changed_sync_rel_data_incomplete(self): + self.is_leader.return_value = True + self.test_config.set('sync-policy-state', 'allowed') + self.relation_get.return_value = {'zone': 'secondary'} + + ceph_hooks.primary_relation_changed('primary:1', 'rgw/0') + + self.is_leader.assert_called_once() + self.ready_for_service.assert_called_once_with(legacy=False) + self.config.assert_called_once_with('sync-policy-state') + self.relation_get.assert_called_once_with(rid='primary:1', + unit='rgw/0') + + def test_primary_relation_changed(self): + self.is_leader.return_value = True + configs = { + 'sync-policy-state': 'allowed', + 'zonegroup': 'testzonegroup', + 'zone': 'zone_a', + } + for k, v in configs.items(): + self.test_config.set(k, v) + self.relation_get.return_value = { + 'zone': 'zone_b', + 'sync_policy_flow_type': 'symmetrical', + # this should force flow type to directional, and ignore the value + # from the relation data. + 'zone_tier_type': 'cloud', + } + self.multisite.is_sync_group_update_needed.return_value = True + group_test_data_file = os.path.join( + os.path.dirname(__file__), 'testdata', 'test_get_sync_group.json') + with open(group_test_data_file, 'r') as f: + self.multisite.get_sync_group.return_value = json.loads(f.read()) + + ceph_hooks.primary_relation_changed('primary:1', 'rgw/0') + + self.is_leader.assert_called_once() + self.ready_for_service.assert_called_once_with(legacy=False) + self.config.assert_has_calls([ + call('sync-policy-state'), + call('zonegroup'), + call('zone'), + ]) + self.relation_get.assert_called_once_with(rid='primary:1', + unit='rgw/0') + self.multisite.is_sync_group_update_needed.assert_called_once_with( + group_id=ceph_hooks.MULTISITE_DEFAULT_SYNC_GROUP_ID, + flow_id='zone_a-zone_b', + pipe_id='zone_a-zone_b', + source_zone='zone_a', + dest_zone='zone_b', + desired_status='allowed', + desired_flow_type=self.multisite.SYNC_FLOW_DIRECTIONAL) + self.multisite.create_sync_group.assert_called_once_with( + group_id=ceph_hooks.MULTISITE_DEFAULT_SYNC_GROUP_ID, + status='allowed') + self.multisite.create_sync_group_flow.assert_called_once_with( + group_id=ceph_hooks.MULTISITE_DEFAULT_SYNC_GROUP_ID, + flow_id='zone_a-zone_b', + flow_type=self.multisite.SYNC_FLOW_DIRECTIONAL, + source_zone='zone_a', dest_zone='zone_b') + self.multisite.create_sync_group_pipe.assert_called_once_with( + group_id=ceph_hooks.MULTISITE_DEFAULT_SYNC_GROUP_ID, + pipe_id='zone_a-zone_b', + source_zones=['zone_a'], dest_zones=['zone_b']) + self.multisite.update_period.assert_called_once_with( + zonegroup='testzonegroup', zone='zone_a') + self.service_restart.assert_called_once_with('rgw@hostname') + self.leader_set.assert_called_once_with(restart_nonce=ANY) + + @patch.object(json, 'loads') + def test_multisite_relation_departed(self, json_loads): + for k, v in self._complete_config.items(): + self.test_config.set(k, v) + self.is_leader.return_value = True + # Multisite is configured at first but then disabled. + self.multisite.is_multisite_configured.side_effect = [True, False] + self.multisite.get_zonegroup_info.return_value = get_zonegroup_stub() + # json.loads() raises TypeError for mock objects. + json_loads.returnvalue = [] + ceph_hooks.multisite_relation_departed() + + self.multisite.modify_zone.assert_called_once_with( + 'testzone', default=True, master=True, zonegroup='testzonegroup' + ) + self.multisite.update_period.assert_called_once_with( + fatal=True, zonegroup='testzonegroup', + zone='testzone', realm='testrealm' + ) + + +class SecondaryMultisiteTests(CephRadosMultisiteTests): + + _complete_config = { + 'realm': 'testrealm', + 'zonegroup': 'testzonegroup', + 'zone': 'testzone2', + 'sync-policy-flow-type': 'symmetrical', + } + + _test_relation = { + 'realm': 'testrealm', + 'zonegroup': 'testzonegroup', + 'access_key': 'anotherkey', + 'secret': 'anothersecret', + 'url': 'http://primary:80' + } + + _test_bad_relation = { + 'realm': 'anotherrealm', + 'zonegroup': 'anotherzg', + 'access_key': 'anotherkey', + 'secret': 'anothersecret', + 'url': 'http://primary:80' + } + + def test_secondary_relation_changed(self): + for k, v in self._complete_config.items(): + self.test_config.set(k, v) + self.is_leader.return_value = True + self.listen_port.return_value = 80 + self.leader_get.return_value = None + self.relation_get.return_value = self._test_relation + self.multisite.list_realms.return_value = [] + self.multisite.list_zones.return_value = [] + self.multisite.check_cluster_has_buckets.return_value = False + ceph_hooks.secondary_relation_changed('secondary:1', 'rgw/0') + self.config.assert_has_calls([ + call('realm'), + call('zonegroup'), + call('zone'), + ]) + self.multisite.pull_realm.assert_called_once_with( + url=self._test_relation['url'], + access_key=self._test_relation['access_key'], + secret=self._test_relation['secret'], + ) + self.multisite.pull_period.assert_called_with( + url=self._test_relation['url'], + access_key=self._test_relation['access_key'], + secret=self._test_relation['secret'], + ) + self.multisite.set_default_realm.assert_called_once_with( + 'testrealm' + ) + self.multisite.create_zone.assert_called_once_with( + 'testzone2', + endpoints=['http://rgw:80'], + default=False, + master=False, + zonegroup='testzonegroup', + access_key=self._test_relation['access_key'], + secret=self._test_relation['secret'], + ) + self.multisite.update_period.assert_has_calls([ + call(fatal=False), + call(zonegroup='testzonegroup', zone='testzone2'), + ]) + self.service_restart.assert_called_once() + self.leader_set.assert_called_once_with(restart_nonce=ANY) + self.relation_set.assert_has_calls([ + call( + relation_id='secondary:1', + sync_policy_flow_type='symmetrical', + ), + call( + relation_id='secondary:1', + zone='testzone2', + ), + ]) + + def test_secondary_relation_changed_incomplete_relation(self): + for k, v in self._complete_config.items(): + self.test_config.set(k, v) + self.is_leader.return_value = True + self.relation_get.return_value = {} + ceph_hooks.secondary_relation_changed('secondary:1', 'rgw/0') + self.config.assert_not_called() + self.relation_set.assert_not_called() + + def test_secondary_relation_changed_mismatching_config(self): + for k, v in self._complete_config.items(): + self.test_config.set(k, v) + self.is_leader.return_value = True + self.relation_get.return_value = self._test_bad_relation + ceph_hooks.secondary_relation_changed('secondary:1', 'rgw/0') + self.config.assert_has_calls([ + call('realm'), + call('zonegroup'), + call('zone'), + ]) + self.multisite.list_realms.assert_not_called() + self.relation_set.assert_not_called() + + def test_secondary_relation_changed_not_leader(self): + self.is_leader.return_value = False + ceph_hooks.secondary_relation_changed('secondary:1', 'rgw/0') + self.relation_get.assert_not_called() + self.relation_set.assert_not_called() + + @patch.object(ceph_hooks, 'apt_install') + @patch.object(ceph_hooks, 'services') + @patch.object(ceph_hooks, 'nrpe') + def test_update_nrpe_config(self, nrpe, services, apt_install): + # Setup Mocks + nrpe.get_nagios_hostname.return_value = 'foo' + nrpe.get_nagios_unit_name.return_value = 'bar' + nrpe_setup = MagicMock() + nrpe.NRPE.return_value = nrpe_setup + services.return_value = ['baz', 'qux'] + + # Call the routine + ceph_hooks.update_nrpe_config() + + # Verify calls + apt_install.assert_called() + nrpe.get_nagios_hostname.assert_called() + nrpe.get_nagios_unit_name.assert_called() + nrpe.copy_nrpe_checks.assert_called() + nrpe.remove_check.assert_not_called() + nrpe.add_init_service_checks.assert_called_with(nrpe_setup, + ['baz', 'qux'], 'bar') + nrpe.add_haproxy_checks.assert_called_with(nrpe_setup, 'bar') + nrpe_setup.write.assert_called() + + # Verify that remove_check is called appropriately if we pass + # checks_to_remove + ceph_hooks.update_nrpe_config(checks_to_remove=['quux', 'quuux']) + nrpe_setup.remove_check.assert_has_calls([call(shortname='quux'), + call(shortname='quuux')]) diff --git a/ceph-radosgw/unit_tests/test_multisite.py b/ceph-radosgw/unit_tests/test_multisite.py new file mode 100644 index 00000000..afc7756f --- /dev/null +++ b/ceph-radosgw/unit_tests/test_multisite.py @@ -0,0 +1,716 @@ +# Copyright 2019 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import os +import json +from unittest import mock + +import multisite + +from test_utils import CharmTestCase + + +def whoami(): + return inspect.stack()[1][3] + + +def get_zonegroup_stub(): + # populate dummy zone info + zone = {} + zone['id'] = "test_zone_id" + zone['name'] = "test_zone" + + # populate dummy zonegroup info + zonegroup = {} + zonegroup['name'] = "test_zonegroup" + zonegroup['master_zone'] = "test_zone_id" + zonegroup['zones'] = [zone] + return zonegroup + + +class TestMultisiteHelpers(CharmTestCase): + + TO_PATCH = [ + 'subprocess', + 'socket', + 'hookenv', + 'utils', + ] + + def setUp(self): + super(TestMultisiteHelpers, self).setUp(multisite, self.TO_PATCH) + self.socket.gethostname.return_value = 'testhost' + self.utils.request_per_unit_key.return_value = True + + def _testdata(self, funcname): + return os.path.join(os.path.dirname(__file__), + 'testdata', + '{}.json'.format(funcname)) + + def test___key_name(self): + self.assertEqual( + multisite._key_name(), + 'rgw.testhost') + self.utils.request_per_unit_key.return_value = False + self.assertEqual( + multisite._key_name(), + 'radosgw.gateway') + + def test_create_realm(self): + with open(self._testdata(whoami()), 'rb') as f: + self.subprocess.check_output.return_value = f.read() + result = multisite.create_realm('beedata', default=True) + self.assertEqual(result['name'], 'beedata') + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'realm', 'create', + '--rgw-realm=beedata', '--default' + ]) + + def test_list_realms(self): + with open(self._testdata(whoami()), 'rb') as f: + self.subprocess.check_output.return_value = f.read() + result = multisite.list_realms() + self.assertTrue('beedata' in result) + + def test_set_default_zone(self): + multisite.set_default_realm('newrealm') + self.subprocess.check_call.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'realm', 'default', + '--rgw-realm=newrealm' + ]) + + def test_create_user(self): + with open(self._testdata(whoami()), 'rb') as f: + self.subprocess.check_output.return_value = f.read() + access_key, secret = multisite.create_user( + 'mrbees', + ) + self.assertEqual( + access_key, + '41JJQK1HN2NAE5DEZUF9') + self.assertEqual( + secret, + '1qhCgxmUDAJI9saFAVdvUTG5MzMjlpMxr5agaaa4') + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'user', 'create', + '--uid=mrbees', + '--display-name=Synchronization User', + ]) + + def test_create_system_user(self): + with open(self._testdata(whoami()), 'rb') as f: + self.subprocess.check_output.return_value = f.read() + access_key, secret = multisite.create_system_user( + 'mrbees', + ) + self.assertEqual( + access_key, + '41JJQK1HN2NAE5DEZUF9') + self.assertEqual( + secret, + '1qhCgxmUDAJI9saFAVdvUTG5MzMjlpMxr5agaaa4') + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'user', 'create', + '--uid=mrbees', + '--display-name=Synchronization User', + '--system' + ]) + + def test_create_zonegroup(self): + with open(self._testdata(whoami()), 'rb') as f: + self.subprocess.check_output.return_value = f.read() + result = multisite.create_zonegroup( + 'brundall', + endpoints=['http://localhost:80'], + master=True, + default=True, + realm='beedata', + ) + self.assertEqual(result['name'], 'brundall') + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'zonegroup', 'create', + '--rgw-zonegroup=brundall', + '--endpoints=http://localhost:80', + '--rgw-realm=beedata', + '--default', + '--master' + ]) + + def test_list_zonegroups(self): + with open(self._testdata(whoami()), 'rb') as f: + self.subprocess.check_output.return_value = f.read() + result = multisite.list_zonegroups() + self.assertTrue('brundall' in result) + + def test_create_zone(self): + with open(self._testdata(whoami()), 'rb') as f: + self.subprocess.check_output.return_value = f.read() + result = multisite.create_zone( + 'brundall-east', + endpoints=['http://localhost:80'], + master=True, + default=True, + zonegroup='brundall', + access_key='mykey', + secret='mypassword', + ) + self.assertEqual(result['name'], 'brundall-east') + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'zone', 'create', + '--rgw-zone=brundall-east', + '--endpoints=http://localhost:80', + '--rgw-zonegroup=brundall', + '--default', '--master', + '--access-key=mykey', + '--secret=mypassword', + '--read-only=0', + ]) + + def test_modify_zone(self): + multisite.modify_zone( + 'brundall-east', + endpoints=['http://localhost:80', 'https://localhost:443'], + access_key='mykey', + secret='secret', + readonly=True + ) + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'zone', 'modify', + '--rgw-zone=brundall-east', + '--endpoints=http://localhost:80,https://localhost:443', + '--access-key=mykey', '--secret=secret', + '--read-only=1', + ]) + + def test_modify_zone_promote_master(self): + multisite.modify_zone( + 'brundall-east', + default=True, + master=True, + ) + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'zone', 'modify', + '--rgw-zone=brundall-east', + '--master', + '--default', + '--read-only=0', + ]) + + def test_modify_zone_partial_credentials(self): + multisite.modify_zone( + 'brundall-east', + endpoints=['http://localhost:80', 'https://localhost:443'], + access_key='mykey', + ) + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'zone', 'modify', + '--rgw-zone=brundall-east', + '--endpoints=http://localhost:80,https://localhost:443', + '--read-only=0', + ]) + + def test_list_zones(self): + with open(self._testdata(whoami()), 'rb') as f: + self.subprocess.check_output.return_value = f.read() + result = multisite.list_zones() + self.assertTrue('brundall-east' in result) + + def test_update_period(self): + multisite.update_period() + self.subprocess.check_call.assert_called_once_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'period', 'update', '--commit' + ]) + + @mock.patch.object(multisite, 'list_zonegroups') + @mock.patch.object(multisite, 'list_zones') + @mock.patch.object(multisite, 'update_period') + def test_tidy_defaults(self, + mock_update_period, + mock_list_zones, + mock_list_zonegroups): + mock_list_zones.return_value = ['default'] + mock_list_zonegroups.return_value = ['default'] + multisite.tidy_defaults() + self.subprocess.call.assert_has_calls([ + mock.call(['radosgw-admin', '--id=rgw.testhost', + 'zonegroup', 'remove', + '--rgw-zonegroup=default', '--rgw-zone=default']), + mock.call(['radosgw-admin', '--id=rgw.testhost', + 'zone', 'delete', + '--rgw-zone=default']), + mock.call(['radosgw-admin', '--id=rgw.testhost', + 'zonegroup', 'delete', + '--rgw-zonegroup=default']) + ]) + mock_update_period.assert_called_with() + + @mock.patch.object(multisite, 'list_zonegroups') + @mock.patch.object(multisite, 'list_zones') + @mock.patch.object(multisite, 'update_period') + def test_tidy_defaults_noop(self, + mock_update_period, + mock_list_zones, + mock_list_zonegroups): + mock_list_zones.return_value = ['brundall-east'] + mock_list_zonegroups.return_value = ['brundall'] + multisite.tidy_defaults() + self.subprocess.call.assert_not_called() + mock_update_period.assert_not_called() + + def test_pull_realm(self): + multisite.pull_realm(url='http://master:80', + access_key='testkey', + secret='testsecret') + self.subprocess.check_output.assert_called_once_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'realm', 'pull', + '--url=http://master:80', + '--access-key=testkey', '--secret=testsecret', + ]) + + def test_pull_period(self): + multisite.pull_period(url='http://master:80', + access_key='testkey', + secret='testsecret') + self.subprocess.check_output.assert_called_once_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'period', 'pull', + '--url=http://master:80', + '--access-key=testkey', '--secret=testsecret', + ]) + + def test_list_buckets(self): + self.subprocess.CalledProcessError = BaseException + multisite.list_buckets('default', 'default') + self.subprocess.check_output.assert_called_once_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'bucket', 'list', '--rgw-zone=default', + '--rgw-zonegroup=default' + ]) + + def test_rename_zonegroup(self): + multisite.rename_zonegroup('default', 'test_zone_group') + self.subprocess.call.assert_called_once_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'zonegroup', 'rename', '--rgw-zonegroup=default', + '--zonegroup-new-name=test_zone_group' + ]) + + def test_rename_zone(self): + multisite.rename_zone('default', 'test_zone', 'test_zone_group') + self.subprocess.call.assert_called_once_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'zone', 'rename', '--rgw-zone=default', + '--zone-new-name=test_zone', + '--rgw-zonegroup=test_zone_group' + ]) + + def test_get_zonegroup(self): + multisite.get_zonegroup_info('test_zone') + self.subprocess.check_output.assert_called_once_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'zonegroup', 'get', '--rgw-zonegroup=test_zone' + ]) + + def test_modify_zonegroup_migrate(self): + multisite.modify_zonegroup('test_zonegroup', + endpoints=['http://localhost:80'], + default=True, master=True, + realm='test_realm') + self.subprocess.check_output.assert_called_once_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'zonegroup', 'modify', + '--rgw-zonegroup=test_zonegroup', '--rgw-realm=test_realm', + '--endpoints=http://localhost:80', '--default', '--master', + ]) + + def test_modify_zone_migrate(self): + multisite.modify_zone('test_zone', default=True, master=True, + endpoints=['http://localhost:80'], + zonegroup='test_zonegroup', realm='test_realm') + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'zone', 'modify', + '--rgw-zone=test_zone', '--rgw-realm=test_realm', + '--rgw-zonegroup=test_zonegroup', + '--endpoints=http://localhost:80', + '--master', '--default', '--read-only=0', + ]) + + @mock.patch.object(multisite, 'list_zones') + @mock.patch.object(multisite, 'get_zonegroup_info') + def test_get_local_zone(self, mock_get_zonegroup_info, mock_list_zones): + mock_get_zonegroup_info.return_value = get_zonegroup_stub() + mock_list_zones.return_value = ['test_zone'] + zone, _zonegroup = multisite.get_local_zone('test_zonegroup') + self.assertEqual( + zone, + 'test_zone' + ) + + def test_rename_multisite_config_zonegroup_fail(self): + self.assertEqual( + multisite.rename_multisite_config( + ['default'], 'test_zonegroup', + ['default'], 'test_zone' + ), + None + ) + + self.subprocess.call.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'zonegroup', 'rename', '--rgw-zonegroup=default', + '--zonegroup-new-name=test_zonegroup' + ]) + + def test_modify_multisite_config_zonegroup_fail(self): + self.assertEqual( + multisite.modify_multisite_config( + 'test_zone', 'test_zonegroup', + endpoints=['http://localhost:80'], + realm='test_realm' + ), + None + ) + + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'zonegroup', 'modify', '--rgw-zonegroup=test_zonegroup', + '--rgw-realm=test_realm', + '--endpoints=http://localhost:80', '--default', + '--master', + ]) + + @mock.patch.object(multisite, 'modify_zonegroup') + def test_modify_multisite_config_zone_fail(self, mock_modify_zonegroup): + mock_modify_zonegroup.return_value = True + self.assertEqual( + multisite.modify_multisite_config( + 'test_zone', 'test_zonegroup', + endpoints=['http://localhost:80'], + realm='test_realm' + ), + None + ) + + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'zone', 'modify', + '--rgw-zone=test_zone', + '--rgw-realm=test_realm', + '--rgw-zonegroup=test_zonegroup', + '--endpoints=http://localhost:80', + '--master', '--default', '--read-only=0', + ]) + + @mock.patch.object(multisite, 'rename_zonegroup') + def test_rename_multisite_config_zone_fail(self, mock_rename_zonegroup): + mock_rename_zonegroup.return_value = True + self.assertEqual( + multisite.rename_multisite_config( + ['default'], 'test_zonegroup', + ['default'], 'test_zone' + ), + None + ) + + self.subprocess.call.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'zone', 'rename', '--rgw-zone=default', + '--zone-new-name=test_zone', + '--rgw-zonegroup=test_zonegroup', + ]) + + @mock.patch.object(json, 'loads') + def test_remove_zone_from_zonegroup(self, json_loads): + # json.loads() raises TypeError for mock objects. + json_loads.returnvalue = [] + multisite.remove_zone_from_zonegroup( + 'test_zone', 'test_zonegroup', + ) + + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'zonegroup', 'remove', '--rgw-zonegroup=test_zonegroup', + '--rgw-zone=test_zone', + ]) + + @mock.patch.object(json, 'loads') + def test_add_zone_from_zonegroup(self, json_loads): + # json.loads() raises TypeError for mock objects. + json_loads.returnvalue = [] + multisite.add_zone_to_zonegroup( + 'test_zone', 'test_zonegroup', + ) + + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'zonegroup', 'add', '--rgw-zonegroup=test_zonegroup', + '--rgw-zone=test_zone', + ]) + + @mock.patch.object(multisite, 'list_zonegroups') + @mock.patch.object(multisite, 'get_local_zone') + @mock.patch.object(multisite, 'list_buckets') + def test_check_zone_has_buckets(self, mock_list_zonegroups, + mock_get_local_zone, + mock_list_buckets): + mock_list_zonegroups.return_value = ['test_zonegroup'] + mock_get_local_zone.return_value = 'test_zone', 'test_zonegroup' + mock_list_buckets.return_value = ['test_bucket_1', 'test_bucket_2'] + self.assertEqual( + multisite.check_cluster_has_buckets(), + True + ) + + def test_get_zone_info(self): + multisite.get_zone_info('test_zone', 'test_zonegroup') + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'zone', 'get', + '--rgw-zone=test_zone', '--rgw-zonegroup=test_zonegroup', + ]) + + def test_sync_group_exists(self): + groups = [ + {'key': 'group1'}, + {'key': 'group2'}, + ] + self.subprocess.check_output.return_value = json.dumps(groups).encode() + self.assertTrue(multisite.sync_group_exists('group1')) + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'sync', 'group', 'get', + ]) + + def test_bucket_sync_group_exists(self): + with open(self._testdata('test_list_sync_groups'), 'rb') as f: + self.subprocess.check_output.return_value = f.read() + self.assertTrue(multisite.sync_group_exists('default', + bucket='test')) + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'sync', 'group', 'get', + '--bucket=test', + ]) + + def test_sync_group_does_not_exists(self): + with open(self._testdata('test_list_sync_groups'), 'rb') as f: + self.subprocess.check_output.return_value = f.read() + self.assertFalse(multisite.sync_group_exists('group-non-existent')) + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'sync', 'group', 'get', + ]) + + def test_get_sync_group(self): + with open(self._testdata(whoami()), 'rb') as f: + self.subprocess.check_output.return_value = f.read() + result = multisite.get_sync_group('default') + self.assertEqual(result['id'], 'default') + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'sync', 'group', 'get', + '--group-id=default', + ]) + + def test_create_sync_group(self): + test_group_json = json.dumps({"id": "default"}).encode() + self.subprocess.check_output.return_value = test_group_json + result = multisite.create_sync_group( + group_id='default', + status=multisite.SYNC_POLICY_ENABLED, + ) + self.assertEqual(result['id'], 'default') + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'sync', 'group', 'create', + '--group-id=default', + '--status={}'.format(multisite.SYNC_POLICY_ENABLED), + ]) + + def test_create_sync_group_wrong_status(self): + self.assertRaises( + multisite.UnknownSyncPolicyState, + multisite.create_sync_group, "default", "wrong_status", + ) + + def test_remove_sync_group(self): + multisite.remove_sync_group('default') + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'sync', 'group', 'remove', + '--group-id=default', + ]) + + @mock.patch.object(multisite, 'get_sync_group') + @mock.patch.object(multisite, 'sync_group_exists') + def test_is_sync_group_update_needed(self, mock_sync_group_exists, + mock_get_sync_group): + mock_sync_group_exists.return_value = True + with open(self._testdata('test_get_sync_group'), 'r') as f: + mock_get_sync_group.return_value = json.loads(f.read()) + + result = multisite.is_sync_group_update_needed( + group_id='default', + flow_id='zone_a-zone_b', + pipe_id='zone_a-zone_b', + source_zone='zone_a', + dest_zone='zone_b', + desired_status=multisite.SYNC_POLICY_ALLOWED, + desired_flow_type=multisite.SYNC_FLOW_SYMMETRICAL, + ) + + mock_sync_group_exists.assert_called_with('default') + mock_get_sync_group.assert_called_with('default') + self.assertFalse(result) + + def test_is_sync_group_flow_update_needed(self): + with open(self._testdata('test_get_sync_group'), 'r') as f: + sync_group = json.loads(f.read()) + result = multisite.is_sync_group_flow_update_needed( + sync_group, + flow_id='zone_a-zone_b', + source_zone='zone_a', dest_zone='zone_b', + desired_flow_type=multisite.SYNC_FLOW_SYMMETRICAL, + ) + self.assertFalse(result) + + @mock.patch.object(multisite, 'remove_sync_group_flow') + def test_is_sync_group_flow_update_needed_flow_type_change( + self, mock_remove_sync_group_flow): + with open(self._testdata('test_get_sync_group'), 'r') as f: + sync_group = json.loads(f.read()) + result = multisite.is_sync_group_flow_update_needed( + sync_group, + flow_id='zone_a-zone_b', + source_zone='zone_a', dest_zone='zone_b', + desired_flow_type=multisite.SYNC_FLOW_DIRECTIONAL, + ) + mock_remove_sync_group_flow.assert_called_with( + group_id='default', + flow_id='zone_a-zone_b', + flow_type=multisite.SYNC_FLOW_SYMMETRICAL, + source_zone='zone_a', dest_zone='zone_b', + ) + self.assertTrue(result) + + def test_create_sync_group_flow_symmetrical(self): + with open(self._testdata('test_create_sync_group_flow'), 'rb') as f: + self.subprocess.check_output.return_value = f.read() + result = multisite.create_sync_group_flow( + group_id='default', + flow_id='flow_id', + flow_type=multisite.SYNC_FLOW_SYMMETRICAL, + source_zone='zone_a', + dest_zone='zone_b', + ) + self.assertEqual(result['groups'][0]['id'], 'default') + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'sync', 'group', 'flow', 'create', + '--group-id=default', + '--flow-id=flow_id', + '--flow-type=symmetrical', + '--zones=zone_a,zone_b', + ]) + + def test_create_sync_group_flow_directional(self): + with open(self._testdata('test_create_sync_group_flow'), 'rb') as f: + self.subprocess.check_output.return_value = f.read() + result = multisite.create_sync_group_flow( + group_id='default', + flow_id='flow_id', + flow_type=multisite.SYNC_FLOW_DIRECTIONAL, + source_zone='zone_a', + dest_zone='zone_b', + ) + self.assertEqual(result['groups'][0]['id'], 'default') + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'sync', 'group', 'flow', 'create', + '--group-id=default', + '--flow-id=flow_id', + '--flow-type=directional', + '--source-zone=zone_a', '--dest-zone=zone_b', + ]) + + def test_create_sync_group_flow_wrong_type(self): + self.assertRaises( + multisite.UnknownSyncFlowType, + multisite.create_sync_group_flow, + group_id='default', flow_id='flow_id', flow_type='wrong_type', + source_zone='zone_a', dest_zone='zone_b', + ) + + def test_remove_sync_group_flow_symmetrical(self): + multisite.remove_sync_group_flow( + group_id='default', + flow_id='flow_id', + flow_type=multisite.SYNC_FLOW_SYMMETRICAL, + ) + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'sync', 'group', 'flow', 'remove', + '--group-id=default', + '--flow-id=flow_id', + '--flow-type=symmetrical', + ]) + + def test_remove_sync_group_flow_directional(self): + multisite.remove_sync_group_flow( + group_id='default', + flow_id='flow_id', + flow_type=multisite.SYNC_FLOW_DIRECTIONAL, + source_zone='zone_a', + dest_zone='zone_b', + ) + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'sync', 'group', 'flow', 'remove', + '--group-id=default', + '--flow-id=flow_id', + '--flow-type=directional', + '--source-zone=zone_a', '--dest-zone=zone_b', + ]) + + def test_create_sync_group_pipe(self): + with open(self._testdata(whoami()), 'rb') as f: + self.subprocess.check_output.return_value = f.read() + result = multisite.create_sync_group_pipe( + group_id='default', + pipe_id='pipe_id', + source_zones=['zone_a', 'zone_b'], + dest_zones=['zone_c', 'zone_d'], + ) + self.assertEqual(result['groups'][0]['id'], 'default') + self.subprocess.check_output.assert_called_with([ + 'radosgw-admin', '--id=rgw.testhost', + 'sync', 'group', 'pipe', 'create', + '--group-id=default', + '--pipe-id=pipe_id', + '--source-zones=zone_a,zone_b', '--source-bucket=*', + '--dest-zones=zone_c,zone_d', '--dest-bucket=*', + ]) diff --git a/ceph-radosgw/unit_tests/test_utils.py b/ceph-radosgw/unit_tests/test_utils.py new file mode 100644 index 00000000..a3feee41 --- /dev/null +++ b/ceph-radosgw/unit_tests/test_utils.py @@ -0,0 +1,116 @@ +# Copyright 2016 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import unittest +import yaml + +from unittest.mock import patch + + +def load_config(): + """Walk backwards from __file__ looking for config.yaml. + + Load and return the 'options' section. + """ + config = None + f = __file__ + while config is None: + d = os.path.dirname(f) + if os.path.isfile(os.path.join(d, 'config.yaml')): + config = os.path.join(d, 'config.yaml') + break + f = d + + if not config: + logging.error('Could not find config.yaml in any parent directory ' + 'of %s. ' % f) + raise Exception + + return yaml.safe_load(open(config).read())['options'] + + +def get_default_config(): + """Load default charm config from config.yaml return as a dict. + + If no default is set in config.yaml, its value is None. + """ + default_config = {} + config = load_config() + for k, v in config.items(): + if 'default' in v: + default_config[k] = v['default'] + else: + default_config[k] = None + return default_config + + +class CharmTestCase(unittest.TestCase): + + def setUp(self, obj, patches): + super(CharmTestCase, self).setUp() + self.patches = patches + self.obj = obj + self.test_config = TestConfig() + self.test_relation = TestRelation() + self.patch_all() + + def patch(self, method): + _m = patch.object(self.obj, method) + mock = _m.start() + self.addCleanup(_m.stop) + return mock + + def patch_all(self): + for method in self.patches: + setattr(self, method, self.patch(method)) + + +class TestConfig(object): + + def __init__(self): + self.config = get_default_config() + + def get(self, attr=None): + if not attr: + return self.get_all() + try: + return self.config[attr] + except KeyError: + return None + + def get_all(self): + return self.config + + def set(self, attr, value): + if attr not in self.config: + raise KeyError + self.config[attr] = value + + +class TestRelation(object): + + def __init__(self, relation_data={}): + self.relation_data = relation_data + + def set(self, relation_data): + self.relation_data = relation_data + + def get(self, attr=None, unit=None, rid=None): + if attr is None: + return self.relation_data + elif attr in self.relation_data: + return self.relation_data[attr] + return None diff --git a/ceph-radosgw/unit_tests/testdata/test_create_realm.json b/ceph-radosgw/unit_tests/testdata/test_create_realm.json new file mode 100644 index 00000000..343233ff --- /dev/null +++ b/ceph-radosgw/unit_tests/testdata/test_create_realm.json @@ -0,0 +1,7 @@ +{ + "id": "793a0176-ef7d-4d97-b544-a921e19a52e7", + "name": "beedata", + "current_period": "1f30e5fa-2c24-471d-b17d-61135c9f9510", + "epoch": 3 +} + diff --git a/ceph-radosgw/unit_tests/testdata/test_create_sync_group_flow.json b/ceph-radosgw/unit_tests/testdata/test_create_sync_group_flow.json new file mode 100644 index 00000000..363ecf60 --- /dev/null +++ b/ceph-radosgw/unit_tests/testdata/test_create_sync_group_flow.json @@ -0,0 +1,20 @@ +{ + "groups": [ + { + "id": "default", + "data_flow": { + "symmetrical": [ + { + "id": "zone_a-zone_b", + "zones": [ + "zone_a", + "zone_b" + ] + } + ] + }, + "pipes": [], + "status": "allowed" + } + ] +} diff --git a/ceph-radosgw/unit_tests/testdata/test_create_sync_group_pipe.json b/ceph-radosgw/unit_tests/testdata/test_create_sync_group_pipe.json new file mode 100644 index 00000000..6d2b6630 --- /dev/null +++ b/ceph-radosgw/unit_tests/testdata/test_create_sync_group_pipe.json @@ -0,0 +1,49 @@ +{ + "groups": [ + { + "id": "default", + "data_flow": { + "symmetrical": [ + { + "id": "zone_a-zone_b", + "zones": [ + "zone_a", + "zone_b" + ] + } + ] + }, + "pipes": [ + { + "id": "zone_a-zone_b", + "source": { + "bucket": "*", + "zones": [ + "zone_a", + "zone_b" + ] + }, + "dest": { + "bucket": "*", + "zones": [ + "zone_a", + "zone_b" + ] + }, + "params": { + "source": { + "filter": { + "tags": [] + } + }, + "dest": {}, + "priority": 0, + "mode": "system", + "user": "" + } + } + ], + "status": "allowed" + } + ] +} diff --git a/ceph-radosgw/unit_tests/testdata/test_create_system_user.json b/ceph-radosgw/unit_tests/testdata/test_create_system_user.json new file mode 100644 index 00000000..83be1d99 --- /dev/null +++ b/ceph-radosgw/unit_tests/testdata/test_create_system_user.json @@ -0,0 +1,38 @@ +{ + "auid": 0, + "bucket_quota": { + "check_on_raw": false, + "enabled": false, + "max_objects": -1, + "max_size": -1, + "max_size_kb": 0 + }, + "caps": [], + "default_placement": "", + "display_name": "Synchronization User", + "email": "", + "keys": [ + { + "access_key": "41JJQK1HN2NAE5DEZUF9", + "secret_key": "1qhCgxmUDAJI9saFAVdvUTG5MzMjlpMxr5agaaa4", + "user": "mrbees" + } + ], + "max_buckets": 1000, + "op_mask": "read, write, delete", + "placement_tags": [], + "subusers": [], + "suspended": 0, + "swift_keys": [], + "system": "true", + "temp_url_keys": [], + "type": "rgw", + "user_id": "mrbees", + "user_quota": { + "check_on_raw": false, + "enabled": false, + "max_objects": -1, + "max_size": -1, + "max_size_kb": 0 + } +} diff --git a/ceph-radosgw/unit_tests/testdata/test_create_user.json b/ceph-radosgw/unit_tests/testdata/test_create_user.json new file mode 100644 index 00000000..83be1d99 --- /dev/null +++ b/ceph-radosgw/unit_tests/testdata/test_create_user.json @@ -0,0 +1,38 @@ +{ + "auid": 0, + "bucket_quota": { + "check_on_raw": false, + "enabled": false, + "max_objects": -1, + "max_size": -1, + "max_size_kb": 0 + }, + "caps": [], + "default_placement": "", + "display_name": "Synchronization User", + "email": "", + "keys": [ + { + "access_key": "41JJQK1HN2NAE5DEZUF9", + "secret_key": "1qhCgxmUDAJI9saFAVdvUTG5MzMjlpMxr5agaaa4", + "user": "mrbees" + } + ], + "max_buckets": 1000, + "op_mask": "read, write, delete", + "placement_tags": [], + "subusers": [], + "suspended": 0, + "swift_keys": [], + "system": "true", + "temp_url_keys": [], + "type": "rgw", + "user_id": "mrbees", + "user_quota": { + "check_on_raw": false, + "enabled": false, + "max_objects": -1, + "max_size": -1, + "max_size_kb": 0 + } +} diff --git a/ceph-radosgw/unit_tests/testdata/test_create_zone.json b/ceph-radosgw/unit_tests/testdata/test_create_zone.json new file mode 100644 index 00000000..9530d229 --- /dev/null +++ b/ceph-radosgw/unit_tests/testdata/test_create_zone.json @@ -0,0 +1,36 @@ +{ + "id": "a69d4cd8-1881-4040-ad7c-914ca35af3b2", + "name": "brundall-east", + "domain_root": "brundall-east.rgw.meta:root", + "control_pool": "brundall-east.rgw.control", + "gc_pool": "brundall-east.rgw.log:gc", + "lc_pool": "brundall-east.rgw.log:lc", + "log_pool": "brundall-east.rgw.log", + "intent_log_pool": "brundall-east.rgw.log:intent", + "usage_log_pool": "brundall-east.rgw.log:usage", + "reshard_pool": "brundall-east.rgw.log:reshard", + "user_keys_pool": "brundall-east.rgw.meta:users.keys", + "user_email_pool": "brundall-east.rgw.meta:users.email", + "user_swift_pool": "brundall-east.rgw.meta:users.swift", + "user_uid_pool": "brundall-east.rgw.meta:users.uid", + "system_key": { + "access_key": "90FM6V8B44BSN1MVKYW6", + "secret_key": "bFHSPN3PB4QZqHfTiNIn11ey8kA8OA6Php6kGpdH" + }, + "placement_pools": [ + { + "key": "default-placement", + "val": { + "index_pool": "brundall-east.rgw.buckets.index", + "data_pool": "brundall-east.rgw.buckets.data", + "data_extra_pool": "brundall-east.rgw.buckets.non-ec", + "index_type": 0, + "compression": "" + } + } + ], + "metadata_heap": "", + "tier_config": [], + "realm_id": "793a0176-ef7d-4d97-b544-a921e19a52e7" +} + diff --git a/ceph-radosgw/unit_tests/testdata/test_create_zonegroup.json b/ceph-radosgw/unit_tests/testdata/test_create_zonegroup.json new file mode 100644 index 00000000..688d85c7 --- /dev/null +++ b/ceph-radosgw/unit_tests/testdata/test_create_zonegroup.json @@ -0,0 +1,51 @@ +{ + "id": "3f41f138-5669-4b63-bf61-278f28fc9306", + "name": "brundall", + "api_name": "brundall", + "is_master": "true", + "endpoints": [ + "http://10.5.100.2:80" + ], + "hostnames": [], + "hostnames_s3website": [], + "master_zone": "a69d4cd8-1881-4040-ad7c-914ca35af3b2", + "zones": [ + { + "id": "8be215da-5316-4d12-a584-44b246285a3f", + "name": "brundall-west", + "endpoints": [ + "http://10.5.100.2:80" + ], + "log_meta": "false", + "log_data": "true", + "bucket_index_max_shards": 0, + "read_only": "false", + "tier_type": "", + "sync_from_all": "true", + "sync_from": [] + }, + { + "id": "a69d4cd8-1881-4040-ad7c-914ca35af3b2", + "name": "brundall-east", + "endpoints": [ + "http://10.5.100.1:80" + ], + "log_meta": "false", + "log_data": "true", + "bucket_index_max_shards": 0, + "read_only": "false", + "tier_type": "", + "sync_from_all": "true", + "sync_from": [] + } + ], + "placement_targets": [ + { + "name": "default-placement", + "tags": [] + } + ], + "default_placement": "default-placement", + "realm_id": "793a0176-ef7d-4d97-b544-a921e19a52e7" +} + diff --git a/ceph-radosgw/unit_tests/testdata/test_get_sync_group.json b/ceph-radosgw/unit_tests/testdata/test_get_sync_group.json new file mode 100644 index 00000000..0a3f43d8 --- /dev/null +++ b/ceph-radosgw/unit_tests/testdata/test_get_sync_group.json @@ -0,0 +1,45 @@ +{ + "id": "default", + "data_flow": { + "symmetrical": [ + { + "id": "zone_a-zone_b", + "zones": [ + "zone_a", + "zone_b" + ] + } + ] + }, + "pipes": [ + { + "id": "zone_a-zone_b", + "source": { + "bucket": "*", + "zones": [ + "zone_a", + "zone_b" + ] + }, + "dest": { + "bucket": "*", + "zones": [ + "zone_a", + "zone_b" + ] + }, + "params": { + "source": { + "filter": { + "tags": [] + } + }, + "dest": {}, + "priority": 0, + "mode": "system", + "user": "" + } + } + ], + "status": "allowed" +} diff --git a/ceph-radosgw/unit_tests/testdata/test_list_realms.json b/ceph-radosgw/unit_tests/testdata/test_list_realms.json new file mode 100644 index 00000000..ce4462aa --- /dev/null +++ b/ceph-radosgw/unit_tests/testdata/test_list_realms.json @@ -0,0 +1,6 @@ +{ + "default_info": "793a0176-ef7d-4d97-b544-a921e19a52e7", + "realms": [ + "beedata" + ] +} diff --git a/ceph-radosgw/unit_tests/testdata/test_list_sync_groups.json b/ceph-radosgw/unit_tests/testdata/test_list_sync_groups.json new file mode 100644 index 00000000..b80c2999 --- /dev/null +++ b/ceph-radosgw/unit_tests/testdata/test_list_sync_groups.json @@ -0,0 +1,45 @@ +[ + { + "key": "default", + "val": { + "id": "default", + "data_flow": { + "directional": [ + { + "source_zone": "zone_a", + "dest_zone": "zone_b" + } + ] + }, + "pipes": [ + { + "id": "zone_a-zone_b", + "source": { + "bucket": "*", + "zones": [ + "zone_a" + ] + }, + "dest": { + "bucket": "*", + "zones": [ + "zone_b" + ] + }, + "params": { + "source": { + "filter": { + "tags": [] + } + }, + "dest": {}, + "priority": 0, + "mode": "system", + "user": "" + } + } + ], + "status": "allowed" + } + } +] diff --git a/ceph-radosgw/unit_tests/testdata/test_list_users.json b/ceph-radosgw/unit_tests/testdata/test_list_users.json new file mode 100644 index 00000000..ab56bb93 --- /dev/null +++ b/ceph-radosgw/unit_tests/testdata/test_list_users.json @@ -0,0 +1,5 @@ +[ + "testuser", + "multisite-sync" +] + diff --git a/ceph-radosgw/unit_tests/testdata/test_list_zonegroups.json b/ceph-radosgw/unit_tests/testdata/test_list_zonegroups.json new file mode 100644 index 00000000..b4a52e9a --- /dev/null +++ b/ceph-radosgw/unit_tests/testdata/test_list_zonegroups.json @@ -0,0 +1,6 @@ +{ + "default_info": "3f41f138-5669-4b63-bf61-278f28fc9306", + "zonegroups": [ + "brundall" + ] +} diff --git a/ceph-radosgw/unit_tests/testdata/test_list_zones.json b/ceph-radosgw/unit_tests/testdata/test_list_zones.json new file mode 100644 index 00000000..ea27dd90 --- /dev/null +++ b/ceph-radosgw/unit_tests/testdata/test_list_zones.json @@ -0,0 +1,6 @@ +{ + "default_info": "a69d4cd8-1881-4040-ad7c-914ca35af3b2", + "zones": [ + "brundall-east" + ] +} diff --git a/ceph-rbd-mirror/.gitignore b/ceph-rbd-mirror/.gitignore new file mode 100644 index 00000000..a7eb42f3 --- /dev/null +++ b/ceph-rbd-mirror/.gitignore @@ -0,0 +1,11 @@ +.tox +.stestr +*__pycache__* +*.pyc +build +.coverage +cover/ +layers/ +interfaces/ +*.swp +*.charm diff --git a/ceph-rbd-mirror/.gitreview b/ceph-rbd-mirror/.gitreview new file mode 100644 index 00000000..66ebbc9a --- /dev/null +++ b/ceph-rbd-mirror/.gitreview @@ -0,0 +1,6 @@ +[gerrit] +host=review.opendev.org +port=29418 +project=openstack/charm-ceph-rbd-mirror.git + +defaultbranch=stable/squid-jammy diff --git a/ceph-rbd-mirror/.stestr.conf b/ceph-rbd-mirror/.stestr.conf new file mode 100644 index 00000000..5fcccaca --- /dev/null +++ b/ceph-rbd-mirror/.stestr.conf @@ -0,0 +1,3 @@ +[DEFAULT] +test_path=./unit_tests +top_dir=./ diff --git a/ceph-rbd-mirror/.zuul.yaml b/ceph-rbd-mirror/.zuul.yaml new file mode 100644 index 00000000..77259668 --- /dev/null +++ b/ceph-rbd-mirror/.zuul.yaml @@ -0,0 +1,4 @@ +- project: + templates: + - openstack-python3-charm-zed-jobs + - openstack-python3-charm-jobs diff --git a/ceph-rbd-mirror/LICENSE b/ceph-rbd-mirror/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/ceph-rbd-mirror/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ceph-rbd-mirror/bindep.txt b/ceph-rbd-mirror/bindep.txt new file mode 100644 index 00000000..17575d9f --- /dev/null +++ b/ceph-rbd-mirror/bindep.txt @@ -0,0 +1,4 @@ +libffi-dev [platform:dpkg] +libpq-dev [platform:dpkg] +libxml2-dev [platform:dpkg] +libxslt1-dev [platform:dpkg] diff --git a/ceph-rbd-mirror/charmcraft.yaml b/ceph-rbd-mirror/charmcraft.yaml new file mode 100644 index 00000000..6abcaad5 --- /dev/null +++ b/ceph-rbd-mirror/charmcraft.yaml @@ -0,0 +1,30 @@ +type: charm + +parts: + charm: + source: src/ + plugin: reactive + build-snaps: + - charm + build-packages: + - tox + - git + - python3-dev + build-environment: + - CHARM_INTERFACES_DIR: /root/project/interfaces/ + - CHARM_LAYERS_DIR: /root/project/layers/ + +base: ubuntu@22.04 +platforms: + amd64: + build-on: amd64 + build-for: amd64 + arm64: + build-on: arm64 + build-for: arm64 + ppc64el: + build-on: ppc64el + build-for: ppc64el + s390x: + build-on: s390x + build-for: s390x diff --git a/ceph-rbd-mirror/metadata.yaml b/ceph-rbd-mirror/metadata.yaml new file mode 120000 index 00000000..07686838 --- /dev/null +++ b/ceph-rbd-mirror/metadata.yaml @@ -0,0 +1 @@ +src/metadata.yaml \ No newline at end of file diff --git a/ceph-rbd-mirror/osci.yaml b/ceph-rbd-mirror/osci.yaml new file mode 100644 index 00000000..a2050c2b --- /dev/null +++ b/ceph-rbd-mirror/osci.yaml @@ -0,0 +1,10 @@ +- project: + templates: + - charm-unit-jobs-py38 + - charm-unit-jobs-py310 + - charm-functional-jobs + vars: + needs_charm_build: true + charm_build_name: ceph-rbd-mirror + build_type: charmcraft + charmcraft_channel: 2.x/stable diff --git a/ceph-rbd-mirror/rebuild b/ceph-rbd-mirror/rebuild new file mode 100644 index 00000000..cccb7641 --- /dev/null +++ b/ceph-rbd-mirror/rebuild @@ -0,0 +1,5 @@ +# This file is used to trigger rebuilds +# when dependencies of the charm change, +# but nothing in the charm needs to. +# simply change the uuid to something new +f9d3918c-b3eb-11eb-a947-c3dd9a34b317 diff --git a/ceph-rbd-mirror/rename.sh b/ceph-rbd-mirror/rename.sh new file mode 100755 index 00000000..d0c35c97 --- /dev/null +++ b/ceph-rbd-mirror/rename.sh @@ -0,0 +1,13 @@ +#!/bin/bash +charm=$(grep "charm_build_name" osci.yaml | awk '{print $2}') +echo "renaming ${charm}_*.charm to ${charm}.charm" +echo -n "pwd: " +pwd +ls -al +echo "Removing bad downloaded charm maybe?" +if [[ -e "${charm}.charm" ]]; +then + rm "${charm}.charm" +fi +echo "Renaming charm here." +mv ${charm}_*.charm ${charm}.charm diff --git a/ceph-rbd-mirror/requirements.txt b/ceph-rbd-mirror/requirements.txt new file mode 100644 index 00000000..b3dc23f7 --- /dev/null +++ b/ceph-rbd-mirror/requirements.txt @@ -0,0 +1,20 @@ +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of *requirements.txt files for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools +# +# NOTE(lourot): This might look like a duplication of test-requirements.txt but +# some tox targets use only test-requirements.txt whereas charm-build uses only +# requirements.txt +setuptools<50.0.0 # https://github.com/pypa/setuptools/commit/04e3df22df840c6bb244e9b27bc56750c44b7c85 + +# NOTE: newer versions of cryptography require a Rust compiler to build, +# see +# * https://github.com/openstack-charmers/zaza/issues/421 +# * https://mail.python.org/pipermail/cryptography-dev/2021-January/001003.html +# +cryptography<3.4 + +git+https://github.com/juju/charm-tools.git + +simplejson diff --git a/ceph-rbd-mirror/src/HACKING.md b/ceph-rbd-mirror/src/HACKING.md new file mode 100644 index 00000000..ab232f8f --- /dev/null +++ b/ceph-rbd-mirror/src/HACKING.md @@ -0,0 +1,10 @@ +# Overview + +This charm is developed as part of the OpenStack Charms project, and as such you +should refer to the [OpenStack Charm Development Guide](https://github.com/openstack/charm-guide) for details on how +to contribute to this charm. + +You can find its source code here: . + + + diff --git a/ceph-rbd-mirror/src/README.md b/ceph-rbd-mirror/src/README.md new file mode 100644 index 00000000..672783c9 --- /dev/null +++ b/ceph-rbd-mirror/src/README.md @@ -0,0 +1,106 @@ +# Overview + +[Ceph][ceph-upstream] is a unified, distributed storage system designed for +excellent performance, reliability, and scalability. + +The ceph-rbd-mirror charm deploys the Ceph `rbd-mirror` daemon and helps +automate remote creation and configuration of mirroring for Ceph pools used for +hosting RBD images. + +> **Note**: RBD mirroring is only one aspect of datacentre redundancy. Refer to + [Ceph RADOS Gateway Multisite Replication][ceph-multisite-replication] and + other work to arrive at a complete solution. + +## Functionality + +The charm has the following major features: + +* Support for a maximum of two Ceph clusters. The clusters may reside within a + single model or be contained within two separate models. + +* Specifically written for two-way replication. This provides the ability to + fail over and fall back to/from a single secondary site. Ceph does have + support for mirroring to any number of clusters but the charm does not + support this. + +* Automatically creates and configures (for mirroring) pools in the remote + cluster based on any pools in the local cluster that are labelled with the + 'rbd' tag. + +* Mirroring of whole pools only. Ceph itself has support for the mirroring of + individual images but the charm does not support this. + +* Network space aware. The mirror daemon can be informed about network + configuration by binding the `public` and `cluster` endpoints. The daemon + will use the network associated with the `cluster` endpoint for mirroring + traffic. + +Other notes on RBD mirroring: + +* Supports multiple running instances of the mirror daemon in each cluster. + Doing so allows for the dynamic re-distribution of the mirroring load amongst + the daemons. This addresses both high availability and performance concerns. + Leverage this feature by scaling out the ceph-rbd-mirror application (i.e. + add more units). + +* Requires that every RBD image within each pool is created with the + `journaling` and `exclusive-lock` image features enabled. The charm enables + these features by default and the ceph-mon charm will announce them over the + `client` relation when it has units connected to its `rbd-mirror` endpoint. + +* The feature first appeared in Ceph Luminous (OpenStack Queens). + +# Usage + +## Configuration + +See file `config.yaml` of the built charm (or see the charm in the [Charm +Store][cs-ceph-rbd-mirror]) for the full list of configuration options, along +with their descriptions and default values. See the [Juju +documentation][juju-docs-config-apps] for details on configuring applications. + +## Deployment + +A standard topology consists of two Ceph clusters with each cluster residing in +a separate Juju model. The deployment steps are a fairly involved and are +therefore covered under [Ceph RBD Mirroring][cdg-rbd-mirroring] in the +[OpenStack Charms Deployment Guide][cdg]. + +## Actions + +This section lists Juju [actions][juju-docs-actions] supported by the charm. +Actions allow specific operations to be performed on a per-unit basis. To +display action descriptions run `juju actions ceph-rbd-mirror`. If the charm is +not deployed then see file `actions.yaml`. + +* `copy-pool` +* `demote` +* `promote` +* `refresh-pools` +* `resync-pools` +* `status` + +## Operations + +Operational procedures touch upon pool creation, failover & fallback, and +recovering from an abrupt shutdown. These topics are also covered under [Ceph +RBD Mirroring][cdg-rbd-mirroring] in the [OpenStack Charms Deployment +Guide][cdg]. + +# Bugs + +Please report bugs on [Launchpad][lp-bugs-charm-ceph-rbd-mirror]. + +For general charm questions refer to the [OpenStack Charm Guide][cg]. + + + +[cg]: https://docs.openstack.org/charm-guide +[cdg]: https://docs.openstack.org/project-deploy-guide/charm-deployment-guide/latest/index.html +[ceph-upstream]: https://ceph.io +[ceph-multisite-replication]: https://docs.openstack.org/project-deploy-guide/charm-deployment-guide/latest/app-rgw-multisite.html +[cdg-rbd-mirroring]: https://docs.openstack.org/project-deploy-guide/charm-deployment-guide/latest/app-ceph-rbd-mirror.html +[lp-bugs-charm-ceph-rbd-mirror]: https://bugs.launchpad.net/charm-ceph-rbd-mirror/+filebug +[juju-docs-actions]: https://jaas.ai/docs/actions +[juju-docs-config-apps]: https://juju.is/docs/configuring-applications +[cs-ceph-rbd-mirror]: https://jaas.ai/ceph-rbd-mirror diff --git a/ceph-rbd-mirror/src/actions.yaml b/ceph-rbd-mirror/src/actions.yaml new file mode 100644 index 00000000..dccfc81e --- /dev/null +++ b/ceph-rbd-mirror/src/actions.yaml @@ -0,0 +1,64 @@ +demote: + description: | + Demote all primary images within given pools to non-primary. + params: + force: + type: boolean + pools: + type: string + description: | + Comma-separated list of pools to demote. If this is not set, all the + pools will be demoted. +promote: + description: | + Promote all non-primary images within given pools to primary. + params: + force: + type: boolean + pools: + type: string + description: | + Comma-separated list of pools to promote. If this is not set, all the + pools will be promoted. +refresh-pools: + description: | + \ + Refresh list of pools from local and remote Ceph endpoint. + As a side effect, mirroring will be configured for any manually created + pools that the charm currently does not know about. +resync-pools: + description: | + \ + USE WITH CAUTION - Force image resync for all images in the given + pools on local Ceph endpoint. + params: + i-really-mean-it: + type: boolean + description: | + This must be set to true to perform the action + pools: + type: string + description: | + Comma-separated list of pools to resync from the local Ceph endpoint. + If this is not set, all the pools from the local Ceph endpoint will + be resynced. + required: + - i-really-mean-it +status: + description: | + Get mirror pool status + params: + verbose: + type: boolean + format: + type: string + default: plain + enum: + - plain + - json + - xml + pools: + type: string + description: | + Comma-separated list of pools to include in the status. If this is + not set, all the pools will be included. diff --git a/ceph-rbd-mirror/src/actions/actions.py b/ceph-rbd-mirror/src/actions/actions.py new file mode 100755 index 00000000..92465f77 --- /dev/null +++ b/ceph-rbd-mirror/src/actions/actions.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +# Copyright 2019 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import json +import os +import subprocess +import sys + +# Load basic layer module from $CHARM_DIR/lib +sys.path.append('lib') +from charms.layer import basic + +# setup module loading from charm venv +basic.bootstrap_charm_deps() + +import charms.reactive as reactive +import charmhelpers.core as ch_core +import charms_openstack.bus +import charms_openstack.charm + +# load reactive interfaces +reactive.bus.discover() +# load Endpoint based interface data +ch_core.hookenv._run_atstart() + +# load charm class +charms_openstack.bus.discover() + + +def get_pools(): + """Get the list of pools given as parameter to perform the actions on.""" + pools = ch_core.hookenv.action_get('pools') + if pools: + return [p.strip() for p in pools.split(',')] + return None + + +def rbd_mirror_action(args): + """Perform RBD command on pools in local Ceph endpoint.""" + action_name = os.path.basename(args[0]) + with charms_openstack.charm.provide_charm_instance() as charm: + ceph_local = reactive.endpoint_from_name('ceph-local') + pools = get_pools() + if not pools: + pools = charm.eligible_pools(ceph_local.pools) + result = {} + cmd = ['rbd', '--id', charm.ceph_id, 'mirror', 'pool', action_name] + if ch_core.hookenv.action_get('force'): + cmd += ['--force'] + if ch_core.hookenv.action_get('verbose'): + cmd += ['--verbose'] + output_format = ch_core.hookenv.action_get('format') + if output_format: + cmd += ['--format', output_format] + for pool in pools: + output = subprocess.check_output(cmd + [pool], + stderr=subprocess.STDOUT, + universal_newlines=True) + if output_format == 'json': + result[pool] = json.loads(output) + else: + result[pool] = output.rstrip() + if output_format == 'json': + ch_core.hookenv.action_set({'output': json.dumps(result)}) + else: + output_str = '' + for pool, output in result.items(): + if output_str: + output_str += '\n' + output_str += '{}: {}'.format(pool, output) + ch_core.hookenv.action_set({'output': output_str}) + + +def refresh_pools(args): + """Refresh list of pools from Ceph. + + This is done by updating data on relations to ceph-mons which lead to them + updating the relation data they have with us as a response. + + Due to how the reactive framework handles publishing of relation data we + must do this by setting a flag and runnnig the reactive handlers, emulating + a full hook execution. + """ + if not reactive.is_flag_set('leadership.is_leader'): + ch_core.hookenv.action_fail('run action on the leader unit') + return + + # set and flush flag to disk + reactive.set_flag('refresh.pools') + ch_core.unitdata._KV.flush() + + # run reactive handlers to deal with flag + return reactive.main() + + +def resync_pools(args): + """Force image resync on pools in local Ceph endpoint.""" + if not ch_core.hookenv.action_get('i-really-mean-it'): + ch_core.hookenv.action_fail('Required parameter not set') + return + with charms_openstack.charm.provide_charm_instance() as charm: + ceph_local = reactive.endpoint_from_name('ceph-local') + pools = get_pools() + if not pools: + pools = charm.eligible_pools(ceph_local.pools) + result = collections.defaultdict(dict) + for pool in pools: + # list images in pool + output = subprocess.check_output( + ['rbd', '--id', charm.ceph_id, '--format', 'json', + '-p', pool, 'ls'], universal_newlines=True) + images = json.loads(output) + for image in images: + output = subprocess.check_output( + ['rbd', '--id', charm.ceph_id, '--format', 'json', 'info', + '{}/{}'.format(pool, image)], universal_newlines=True) + image_info = json.loads(output) + if image_info['mirroring']['state'] == 'disabled': + continue + output = subprocess.check_output( + ['rbd', '--id', charm.ceph_id, 'mirror', 'image', 'resync', + '{}/{}'.format(pool, image)], universal_newlines=True) + result[pool][image] = output.rstrip() + output_str = '' + for pool in result: + for image in result[pool]: + if output_str: + output_str += '\n' + output_str += '{}/{}: {}'.format(pool, image, + result[pool][image]) + ch_core.hookenv.action_set({'output': output_str}) + + +ACTIONS = { + 'demote': rbd_mirror_action, + 'promote': rbd_mirror_action, + 'refresh-pools': refresh_pools, + 'resync-pools': resync_pools, + 'status': rbd_mirror_action, +} + + +def main(args): + action_name = os.path.basename(args[0]) + try: + action = ACTIONS[action_name] + except KeyError: + return 'Action {} is undefined'.format(action_name) + + try: + action(args) + except Exception as e: + ch_core.hookenv.action_fail(str(e)) + + +if __name__ == '__main__': + sys.exit(main(sys.argv)) diff --git a/ceph-rbd-mirror/src/actions/demote b/ceph-rbd-mirror/src/actions/demote new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/ceph-rbd-mirror/src/actions/demote @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/ceph-rbd-mirror/src/actions/promote b/ceph-rbd-mirror/src/actions/promote new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/ceph-rbd-mirror/src/actions/promote @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/ceph-rbd-mirror/src/actions/refresh-pools b/ceph-rbd-mirror/src/actions/refresh-pools new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/ceph-rbd-mirror/src/actions/refresh-pools @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/ceph-rbd-mirror/src/actions/resync-pools b/ceph-rbd-mirror/src/actions/resync-pools new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/ceph-rbd-mirror/src/actions/resync-pools @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/ceph-rbd-mirror/src/actions/status b/ceph-rbd-mirror/src/actions/status new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/ceph-rbd-mirror/src/actions/status @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/ceph-rbd-mirror/src/config.yaml b/ceph-rbd-mirror/src/config.yaml new file mode 100644 index 00000000..a985d462 --- /dev/null +++ b/ceph-rbd-mirror/src/config.yaml @@ -0,0 +1 @@ +options: {} diff --git a/ceph-rbd-mirror/src/copyright b/ceph-rbd-mirror/src/copyright new file mode 100644 index 00000000..27b9a9ec --- /dev/null +++ b/ceph-rbd-mirror/src/copyright @@ -0,0 +1,6 @@ +Format: http://dep.debian.net/deps/dep5/ + +Files: * +Copyright: Copyright 2018, Canonical Ltd +License: Apache-2.0 + diff --git a/ceph-rbd-mirror/src/files/.gitkeep b/ceph-rbd-mirror/src/files/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/ceph-rbd-mirror/src/icon.svg b/ceph-rbd-mirror/src/icon.svg new file mode 100644 index 00000000..45b29930 --- /dev/null +++ b/ceph-rbd-mirror/src/icon.svg @@ -0,0 +1,316 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + diff --git a/ceph-rbd-mirror/src/layer.yaml b/ceph-rbd-mirror/src/layer.yaml new file mode 100644 index 00000000..757f2093 --- /dev/null +++ b/ceph-rbd-mirror/src/layer.yaml @@ -0,0 +1,17 @@ +includes: + - layer:leadership + - layer:ceph + - interface:ceph-rbd-mirror + - interface:nrpe-external-master +options: + basic: + use_venv: True +repo: https://github.com/openstack/charm-ceph-rbd-mirror +config: + deletes: + - debug + - ssl_ca + - ssl_cert + - ssl_key + - use-internal-endpoints + - verbose diff --git a/ceph-rbd-mirror/src/lib/__init__.py b/ceph-rbd-mirror/src/lib/__init__.py new file mode 100644 index 00000000..68451dd0 --- /dev/null +++ b/ceph-rbd-mirror/src/lib/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2018 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-rbd-mirror/src/lib/charm/__init__.py b/ceph-rbd-mirror/src/lib/charm/__init__.py new file mode 100644 index 00000000..68451dd0 --- /dev/null +++ b/ceph-rbd-mirror/src/lib/charm/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2018 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-rbd-mirror/src/lib/charm/openstack/__init__.py b/ceph-rbd-mirror/src/lib/charm/openstack/__init__.py new file mode 100644 index 00000000..68451dd0 --- /dev/null +++ b/ceph-rbd-mirror/src/lib/charm/openstack/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2018 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-rbd-mirror/src/lib/charm/openstack/ceph_rbd_mirror.py b/ceph-rbd-mirror/src/lib/charm/openstack/ceph_rbd_mirror.py new file mode 100644 index 00000000..3446c619 --- /dev/null +++ b/ceph-rbd-mirror/src/lib/charm/openstack/ceph_rbd_mirror.py @@ -0,0 +1,234 @@ +# Copyright 2018 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import json +import socket +import subprocess + +import charms.reactive as reactive + +import charms_openstack.charm +import charms_openstack.adapters +import charms_openstack.plugins + +import charmhelpers.core as ch_core +import charmhelpers.contrib.storage.linux.ceph as ch_ceph + + +class CephRBDMirrorCharmRelationAdapters( + charms_openstack.adapters.OpenStackRelationAdapters): + relation_adapters = { + 'ceph_local': charms_openstack.plugins.CephRelationAdapter, + 'ceph_remote': charms_openstack.plugins.CephRelationAdapter, + } + + +class CephRBDMirrorCharm(charms_openstack.plugins.CephCharm): + # We require Ceph 12.2 Luminous or later for HA support in the Ceph + # rbd-mirror daemon. Luminous appears in UCA at pike. + release = 'pike' + name = 'ceph-rbd-mirror' + python_version = 3 + packages = ['rbd-mirror'] + required_relations = ['ceph-local', 'ceph-remote'] + user = 'ceph' + group = 'ceph' + adapters_class = CephRBDMirrorCharmRelationAdapters + ceph_service_name_override = 'rbd-mirror' + ceph_key_per_unit_name = True + + def __init__(self, **kwargs): + self.ceph_id = 'rbd-mirror.{}'.format(socket.gethostname()) + self.services = [ + 'ceph-rbd-mirror@{}'.format(self.ceph_id), + ] + self.restart_map = { + '/etc/ceph/ceph.conf': self.services, + '/etc/ceph/remote.conf': self.services, + } + super().__init__(**kwargs) + + def eligible_pools(self, pools): + """Filter eligible pools. + + :param pools: Dictionary with detailed pool information as provided + over the ``ceph-rbd-mirror`` interface provided by the + ``ceph-mon`` charm. + :type pools: dict + :returns: Dictionary with detailed pool information for pools eligible + for mirroring. + :rtype: dict + """ + return {pool: attrs for pool, attrs in pools.items() + if 'rbd' in attrs['applications']} + + def custom_assess_status_check(self): + """Provide mirrored pool statistics through juju status.""" + if (reactive.is_flag_set('config.rendered') and + reactive.is_flag_set('ceph-local.available') and + reactive.is_flag_set('ceph-remote.available')): + endpoint = reactive.endpoint_from_flag('ceph-local.available') + try: + stats = self.mirror_pools_summary( + self.eligible_pools(endpoint.pools)) + except subprocess.CalledProcessError as e: + ch_core.hookenv.log('Unable to retrieve mirror pool status: ' + '"{}"'.format(e)) + return None, None + ch_core.hookenv.log('mirror_pools_summary = "{}"' + .format(stats), + level=ch_core.hookenv.DEBUG) + status = 'active' + pool_msg = '' + image_msg = '' + for health, count in stats['pool_health'].items(): + if not pool_msg: + pool_msg = 'Pools ' + pool_msg += '{} ({}) '.format(health, count) + + # Disabling blocked state until + # https://bugs.launchpad.net/charm-ceph-rbd-mirror/+bug/1879749 + # is resolved + # if health != 'OK': + # status = 'blocked' + for state, count in stats['image_states'].items(): + if not image_msg: + image_msg = 'Images ' + if state == 'stopped': + state_name = 'Primary' + elif state == 'replaying': + state_name = 'Secondary' + else: + state_name = state + image_msg += '{} ({}) '.format(state_name, count) + msg = '' + if pool_msg: + msg = 'Unit is ready ({})'.format( + pool_msg + image_msg.rstrip()) + else: + status = 'waiting' + msg = 'Waiting for pools to be created' + return status, msg + return None, None + + def _mirror_pool_info(self, pool): + output = subprocess.check_output(['rbd', '--id', self.ceph_id, + 'mirror', 'pool', 'info', '--format', + 'json', pool], + universal_newlines=True) + return json.loads(output) + + def mirror_pool_enabled(self, pool, mode='pool'): + return self._mirror_pool_info(pool).get('mode', None) == mode + + def mirror_pool_has_peers(self, pool): + return len(self._mirror_pool_info(pool).get('peers', [])) > 0 + + def mirror_pool_status(self, pool): + output = subprocess.check_output(['rbd', '--id', self.ceph_id, + 'mirror', 'pool', 'status', + '--format', 'json', '--verbose', + pool], + universal_newlines=True) + return json.loads(output) + + def mirror_pools_summary(self, pools): + stats = {} + stats['pool_health'] = collections.defaultdict(int) + stats['image_states'] = collections.defaultdict(int) + for pool in pools: + pool_stat = self.mirror_pool_status(pool) + stats['pool_health'][pool_stat['summary']['health']] += 1 + for state, value in pool_stat['summary']['states'].items(): + stats['image_states'][state] += value + return stats + + def mirror_pool_enable(self, pool, mode='pool'): + base_cmd = ['rbd', '--id', self.ceph_id, 'mirror', 'pool'] + subprocess.check_call(base_cmd + ['enable', pool, mode]) + subprocess.check_call(base_cmd + ['peer', 'add', pool, + 'client.{}@remote' + .format(self.ceph_id)]) + + def pools_in_broker_request(self, rq, ops_to_check=None): + """Extract pool names touched by a broker request. + + :param rq: Ceph Broker Request Object + :type rq: ch_ceph.CephBrokerRq + :param ops_to_check: Set providing which ops to check + :type ops_to_check: Optional[Set[str]] + :returns: Set of pool names + :rtype: Set[str] + """ + assert rq.api_version == 1 + ops_to_check = ops_to_check or set(('create-pool',)) + result_set = set() + for op in rq.ops: + if op['op'] in ops_to_check: + result_set.add(op['name']) + return result_set + + def pool_mirroring_mode(self, pool, broker_requests=[]): + """Get the Ceph RBD mirroring mode for the pool. + + Checks if the pool RBD mirroring mode was explicitly set as part of + the 'create-pool' operation into any of the given broker requests. + If this is true, its value is returned, otherwise the default 'pool' + mirroring mode is used. + + :param pool: Pool name + :type pool: str + :param broker_requests: List of broker requests + :type broker_requests: List[ch_ceph.CephBrokerRq] + :returns: Ceph RBD mirroring mode + :rtype: str + """ + default_mirroring_mode = 'pool' + for rq in broker_requests: + if not rq: + continue + assert rq.api_version == 1 + for op in rq.ops: + if op['op'] == 'create-pool' and op['name'] == pool: + return op.get( + 'rbd-mirroring-mode', default_mirroring_mode) + return default_mirroring_mode + + def collapse_and_filter_broker_requests(self, broker_requests, + allowed_ops, require_vp=None): + """Extract allowed ops from broker requests into one collapsed request. + + :param broker_requests: List of broker requests + :type broker_requests: List[ch_ceph.CephBrokerRq] + :param allowed_ops: Set of ops to allow + :type allowed_ops: Set + :param require_vp: Map of required key-value pairs in op + :type require_vp: Optional[Dict[str,any]] + :returns: Collapsed broker request + :rtype: Optional[ch_ceph.CephBrokerRq] + """ + require_vp = require_vp or {} + new_rq = ch_ceph.CephBrokerRq() + for rq in broker_requests: + assert rq['api-version'] == 1 + for op in rq['ops']: + if op['op'] in allowed_ops: + for k, v in require_vp.items(): + if k not in op or op[k] != v: + break + else: + new_rq.add_op(op) + if len(new_rq.ops): + return new_rq diff --git a/ceph-rbd-mirror/src/metadata.yaml b/ceph-rbd-mirror/src/metadata.yaml new file mode 100644 index 00000000..1ed8ab6f --- /dev/null +++ b/ceph-rbd-mirror/src/metadata.yaml @@ -0,0 +1,33 @@ +name: ceph-rbd-mirror +summary: Highly scalable distributed storage - Ceph RBD Mirroring +maintainer: OpenStack Charmers +description: | + RBD images can be asynchronously mirrored between two Ceph clusters. This + capability uses the RBD journaling image feature to ensure crash-consistent + replication between clusters. The charm automatically creates pools used for + RBD images on the remote cluster and configures mirroring. Pools tagged with + the ``rbd`` application are selected. + + NOTE: The charm requires Ceph Luminous or later. +docs: https://discourse.charmhub.io/t/ceph-rbd-mirror-docs-index/11006 +tags: +- openstack +- storage +- file-servers +- misc +series: +- focal +- jammy +extra-bindings: + public: + cluster: +subordinate: false +provides: + nrpe-external-master: + interface: nrpe-external-master + scope: container +requires: + ceph-local: + interface: ceph-rbd-mirror + ceph-remote: + interface: ceph-rbd-mirror diff --git a/ceph-rbd-mirror/src/reactive/__init__.py b/ceph-rbd-mirror/src/reactive/__init__.py new file mode 100644 index 00000000..68451dd0 --- /dev/null +++ b/ceph-rbd-mirror/src/reactive/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2018 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ceph-rbd-mirror/src/reactive/ceph_rbd_mirror_handlers.py b/ceph-rbd-mirror/src/reactive/ceph_rbd_mirror_handlers.py new file mode 100644 index 00000000..dcf24cc8 --- /dev/null +++ b/ceph-rbd-mirror/src/reactive/ceph_rbd_mirror_handlers.py @@ -0,0 +1,159 @@ +# Copyright 2018 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import charms.reactive as reactive + +import charms_openstack.bus +import charms_openstack.charm as charm + +import charmhelpers.core as ch_core +import charmhelpers.contrib.storage.linux.ceph as ch_ceph + + +charms_openstack.bus.discover() + +# Use the charms.openstack defaults for common states and hooks +charm.use_defaults( + 'charm.installed', + 'config.rendered', + 'update-status', + 'upgrade-charm') + + +@reactive.when_none('is-update-status-hook', + 'ceph-local.available', + 'ceph-remote.available') +@reactive.when('ceph-local.connected', + 'ceph-remote.connected') +def request_keys(): + with charm.provide_charm_instance() as charm_instance: + for flag in ('ceph-local.connected', 'ceph-remote.connected'): + endpoint = reactive.endpoint_from_flag(flag) + ch_core.hookenv.log('Ceph endpoint "{}" connected, requesting key' + .format(endpoint.endpoint_name), + level=ch_core.hookenv.INFO) + endpoint.request_key() + charm_instance.assess_status() + + +@reactive.when_none('is-update-status-hook') +@reactive.when('config.changed', + 'ceph-local.available', + 'ceph-remote.available') +def config_changed(): + with charm.provide_charm_instance() as charm_instance: + charm_instance.upgrade_if_available([ + reactive.endpoint_from_flag('ceph-local.available'), + reactive.endpoint_from_flag('ceph-remote.available'), + ]) + charm_instance.assess_status() + + +@reactive.when_none('is-update-status-hook') +@reactive.when('ceph-local.available', + 'ceph-remote.available') +def render_stuff(*args): + with charm.provide_charm_instance() as charm_instance: + for endpoint in args: + if not endpoint.key: + ch_core.hookenv.log('Ceph endpoint "{}" flagged available yet ' + 'no key. Relation is probably departing.', + level=ch_core.hookenv.INFO) + return + ch_core.hookenv.log('Ceph endpoint "{}" available, configuring ' + 'keyring'.format(endpoint.endpoint_name), + level=ch_core.hookenv.INFO) + ch_core.hookenv.log('Pools: "{}"'.format(endpoint.pools), + level=ch_core.hookenv.INFO) + + cluster_name = ( + 'remote') if endpoint.endpoint_name == 'ceph-remote' else None + charm_instance.configure_ceph_keyring(endpoint.key, + cluster_name=cluster_name) + charm_instance.render_with_interfaces(args) + reactive.set_flag('config.rendered') + + +@reactive.when_none('is-update-status-hook') +@reactive.when('leadership.is_leader', + 'refresh.pools', + 'ceph-local.available', + 'ceph-remote.available') +def refresh_pools(): + for endpoint in 'ceph-local', 'ceph-remote': + endpoint = reactive.endpoint_from_name(endpoint) + endpoint.refresh_pools() + reactive.clear_flag('refresh.pools') + + +@reactive.when_none('is-update-status-hook') +@reactive.when('leadership.is_leader', + 'config.rendered', + 'ceph-local.available', + 'ceph-remote.available') +def configure_pools(): + local = reactive.endpoint_from_flag('ceph-local.available') + remote = reactive.endpoint_from_flag('ceph-remote.available') + with charm.provide_charm_instance() as charm_instance: + rq = charm_instance.collapse_and_filter_broker_requests( + local.broker_requests, set(('create-pool',)), + require_vp={'app-name': 'rbd'}) + remote_rq = charm_instance.collapse_and_filter_broker_requests( + remote.broker_requests, set(('create-pool',)), + require_vp={'app-name': 'rbd'}) + pools_in_rq = charm_instance.pools_in_broker_request( + rq) if rq else set() + pools_in_rq |= charm_instance.pools_in_broker_request( + remote_rq) if remote_rq else set() + for pool, attrs in charm_instance.eligible_pools(local.pools).items(): + pool_mirroring_mode = charm_instance.pool_mirroring_mode( + pool, [rq, remote_rq]) + mirroring_enabled = charm_instance.mirror_pool_enabled( + pool, pool_mirroring_mode) + has_peers = charm_instance.mirror_pool_has_peers(pool) + if not (mirroring_enabled and has_peers): + ch_core.hookenv.log('Enabling mirroring for pool "{}"' + .format(pool), + level=ch_core.hookenv.INFO) + charm_instance.mirror_pool_enable(pool, pool_mirroring_mode) + if (pool not in pools_in_rq and + 'erasure_code_profile' not in attrs['parameters']): + # A pool exists that there is no broker request for which means + # it is a manually created pool. We will forward creation of + # replicated pools but forwarding of manually created Erasure + # Coded pools is not supported. + pg_num = attrs['parameters'].get('pg_num') + max_bytes = attrs['quota'].get('max_bytes') + max_objects = attrs['quota'].get('max_objects') + size = attrs['parameters'].get('size') + ch_core.hookenv.log('Adding manually created pool "{}" to ' + 'request.' + .format(pool), + level=ch_core.hookenv.INFO) + if not rq: + rq = ch_ceph.CephBrokerRq() + rq.add_op_create_replicated_pool( + pool, + replica_count=size if not size else int(size), + pg_num=pg_num if not pg_num else int(pg_num), + app_name='rbd', + max_bytes=max_bytes if not max_bytes else int(max_bytes), + max_objects=max_objects if not max_objects else int( + max_objects), + ) + ch_core.hookenv.log('Request for evaluation: "{}"' + .format(rq), + level=ch_core.hookenv.DEBUG) + if rq: + remote.maybe_send_rq(rq) diff --git a/ceph-rbd-mirror/src/templates/ceph.conf b/ceph-rbd-mirror/src/templates/ceph.conf new file mode 100644 index 00000000..fb7a2847 --- /dev/null +++ b/ceph-rbd-mirror/src/templates/ceph.conf @@ -0,0 +1,19 @@ +############################################################################### +# [ WARNING ] +# cinder configuration file maintained by Juju +# local changes may be overwritten. +############################################################################### +[global] +{% if ceph_local.auth -%} +auth_supported = {{ ceph_local.auth }} +keyring = /etc/ceph/$cluster.$name.keyring +mon host = {{ ceph_local.monitors }} +{% endif -%} +log to syslog = {{ options.use_syslog }} +err to syslog = {{ options.use_syslog }} +clog to syslog = {{ options.use_syslog }} + +public network = {{ ceph_local.public_network }} +{% if ceph_local.cluster_network %} +cluster network = {{ ceph_local.cluster_network }} +{% endif -%} diff --git a/ceph-rbd-mirror/src/templates/remote.conf b/ceph-rbd-mirror/src/templates/remote.conf new file mode 100644 index 00000000..64ab359f --- /dev/null +++ b/ceph-rbd-mirror/src/templates/remote.conf @@ -0,0 +1,19 @@ +############################################################################### +# [ WARNING ] +# cinder configuration file maintained by Juju +# local changes may be overwritten. +############################################################################### +[global] +{% if ceph_remote.auth -%} +auth_supported = {{ ceph_remote.auth }} +keyring = /etc/ceph/$cluster.$name.keyring +mon host = {{ ceph_remote.monitors }} +{% endif -%} +log to syslog = {{ options.use_syslog }} +err to syslog = {{ options.use_syslog }} +log to syslog = {{ options.use_syslog }} + +public network = {{ ceph_remote.public_network }} +{% if ceph_remote.cluster_network %} +cluster network = {{ ceph_remote.cluster_network }} +{% endif -%} diff --git a/ceph-rbd-mirror/src/test-requirements.txt b/ceph-rbd-mirror/src/test-requirements.txt new file mode 100644 index 00000000..e7710236 --- /dev/null +++ b/ceph-rbd-mirror/src/test-requirements.txt @@ -0,0 +1,9 @@ +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of *requirements.txt files for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools +# + +# Functional Test Requirements (let Zaza's dependencies solve all dependencies here!) +git+https://github.com/openstack-charmers/zaza.git#egg=zaza +git+https://github.com/openstack-charmers/zaza-openstack-tests.git#egg=zaza.openstack diff --git a/ceph-rbd-mirror/src/tests/bundles/focal-yoga.yaml b/ceph-rbd-mirror/src/tests/bundles/focal-yoga.yaml new file mode 100644 index 00000000..9b8f51b6 --- /dev/null +++ b/ceph-rbd-mirror/src/tests/bundles/focal-yoga.yaml @@ -0,0 +1,187 @@ +variables: + openstack-origin: &openstack-origin cloud:focal-yoga + +local_overlay_enabled: False + +series: &series focal + +machines: + '0': + constraints: "mem=3072M" + '1': + constraints: "mem=3072M" + '2': + constraints: "mem=3072M" + +applications: + + keystone-mysql-router: + charm: ch:mysql-router + channel: latest/edge + glance-mysql-router: + charm: ch:mysql-router + channel: latest/edge + cinder-mysql-router: + charm: ch:mysql-router + channel: latest/edge + + mysql-innodb-cluster: + charm: ch:mysql-innodb-cluster + num_units: 3 + options: + source: *openstack-origin + to: + - '0' + - '1' + - '2' + channel: latest/edge + + keystone: + charm: ch:keystone + num_units: 1 + options: + openstack-origin: *openstack-origin + channel: yoga/edge + + rabbitmq-server: + charm: ch:rabbitmq-server + num_units: 1 + options: + source: *openstack-origin + channel: latest/edge + + cinder: + charm: ch:cinder + num_units: 1 + options: + block-device: None + glance-api-version: 2 + openstack-origin: *openstack-origin + channel: yoga/edge + + cinder-ceph: + charm: ch:cinder-ceph + num_units: 0 + channel: yoga/edge + + glance: + charm: ch:glance + num_units: 1 + options: + openstack-origin: *openstack-origin + channel: yoga/edge + + nova-compute: + charm: ch:nova-compute + num_units: 1 + options: + openstack-origin: *openstack-origin + channel: yoga/edge + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + expected-osd-count: 3 + source: *openstack-origin + channel: quincy/edge + + ceph-osd: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 'cinder,10G' + options: + source: *openstack-origin + bluestore: False + use-direct-io: False + osd-devices: '/dev/test-non-existent' + channel: quincy/edge + + ceph-rbd-mirror: + series: *series + charm: ../../../ceph-rbd-mirror.charm + num_units: 1 + options: + source: *openstack-origin + + ceph-mon-b: + charm: ch:ceph-mon + num_units: 3 + options: + expected-osd-count: 3 + source: *openstack-origin + channel: quincy/edge + + ceph-osd-b: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 'cinder,10G' + options: + source: *openstack-origin + bluestore: False + use-direct-io: False + osd-devices: '/dev/test-non-existent' + channel: quincy/edge + + ceph-rbd-mirror-b: + series: *series + charm: ../../../ceph-rbd-mirror.charm + num_units: 1 + options: + source: *openstack-origin + +relations: + +- - keystone:shared-db + - keystone-mysql-router:shared-db +- - keystone-mysql-router:db-router + - mysql-innodb-cluster:db-router + +- - glance:shared-db + - glance-mysql-router:shared-db +- - glance-mysql-router:db-router + - mysql-innodb-cluster:db-router + +- - cinder:shared-db + - cinder-mysql-router:shared-db +- - cinder-mysql-router:db-router + - mysql-innodb-cluster:db-router + +- - rabbitmq-server + - cinder + +- - 'keystone:identity-service' + - cinder +- - 'keystone:identity-service' + - glance + +- - cinder + - cinder-ceph +- - cinder-ceph:ceph + - ceph-mon:client + +- - nova-compute:ceph-access + - cinder-ceph:ceph-access +- - nova-compute:amqp + - rabbitmq-server:amqp + +- - glance:image-service + - nova-compute:image-service +- - glance + - ceph-mon + +- - ceph-mon:osd + - ceph-osd:mon +- - ceph-mon + - ceph-rbd-mirror:ceph-local +- - ceph-mon + - ceph-rbd-mirror-b:ceph-remote + +- - ceph-mon-b:osd + - ceph-osd-b:mon +- - ceph-mon-b + - ceph-rbd-mirror-b:ceph-local +- - ceph-mon-b + - ceph-rbd-mirror:ceph-remote diff --git a/ceph-rbd-mirror/src/tests/bundles/jammy-antelope.yaml b/ceph-rbd-mirror/src/tests/bundles/jammy-antelope.yaml new file mode 100644 index 00000000..f8a08dea --- /dev/null +++ b/ceph-rbd-mirror/src/tests/bundles/jammy-antelope.yaml @@ -0,0 +1,184 @@ +variables: + openstack-origin: &openstack-origin cloud:jammy-antelope + series: &series jammy + +local_overlay_enabled: False + +series: *series + +machines: + '0': + constraints: "mem=3072M" + '1': + constraints: "mem=3072M" + '2': + constraints: "mem=3072M" + +applications: + + keystone-mysql-router: + charm: ch:mysql-router + channel: latest/edge + glance-mysql-router: + charm: ch:mysql-router + channel: latest/edge + cinder-mysql-router: + charm: ch:mysql-router + channel: latest/edge + + mysql-innodb-cluster: + charm: ch:mysql-innodb-cluster + num_units: 3 + to: + - '0' + - '1' + - '2' + channel: 8.0.19/edge + + keystone: + charm: ch:keystone + num_units: 1 + options: + openstack-origin: *openstack-origin + channel: 2023.1/edge + + rabbitmq-server: + charm: ch:rabbitmq-server + num_units: 1 + channel: 3.9/edge + + cinder: + charm: ch:cinder + num_units: 1 + options: + block-device: None + glance-api-version: 2 + openstack-origin: *openstack-origin + channel: 2023.1/edge + + cinder-ceph: + charm: ch:cinder-ceph + num_units: 0 + channel: 2023.1/edge + + glance: + charm: ch:glance + num_units: 1 + options: + openstack-origin: *openstack-origin + channel: 2023.1/edge + + nova-compute: + charm: ch:nova-compute + num_units: 1 + options: + openstack-origin: *openstack-origin + channel: 2023.1/edge + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + expected-osd-count: 3 + source: *openstack-origin + channel: quincy/edge + + ceph-osd: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 'cinder,10G' + options: + source: *openstack-origin + bluestore: False + use-direct-io: False + osd-devices: '/dev/test-non-existent' + channel: quincy/edge + + ceph-rbd-mirror: + series: *series + charm: ../../../ceph-rbd-mirror.charm + num_units: 1 + options: + source: *openstack-origin + + ceph-mon-b: + charm: ch:ceph-mon + num_units: 3 + options: + expected-osd-count: 3 + source: *openstack-origin + channel: quincy/edge + + ceph-osd-b: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 'cinder,10G' + options: + source: *openstack-origin + bluestore: False + use-direct-io: False + osd-devices: '/dev/test-non-existent' + channel: quincy/edge + + ceph-rbd-mirror-b: + series: *series + charm: ../../../ceph-rbd-mirror.charm + num_units: 1 + options: + source: *openstack-origin + +relations: + +- - keystone:shared-db + - keystone-mysql-router:shared-db +- - keystone-mysql-router:db-router + - mysql-innodb-cluster:db-router + +- - glance:shared-db + - glance-mysql-router:shared-db +- - glance-mysql-router:db-router + - mysql-innodb-cluster:db-router + +- - cinder:shared-db + - cinder-mysql-router:shared-db +- - cinder-mysql-router:db-router + - mysql-innodb-cluster:db-router + +- - rabbitmq-server + - cinder + +- - 'keystone:identity-service' + - cinder +- - 'keystone:identity-service' + - glance + +- - cinder + - cinder-ceph +- - cinder-ceph:ceph + - ceph-mon:client + +- - nova-compute:ceph-access + - cinder-ceph:ceph-access +- - nova-compute:amqp + - rabbitmq-server:amqp + +- - glance:image-service + - nova-compute:image-service +- - glance + - ceph-mon + +- - ceph-mon:osd + - ceph-osd:mon +- - ceph-mon + - ceph-rbd-mirror:ceph-local +- - ceph-mon + - ceph-rbd-mirror-b:ceph-remote + +- - ceph-mon-b:osd + - ceph-osd-b:mon +- - ceph-mon-b + - ceph-rbd-mirror-b:ceph-local +- - ceph-mon-b + - ceph-rbd-mirror:ceph-remote diff --git a/ceph-rbd-mirror/src/tests/bundles/jammy-bobcat.yaml b/ceph-rbd-mirror/src/tests/bundles/jammy-bobcat.yaml new file mode 100644 index 00000000..a6a37e0b --- /dev/null +++ b/ceph-rbd-mirror/src/tests/bundles/jammy-bobcat.yaml @@ -0,0 +1,180 @@ +variables: + openstack-origin: &openstack-origin cloud:jammy-bobcat + series: &series jammy + +local_overlay_enabled: False + +series: *series + +machines: + '0': + constraints: "mem=3072M" + '1': + constraints: "mem=3072M" + '2': + constraints: "mem=3072M" + +applications: + + keystone-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + glance-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + cinder-mysql-router: + charm: ch:mysql-router + channel: 8.0/edge + + mysql-innodb-cluster: + charm: ch:mysql-innodb-cluster + num_units: 3 + to: + - '0' + - '1' + - '2' + channel: 8.0/edge + + keystone: + charm: ch:keystone + num_units: 1 + options: + openstack-origin: *openstack-origin + channel: 2023.2/edge + + rabbitmq-server: + charm: ch:rabbitmq-server + num_units: 1 + channel: 3.9/edge + + cinder: + charm: ch:cinder + num_units: 1 + options: + block-device: None + glance-api-version: 2 + openstack-origin: *openstack-origin + channel: 2023.2/edge + + cinder-ceph: + charm: ch:cinder-ceph + num_units: 0 + channel: 2023.2/edge + + glance: + charm: ch:glance + num_units: 1 + options: + openstack-origin: *openstack-origin + channel: 2023.2/edge + + nova-compute: + charm: ch:nova-compute + num_units: 1 + options: + openstack-origin: *openstack-origin + channel: 2023.2/edge + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + expected-osd-count: 3 + source: *openstack-origin + channel: reef/edge + + ceph-osd: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 'cinder,10G' + options: + source: *openstack-origin + osd-devices: '/dev/test-non-existent' + channel: reef/edge + + ceph-rbd-mirror: + series: *series + charm: ../../../ceph-rbd-mirror.charm + num_units: 1 + options: + source: *openstack-origin + + ceph-mon-b: + charm: ch:ceph-mon + num_units: 3 + options: + expected-osd-count: 3 + source: *openstack-origin + channel: reef/edge + + ceph-osd-b: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 'cinder,10G' + options: + source: *openstack-origin + osd-devices: '/dev/test-non-existent' + channel: reef/edge + + ceph-rbd-mirror-b: + series: *series + charm: ../../../ceph-rbd-mirror.charm + num_units: 1 + options: + source: *openstack-origin + +relations: + +- - keystone:shared-db + - keystone-mysql-router:shared-db +- - keystone-mysql-router:db-router + - mysql-innodb-cluster:db-router + +- - glance:shared-db + - glance-mysql-router:shared-db +- - glance-mysql-router:db-router + - mysql-innodb-cluster:db-router + +- - cinder:shared-db + - cinder-mysql-router:shared-db +- - cinder-mysql-router:db-router + - mysql-innodb-cluster:db-router + +- - rabbitmq-server + - cinder + +- - 'keystone:identity-service' + - cinder +- - 'keystone:identity-service' + - glance + +- - cinder + - cinder-ceph +- - cinder-ceph:ceph + - ceph-mon:client + +- - nova-compute:ceph-access + - cinder-ceph:ceph-access +- - nova-compute:amqp + - rabbitmq-server:amqp + +- - glance:image-service + - nova-compute:image-service +- - glance + - ceph-mon + +- - ceph-mon:osd + - ceph-osd:mon +- - ceph-mon + - ceph-rbd-mirror:ceph-local +- - ceph-mon + - ceph-rbd-mirror-b:ceph-remote + +- - ceph-mon-b:osd + - ceph-osd-b:mon +- - ceph-mon-b + - ceph-rbd-mirror-b:ceph-local +- - ceph-mon-b + - ceph-rbd-mirror:ceph-remote diff --git a/ceph-rbd-mirror/src/tests/bundles/jammy-caracal.yaml b/ceph-rbd-mirror/src/tests/bundles/jammy-caracal.yaml new file mode 100644 index 00000000..0009b692 --- /dev/null +++ b/ceph-rbd-mirror/src/tests/bundles/jammy-caracal.yaml @@ -0,0 +1,114 @@ +variables: + openstack-origin: &openstack-origin cloud:jammy-caracal + series: &series jammy + +series: *series + +machines: + '0': + constraints: cores=2 mem=4G root-disk=16G virt-type=virtual-machine + '1': + constraints: cores=2 mem=4G root-disk=16G virt-type=virtual-machine + '2': + constraints: cores=2 mem=4G root-disk=16G virt-type=virtual-machine + '3': + constraints: cores=2 mem=4G root-disk=16G virt-type=virtual-machine + '4': + constraints: cores=2 mem=4G root-disk=16G virt-type=virtual-machine + '5': + constraints: cores=2 mem=4G root-disk=16G virt-type=virtual-machine + '6': + '7': + '8': + '9': + '10': + '11': + +applications: + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + expected-osd-count: 3 + source: *openstack-origin + channel: latest/edge + to: + - '6' + - '7' + - '8' + + ceph-osd: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 'loop,4G' + options: + source: *openstack-origin + osd-devices: '/dev/test-non-existent' + channel: latest/edge + to: + - '0' + - '1' + - '2' + + ceph-rbd-mirror: + series: *series + charm: ch:ceph-rbd-mirror + channel: latest/edge + num_units: 1 + options: + source: *openstack-origin + to: + - '0' + + ceph-mon-b: + charm: ch:ceph-mon + num_units: 3 + options: + expected-osd-count: 3 + source: *openstack-origin + channel: latest/edge + to: + - '9' + - '10' + - '11' + + ceph-osd-b: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 'loop,4G' + options: + source: *openstack-origin + osd-devices: '/dev/test-non-existent' + channel: latest/edge + to: + - '3' + - '4' + - '5' + + ceph-rbd-mirror-b: + series: *series + charm: ch:ceph-rbd-mirror + channel: latest/edge + num_units: 1 + options: + source: *openstack-origin + to: + - '3' + +relations: + +- - ceph-mon:osd + - ceph-osd:mon +- - ceph-mon + - ceph-rbd-mirror:ceph-local +- - ceph-mon + - ceph-rbd-mirror-b:ceph-remote + +- - ceph-mon-b:osd + - ceph-osd-b:mon +- - ceph-mon-b + - ceph-rbd-mirror-b:ceph-local +- - ceph-mon-b + - ceph-rbd-mirror:ceph-remote diff --git a/ceph-rbd-mirror/src/tests/bundles/jammy-yoga.yaml b/ceph-rbd-mirror/src/tests/bundles/jammy-yoga.yaml new file mode 100644 index 00000000..8e85d4aa --- /dev/null +++ b/ceph-rbd-mirror/src/tests/bundles/jammy-yoga.yaml @@ -0,0 +1,186 @@ +variables: + openstack-origin: &openstack-origin distro + +local_overlay_enabled: False + +series: &series jammy + +machines: + '0': + constraints: "mem=3072M" + '1': + constraints: "mem=3072M" + '2': + constraints: "mem=3072M" + +applications: + + keystone-mysql-router: + charm: ch:mysql-router + channel: latest/edge + glance-mysql-router: + charm: ch:mysql-router + channel: latest/edge + cinder-mysql-router: + charm: ch:mysql-router + channel: latest/edge + + mysql-innodb-cluster: + charm: ch:mysql-innodb-cluster + num_units: 3 + options: + source: *openstack-origin + to: + - '0' + - '1' + - '2' + channel: latest/edge + + keystone: + charm: ch:keystone + num_units: 1 + options: + openstack-origin: *openstack-origin + channel: yoga/edge + + rabbitmq-server: + charm: ch:rabbitmq-server + num_units: 1 + options: + source: *openstack-origin + channel: latest/edge + + cinder: + charm: ch:cinder + num_units: 1 + options: + block-device: None + glance-api-version: 2 + channel: yoga/edge + + cinder-ceph: + charm: ch:cinder-ceph + num_units: 0 + channel: yoga/edge + + glance: + charm: ch:glance + num_units: 1 + options: + openstack-origin: *openstack-origin + channel: yoga/edge + + nova-compute: + charm: ch:nova-compute + num_units: 1 + options: + openstack-origin: *openstack-origin + channel: yoga/edge + + ceph-mon: + charm: ch:ceph-mon + num_units: 3 + options: + expected-osd-count: 3 + source: *openstack-origin + channel: quincy/edge + + ceph-osd: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 'cinder,10G' + options: + source: *openstack-origin + bluestore: False + use-direct-io: False + osd-devices: '/dev/test-non-existent' + channel: quincy/edge + + ceph-rbd-mirror: + series: *series + charm: ../../../ceph-rbd-mirror.charm + num_units: 1 + options: + source: *openstack-origin + + ceph-mon-b: + charm: ch:ceph-mon + num_units: 3 + options: + expected-osd-count: 3 + source: *openstack-origin + channel: quincy/edge + + ceph-osd-b: + charm: ch:ceph-osd + num_units: 3 + storage: + osd-devices: 'cinder,10G' + options: + source: *openstack-origin + bluestore: False + use-direct-io: False + osd-devices: '/dev/test-non-existent' + channel: quincy/edge + + ceph-rbd-mirror-b: + series: *series + charm: ../../../ceph-rbd-mirror.charm + num_units: 1 + options: + source: *openstack-origin + +relations: + +- - keystone:shared-db + - keystone-mysql-router:shared-db +- - keystone-mysql-router:db-router + - mysql-innodb-cluster:db-router + +- - glance:shared-db + - glance-mysql-router:shared-db +- - glance-mysql-router:db-router + - mysql-innodb-cluster:db-router + +- - cinder:shared-db + - cinder-mysql-router:shared-db +- - cinder-mysql-router:db-router + - mysql-innodb-cluster:db-router + +- - rabbitmq-server + - cinder + +- - 'keystone:identity-service' + - cinder +- - 'keystone:identity-service' + - glance + +- - cinder + - cinder-ceph +- - cinder-ceph:ceph + - ceph-mon:client + +- - nova-compute:ceph-access + - cinder-ceph:ceph-access +- - nova-compute:amqp + - rabbitmq-server:amqp + +- - glance:image-service + - nova-compute:image-service +- - glance + - ceph-mon + +- - ceph-mon:osd + - ceph-osd:mon +- - ceph-mon + - ceph-rbd-mirror:ceph-local +- - ceph-mon + - ceph-rbd-mirror-b:ceph-remote + +- - ceph-mon-b:osd + - ceph-osd-b:mon +- - ceph-mon-b + - ceph-rbd-mirror-b:ceph-local +- - ceph-mon-b + - ceph-rbd-mirror:ceph-remote diff --git a/ceph-rbd-mirror/src/tests/target.py b/ceph-rbd-mirror/src/tests/target.py new file mode 100644 index 00000000..c8aece63 --- /dev/null +++ b/ceph-rbd-mirror/src/tests/target.py @@ -0,0 +1,859 @@ +# Copyright 2019 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Encapsulate ``ceph-rbd-mirror`` testing.""" +import json +import logging +import re +import time +import unittest + +import cinderclient.exceptions as cinder_exceptions + +import zaza.openstack.charm_tests.test_utils as test_utils + +import zaza.model +import zaza.openstack.utilities.ceph +import zaza.openstack.utilities.openstack as openstack +import zaza.openstack.utilities.generic as zaza_utils + +from zaza.openstack.charm_tests.glance.setup import ( + LTS_IMAGE_NAME, + CIRROS_IMAGE_NAME) + + +DEFAULT_CINDER_RBD_MIRRORING_MODE = 'pool' + + +def get_cinder_rbd_mirroring_mode(cinder_ceph_app_name='cinder-ceph'): + """Get the RBD mirroring mode for the Cinder Ceph pool. + + :param cinder_ceph_app_name: Cinder Ceph Juju application name. + :type cinder_ceph_app_name: str + :returns: A string representing the RBD mirroring mode. It can be + either 'pool' or 'image'. + :rtype: str + """ + rbd_mirroring_mode_config = zaza.model.get_application_config( + cinder_ceph_app_name).get('rbd-mirroring-mode') + if rbd_mirroring_mode_config: + rbd_mirroring_mode = rbd_mirroring_mode_config.get( + 'value', DEFAULT_CINDER_RBD_MIRRORING_MODE).lower() + else: + rbd_mirroring_mode = DEFAULT_CINDER_RBD_MIRRORING_MODE + + return rbd_mirroring_mode + + +def get_glance_image(glance): + """Get the Glance image object to be used by the Ceph tests. + + It looks for the Cirros Glance image, and it's returned if it's found. + If the Cirros image is not found, it will try and find the Ubuntu + LTS image. + + :param glance: Authenticated glanceclient + :type glance: glanceclient.Client + :returns: Glance image object + :rtype: glanceclient.image + """ + images = openstack.get_images_by_name(glance, CIRROS_IMAGE_NAME) + if images: + return images[0] + logging.info("Failed to find {} image, falling back to {}".format( + CIRROS_IMAGE_NAME, + LTS_IMAGE_NAME)) + return openstack.get_images_by_name(glance, LTS_IMAGE_NAME)[0] + + +def setup_cinder_repl_volume_type(cinder, type_name='repl', + backend_name='cinder-ceph'): + """Set up the Cinder volume replication type. + + :param cinder: Authenticated cinderclient + :type cinder: cinder.Client + :param type_name: Cinder volume type name + :type type_name: str + :param backend_name: Cinder volume backend name with replication enabled. + :type backend_name: str + :returns: Cinder volume type object + :rtype: cinderclient.VolumeType + """ + try: + vol_type = cinder.volume_types.find(name=type_name) + except cinder_exceptions.NotFound: + vol_type = cinder.volume_types.create(type_name) + + vol_type.set_keys(metadata={ + 'volume_backend_name': backend_name, + 'replication_enabled': ' True', + }) + return vol_type + + +# TODO: This function should be incorporated into +# 'zaza.openstack.utilities.openstack.create_volume' helper, once the below +# flakiness comments are addressed. +def create_cinder_volume(cinder, name='zaza', image_id=None, type_id=None): + """Create a new Cinder volume. + + :param cinder: Authenticated cinderclient. + :type cinder: cinder.Client + :param name: Volume name. + :type name: str + :param image_id: Glance image id, if the volume is created from image. + :type image_id: str + :param type_id: Cinder Volume type id, if the volume needs to use an + explicit volume type. + :type type_id: boolean + :returns: Cinder volume + :rtype: :class:`Volume`. + """ + # NOTE(fnordahl): for some reason create volume from image often fails + # when run just after deployment is finished. We should figure out + # why, resolve the underlying issue and then remove this. + # + # We do not use tenacity here as it will interfere with tenacity used + # in ``resource_reaches_status`` + def create_volume(cinder, volume_params, retry=20): + if retry < 1: + return + volume = cinder.volumes.create(**volume_params) + try: + # Note(coreycb): stop_after_attempt is increased because using + # juju storage for ceph-osd backed by cinder on undercloud + # takes longer than the prior method of directory-backed OSD + # devices. + openstack.resource_reaches_status( + cinder.volumes, volume.id, msg='volume', + stop_after_attempt=20) + return volume + except AssertionError: + logging.info('retrying') + volume.delete() + return create_volume(cinder, volume_params, retry=retry - 1) + + volume_params = { + 'size': 8, + 'name': name, + } + if image_id: + volume_params['imageRef'] = image_id + if type_id: + volume_params['volume_type'] = type_id + + return create_volume(cinder, volume_params) + + +def setup_rbd_mirror(): + def setup(suffix): + zaza.model.run_action_on_leader( + 'ceph-mon' + suffix, + 'create-pool', + action_params={ + 'name': 'zaza-boot', + 'app-name': 'rbd', + } + ) + zaza.model.run_action_on_leader( + 'ceph-rbd-mirror' + suffix, + 'refresh-pools', + action_params={} + ) + + """Set up an RBD pool in case Cinder isn't present.""" + setup('') + setup('-b') + + +class CephRBDMirrorBase(test_utils.BaseCharmTest): + """Base class for ``ceph-rbd-mirror`` tests.""" + + @classmethod + def setUpClass(cls): + """Run setup for ``ceph-rbd-mirror`` tests.""" + super().setUpClass() + cls.cinder_ceph_app_name = 'cinder-ceph' + cls.test_cinder_volume_name = 'test-cinder-ceph-volume' + # get ready for multi-model Zaza + cls.site_a_model = cls.site_b_model = zaza.model.get_juju_model() + cls.site_b_app_suffix = '-b' + + def test_if_cinder_present(self): + """Test if the cinder-ceph application is present.""" + try: + zaza.model.get_application(self.cinder_ceph_app_name) + return True + except KeyError: + return False + + def skip_test_if_cinder_not_present(self, caller): + """Skip a test if Cinder isn't present.""" + if not self.test_if_cinder_present(): + raise unittest.SkipTest('Skipping %s due to lack of Cinder' + % caller) + + def run_status_action(self, application_name=None, model_name=None, + pools=[]): + """Run status action, decode and return response.""" + action_params = { + 'verbose': True, + 'format': 'json', + } + if len(pools) > 0: + action_params['pools'] = ','.join(pools) + result = zaza.model.run_action_on_leader( + application_name or self.application_name, + 'status', + model_name=model_name, + action_params=action_params) + if result.status == "failed": + logging.error("status action failed: %s", result.message) + return + return json.loads(result.results['output']) + + def get_pools(self): + """Retrieve list of pools from both sites. + + :returns: Tuple with list of pools on each side. + :rtype: tuple + """ + site_a_pools = zaza.openstack.utilities.ceph.get_ceph_pools( + zaza.model.get_lead_unit_name( + 'ceph-mon', model_name=self.site_a_model), + model_name=self.site_a_model) + site_b_pools = zaza.openstack.utilities.ceph.get_ceph_pools( + zaza.model.get_lead_unit_name( + 'ceph-mon' + self.site_b_app_suffix, + model_name=self.site_b_model), + model_name=self.site_b_model) + return sorted(site_a_pools.keys()), sorted(site_b_pools.keys()) + + def get_failover_pools(self): + """Get the failover Ceph pools' names, from both sites. + + If the Cinder RBD mirroring mode is 'image', the 'cinder-ceph' pool + needs to be excluded, since Cinder orchestrates the failover then. + + Also remove .mgr pools as they're not failed over + + :returns: Tuple with site-a pools and site-b pools. + :rtype: Tuple[List[str], List[str]] + """ + site_a_pools, site_b_pools = self.get_pools() + if (self.test_if_cinder_present() and + get_cinder_rbd_mirroring_mode(self.cinder_ceph_app_name) == + 'image'): + site_a_pools.remove(self.cinder_ceph_app_name) + site_b_pools.remove(self.cinder_ceph_app_name) + + site_a_pools.remove(".mgr") + site_b_pools.remove(".mgr") + + return site_a_pools, site_b_pools + + def wait_for_mirror_state(self, state, application_name=None, + model_name=None, + check_entries_behind_master=False, + require_images_in=[], + pools=[]): + """Wait until all images reach requested state. + + This function runs the ``status`` action and examines the data it + returns. + + :param state: State to expect all images to be in + :type state: str + :param application_name: Application to run action on + :type application_name: str + :param model_name: Model to run in + :type model_name: str + :param check_entries_behind_master: Wait for ``entries_behind_master`` + to become '0'. Only makes sense + when used with state + ``up+replying``. + :type check_entries_behind_master: bool + :param require_images_in: List of pools to require images in + :type require_images_in: list of str + :param pools: List of pools to run status on. If this is empty, the + status action will run on all the pools. + :type pools: list of str + :returns: True on success, never returns on failure + """ + rep = re.compile(r'.*"entries_behind_primary":(\d+),') + while True: + pool_status = self.run_status_action( + application_name=application_name, model_name=model_name, + pools=pools) + if pool_status is None: + logging.debug("status action failed, retrying") + time.sleep(5) # don't spam juju run-action + continue + for pool, status in pool_status.items(): + images = status.get('images', []) + logging.debug("checking pool %s, images: %s", pool, images) + if not len(images) and pool in require_images_in: + break + for image in images: + if image['state'] and image['state'] != state: + break + if check_entries_behind_master: + m = rep.match(image['description']) + # NOTE(fnordahl): Tactical fix for upstream Ceph + # Luminous bug https://tracker.ceph.com/issues/23516 + if m and int(m.group(1)) > 42: + logging.info('entries_behind_primary:{}' + .format(m.group(1))) + break + else: + # not found here, check next pool + continue + # found here, pass on to outer loop + break + else: + # all images with state has expected state + return True + time.sleep(5) # don't spam juju run-action + + def setup_test_cinder_volume(self): + """Set up the test Cinder volume into the Ceph RBD mirror environment. + + If the volume already exists, then it's returned. + + Also, if the Cinder RBD mirroring mode is 'image', the volume will + use an explicit volume type with the appropriate replication flags. + Otherwise, it is just a simple Cinder volume using the default backend. + + :returns: Cinder volume + :rtype: :class:`Volume`. + """ + session = openstack.get_overcloud_keystone_session() + cinder = openstack.get_cinder_session_client(session, version=3) + + try: + return cinder.volumes.find(name=self.test_cinder_volume_name) + except cinder_exceptions.NotFound: + logging.info("Test Cinder volume doesn't exist. Creating it") + + glance = openstack.get_glance_session_client(session) + image = get_glance_image(glance) + kwargs = { + 'cinder': cinder, + 'name': self.test_cinder_volume_name, + 'image_id': image.id, + } + if get_cinder_rbd_mirroring_mode(self.cinder_ceph_app_name) == 'image': + volume_type = setup_cinder_repl_volume_type( + cinder, + backend_name=self.cinder_ceph_app_name) + kwargs['type_id'] = volume_type.id + + return create_cinder_volume(**kwargs) + + +class CephRBDMirrorTest(CephRBDMirrorBase): + """Encapsulate ``ceph-rbd-mirror`` tests.""" + + def test_pause_resume(self): + """Run pause and resume tests.""" + self.pause_resume(['rbd-mirror']) + + def test_pool_broker_synced(self): + """Validate that pools created with broker protocol are synced. + + The functional test bundle includes the ``cinder``, ``cinder-ceph`` and + ``glance`` charms. The ``cinder-ceph`` and ``glance`` charms will + create pools using the ceph charms broker protocol at deploy time. + """ + site_a_pools, site_b_pools = self.get_pools() + self.assertEqual(site_a_pools, site_b_pools) + + def test_pool_manual_synced(self): + """Validate that manually created pools are synced after refresh. + + The ``ceph-rbd-mirror`` charm does not get notified when the operator + creates a pool manually without using the ceph charms broker protocol. + + To alleviate this the charm has a ``refresh-pools`` action the operator + can call to have it discover such pools. Validate its operation. + """ + # use action on ceph-mon to create a pool directly in the Ceph cluster + # without using the broker protocol + zaza.model.run_action_on_leader( + 'ceph-mon', + 'create-pool', + model_name=self.site_a_model, + action_params={ + 'name': 'zaza', + 'app-name': 'rbd', + }) + # tell ceph-rbd-mirror unit on site_a to refresh list of pools + zaza.model.run_action_on_leader( + 'ceph-rbd-mirror', + 'refresh-pools', + model_name=self.site_a_model, + action_params={ + }) + # wait for execution to start + zaza.model.wait_for_agent_status(model_name=self.site_a_model) + zaza.model.wait_for_agent_status(model_name=self.site_b_model) + # wait for execution to finish + zaza.model.wait_for_application_states(model_name=self.site_a_model) + zaza.model.wait_for_application_states(model_name=self.site_b_model) + # make sure everything is idle before we test + zaza.model.block_until_all_units_idle(model_name=self.site_a_model) + zaza.model.block_until_all_units_idle(model_name=self.site_b_model) + # validate result + site_a_pools, site_b_pools = self.get_pools() + self.assertEqual(site_a_pools, site_b_pools) + + def test_cinder_volume_mirrored(self): + """Validate that a volume created through Cinder is mirrored. + + For RBD Mirroring to work clients must enable the correct set of + features when creating images. + + The RBD image feature settings are announced by the ``ceph-mon`` charm + over the client relation when it has units related on its + ``rbd-mirror`` endpoint. + + By creating a volume through cinder on site A, checking for presence on + site B and subsequently comparing the contents we get a full end to end + test. + """ + self.skip_test_if_cinder_not_present('test_cinder_volume_mirrored') + volume = self.setup_test_cinder_volume() + site_a_hash = zaza.openstack.utilities.ceph.get_rbd_hash( + zaza.model.get_lead_unit_name('ceph-mon', + model_name=self.site_a_model), + 'cinder-ceph', + 'volume-{}'.format(volume.id), + model_name=self.site_a_model) + self.wait_for_mirror_state( + 'up+replaying', + check_entries_behind_master=True, + application_name=self.application_name + self.site_b_app_suffix, + model_name=self.site_b_model) + logging.info('Checking the Ceph RBD hashes of the primary and ' + 'the secondary Ceph images') + site_b_hash = zaza.openstack.utilities.ceph.get_rbd_hash( + zaza.model.get_lead_unit_name('ceph-mon' + self.site_b_app_suffix, + model_name=self.site_b_model), + 'cinder-ceph', + 'volume-{}'.format(volume.id), + model_name=self.site_b_model) + logging.info(site_a_hash) + logging.info(site_b_hash) + self.assertEqual(site_a_hash, site_b_hash) + + +class CephRBDMirrorControlledFailoverTest(CephRBDMirrorBase): + """Encapsulate ``ceph-rbd-mirror`` controlled failover tests.""" + + def execute_failover_juju_actions(self, + primary_site_app_name, + primary_site_model, + primary_site_pools, + secondary_site_app_name, + secondary_site_model, + secondary_site_pools): + """Execute the failover Juju actions. + + The failover / failback via Juju actions shares the same workflow. The + failback is just a failover with sites in reversed order. + + This function encapsulates the tasks to failover a primary site to + a secondary site: + 1. Demote primary site + 2. Validation of the primary site demotion + 3. Promote secondary site + 4. Validation of the secondary site promotion + + :param primary_site_app_name: Primary site Ceph RBD mirror app name. + :type primary_site_app_name: str + :param primary_site_model: Primary site Juju model name. + :type primary_site_model: str + :param primary_site_pools: Primary site pools. + :type primary_site_pools: List[str] + :param secondary_site_app_name: Secondary site Ceph RBD mirror + app name. + :type secondary_site_app_name: str + :param secondary_site_model: Secondary site Juju model name. + :type secondary_site_model: str + :param secondary_site_pools: Secondary site pools. + :type secondary_site_pools: List[str] + """ + # Check if primary and secondary pools sizes are the same. + self.assertEqual(len(primary_site_pools), len(secondary_site_pools)) + + # Run the 'demote' Juju action against the primary site pools. + logging.info('Demoting {} from model {}.'.format( + primary_site_app_name, primary_site_model)) + result = zaza.model.run_action_on_leader( + primary_site_app_name, + 'demote', + model_name=primary_site_model, + action_params={ + 'pools': ','.join(primary_site_pools) + }) + logging.info(result) + zaza_utils.assertActionRanOK(result) + + # Validate that the demoted pools count matches the total primary site + # pools count. + n_pools_demoted = len(result.results.get('output').split('\n')) + self.assertEqual(len(primary_site_pools), n_pools_demoted) + + # At this point, both primary and secondary sites are demoted. Validate + # that the Ceph images, from both sites, report 'up+unknown', since + # there isn't a primary site at the moment. + logging.info('Waiting until {} is demoted.'.format( + primary_site_app_name)) + self.wait_for_mirror_state( + 'up+unknown', + application_name=primary_site_app_name, + model_name=primary_site_model, + pools=primary_site_pools) + self.wait_for_mirror_state( + 'up+unknown', + application_name=secondary_site_app_name, + model_name=secondary_site_model, + pools=secondary_site_pools) + + # Run the 'promote' Juju against the secondary site. + logging.info('Promoting {} from model {}.'.format( + secondary_site_app_name, secondary_site_model)) + result = zaza.model.run_action_on_leader( + secondary_site_app_name, + 'promote', + model_name=secondary_site_model, + action_params={ + 'pools': ','.join(secondary_site_pools) + }) + zaza_utils.assertActionRanOK(result) + + # Validate that the promoted pools count matches the total secondary + # site pools count. + n_pools_promoted = len(result.results.get('output').split('\n')) + self.assertEqual(len(secondary_site_pools), n_pools_promoted) + + # Validate that the Ceph images from the newly promoted site + # report 'up+stopped' state (which is reported by primary Ceph images). + logging.info('Waiting until {} is promoted.'.format( + secondary_site_app_name)) + self.wait_for_mirror_state( + 'up+stopped', + application_name=secondary_site_app_name, + model_name=secondary_site_model, + pools=secondary_site_pools) + + # Validate that the Ceph images from site-a report 'up+replaying' + # (which is reported by secondary Ceph images). + self.wait_for_mirror_state( + 'up+replaying', + check_entries_behind_master=True, + application_name=primary_site_app_name, + model_name=primary_site_model, + pools=primary_site_pools) + + def test_100_cinder_failover(self): + """Validate controlled failover via the Cinder API. + + This test only makes sense if Cinder RBD mirroring mode is 'image'. + It will return early, if this is not the case. + """ + self.skip_test_if_cinder_not_present('test_100_cinder_failover') + cinder_rbd_mirroring_mode = get_cinder_rbd_mirroring_mode( + self.cinder_ceph_app_name) + if cinder_rbd_mirroring_mode != 'image': + logging.warning( + "Skipping 'test_100_cinder_failover' since Cinder RBD " + "mirroring mode is {}.".format(cinder_rbd_mirroring_mode)) + return + + session = openstack.get_overcloud_keystone_session() + cinder = openstack.get_cinder_session_client(session, version=3) + + # Check if the Cinder volume host is available with replication + # enabled. + host = 'cinder@{}'.format(self.cinder_ceph_app_name) + svc = cinder.services.list(host=host, binary='cinder-volume')[0] + self.assertEqual(svc.replication_status, 'enabled') + self.assertEqual(svc.status, 'enabled') + + # Setup the test Cinder volume + volume = self.setup_test_cinder_volume() + + # Check if the volume is properly mirrored + self.wait_for_mirror_state( + 'up+replaying', + check_entries_behind_master=True, + application_name=self.application_name + self.site_b_app_suffix, + model_name=self.site_b_model, + pools=[self.cinder_ceph_app_name]) + + # Execute the Cinder volume failover + openstack.failover_cinder_volume_host( + cinder=cinder, + backend_name=self.cinder_ceph_app_name, + target_backend_id='ceph', + target_status='disabled', + target_replication_status='failed-over') + + # Check if the test volume is still available after failover + self.assertEqual(cinder.volumes.get(volume.id).status, 'available') + + def test_101_cinder_failback(self): + """Validate controlled failback via the Cinder API. + + This test only makes sense if Cinder RBD mirroring mode is 'image'. + It will return early, if this is not the case. + + The test needs to be executed when the Cinder volume host is already + failed-over with the test volume on it. + """ + self.skip_test_if_cinder_not_present('test_101_cinder_failback') + cinder_rbd_mirroring_mode = get_cinder_rbd_mirroring_mode( + self.cinder_ceph_app_name) + if cinder_rbd_mirroring_mode != 'image': + logging.warning( + "Skipping 'test_101_cinder_failback' since Cinder RBD " + "mirroring mode is {}.".format(cinder_rbd_mirroring_mode)) + return + + session = openstack.get_overcloud_keystone_session() + cinder = openstack.get_cinder_session_client(session, version=3) + + # Check if the Cinder volume host is already failed-over + host = 'cinder@{}'.format(self.cinder_ceph_app_name) + svc = cinder.services.list(host=host, binary='cinder-volume')[0] + self.assertEqual(svc.replication_status, 'failed-over') + self.assertEqual(svc.status, 'disabled') + + # Check if the test Cinder volume is already present. The method + # 'cinder.volumes.find' raises 404 if the volume is not found. + volume = cinder.volumes.find(name=self.test_cinder_volume_name) + + # Execute the Cinder volume failback + openstack.failover_cinder_volume_host( + cinder=cinder, + backend_name=self.cinder_ceph_app_name, + target_backend_id='default', + target_status='enabled', + target_replication_status='enabled') + + # Check if the test volume is still available after failback + self.assertEqual(cinder.volumes.get(volume.id).status, 'available') + + def test_200_juju_failover(self): + """Validate controlled failover via Juju actions.""" + # Get the Ceph pools needed to failover + site_a_pools, site_b_pools = self.get_failover_pools() + + # Execute the failover Juju actions with the appropriate parameters. + site_b_app_name = self.application_name + self.site_b_app_suffix + self.execute_failover_juju_actions( + primary_site_app_name=self.application_name, + primary_site_model=self.site_a_model, + primary_site_pools=site_a_pools, + secondary_site_app_name=site_b_app_name, + secondary_site_model=self.site_b_model, + secondary_site_pools=site_b_pools) + + def test_201_juju_failback(self): + """Validate controlled failback via Juju actions.""" + # Get the Ceph pools needed to failback + site_a_pools, site_b_pools = self.get_failover_pools() + + # Execute the failover Juju actions with the appropriate parameters. + # The failback operation is just a failover with sites in reverse + # order. + site_b_app_name = self.application_name + self.site_b_app_suffix + self.execute_failover_juju_actions( + primary_site_app_name=site_b_app_name, + primary_site_model=self.site_b_model, + primary_site_pools=site_b_pools, + secondary_site_app_name=self.application_name, + secondary_site_model=self.site_a_model, + secondary_site_pools=site_a_pools) + + def test_203_juju_resync(self): + """Validate the 'resync-pools' Juju action. + + The 'resync-pools' Juju action is meant to flag Ceph images from the + secondary site to re-sync against the Ceph images from the primary + site. + + This use case is useful when the Ceph secondary images are out of sync. + """ + # Get the Ceph pools needed to failback + _, site_b_pools = self.get_failover_pools() + + # Run the 'resync-pools' Juju action against the pools from site-b. + # This will make sure that the Ceph images from site-b are properly + # synced with the primary images from site-a. + site_b_app_name = self.application_name + self.site_b_app_suffix + logging.info('Re-syncing {} from model {}'.format( + site_b_app_name, self.site_b_model)) + result = zaza.model.run_action_on_leader( + site_b_app_name, + 'resync-pools', + model_name=self.site_b_model, + action_params={ + 'pools': ','.join(site_b_pools), + 'i-really-mean-it': True, + }) + zaza_utils.assertActionRanOK(result) + + # Validate that the Ceph images from site-b report 'up+replaying' + # (which is reported by secondary Ceph images). And check that images + # exist in Cinder and Glance pools. + self.wait_for_mirror_state( + 'up+replaying', + check_entries_behind_master=True, + application_name=site_b_app_name, + model_name=self.site_b_model, + require_images_in=[self.cinder_ceph_app_name, 'glance'], + pools=site_b_pools) + + +class CephRBDMirrorDisasterFailoverTest(CephRBDMirrorBase): + """Encapsulate ``ceph-rbd-mirror`` destructive tests.""" + + def apply_cinder_ceph_workaround(self): + """Set minimal timeouts / retries to the Cinder Ceph backend. + + This is needed because the failover via Cinder API will try to do a + demotion of the site-a. However, when site-a is down, and with the + default timeouts / retries, the operation takes an unreasonably amount + of time (or sometimes it never finishes). + """ + # These new config options need to be set under the Cinder Ceph backend + # section in the main Cinder config file. + # At the moment, we don't the possibility of using Juju config to set + # these options. And also, it's not even a good practice to have them + # in production. + # These should be set only to do the Ceph failover via Cinder API, and + # they need to be removed after. + configs = { + 'rados_connect_timeout': '1', + 'rados_connection_retries': '1', + 'rados_connection_interval': '0', + 'replication_connect_timeout': '1', + } + + # Small Python script that will be executed via Juju run to update + # the Cinder config file. + update_cinder_conf_script = ( + "import configparser; " + "config = configparser.ConfigParser(); " + "config.read('/etc/cinder/cinder.conf'); " + "{}" + "f = open('/etc/cinder/cinder.conf', 'w'); " + "config.write(f); " + "f.close()") + set_cmd = '' + for cfg_name in configs: + set_cmd += "config.set('{0}', '{1}', '{2}'); ".format( + self.cinder_ceph_app_name, cfg_name, configs[cfg_name]) + script = update_cinder_conf_script.format(set_cmd) + + # Run the workaround script via Juju run + zaza.model.run_on_leader( + self.cinder_ceph_app_name, + 'python3 -c "{}"; systemctl restart cinder-volume'.format(script)) + + def kill_primary_site(self): + """Simulate an unexpected primary site shutdown.""" + logging.info('Killing the Ceph primary site') + for application in ['ceph-rbd-mirror', 'ceph-mon', 'ceph-osd']: + zaza.model.remove_application( + application, + model_name=self.site_a_model, + forcefully_remove_machines=True) + + def test_100_forced_juju_failover(self): + """Validate Ceph failover via Juju when the primary site is down. + + * Kill the primary site + * Execute the forced failover via Juju actions + """ + # Get the site-b Ceph pools that need to be promoted + _, site_b_pools = self.get_failover_pools() + site_b_app_name = self.application_name + self.site_b_app_suffix + + # Simulate primary site unexpected shutdown + self.kill_primary_site() + + # Try and promote the site-b to primary. + result = zaza.model.run_action_on_leader( + site_b_app_name, + 'promote', + model_name=self.site_b_model, + action_params={ + 'pools': ','.join(site_b_pools), + }) + zaza_utils.assertActionRanOK(result) + + # The action may not show up as 'failed' if there are no pools that + # needed to be promoted. + # self.assertEqual(result.status, 'failed') + + # Retry to promote site-b using the 'force' Juju action parameter. + result = zaza.model.run_action_on_leader( + site_b_app_name, + 'promote', + model_name=self.site_b_model, + action_params={ + 'force': True, + 'pools': ','.join(site_b_pools), + }) + + # Validate successful Juju action execution + self.assertEqual(result.status, 'completed') + + def test_200_forced_cinder_failover(self): + """Validate Ceph failover via Cinder when the primary site is down. + + This test only makes sense if Cinder RBD mirroring mode is 'image'. + It will return early, if this is not the case. + + This assumes that the primary site is already killed. + """ + self.skip_test_if_cinder_not_present('test_200_forced_cinder_failover') + cinder_rbd_mirroring_mode = get_cinder_rbd_mirroring_mode( + self.cinder_ceph_app_name) + if cinder_rbd_mirroring_mode != 'image': + logging.warning( + "Skipping 'test_200_cinder_failover_without_primary_site' " + "since Cinder RBD mirroring mode is {}.".format( + cinder_rbd_mirroring_mode)) + return + + # Make sure that the Cinder Ceph backend workaround is applied. + self.apply_cinder_ceph_workaround() + + session = openstack.get_overcloud_keystone_session() + cinder = openstack.get_cinder_session_client(session, version=3) + openstack.failover_cinder_volume_host( + cinder=cinder, + backend_name=self.cinder_ceph_app_name, + target_backend_id='ceph', + target_status='disabled', + target_replication_status='failed-over') + + # Check that the Cinder volumes are still available after forced + # failover. + for volume in cinder.volumes.list(): + self.assertEqual(volume.status, 'available') diff --git a/ceph-rbd-mirror/src/tests/tests.yaml b/ceph-rbd-mirror/src/tests/tests.yaml new file mode 100644 index 00000000..46adf75b --- /dev/null +++ b/ceph-rbd-mirror/src/tests/tests.yaml @@ -0,0 +1,27 @@ +charm_name: ceph-rbd-mirror + +gate_bundles: + - jammy-caracal + +smoke_bundles: + - jammy-caracal + +dev_bundles: + - jammy-caracal + +configure: + - tests.target.setup_rbd_mirror +tests: +- zaza.charm_tests.lifecycle.refresh.CharmRefreshAll +- tests.target.CephRBDMirrorTest +- tests.target.CephRBDMirrorControlledFailoverTest +- tests.target.CephRBDMirrorDisasterFailoverTest + +target_deploy_status: + ceph-rbd-mirror: + workload-status: waiting + workload-status-message: 'Waiting for pools to be created' + + ceph-rbd-mirror-b: + workload-status: waiting + workload-status-message: 'Waiting for pools to be created' diff --git a/ceph-rbd-mirror/src/tox.ini b/ceph-rbd-mirror/src/tox.ini new file mode 100644 index 00000000..cf00338c --- /dev/null +++ b/ceph-rbd-mirror/src/tox.ini @@ -0,0 +1,63 @@ +# Source charm (with zaza): ./src/tox.ini +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of tox.ini for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools + +[tox] +envlist = pep8 +# NOTE: Avoid build/test env pollution by not enabling sitepackages. +sitepackages = False +# NOTE: Avoid false positives by not skipping missing interpreters. +skip_missing_interpreters = False + +[testenv] +# We use tox mainly for virtual environment management for test requirements +# and do not install the charm code as a Python package into that environment. +# Ref: https://tox.wiki/en/latest/config.html#skip_install +skip_install = True +setenv = VIRTUAL_ENV={envdir} + PYTHONHASHSEED=0 + TEST_JUJU3=1 + CHARMS_ARTIFACT_DIR={toxinidir}/../.. + +allowlist_externals = juju +passenv = + HOME + TERM + CS_* + OS_* + TEST_* +deps = -r{toxinidir}/test-requirements.txt + +[testenv:pep8] +basepython = python3 +commands = charm-proof + +[testenv:func-noop] +basepython = python3 +commands = + functest-run-suite --help + +[testenv:func] +basepython = python3 +commands = + functest-run-suite --keep-model + +[testenv:func-smoke] +basepython = python3 +commands = + functest-run-suite --keep-model --smoke + +[testenv:func-dev] +basepython = python3 +commands = + functest-run-suite --keep-model --dev + +[testenv:func-target] +basepython = python3 +commands = + functest-run-suite --keep-model --bundle {posargs} + +[testenv:venv] +commands = {posargs} diff --git a/ceph-rbd-mirror/src/wheelhouse.txt b/ceph-rbd-mirror/src/wheelhouse.txt new file mode 100644 index 00000000..04ab38c3 --- /dev/null +++ b/ceph-rbd-mirror/src/wheelhouse.txt @@ -0,0 +1,4 @@ +git+https://github.com/juju/charm-helpers.git#egg=charmhelpers +psutil +poetry-core +git+https://github.com/openstack/charms.openstack.git#egg=charms.openstack diff --git a/ceph-rbd-mirror/test-requirements.txt b/ceph-rbd-mirror/test-requirements.txt new file mode 100644 index 00000000..9e3c89dd --- /dev/null +++ b/ceph-rbd-mirror/test-requirements.txt @@ -0,0 +1,54 @@ +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of *requirements.txt files for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools + +# NOTE: newer versions of cryptography require a Rust compiler to build, +# see +# * https://github.com/openstack-charmers/zaza/issues/421 +# * https://mail.python.org/pipermail/cryptography-dev/2021-January/001003.html +# +cryptography<3.4 + +requests>=2.18.4 + +stestr>=2.2.0 + +# Dependency of stestr. Workaround for +# https://github.com/mtreinish/stestr/issues/145 +cliff<3.0.0 + +# Dependencies of stestr. Newer versions use keywords that didn't exist in +# python 3.5 yet (e.g. "ModuleNotFoundError") +importlib-metadata<3.0.0; python_version < '3.6' +importlib-resources<3.0.0; python_version < '3.6' + +# Some Zuul nodes sometimes pull newer versions of these dependencies which +# dropped support for python 3.5: +osprofiler<2.7.0;python_version<'3.6' +stevedore<1.31.0;python_version<'3.6' +debtcollector<1.22.0;python_version<'3.6' +oslo.utils<=3.41.0;python_version<'3.6' + +coverage>=4.5.2 +pyudev # for ceph-* charm unit tests (need to fix the ceph-* charm unit tests/mocking) +git+https://github.com/openstack-charmers/zaza.git#egg=zaza +git+https://github.com/openstack-charmers/zaza-openstack-tests.git#egg=zaza.openstack + +# Needed for charm-glance: +git+https://opendev.org/openstack/tempest.git#egg=tempest;python_version>='3.8' +tempest<30.0.0;python_version<'3.8' and python_version >= '3.6' +tempest<24.0.0;python_version<'3.6' + +croniter # needed for charm-rabbitmq-server unit tests + +# icey: pyopenssl 22 introduces a requirement on newer OpenSSL which causes test +# failures. Pin pyopenssl to resolve the failure. +pyopenssl<=22.0.0 + +pydantic < 2 +cosl + +netifaces +git+https://github.com/openstack/charms.openstack.git#egg=charms.openstack +charms.reactive diff --git a/ceph-rbd-mirror/tox.ini b/ceph-rbd-mirror/tox.ini new file mode 100644 index 00000000..721bd6be --- /dev/null +++ b/ceph-rbd-mirror/tox.ini @@ -0,0 +1,107 @@ +# Source charm: ./tox.ini +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of tox.ini for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools + +[tox] +envlist = pep8,py3 +# NOTE: Avoid build/test env pollution by not enabling sitepackages. +sitepackages = False +# NOTE: Avoid false positives by not skipping missing interpreters. +skip_missing_interpreters = False + +[testenv] +setenv = VIRTUAL_ENV={envdir} + PYTHONHASHSEED=0 + TERM=linux + CHARM_LAYERS_DIR={toxinidir}/layers + CHARM_INTERFACES_DIR={toxinidir}/interfaces + JUJU_REPOSITORY={toxinidir}/build + TEST_JUJU3=1 +passenv = + no_proxy + http_proxy + https_proxy + CHARM_INTERFACES_DIR + CHARM_LAYERS_DIR + JUJU_REPOSITORY +allowlist_externals = + charmcraft + bash + tox + {toxinidir}/rename.sh +deps = + -r{toxinidir}/requirements.txt + +[testenv:build] +basepython = python3 +commands = + charmcraft clean + charmcraft -v pack + {toxinidir}/rename.sh + charmcraft clean + +[testenv:build-reactive] +basepython = python3 +commands = + charm-build --log-level DEBUG --use-lock-file-branches --binary-wheels-from-source -o {toxinidir}/build/builds src {posargs} + +[testenv:add-build-lock-file] +basepython = python3 +commands = + charm-build --log-level DEBUG --write-lock-file -o {toxinidir}/build/builds src {posargs} + +[testenv:py3] +basepython = python3 +deps = + -r{toxinidir}/test-requirements.txt +commands = stestr run --slowest {posargs} + +[testenv:py310] +basepython = python3.10 +deps = + -r{toxinidir}/test-requirements.txt +commands = stestr run --slowest {posargs} + +[testenv:pep8] +basepython = python3 +deps = flake8 + charm-tools +commands = flake8 {posargs} src unit_tests + +[testenv:cover] +# Technique based heavily upon +# https://github.com/openstack/nova/blob/master/tox.ini +basepython = python3 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt +setenv = + {[testenv]setenv} + PYTHON=coverage run +commands = + coverage erase + stestr run --slowest {posargs} + coverage combine + coverage html -d cover + coverage xml -o cover/coverage.xml + coverage report + +[coverage:run] +branch = True +concurrency = multiprocessing +parallel = True +source = + . +omit = + .tox/* + */charmhelpers/* + unit_tests/* + +[testenv:venv] +basepython = python3 +commands = {posargs} + +[flake8] +# E402 ignore necessary for path append before sys module import in actions +ignore = E402,W503,W504 diff --git a/ceph-rbd-mirror/unit_tests/__init__.py b/ceph-rbd-mirror/unit_tests/__init__.py new file mode 100644 index 00000000..7b5dac4f --- /dev/null +++ b/ceph-rbd-mirror/unit_tests/__init__.py @@ -0,0 +1,22 @@ +# Copyright 2018 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +sys.path.append('src') +sys.path.append('src/lib') + +# Mock out charmhelpers so that we can test without it. +import charms_openstack.test_mocks # noqa +charms_openstack.test_mocks.mock_charmhelpers() diff --git a/ceph-rbd-mirror/unit_tests/test_actions.py b/ceph-rbd-mirror/unit_tests/test_actions.py new file mode 100644 index 00000000..9c0964cb --- /dev/null +++ b/ceph-rbd-mirror/unit_tests/test_actions.py @@ -0,0 +1,191 @@ +# Copyright 2019 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import json +from unittest import mock +import sys + +sys.modules['charms.layer'] = mock.MagicMock() +import actions.actions as actions +import charm.openstack.ceph_rbd_mirror as crm + +import charms_openstack.test_utils as test_utils + + +class TestCephRBDMirrorActions(test_utils.PatchHelper): + + def setUp(self): + super().setUp() + self.patch_release(crm.CephRBDMirrorCharm.release) + self.crm_charm = mock.MagicMock() + self.patch_object(actions.charms_openstack.charm, + 'provide_charm_instance', + new=mock.MagicMock()) + self.provide_charm_instance().__enter__.return_value = \ + self.crm_charm + self.provide_charm_instance().__exit__.return_value = None + + def test_rbd_mirror_action(self): + self.patch_object(actions.reactive, 'endpoint_from_name') + self.patch_object(actions.ch_core.hookenv, 'action_get') + self.patch_object(actions.subprocess, 'check_output') + self.patch_object(actions.ch_core.hookenv, 'action_set') + endpoint = mock.MagicMock() + endpoint.pools = collections.OrderedDict( + {'apool': {'applications': {'rbd': {}}}, + 'bpool': {'applications': {'rbd': {}}}}) + self.endpoint_from_name.return_value = endpoint + self.crm_charm.eligible_pools.return_value = endpoint.pools + self.crm_charm.ceph_id = 'acephid' + self.action_get.return_value = False + self.check_output.return_value = 'Promoted 0 mirrored images\n' + actions.rbd_mirror_action(['promote']) + self.endpoint_from_name.assert_called_once_with('ceph-local') + self.crm_charm.eligible_pools.assert_called_once_with(endpoint.pools) + self.action_get.assert_has_calls([ + mock.call('pools'), + mock.call('force'), + mock.call('verbose'), + mock.call('format'), + ]) + self.check_output.assert_has_calls([ + mock.call(['rbd', '--id', 'acephid', 'mirror', 'pool', 'promote', + 'apool'], + stderr=actions.subprocess.STDOUT, + universal_newlines=True), + mock.call(['rbd', '--id', 'acephid', 'mirror', 'pool', 'promote', + 'bpool'], + stderr=actions.subprocess.STDOUT, + universal_newlines=True), + ], any_order=True) + # the order the pools has in the output string is undefined + self.action_set.assert_called_once_with( + {'output': mock.ANY}) + self.assertEqual( + sorted(self.action_set.call_args[0][0]['output'].split('\n')), + ['apool: Promoted 0 mirrored images', + 'bpool: Promoted 0 mirrored images']) + self.action_get.side_effect = [None, True, True, False] + self.check_output.reset_mock() + actions.rbd_mirror_action(['promote']) + self.check_output.assert_has_calls([ + mock.call(['rbd', '--id', 'acephid', 'mirror', 'pool', 'promote', + '--force', '--verbose', 'apool'], + stderr=actions.subprocess.STDOUT, + universal_newlines=True), + mock.call(['rbd', '--id', 'acephid', 'mirror', 'pool', 'promote', + '--force', '--verbose', 'bpool'], + stderr=actions.subprocess.STDOUT, + universal_newlines=True), + ], any_order=True) + self.action_get.assert_has_calls([ + mock.call('pools'), + mock.call('force'), + mock.call('verbose'), + mock.call('format'), + ]) + self.action_get.side_effect = ['apool', True, True, False] + self.check_output.reset_mock() + actions.rbd_mirror_action(['promote']) + self.check_output.assert_called_once_with( + ['rbd', '--id', 'acephid', 'mirror', 'pool', 'promote', + '--force', '--verbose', 'apool'], + stderr=actions.subprocess.STDOUT, + universal_newlines=True) + self.action_get.assert_has_calls([ + mock.call('pools'), + mock.call('force'), + mock.call('verbose'), + mock.call('format'), + ]) + + def test_refresh_pools(self): + self.patch_object(actions.reactive, 'is_flag_set') + self.patch_object(actions.ch_core.hookenv, 'action_fail') + self.is_flag_set.return_value = False + actions.refresh_pools([]) + self.is_flag_set.assert_called_once_with('leadership.is_leader') + self.action_fail.assert_called_once_with( + 'run action on the leader unit') + self.is_flag_set.return_value = True + self.patch_object(actions.reactive, 'set_flag') + self.patch_object(actions.ch_core.unitdata, '_KV') + self.patch_object(actions.reactive, 'main') + actions.refresh_pools([]) + self.set_flag.assert_called_once_with('refresh.pools') + self._KV.flush.assert_called_once_with() + self.main.assert_called_once_with() + + def test_resync_pools(self): + self.patch_object(actions.reactive, 'endpoint_from_name') + self.patch_object(actions.ch_core.hookenv, 'action_get') + self.patch_object(actions.subprocess, 'check_output') + self.patch_object(actions.ch_core.hookenv, 'action_set') + endpoint = mock.MagicMock() + endpoint.pools = collections.OrderedDict( + {'apool': {'applications': {'rbd': {}}}}) + self.endpoint_from_name.return_value = endpoint + self.crm_charm.eligible_pools.return_value = endpoint.pools + self.crm_charm.ceph_id = 'acephid' + self.action_get.side_effect = [False, None] + actions.resync_pools([]) + self.action_get.assert_has_calls([ + mock.call('i-really-mean-it'), + ]) + self.assertFalse(self.check_output.called) + self.assertFalse(self.action_set.called) + self.action_get.side_effect = [True, 'bpool'] + self.check_output.return_value = json.dumps([]) + actions.resync_pools([]) + self.action_get.assert_has_calls([ + mock.call('i-really-mean-it'), + mock.call('pools'), + ]) + self.check_output.assert_called_once_with( + ['rbd', '--id', 'acephid', '--format', 'json', + '-p', 'bpool', 'ls'], + universal_newlines=True) + self.action_set.assert_called_once_with({'output': ''}) + self.action_get.side_effect = [True, None] + self.check_output.side_effect = [ + json.dumps(['imagea', 'imageb']), + json.dumps({'mirroring': {'state': 'enabled'}}), + 'resync flagged for imagea\n', + json.dumps({'mirroring': {'state': 'disabled'}}), + ] + self.check_output.reset_mock() + actions.resync_pools([]) + self.action_get.assert_has_calls([ + mock.call('i-really-mean-it'), + mock.call('pools'), + ]) + self.assertEqual( + sorted(self.action_set.call_args[0][0]['output'].split('\n')), + ['apool/imagea: resync flagged for imagea']) + + def test_main(self): + self.patch_object(actions, 'ACTIONS') + self.patch_object(actions.ch_core.hookenv, 'action_fail') + args = ['/non-existent/path/to/charm/binary/promote'] + function = mock.MagicMock() + self.ACTIONS.__getitem__.return_value = function + actions.main(args) + function.assert_called_once_with(args) + self.ACTIONS.__getitem__.side_effect = KeyError + self.assertEqual(actions.main(args), 'Action promote is undefined') + self.ACTIONS.__getitem__.side_effect = None + function.side_effect = Exception('random exception') + actions.main(args) + self.action_fail.assert_called_once_with('random exception') diff --git a/ceph-rbd-mirror/unit_tests/test_ceph_rbd_mirror_handlers.py b/ceph-rbd-mirror/unit_tests/test_ceph_rbd_mirror_handlers.py new file mode 100644 index 00000000..3d5c5b28 --- /dev/null +++ b/ceph-rbd-mirror/unit_tests/test_ceph_rbd_mirror_handlers.py @@ -0,0 +1,194 @@ +# Copyright 2019 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +import charm.openstack.ceph_rbd_mirror as crm +import reactive.ceph_rbd_mirror_handlers as handlers + +import charms_openstack.test_utils as test_utils + + +class TestRegisteredHooks(test_utils.TestRegisteredHooks): + + def test_hooks(self): + defaults = [ + 'charm.installed', + 'config.rendered', + 'update-status', + 'upgrade-charm', + ] + hook_set = { + 'when': { + 'config_changed': ( + 'config.changed', + 'ceph-local.available', + 'ceph-remote.available', + ), + 'render_stuff': ( + 'ceph-local.available', + 'ceph-remote.available', + ), + 'configure_pools': ( + 'leadership.is_leader', + 'config.rendered', + 'ceph-local.available', + 'ceph-remote.available', + ), + 'refresh_pools': ( + 'leadership.is_leader', + 'refresh.pools', + 'ceph-local.available', + 'ceph-remote.available', + ), + 'request_keys': ( + 'ceph-local.connected', + 'ceph-remote.connected', + ), + }, + 'when_none': { + 'config_changed': ( + 'is-update-status-hook',), + 'render_stuff': ( + 'is-update-status-hook',), + 'refresh_pools': ( + 'is-update-status-hook',), + 'configure_pools': ( + 'is-update-status-hook',), + 'request_keys': ( + 'is-update-status-hook', + 'ceph-local.available', + 'ceph-remote.available', + ), + }, + } + # test that the hooks were registered + self.registered_hooks_test_helper(handlers, hook_set, defaults) + + +class TestCephRBDMirrorHandlers(test_utils.PatchHelper): + + def setUp(self): + super().setUp() + self.patch_release(crm.CephRBDMirrorCharm.release) + self.crm_charm = mock.MagicMock() + self.patch_object(handlers.charm, 'provide_charm_instance', + new=mock.MagicMock()) + self.provide_charm_instance().__enter__.return_value = \ + self.crm_charm + self.provide_charm_instance().__exit__.return_value = None + + def test_request_keys(self): + self.patch_object(handlers.reactive, 'endpoint_from_flag') + endpoint_local = mock.MagicMock() + endpoint_remote = mock.MagicMock() + endpoint_local.endpoint_name = 'ceph-local' + endpoint_remote.endpoint_name = 'ceph-remote' + self.endpoint_from_flag.side_effect = [endpoint_local, + endpoint_remote] + handlers.request_keys() + self.endpoint_from_flag.assert_has_calls([ + mock.call('ceph-local.connected'), + mock.call('ceph-remote.connected'), + ]) + endpoint_local.request_key.assert_called_once_with() + endpoint_remote.request_key.assert_called_once_with() + self.crm_charm.assess_status.assert_called_once_with() + + def test_config_changed(self): + self.patch_object(handlers.reactive, 'endpoint_from_flag') + handlers.config_changed() + self.endpoint_from_flag.assert_has_calls([ + mock.call('ceph-local.available'), + mock.call('ceph-remote.available'), + ]) + self.crm_charm.upgrade_if_available.assert_called_once_with( + [self.endpoint_from_flag(), self.endpoint_from_flag()]) + self.crm_charm.assess_status.assert_called_once_with() + + def test_render_stuff(self): + self.patch_object(handlers.ch_core.host, 'service') + endpoint_local = mock.MagicMock() + endpoint_remote = mock.MagicMock() + endpoint_local.endpoint_name = 'ceph-local' + endpoint_local.pools = {} + endpoint_remote.endpoint_name = 'ceph-remote' + endpoint_remote.pools = {} + self.crm_charm.services = ['aservice'] + endpoint_local.key = None + handlers.render_stuff(endpoint_local, endpoint_remote) + self.assertFalse(self.crm_charm.configure_ceph_keyring.called) + endpoint_local.key = 'akey' + handlers.render_stuff(endpoint_local, endpoint_remote) + self.crm_charm.configure_ceph_keyring.assert_has_calls([ + mock.call(endpoint_local.key, cluster_name=None), + mock.call(endpoint_remote.key, cluster_name='remote'), + ]) + self.crm_charm.render_with_interfaces.assert_called_once_with( + (endpoint_local, endpoint_remote)) + + def test_refresh_pools(self): + self.patch_object(handlers.reactive, 'endpoint_from_name') + self.patch_object(handlers.reactive, 'clear_flag') + endpoint_local = mock.MagicMock() + endpoint_remote = mock.MagicMock() + self.endpoint_from_name.side_effect = [endpoint_local, endpoint_remote] + handlers.refresh_pools() + self.endpoint_from_name.assert_has_calls([ + mock.call('ceph-local'), + mock.call('ceph-remote'), + ]) + endpoint_local.refresh_pools.assert_called_once_with() + endpoint_remote.refresh_pools.assert_called_once_with() + self.clear_flag.assert_called_once_with('refresh.pools') + + def test_configure_pools(self): + self.patch_object(handlers.reactive, 'endpoint_from_flag') + endpoint_local = mock.MagicMock() + endpoint_remote = mock.MagicMock() + self.crm_charm.collapse_and_filter_broker_requests.side_effect = [ + endpoint_local, endpoint_remote] + endpoint_local.endpoint_name = 'ceph-local' + endpoint_local.pools = { + 'cinder-ceph': { + 'applications': {'rbd': {}}, + 'parameters': { + 'pg_num': 42, + 'size': 3, + 'rbd-mirroring-mode': 'pool' + }, + 'quota': {'max_bytes': 1024, 'max_objects': 51}, + }, + } + endpoint_remote.endpoint_name = 'ceph-remote' + self.endpoint_from_flag.side_effect = [endpoint_local, + endpoint_remote] + self.crm_charm.eligible_pools.return_value = endpoint_local.pools + self.crm_charm.mirror_pool_enabled.return_value = False + self.crm_charm.pool_mirroring_mode.return_value = 'pool' + + handlers.configure_pools() + self.endpoint_from_flag.assert_has_calls([ + mock.call('ceph-local.available'), + mock.call('ceph-remote.available'), + ]) + self.crm_charm.eligible_pools.assert_called_once_with( + endpoint_local.pools) + self.crm_charm.pool_mirroring_mode.assert_called_once_with( + 'cinder-ceph', [endpoint_local, endpoint_remote]) + self.crm_charm.mirror_pool_enabled.assert_called_once_with( + 'cinder-ceph', 'pool') + self.crm_charm.mirror_pool_enable.assert_called_once_with( + 'cinder-ceph', 'pool') + endpoint_remote.maybe_send_rq.assert_called_once_with(endpoint_local) diff --git a/ceph-rbd-mirror/unit_tests/test_lib_charm_openstack_ceph_rbd_mirror.py b/ceph-rbd-mirror/unit_tests/test_lib_charm_openstack_ceph_rbd_mirror.py new file mode 100644 index 00000000..49039fad --- /dev/null +++ b/ceph-rbd-mirror/unit_tests/test_lib_charm_openstack_ceph_rbd_mirror.py @@ -0,0 +1,250 @@ +# Copyright 2018 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import json +from unittest import mock +import subprocess + +import charms_openstack.test_utils as test_utils + +import charm.openstack.ceph_rbd_mirror as ceph_rbd_mirror + + +class Helper(test_utils.PatchHelper): + + def setUp(self): + super().setUp() + self.patch_release(ceph_rbd_mirror.CephRBDMirrorCharm.release) + + +class TestCephRBDMirrorCharm(Helper): + + def test_custom_assess_status_check(self): + self.patch_object(ceph_rbd_mirror.socket, 'gethostname') + self.patch_object(ceph_rbd_mirror.reactive, 'is_flag_set') + self.is_flag_set.return_value = False + crmc = ceph_rbd_mirror.CephRBDMirrorCharm() + self.assertEqual(crmc.custom_assess_status_check(), (None, None)) + self.is_flag_set.return_value = True + self.patch_object(ceph_rbd_mirror.reactive, 'endpoint_from_flag') + self.assertEqual(crmc.custom_assess_status_check(), + ('waiting', 'Waiting for pools to be created')) + self.endpoint_from_flag.assert_called_once_with( + 'ceph-local.available') + crmc.mirror_pools_summary = mock.MagicMock() + crmc.mirror_pools_summary.return_value = collections.OrderedDict({ + 'pool_health': collections.OrderedDict( + {'OK': 1, 'WARN': 1, 'ERROR': 1}), + 'image_states': collections.OrderedDict( + {'stopped': 2, 'replaying': 2}), + }) + result = crmc.custom_assess_status_check() + # Disabling blocked state until + # https://bugs.launchpad.net/charm-ceph-rbd-mirror/+bug/1879749 + # is resolved + # self.assertTrue('blocked' in result[0]) + # the order of which the statuses appear in the string is undefined + self.assertTrue('OK (1)' in result[1]) + self.assertTrue('WARN (1)' in result[1]) + self.assertTrue('ERROR (1)' in result[1]) + self.assertTrue('Primary (2)' in result[1]) + self.assertTrue('Secondary (2)' in result[1]) + crmc.mirror_pools_summary.return_value = collections.OrderedDict({ + 'pool_health': collections.OrderedDict({'OK': 1}), + 'image_states': collections.OrderedDict({'stopped': 2}), + }) + self.assertEqual(crmc.custom_assess_status_check(), + ('active', 'Unit is ready (Pools OK (1) ' + 'Images Primary (2))')) + crmc.mirror_pools_summary.side_effect = subprocess.CalledProcessError( + 42, []) + self.assertEqual(crmc.custom_assess_status_check(), (None, None)) + + def test__mirror_pool_info(self): + self.patch_object(ceph_rbd_mirror.socket, 'gethostname') + self.patch_object(ceph_rbd_mirror.subprocess, 'check_output') + self.gethostname.return_value = 'ahostname' + self.check_output.return_value = '{}' + crmc = ceph_rbd_mirror.CephRBDMirrorCharm() + crmc._mirror_pool_info('apool') + self.check_output.assert_called_once_with( + ['rbd', '--id', 'rbd-mirror.ahostname', 'mirror', 'pool', 'info', + '--format', 'json', 'apool'], universal_newlines=True) + + def test_mirror_pool_enabled(self): + self.patch_object(ceph_rbd_mirror.socket, 'gethostname') + crmc = ceph_rbd_mirror.CephRBDMirrorCharm() + _mirror_pool_info = mock.MagicMock() + _mirror_pool_info.return_value = { + 'mode': 'pool', + 'peers': [{ + 'uuid': '0e4dfe58-93fc-44f8-8c74-7e700f950118', + 'cluster_name': 'remote', + 'client_name': + 'client.rbd-mirror.juju-c50b1a-zaza-4ce96f1e7e43-12'}] + } + crmc._mirror_pool_info = _mirror_pool_info + self.assertTrue(crmc.mirror_pool_enabled('apool', mode='pool')) + _mirror_pool_info.assert_called_once_with('apool') + _mirror_pool_info.return_value = {'mode': 'disabled'} + self.assertFalse(crmc.mirror_pool_enabled('apool', mode='pool')) + + def test_mirror_pool_has_peers(self): + self.patch_object(ceph_rbd_mirror.socket, 'gethostname') + crmc = ceph_rbd_mirror.CephRBDMirrorCharm() + _mirror_pool_info = mock.MagicMock() + _mirror_pool_info.return_value = { + 'mode': 'pool', + 'peers': [{ + 'uuid': '0e4dfe58-93fc-44f8-8c74-7e700f950118', + 'cluster_name': 'remote', + 'client_name': + 'client.rbd-mirror.juju-c50b1a-zaza-4ce96f1e7e43-12'}] + } + crmc._mirror_pool_info = _mirror_pool_info + self.assertTrue(crmc.mirror_pool_has_peers('apool')) + _mirror_pool_info.assert_called_once_with('apool') + _mirror_pool_info.return_value = { + 'mode': 'pool', + 'peers': []} + self.assertFalse(crmc.mirror_pool_has_peers('apool')) + + def test_pools_in_broker_request(self): + rq = mock.MagicMock() + rq.api_version = 1 + rq.ops = [{'op': 'create-pool', 'name': 'fakepool'}] + crmc = ceph_rbd_mirror.CephRBDMirrorCharm() + self.assertIn('fakepool', crmc.pools_in_broker_request(rq)) + + def test_collapse_and_filter_broker_requests(self): + self.patch_object(ceph_rbd_mirror.ch_ceph, 'CephBrokerRq') + + class FakeCephBrokerRq(object): + + def __init__(self): + self.ops = [] + + def add_op(self, op): + self.ops.append(op) + + self.CephBrokerRq.side_effect = FakeCephBrokerRq + + broker_requests = [ + { + 'api-version': 1, + 'ops': [ + { + 'op': 'create-pool', + 'name': 'pool-rq0', + 'app-name': 'rbd', + }, + ] + }, + { + 'api-version': 1, + 'ops': [ + { + 'op': 'create-pool', + 'name': 'pool-rq1', + 'app-name': 'notrbd', + }, + ] + }, + { + 'api-version': 1, + 'ops': [ + { + 'op': 'create-pool', + 'name': 'pool-rq2', + 'app-name': 'rbd', + 'someotherkey': 'value', + }, + ] + }, + ] + crmc = ceph_rbd_mirror.CephRBDMirrorCharm() + rq = crmc.collapse_and_filter_broker_requests( + broker_requests, + set(('create-pool',)), + require_vp={'app-name': 'rbd'}) + self.assertDictEqual( + rq.ops[0], + {'app-name': 'rbd', 'name': 'pool-rq0', 'op': 'create-pool'}) + self.assertDictEqual( + rq.ops[1], + {'app-name': 'rbd', 'name': 'pool-rq2', 'op': 'create-pool', + 'someotherkey': 'value'}) + self.assertTrue(len(rq.ops) == 2) + rq = crmc.collapse_and_filter_broker_requests( + broker_requests, + set(('create-pool',)), + require_vp={'app-name': 'rbd', 'someotherkey': 'value'}) + self.assertDictEqual( + rq.ops[0], + {'app-name': 'rbd', 'name': 'pool-rq2', 'op': 'create-pool', + 'someotherkey': 'value'}) + self.assertTrue(len(rq.ops) == 1) + + def test_pool_mirroring_mode(self): + self.patch_object(ceph_rbd_mirror.ch_ceph, 'CephBrokerRq') + + class FakeCephBrokerRq(object): + def __init__(self, raw_request_data=None): + request_data = json.loads(raw_request_data) + self.api_version = request_data['api-version'] + self.request_id = request_data['request-id'] + self.set_ops(request_data['ops']) + + def set_ops(self, ops): + self.ops = ops + + def add_op(self, op): + self.ops.append(op) + + self.CephBrokerRq.side_effect = FakeCephBrokerRq + + brq1_data = json.dumps({ + 'api-version': 1, + 'request-id': 'broker_rq1', + 'ops': [ + { + 'op': 'create-pool', + 'name': 'pool-rq0', + 'app-name': 'rbd-pool', + 'rbd-mirroring-mode': 'pool' + }, + ] + }) + brq2_data = json.dumps({ + 'api-version': 1, + 'request-id': 'broker_rq2', + 'ops': [ + { + 'op': 'create-pool', + 'name': 'pool-rq1', + 'app-name': 'rbd-image', + 'rbd-mirroring-mode': 'image' + }, + ] + }) + + brq1 = self.CephBrokerRq(raw_request_data=brq1_data) + brq2 = self.CephBrokerRq(raw_request_data=brq2_data) + broker_requests = [brq1, brq2, None] + crmc = ceph_rbd_mirror.CephRBDMirrorCharm() + rq0 = crmc.pool_mirroring_mode('pool-rq0', broker_requests) + self.assertEqual('pool', rq0) + rq1 = crmc.pool_mirroring_mode('pool-rq1', broker_requests) + self.assertEqual('image', rq1) diff --git a/constraints/test-constraints.txt b/constraints/test-constraints.txt new file mode 100644 index 00000000..31e245a8 --- /dev/null +++ b/constraints/test-constraints.txt @@ -0,0 +1,2 @@ +# https://github.com/boto/boto3/issues/4392 +boto3<1.36.0 diff --git a/terraform/applications.tf b/terraform/applications.tf new file mode 100644 index 00000000..8e404961 --- /dev/null +++ b/terraform/applications.tf @@ -0,0 +1,31 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +module "ceph_mon" { + source = "../ceph-mon/terraform" + + model = var.model + base = module.ceph_mon_config.config.base + constraints = module.ceph_mon_config.config.constraints + channel = module.ceph_mon_config.config.channel + + config = module.ceph_mon_config.config.config + resources = module.ceph_mon_config.config.resources + revision = module.ceph_mon_config.config.revision + units = module.ceph_mon_config.config.units +} + +module "ceph_osd" { + source = "../ceph-osd/terraform" + + model = var.model + base = module.ceph_osd_config.config.base + constraints = module.ceph_osd_config.config.constraints + channel = module.ceph_osd_config.config.channel + + config = module.ceph_osd_config.config.config + resources = module.ceph_osd_config.config.resources + storage = module.ceph_osd_config.config.storage + revision = module.ceph_osd_config.config.revision + units = module.ceph_osd_config.config.units +} diff --git a/terraform/configs.tf b/terraform/configs.tf new file mode 100644 index 00000000..13f50685 --- /dev/null +++ b/terraform/configs.tf @@ -0,0 +1,14 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +module "ceph_mon_config" { + source = "./manifest" + manifest = var.manifest_yaml + app = "ceph_mon" +} + +module "ceph_osd_config" { + source = "./manifest" + manifest = var.manifest_yaml + app = "ceph_osd" +} diff --git a/terraform/integrations.tf b/terraform/integrations.tf new file mode 100644 index 00000000..03455214 --- /dev/null +++ b/terraform/integrations.tf @@ -0,0 +1,14 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +resource "juju_integration" "ceph_mon" { + model = var.model + application { + name = module.ceph_mon.app_name + endpoint = module.ceph_mon.provides.osd + } + application { + name = module.ceph_osd.app_name + endpoint = module.ceph_osd.requires.mon + } +} diff --git a/terraform/manifest/main.tf b/terraform/manifest/main.tf new file mode 100644 index 00000000..390769ed --- /dev/null +++ b/terraform/manifest/main.tf @@ -0,0 +1,6 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +locals { + yaml_data = lookup(yamldecode(file("${var.manifest}")), var.app, {}) +} diff --git a/terraform/manifest/outputs.tf b/terraform/manifest/outputs.tf new file mode 100644 index 00000000..09464e0f --- /dev/null +++ b/terraform/manifest/outputs.tf @@ -0,0 +1,16 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +output "config" { + value = { + app_name = lookup(local.yaml_data, "app_name", null) + base = lookup(local.yaml_data, "base", null) + channel = lookup(local.yaml_data, "channel", null) + config = lookup(local.yaml_data, "config", null) + constraints = lookup(local.yaml_data, "constraints", null) + resources = lookup(local.yaml_data, "resoruces", null) + revision = lookup(local.yaml_data, "revision", null) + units = lookup(local.yaml_data, "units", null) + storage = lookup(local.yaml_data, "storage", null) + } +} diff --git a/terraform/manifest/variables.tf b/terraform/manifest/variables.tf new file mode 100644 index 00000000..b7b3fc44 --- /dev/null +++ b/terraform/manifest/variables.tf @@ -0,0 +1,12 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +variable "manifest" { + description = "Absolute path to a yaml file with config for a Juju application." + type = string +} + +variable "app" { + description = "Name of the application to load config for." + type = string +} diff --git a/terraform/outputs.tf b/terraform/outputs.tf new file mode 100644 index 00000000..a3c831cd --- /dev/null +++ b/terraform/outputs.tf @@ -0,0 +1,12 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +output "ceph_mon" { + description = "Object of the ceph_mon application." + value = module.ceph_mon +} + +output "ceph_osd" { + description = "Object of the ceph_osd application." + value = module.ceph_osd +} diff --git a/terraform/variables.tf b/terraform/variables.tf new file mode 100644 index 00000000..60c709f0 --- /dev/null +++ b/terraform/variables.tf @@ -0,0 +1,12 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +variable "manifest_yaml" { + description = "Absolute path to the manifest yaml file for the charm configurations." + type = string +} + +variable "model" { + description = "Name of the Juju model to deploy to." + type = string +} diff --git a/terraform/versions.tf b/terraform/versions.tf new file mode 100644 index 00000000..e25f4383 --- /dev/null +++ b/terraform/versions.tf @@ -0,0 +1,12 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +terraform { + required_version = ">= 1.6" + required_providers { + juju = { + source = "juju/juju" + version = "~> 0.14.0" + } + } +} diff --git a/tests/configs/dot.zaza.yaml b/tests/configs/dot.zaza.yaml new file mode 100644 index 00000000..4f0579f7 --- /dev/null +++ b/tests/configs/dot.zaza.yaml @@ -0,0 +1,6 @@ +--- +model_settings: + logging-config: "=INFO;unit=DEBUG" + +runtime_config: + TEST_MAX_RESOLVE_COUNT: 5 diff --git a/tests/configs/model-defaults.yaml b/tests/configs/model-defaults.yaml new file mode 100644 index 00000000..25a325c9 --- /dev/null +++ b/tests/configs/model-defaults.yaml @@ -0,0 +1,4 @@ +test-mode: true +automatically-retry-hooks: true +logging-config: "=DEBUG" +enable-os-upgrade: false diff --git a/tests/scripts/actionutils.sh b/tests/scripts/actionutils.sh new file mode 100755 index 00000000..ae48ac97 --- /dev/null +++ b/tests/scripts/actionutils.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +function cleaript() { + # Docker can inject rules causing firewall conflicts + sudo iptables -P FORWARD ACCEPT || true + sudo ip6tables -P FORWARD ACCEPT || true + sudo iptables -F FORWARD || true + sudo ip6tables -F FORWARD || true + +} + +function cacheimgs() { + local base="${1?missing}" + juju add-model dummy + juju add-machine --base "$base" + sleep 10 + juju add-machine --base "$base" --constraints "virt-type=virtual-machine" + while [ "$(juju machines | egrep -wc 'started')" -ne 2 ]; do + sleep 2 + done + juju destroy-model --force --timeout 20s --no-prompt dummy + sleep 5 +} + +function setup_functest() { + sudo apt -y install tox + if [ ! -d "$HOME/.local/share/juju" ]; then + sudo snap install juju --channel=3.6/stable + mkdir -p ~/.local/share/juju + juju bootstrap \ + --auto-upgrade=false \ + --model-default=tests/configs/model-defaults.yaml \ + localhost localhost + fi + sudo snap install --classic juju-crashdump + cp tests/configs/dot.zaza.yaml ~/.zaza.yaml +} + +run="${1}" +shift + +$run "$@" diff --git a/tests/terraform/default.yaml b/tests/terraform/default.yaml new file mode 100644 index 00000000..3e4af66f --- /dev/null +++ b/tests/terraform/default.yaml @@ -0,0 +1,14 @@ +ceph_mon: + channel: quincy/stable + constraints: arch=amd64 cores=2 mem=8192M root-disk=16384M virt-type=virtual-machine + units: 1 + config: + monitor-count: 1 + expected-osd-count: 2 +ceph_osd: + channel: quincy/stable + constraints: arch=amd64 cores=2 mem=8192M root-disk=16384M virt-type=virtual-machine + units: 2 + storage: + osd-devices: 1G,1 + osd-journals: 1G,1 diff --git a/tests/terraform/main.tf b/tests/terraform/main.tf new file mode 100644 index 00000000..a7d0a5ee --- /dev/null +++ b/tests/terraform/main.tf @@ -0,0 +1,31 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +terraform { + required_version = ">= 1.6" + required_providers { + juju = { + source = "juju/juju" + version = "~> 0.14.0" + } + } +} + +provider "juju" {} + +variable "manifest_yaml" { + description = "Path to the manifest YAML file" + type = string +} + +variable "model" { + description = "Name of the model to deploy to" + type = string + default = "ceph-model" +} + +module "ceph" { + source = "../../terraform" + model = var.model + manifest_yaml = var.manifest_yaml +}