.github/workflows/ci.yml

name: ci
on:
  pull_request:
    paths:
      # NOTE: keep these paths in sync with the paths that trigger the
      # fuzzydata Github Actions in .github/workflows/fuzzydata-test.yml
      - .github/workflows/**
      - '!.github/workflows/push-to-master.yml'
      - asv_bench/**
      - modin/**
      - requirements/**
      - scripts/**
      - environment-dev.yml
      - requirements-dev.txt
      - setup.cfg
      - setup.py
      - versioneer.py
  push:
concurrency:
  # Cancel other jobs in the same branch. We don't care whether CI passes
  # on old commits.
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
env:
  MODIN_GITHUB_CI: true
jobs:
  lint-black:
    name: lint (black)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 1
      - uses: actions/setup-python@v4
        with:
          python-version: "3.8.x"
          architecture: "x64"
      - run: pip install black
      # NOTE: keep the black command here in sync with the pre-commit hook in
      # /contributing/pre-commit
      - run: black --check --diff modin/ asv_bench/benchmarks scripts/doc_checker.py

  lint-mypy:
    name: lint (mypy)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 1
      - uses: actions/setup-python@v4
        with:
          python-version: "3.8.x"
          architecture: "x64"
      - run: pip install -r requirements-dev.txt
      - run: mypy --config-file mypy.ini

  lint-pydocstyle:
    if: github.event_name == 'pull_request'
    name: lint (pydocstyle)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 1
      - uses: actions/setup-python@v4
        with:
          python-version: "3.8.x"
          architecture: "x64"
      # The `numpydoc` version here MUST match the versions in the dev requirements files.
      - run: pip install pytest pytest-cov pydocstyle numpydoc==1.1.0 xgboost
      - run: python -m pytest scripts/test
      - run: pip install -e ".[all]"
      - run: |
          python scripts/doc_checker.py --add-ignore=D101,D102,D103,D105 --disable-numpydoc \
            modin/pandas/dataframe.py modin/pandas/series.py \
            modin/pandas/groupby.py \
            modin/pandas/series_utils.py modin/pandas/general.py \
            modin/pandas/plotting.py modin/pandas/utils.py \
            modin/pandas/iterator.py modin/pandas/indexing.py \
      - run: python scripts/doc_checker.py modin/core/dataframe
      - run: python scripts/doc_checker.py modin/core/execution/dask
      - run: |
          python scripts/doc_checker.py \
            modin/pandas/accessor.py modin/pandas/general.py \
            modin/pandas/groupby.py modin/pandas/indexing.py \
            modin/pandas/iterator.py modin/pandas/plotting.py \
            modin/pandas/series_utils.py modin/pandas/utils.py \
            modin/pandas/base.py \
            modin/pandas/io.py \
            asv_bench/benchmarks/utils \
            asv_bench/benchmarks/__init__.py asv_bench/benchmarks/io/__init__.py \
            asv_bench/benchmarks/scalability/__init__.py \
            modin/core/io \
            modin/experimental/core/execution/ray/implementations/pandas_on_ray \
            modin/experimental/core/execution/ray/implementations/pyarrow_on_ray \
            modin/pandas/series.py \
            modin/core/execution/python \
            modin/pandas/dataframe.py \
            modin/config/__init__.py \
            modin/config/__main__.py \
            modin/config/envvars.py \
            modin/config/pubsub.py
      - run: python scripts/doc_checker.py modin/distributed
      - run: python scripts/doc_checker.py modin/utils.py
      - run: python scripts/doc_checker.py modin/experimental/sklearn
      - run: |
          python scripts/doc_checker.py modin/experimental/xgboost/__init__.py \
            modin/experimental/xgboost/utils.py modin/experimental/xgboost/xgboost.py \
            modin/experimental/xgboost/xgboost_ray.py
      - run: python scripts/doc_checker.py modin/core/execution/ray
      - run: |
          python scripts/doc_checker.py modin/core/execution/dispatching/factories/factories.py \
            modin/core/execution/dispatching/factories/dispatcher.py                            \
      - run: python scripts/doc_checker.py scripts/doc_checker.py
      - run: |
          python scripts/doc_checker.py modin/experimental/pandas/io.py \
            modin/experimental/pandas/numpy_wrap.py modin/experimental/pandas/__init__.py
      - run: python scripts/doc_checker.py modin/core/storage_formats/base
      - run: python scripts/doc_checker.py modin/experimental/core/storage_formats/pyarrow
      - run: python scripts/doc_checker.py modin/core/storage_formats/pandas
      - run: |
          python scripts/doc_checker.py \
            modin/experimental/core/execution/native/implementations/hdk_on_native/dataframe \
            modin/experimental/core/execution/native/implementations/hdk_on_native/io \
            modin/experimental/core/execution/native/implementations/hdk_on_native/partitioning \
            modin/experimental/core/execution/native/implementations/hdk_on_native/calcite_algebra.py \
            modin/experimental/core/execution/native/implementations/hdk_on_native/calcite_builder.py \
            modin/experimental/core/execution/native/implementations/hdk_on_native/calcite_serializer.py \
            modin/experimental/core/execution/native/implementations/hdk_on_native/df_algebra.py \
            modin/experimental/core/execution/native/implementations/hdk_on_native/expr.py \
            modin/experimental/core/execution/native/implementations/hdk_on_native/hdk_worker.py \
      - run: python scripts/doc_checker.py modin/experimental/core/storage_formats/hdk
      - run: python scripts/doc_checker.py modin/experimental/core/execution/native/implementations/hdk_on_native/interchange/dataframe_protocol
      - run: python scripts/doc_checker.py modin/experimental/batch/pipeline.py
      - run: python scripts/doc_checker.py modin/logging

  lint-flake8:
    name: lint (flake8)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 1
      - uses: actions/setup-python@v4
        with:
          python-version: "3.8.x"
          architecture: "x64"
      # NOTE: If you are changing the set of packages installed here, make sure that
      # the dev requirements match them.
      - run: pip install flake8 flake8-print flake8-no-implicit-concat
      # NOTE: keep the flake8 command here in sync with the pre-commit hook in
      # /contributing/pre-commit
      - run: flake8 modin/ asv_bench/benchmarks scripts/doc_checker.py

  test-api:
    runs-on: ubuntu-latest
    name: test api
    defaults:
      run:
        # `shell: bash -l {0}` - special way to activate modin environment
        shell: bash -l {0}
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 1
      - name: Cache conda
        uses: actions/cache@v3
        with:
          path: |
            ~/conda_pkgs_dir
            ~/.cache/pip
          key:
            ${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }}
      - uses: conda-incubator/setup-miniconda@v2
        with:
          miniforge-variant: Mambaforge
          miniforge-version: latest
          use-mamba: true
          activate-environment: modin
          environment-file: environment-dev.yml
          python-version: 3.8
          channel-priority: strict
          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
          use-only-tar-bz2: false
      - name: Conda environment
        run: |
          conda info
          conda list
      - run: sudo apt update && sudo apt install -y libhdf5-dev
      - name: Api tests
        run: python -m pytest modin/pandas/test/test_api.py
      - name: Executions Api tests
        run: python -m pytest modin/test/test_executions_api.py

  test-headers:
    runs-on: ubuntu-latest
    name: test-headers
    defaults:
      run:
        shell: bash -l {0}
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 1
      - name: Cache conda
        uses: actions/cache@v3
        with:
          path: |
            ~/conda_pkgs_dir
            ~/.cache/pip
          key:
            ${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }}
      - uses: conda-incubator/setup-miniconda@v2
        with:
          miniforge-variant: Mambaforge
          miniforge-version: latest
          use-mamba: true
          activate-environment: modin
          environment-file: environment-dev.yml
          python-version: 3.8
          channel-priority: strict
          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
          use-only-tar-bz2: false
      - name: Conda environment
        run: |
          conda info
          conda list
      - name: Headers tests
        run: python -m pytest modin/test/test_headers.py

  test-clean-install-ubuntu:
    needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers]
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    name: test-clean-install-ubuntu
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 1
      - uses: actions/setup-python@v4
        with:
          python-version: "3.8.x"
          architecture: "x64"
      - name: Clean install and run
        run: |
          python -m pip install -e ".[all]"
          MODIN_ENGINE=dask python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))"
          MODIN_ENGINE=ray python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))"
          MODIN_ENGINE=unidist UNIDIST_BACKEND=mpi mpiexec -n 1 python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))"

  test-clean-install-windows:
    needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers]
    runs-on: windows-latest
    defaults:
      run:
        shell: bash -l {0}
    name: test-clean-install-windows
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 1
      - uses: actions/setup-python@v4
        with:
          python-version: "3.8.x"
          architecture: "x64"
      - name: Clean install and run
        run: |
          python -m pip install -e ".[all]"
          MODIN_ENGINE=dask python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))"
          MODIN_ENGINE=ray python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))"
          MODIN_ENGINE=unidist UNIDIST_BACKEND=mpi mpiexec -n 1 python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))"

  test-internals:
    needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers]
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    name: test-internals
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 1
      - name: Cache conda
        uses: actions/cache@v3
        with:
          path: |
            ~/conda_pkgs_dir
            ~/.cache/pip
          key:
            ${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }}
      - uses: conda-incubator/setup-miniconda@v2
        with:
          miniforge-variant: Mambaforge
          miniforge-version: latest
          use-mamba: true
          activate-environment: modin
          environment-file: environment-dev.yml
          python-version: 3.8
          channel-priority: strict
          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
          use-only-tar-bz2: false
      - name: Conda environment
        run: |
          conda info
          conda list
      - name: Internals tests
        run: python -m pytest modin/core/execution/dispatching/factories/test/test_dispatcher.py modin/experimental/cloud/test/test_cloud.py
      - run: python -m pytest modin/config/test
      - run: python -m pytest modin/test/test_envvar_catcher.py
      - run: python -m pytest modin/test/storage_formats/base/test_internals.py
      - run: python -m pytest modin/test/storage_formats/pandas/test_internals.py
      - run: python -m pytest modin/test/test_envvar_npartitions.py
      - run: python -m pytest modin/test/test_utils.py
      - run: python -m pytest asv_bench/test/test_utils.py
      - run: python -m pytest modin/test/interchange/dataframe_protocol/base
      - run: python -m pytest modin/test/test_logging.py
      - uses: ./.github/workflows/upload-coverage

  test-no-engine:
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 1
      - name: Cache conda
        uses: actions/cache@v3
        with:
          path: |
            ~/conda_pkgs_dir
            ~/.cache/pip
          key:
            ${{ runner.os }}-conda-${{ hashFiles('requirements-no-engine.yml') }}
      - uses: conda-incubator/setup-miniconda@v2
        with:
          miniforge-variant: Mambaforge
          miniforge-version: latest
          use-mamba: true
          activate-environment: modin
          environment-file: requirements/requirements-no-engine.yml
          python-version: 3.8
          channel-priority: strict
          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
          use-only-tar-bz2: false
      - name: Conda environment
        run: |
          conda info
          conda list
      - run: python -m pytest modin/core/execution/dispatching/factories/test/test_dispatcher.py::test_add_option
      - uses: ./.github/workflows/upload-coverage

  test-defaults:
    needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers]
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    strategy:
      matrix:
        execution: [BaseOnPython]
    env:
      MODIN_TEST_DATASET_SIZE: "small"
    name: Test ${{ matrix.execution }} execution, Python 3.8
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: Cache conda
        uses: actions/cache@v3
        with:
          path: |
            ~/conda_pkgs_dir
            ~/.cache/pip
          key:
            ${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }}
      - uses: conda-incubator/setup-miniconda@v2
        with:
          miniforge-variant: Mambaforge
          miniforge-version: latest
          use-mamba: true
          activate-environment: modin
          environment-file: environment-dev.yml
          python-version: 3.8
          channel-priority: strict
          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
          use-only-tar-bz2: false
      - name: Conda environment
        run: |
          conda info
          conda list
      - name: Install HDF5
        run: sudo apt update && sudo apt install -y libhdf5-dev
      - run: python -m pytest modin/experimental/xgboost/test/test_default.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/test/storage_formats/base/test_internals.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_binary.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_default.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_indexing.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_iter.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_join_sort.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_map_metadata.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_reduce.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_udf.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_window.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_pickle.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/pandas/test/test_series.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/pandas/test/test_rolling.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/pandas/test/test_expanding.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/pandas/test/test_concat.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/pandas/test/test_groupby.py --execution=${{ matrix.execution }}
      - run: MODIN_EXPERIMENTAL_GROUPBY=1 python -m pytest -n 2 modin/pandas/test/test_groupby.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/pandas/test/test_reshape.py --execution=${{ matrix.execution }}
      - run: python -m pytest -n 2 modin/pandas/test/test_general.py --execution=${{ matrix.execution }}
      - uses: ./.github/workflows/upload-coverage

  test-hdk:
    needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers]
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    env:
      MODIN_EXPERIMENTAL: "True"
      MODIN_ENGINE: "native"
      MODIN_STORAGE_FORMAT: "hdk"
    name: Test HDK storage format, Python 3.8
    services:
      moto:
        image: motoserver/moto
        ports:
          - 5000:5000
        env:
          AWS_ACCESS_KEY_ID: foobar_key
          AWS_SECRET_ACCESS_KEY: foobar_secret
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: Cache conda
        uses: actions/cache@v3
        with:
          path: |
            ~/conda_pkgs_dir
            ~/.cache/pip
          key:
            ${{ runner.os }}-conda-${{ hashFiles('requirements/env_hdk.yml') }}
      - name: Setting up Modin environment
        uses: conda-incubator/setup-miniconda@v2
        with:
          miniforge-variant: Mambaforge
          miniforge-version: latest
          use-mamba: true
          activate-environment: modin_on_hdk
          environment-file: requirements/env_hdk.yml
          python-version: 3.8
          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
          use-only-tar-bz2: false
      - name: Conda environment
        run: |
          conda info
          conda list
      - name: Install HDF5
        run: sudo apt update && sudo apt install -y libhdf5-dev
      - run: python -m pytest modin/test/storage_formats/hdk/test_internals.py
      - run: python -m pytest modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_init.py
      - run: python -m pytest modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py
      - run: python -m pytest modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_utils.py
      - run: python -m pytest modin/pandas/test/test_io.py --verbose
      - run: python -m pytest modin/test/interchange/dataframe_protocol/test_general.py
      - run: python -m pytest modin/test/interchange/dataframe_protocol/hdk
      - run: python -m pytest modin/experimental/sql/test/test_sql.py
      - run: python -m pytest modin/pandas/test/test_concat.py
      - run: python -m pytest modin/pandas/test/dataframe/test_binary.py
      - run: python -m pytest modin/pandas/test/dataframe/test_reduce.py
      - run: python -m pytest modin/pandas/test/dataframe/test_join_sort.py
      - run: python -m pytest modin/pandas/test/test_general.py
      - run: python -m pytest modin/pandas/test/dataframe/test_indexing.py
      - run: python -m pytest modin/pandas/test/test_series.py
      - run: python -m pytest modin/pandas/test/dataframe/test_map_metadata.py
      - run: python -m pytest modin/pandas/test/dataframe/test_window.py
      - run: python -m pytest modin/pandas/test/dataframe/test_default.py
      - run: python examples/docker/modin-hdk/census-hdk.py examples/data/census_1k.csv -no-ml
      - run: python examples/docker/modin-hdk/nyc-taxi-hdk.py examples/data/nyc-taxi_1k.csv
      - run: |
          python examples/docker/modin-hdk/plasticc-hdk.py \
          examples/data/plasticc_training_set_1k.csv \
          examples/data/plasticc_test_set_1k.csv \
          examples/data/plasticc_training_set_metadata_1k.csv \
          examples/data/plasticc_test_set_metadata_1k.csv \
          -no-ml
      - uses: ./.github/workflows/upload-coverage

  test-asv-benchmarks:
    if: github.event_name == 'pull_request'
    needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers]
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    env:
      MODIN_ENGINE: ray
      MODIN_MEMORY: 1000000000
      MODIN_TEST_DATASET_SIZE: small
    name: test-asv-benchmarks
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 1
      - uses: conda-incubator/setup-miniconda@v2
        with:
          auto-activate-base: true
          activate-environment: ""
      - name: ASV installation
        run: |
          # FIXME: use the tag or release version of ASV as soon as it appears;
          # The ability to build a conda environment by specifying yml file has not
          # yet appeared in the release versions;
          pip install git+https://github.com/airspeed-velocity/asv.git@ef016e233cb9a0b19d517135104f49e0a3c380e9
      - name: Running benchmarks
        run: |
          # ASV correctly creates environments for testing only from the branch
          # with `master` name
          git checkout -b master
          cd asv_bench
          asv check -v
          git remote add upstream https://github.com/modin-project/modin.git
          git fetch upstream
          if git diff upstream/master --name-only | grep -q "^asv_bench/"; then
              asv machine --yes

              # check Modin on Ray
              asv run --quick --strict --show-stderr --launch-method=spawn \
                -b ^benchmarks -b ^io -b ^scalability | tee benchmarks.log

              # check pure pandas
              MODIN_ASV_USE_IMPL=pandas asv run --quick --strict --show-stderr --launch-method=spawn \
                -b ^benchmarks -b ^io | tee benchmarks.log

              # HDK: ERR_OUT_OF_CPU_MEM: Not enough host memory to execute the query (MODIN#4270)
              # just disable test for testing - it works well in a machine with more memory
              sed -i 's/def time_groupby_agg_nunique(self, \*args, \*\*kwargs):/# def time_groupby_agg_nunique(self, *args, **kwargs):/g' benchmarks/hdk/benchmarks.py
              sed -i 's/execute(self.df.groupby(by=self.groupby_columns).agg("nunique"))/# execute(self.df.groupby(by=self.groupby_columns).agg("nunique"))/g' benchmarks/hdk/benchmarks.py

              # check Modin on HDK
              MODIN_ENGINE=native MODIN_STORAGE_FORMAT=hdk MODIN_EXPERIMENTAL=true asv run --quick --strict --show-stderr \
                --launch-method=forkserver --config asv.conf.hdk.json \
                -b ^hdk | tee benchmarks.log
          else
              echo "Benchmarks did not run, no changes detected"
          fi
        if: always()

      - name: Publish benchmarks artifact
        uses: actions/upload-artifact@master
        with:
          name: Benchmarks log
          path: asv_bench/benchmarks.log
        if: failure()

  test-all-unidist:
    needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers]
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    strategy:
      matrix:
        python-version: ["3.8"]
        unidist-backend: ["mpi"]
    env:
      MODIN_ENGINE: "Unidist"
      UNIDIST_BACKEND: ${{matrix.unidist-backend}}
      # Only test reading from SQL server and postgres on ubuntu for now.
      # Eventually, we should test on Windows, too, but we will have to set up
      # the servers differently.
      MODIN_TEST_READ_FROM_SQL_SERVER: true
      MODIN_TEST_READ_FROM_POSTGRES: true
    name: test-ubuntu (engine unidist ${{matrix.unidist-backend}}, python ${{matrix.python-version}})
    services:
      moto:
        image: motoserver/moto
        ports:
          - 5000:5000
        env:
          AWS_ACCESS_KEY_ID: foobar_key
          AWS_SECRET_ACCESS_KEY: foobar_secret
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: Cache conda
        uses: actions/cache@v3
        with:
          path: |
            ~/conda_pkgs_dir
            ~/.cache/pip
          key:
            ${{ runner.os }}-conda-${{ hashFiles('requirements/env_unidist.yml') }}
      - uses: conda-incubator/setup-miniconda@v2
        with:
          miniforge-variant: Mambaforge
          miniforge-version: latest
          use-mamba: true
          activate-environment: modin_on_unidist
          environment-file: requirements/env_unidist.yml
          python-version: ${{matrix.python-version}}
          channel-priority: strict
          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
          use-only-tar-bz2: false
      - name: Conda environment
        run: |
          conda info
          conda list
      - name: Install HDF5
        run: sudo apt update && sudo apt install -y libhdf5-dev
      - name: Set up postgres
        # Locally, specifying port 2345:5432 works, but 2345:2345 and 5432:5432 do not. This solution is from
        # https://stackoverflow.com/questions/36415654/cant-connect-docker-postgresql-9-3
        run: |
          sudo docker pull postgres
          sudo docker run --name some-postgres -e POSTGRES_USER=sa -e POSTGRES_PASSWORD=Strong.Pwd-123 -e POSTGRES_DB=postgres -d -p 2345:5432 postgres
      - run: MODIN_BENCHMARK_MODE=True mpiexec -n 1 python -m pytest modin/pandas/test/internals/test_benchmark_mode.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/internals/test_repartition.py
      - run: mpiexec -n 1 python -m pytest modin/test/test_partition_api.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_binary.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_default.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_indexing.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_iter.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_join_sort.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_map_metadata.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_reduce.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_udf.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_window.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_pickle.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/test_series.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/test_rolling.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/test_expanding.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/test_concat.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/test_groupby.py
      - run: MODIN_EXPERIMENTAL_GROUPBY=1 mpiexec -n 1 python -m pytest modin/pandas/test/test_groupby.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/test_reshape.py
      - run: mpiexec -n 1 python -m pytest modin/pandas/test/test_general.py
      - run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array.py
      - run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array_creation.py
      - run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array_arithmetic.py
      - run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array_axis_functions.py
      - run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array_logic.py
      - run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array_linalg.py
      - run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array_indexing.py
      - run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array_math.py
      - run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array_shaping.py
      - run: chmod +x ./.github/workflows/sql_server/set_up_sql_server.sh
      - run: ./.github/workflows/sql_server/set_up_sql_server.sh
      # need an extra argument "genv" to set environment variables for mpiexec. We need
      # these variables to test writing to the mock s3 filesystem.
      - run: mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key -genv AWS_SECRET_ACCESS_KEY foobar_secret python -m pytest modin/pandas/test/test_io.py --verbose
      - run: mpiexec -n 1 python -m pytest modin/experimental/pandas/test/test_io_exp.py
      - run: pip install "dfsql>=0.4.2" "pyparsing<=2.4.7" && mpiexec -n 1 python -m pytest modin/experimental/sql/test/test_sql.py
      - run: mpiexec -n 1 python -m pytest modin/test/interchange/dataframe_protocol/test_general.py
      - run: mpiexec -n 1 python -m pytest modin/test/interchange/dataframe_protocol/pandas/test_protocol.py
      - run: |
          python -m pip install lazy_import
          mpiexec -n 1 python -m pytest modin/pandas/test/integrations/
      - uses: ./.github/workflows/upload-coverage

  test-all:
    needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers]
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    strategy:
      matrix:
        python-version: ["3.8"]
        engine: ["python", "ray", "dask"]
        test_task:
          - group_1
          - group_2
          - group_3
          - group_4
        exclude: # python engine only have one task group that contains all the tests
          - engine: "python"
            test_task: "group_2"
          - engine: "python"
            test_task: "group_3"
          - engine: "python"
            test_task: "group_4"
    env:
      MODIN_ENGINE: ${{matrix.engine}}
      # Only test reading from SQL server and postgres on ubuntu for now.
      # Eventually, we should test on Windows, too, but we will have to set up
      # the servers differently.
      MODIN_TEST_READ_FROM_SQL_SERVER: true
      MODIN_TEST_READ_FROM_POSTGRES: true
    name: test-ubuntu (engine ${{matrix.engine}}, python ${{matrix.python-version}}, ${{matrix.test_task}})
    services:
      # This service only needs to run for test_task group_4; however, GitHub does not
      # currently support conditionally running services. This issue: 
      # is open https://github.com/actions/runner/issues/822 - until GitHub implements this feature,
      # we will just have to run `moto` for all groups.
      moto:
        image: motoserver/moto
        ports:
          - 5000:5000
        env:
          AWS_ACCESS_KEY_ID: foobar_key
          AWS_SECRET_ACCESS_KEY: foobar_secret
    steps:
      - name: Limit ray memory
        run: echo "MODIN_MEMORY=1000000000" >> $GITHUB_ENV
        if: matrix.engine == 'ray'
      - uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: Cache conda
        uses: actions/cache@v3
        with:
          path: |
            ~/conda_pkgs_dir
            ~/.cache/pip
          key:
            ${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }}
      - uses: conda-incubator/setup-miniconda@v2
        with:
          miniforge-variant: Mambaforge
          miniforge-version: latest
          use-mamba: true
          activate-environment: modin
          environment-file: environment-dev.yml
          python-version: ${{matrix.python-version}}
          channel-priority: strict
          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
          use-only-tar-bz2: false
      - name: Conda environment
        run: |
          conda info
          conda list
      - name: Install HDF5
        run: sudo apt update && sudo apt install -y libhdf5-dev
      - name: Set up postgres
        # Locally, specifying port 2345:5432 works, but 2345:2345 and 5432:5432 do not. This solution is from
        # https://stackoverflow.com/questions/36415654/cant-connect-docker-postgresql-9-3
        run: |
          sudo docker pull postgres
          sudo docker run --name some-postgres -e POSTGRES_USER=sa -e POSTGRES_PASSWORD=Strong.Pwd-123 -e POSTGRES_DB=postgres -d -p 2345:5432 postgres
      - run: MODIN_BENCHMARK_MODE=True python -m pytest modin/pandas/test/internals/test_benchmark_mode.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_1'
      - run: python -m pytest modin/pandas/test/internals/test_repartition.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_1'
      - run: python -m pytest modin/test/test_partition_api.py
        if: matrix.engine != 'python' && matrix.test_task == 'group_1'
      - run: python -m pytest -n 2 modin/experimental/xgboost/test/test_default.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_1'
      - run: python -m pytest -n 2 modin/experimental/xgboost/test/test_xgboost.py
        if: matrix.engine == 'ray' && matrix.test_task == 'group_1'
      - run: python -m pytest -n 2 modin/experimental/xgboost/test/test_dmatrix.py
        if: matrix.engine == 'ray' && matrix.test_task == 'group_1'
      - run: python -m pytest -n 2 modin/experimental/batch/test/test_pipeline.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_1'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_binary.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_1'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_default.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_1'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_indexing.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_1'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_iter.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_2'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_join_sort.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_2'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_reduce.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_2'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_udf.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_2'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_window.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_2'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_pickle.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_2'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_map_metadata.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_3'
      - run: python -m pytest -n 2 modin/pandas/test/test_series.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_3'
      - run: MODIN_EXPERIMENTAL_GROUPBY=1 python -m pytest -n 2 modin/pandas/test/test_groupby.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_3'
      - run: python -m pytest -n 2 modin/pandas/test/test_rolling.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/pandas/test/test_expanding.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: python -m pytest modin/pandas/test/test_concat.py # Ray and Dask versions fails with -n 2
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array_creation.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array_arithmetic.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array_axis_functions.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array_logic.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array_linalg.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array_indexing.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array_math.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array_shaping.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/pandas/test/test_groupby.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/pandas/test/test_reshape.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/pandas/test/test_general.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: chmod +x ./.github/workflows/sql_server/set_up_sql_server.sh
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: ./.github/workflows/sql_server/set_up_sql_server.sh
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      # Do not add parallelism (`-n` argument) here - it will cause mock S3 service to fail.
      - run: python -m pytest modin/pandas/test/test_io.py --verbose
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: python -m pytest modin/experimental/pandas/test/test_io_exp.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: pip install "dfsql>=0.4.2" "pyparsing<=2.4.7" && python -m pytest modin/experimental/sql/test/test_sql.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: python -m pytest modin/test/interchange/dataframe_protocol/test_general.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: python -m pytest modin/test/interchange/dataframe_protocol/pandas/test_protocol.py
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - run: |
          python -m pip install lazy_import
          python -m pytest modin/pandas/test/integrations/
        if: matrix.engine == 'python' || matrix.test_task == 'group_4'
      - uses: ./.github/workflows/upload-coverage

  test-experimental:
    needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers]
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    env:
      MODIN_ENGINE: "python"
      MODIN_EXPERIMENTAL: "True"
    name: test experimental
    services:
      moto:
        image: motoserver/moto
        ports:
          - 5000:5000
        env:
          AWS_ACCESS_KEY_ID: foobar_key
          AWS_SECRET_ACCESS_KEY: foobar_secret
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: Cache conda
        uses: actions/cache@v3
        with:
          path: |
            ~/conda_pkgs_dir
            ~/.cache/pip
          key:
            ${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }}
      - uses: conda-incubator/setup-miniconda@v2
        with:
          miniforge-variant: Mambaforge
          miniforge-version: latest
          use-mamba: true
          activate-environment: modin
          environment-file: environment-dev.yml
          python-version: 3.8
          channel-priority: strict
          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
          use-only-tar-bz2: false
      - name: Conda environment
        run: |
          conda info
          conda list
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_map_metadata.py
      - run: python -m pytest -n 2 modin/pandas/test/test_series.py
      # Do not add parallelism (`-n` argument) here - it will cause mock S3 service to fail.
      - run: python -m pytest modin/pandas/test/test_io.py --verbose
      - uses: ./.github/workflows/upload-coverage

  test-cloud:
    needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers]
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    env:
      MODIN_ENGINE: "python"
      MODIN_EXPERIMENTAL: "True"
    name: test cloud
    services:
      moto:
        image: motoserver/moto
        ports:
          - 5000:5000
        env:
          AWS_ACCESS_KEY_ID: foobar_key
          AWS_SECRET_ACCESS_KEY: foobar_secret
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: Cache conda
        uses: actions/cache@v3
        with:
          path: |
            ~/conda_pkgs_dir
            ~/.cache/pip
          key:
            ${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }}
      - uses: conda-incubator/setup-miniconda@v2
        with:
          miniforge-variant: Mambaforge
          miniforge-version: latest
          use-mamba: true
          activate-environment: modin
          environment-file: environment-dev.yml
          python-version: 3.8
          channel-priority: strict
          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
          use-only-tar-bz2: false
      - name: Conda environment
        run: |
          conda info
          conda list
        # TODO(https://github.com/modin-project/modin/issues/4004): Re-add
        # "python -m pytest --simulate-cloud=normal modin/pandas/test/test_io.py --verbose"
        # once that test stops crashing.
      - run: python -m pytest --simulate-cloud=normal modin/pandas/test/dataframe/test_default.py::test_kurt_kurtosis --verbose
      - # When running without parameters, some of the tests fail
        run: python -m pytest --simulate-cloud=normal modin/pandas/test/dataframe/test_binary.py::test_math_functions[add-rows-scalar]
      - uses: ./.github/workflows/upload-coverage

  test-windows:
    needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers]
    runs-on: windows-latest
    defaults:
      run:
        shell: bash -l {0}
    strategy:
      matrix:
        python-version: ["3.8"]
        engine: ["ray", "dask"]
        test_task:
          - group_1
          - group_2
          - group_3
          - group_4
    env:
      MODIN_ENGINE: ${{matrix.engine}}
    name: test-windows (engine ${{matrix.engine}}, python ${{matrix.python-version}}, ${{matrix.test_task}})
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - uses: conda-incubator/setup-miniconda@v2
        with:
          miniforge-variant: Mambaforge
          miniforge-version: latest
          use-mamba: true
          activate-environment: modin
          environment-file: environment-dev.yml
          python-version: ${{matrix.python-version}}
          channel-priority: strict
          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
          use-only-tar-bz2: false
      - name: Conda environment
        run: |
          conda info
          conda list
      - name: Start local ray cluster
        # Try a few times to start ray to work around
        # https://github.com/modin-project/modin/issues/4562
        uses: nick-fields/retry@v2
        with:
          timeout_minutes: 5
          max_attempts: 5
          command: |
            ray start --head --port=6379 --object-store-memory=1000000000
        if: matrix.engine == 'ray'
      - name: Tell Modin to use existing ray cluster
        run: echo "MODIN_RAY_CLUSTER=True" >> $GITHUB_ENV
        if: matrix.engine == 'ray'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_binary.py
        if: matrix.test_task == 'group_1'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_default.py
        if: matrix.test_task == 'group_1'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_indexing.py
        if: matrix.test_task == 'group_1'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_iter.py
        if: matrix.test_task == 'group_1'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_join_sort.py
        if: matrix.test_task == 'group_2'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_reduce.py
        if: matrix.test_task == 'group_2'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_udf.py
        if: matrix.test_task == 'group_2'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_window.py
        if: matrix.test_task == 'group_2'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_pickle.py
        if: matrix.test_task == 'group_2'
      - run: python -m pytest -n 2 modin/pandas/test/test_series.py
        if: matrix.test_task == 'group_3'
      - run: python -m pytest -n 2 modin/pandas/test/dataframe/test_map_metadata.py
        if: matrix.test_task == 'group_3'
      - run: MODIN_EXPERIMENTAL_GROUPBY=1 python -m pytest -n 2 modin/pandas/test/test_groupby.py
        if: matrix.test_task == 'group_3'
      - run: python -m pytest -n 2 modin/pandas/test/test_rolling.py
        if: matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/pandas/test/test_expanding.py
        if: matrix.test_task == 'group_4'
      - run: python -m pytest modin/pandas/test/test_concat.py # Ray and Dask versions fails with -n 2
        if: matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array.py
        if: matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array_creation.py
        if: matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array_arithmetic.py
        if: matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array_axis_functions.py
        if: matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array_logic.py
        if: matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array_linalg.py
        if: matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array_indexing.py
        if: matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array_math.py
        if: matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/numpy/test/test_array_shaping.py
        if: matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/pandas/test/test_groupby.py
        if: matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/pandas/test/test_reshape.py
        if: matrix.test_task == 'group_4'
      - run: python -m pytest -n 2 modin/pandas/test/test_general.py
        if: matrix.test_task == 'group_4'
      - timeout-minutes: 60
        run: python -m pytest modin/pandas/test/test_io.py --verbose
        if: matrix.test_task == 'group_4'
      - uses: ./.github/workflows/upload-coverage
      - name: Stop local ray cluster
        run: ray stop
        if: matrix.engine == 'ray'
      - name: Rename the folder with conda packages so it won't be deleted, it's too slow on Windows.
        run: mv "${CONDA_PKGS_DIR}" "${CONDA_PKGS_DIR}_do_not_cache"

  test-pyarrow:
    needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers]
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    strategy:
      matrix:
        python-version: ["3.8"]
    env:
      MODIN_STORAGE_FORMAT: pyarrow
      MODIN_EXPERIMENTAL: "True"
    name: test (pyarrow, python ${{matrix.python-version}})
    services:
      moto:
        image: motoserver/moto
        ports:
          - 5000:5000
        env:
          AWS_ACCESS_KEY_ID: foobar_key
          AWS_SECRET_ACCESS_KEY: foobar_secret
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 1
      - name: Cache conda
        uses: actions/cache@v3
        with:
          path: |
            ~/conda_pkgs_dir
            ~/.cache/pip
          key:
            ${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }}
      - uses: conda-incubator/setup-miniconda@v2
        with:
          miniforge-variant: Mambaforge
          miniforge-version: latest
          use-mamba: true
          activate-environment: modin
          environment-file: environment-dev.yml
          python-version: ${{matrix.python-version}}
          channel-priority: strict
          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
          use-only-tar-bz2: false
      - name: Conda environment
        run: |
          conda info
          conda list
      - run: sudo apt update && sudo apt install -y libhdf5-dev
      - run: python -m pytest modin/pandas/test/test_io.py::TestCsv --verbose

  test-spreadsheet:
    needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers]
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    strategy:
      matrix:
        python-version: ["3.8"]
        engine: ["ray", "dask"]
    env:
      MODIN_EXPERIMENTAL: "True"
      MODIN_ENGINE: ${{matrix.engine}}
    name: test-spreadsheet (engine ${{matrix.engine}}, python ${{matrix.python-version}})
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 1
      - name: Cache conda
        uses: actions/cache@v3
        with:
          path: |
            ~/conda_pkgs_dir
            ~/.cache/pip
          key:
            ${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }}
      - uses: conda-incubator/setup-miniconda@v2
        with:
          miniforge-variant: Mambaforge
          miniforge-version: latest
          use-mamba: true
          activate-environment: modin
          environment-file: environment-dev.yml
          python-version: ${{matrix.python-version}}
          channel-priority: strict
          # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
          # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
          use-only-tar-bz2: false
      - name: Conda environment
        run: |
          conda info
          conda list
      - run: python -m pytest modin/experimental/spreadsheet/test/test_general.py

  upload-coverage:
    needs: [test-internals, test-no-engine, test-defaults, test-hdk, test-all-unidist, test-all, test-experimental, test-cloud, test-windows]
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash -l {0}
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 1
      - name: Download coverage data
        uses: actions/download-artifact@v3.0.2
        with:
          name: coverage-data
      - uses: actions/setup-python@v4
      - run: pip install coverage
      - name: Combine coverage
        run: python -m coverage combine
      - name: Generate coverage report in xml format
        run: python -m coverage xml
      - uses: codecov/codecov-action@v3
        with:
          fail_ci_if_error: true