From 410accb82b240413371897a58f4ec517323c848f Mon Sep 17 00:00:00 2001 From: Shanshan Shen <467638484@qq.com> Date: Mon, 13 Jan 2025 03:44:48 +0000 Subject: [PATCH] merge main --- .github/workflows/actionlint.yml | 40 +++++++++++++++++++ .gitignore | 2 +- Dockerfile | 2 +- README.md | 2 +- mypy.ini | 9 ++++- setup.py | 28 +++++++------ tools/mypy.sh | 3 +- .../__init__.py | 2 +- .../attention.py | 8 ++-- .../communicator.py | 0 .../model_runner.py | 0 vllm_ascend/ops/__init__.py | 1 + vllm_ascend/ops/layernorm.py | 24 +++++++++++ .../platform.py | 14 +++---- {vllm_ascend_plugin => vllm_ascend}/worker.py | 2 +- vllm_ascend_plugin/ops/__init__.py | 10 ----- vllm_ascend_plugin/ops/layernorm.py | 23 ----------- 17 files changed, 106 insertions(+), 64 deletions(-) create mode 100644 .github/workflows/actionlint.yml rename {vllm_ascend_plugin => vllm_ascend}/__init__.py (50%) rename {vllm_ascend_plugin => vllm_ascend}/attention.py (99%) rename {vllm_ascend_plugin => vllm_ascend}/communicator.py (100%) rename {vllm_ascend_plugin => vllm_ascend}/model_runner.py (100%) create mode 100644 vllm_ascend/ops/__init__.py create mode 100644 vllm_ascend/ops/layernorm.py rename {vllm_ascend_plugin => vllm_ascend}/platform.py (87%) rename {vllm_ascend_plugin => vllm_ascend}/worker.py (99%) delete mode 100644 vllm_ascend_plugin/ops/__init__.py delete mode 100644 vllm_ascend_plugin/ops/layernorm.py diff --git a/.github/workflows/actionlint.yml b/.github/workflows/actionlint.yml new file mode 100644 index 0000000..0226cf0 --- /dev/null +++ b/.github/workflows/actionlint.yml @@ -0,0 +1,40 @@ +name: Lint GitHub Actions workflows +on: + push: + branches: + - "main" + paths: + - '.github/workflows/*.ya?ml' + - '.github/workflows/actionlint.*' + - '.github/workflows/matchers/actionlint.json' + pull_request: + branches: + - "main" + paths: + - '.github/workflows/*.ya?ml' + - '.github/workflows/actionlint.*' + - '.github/workflows/matchers/actionlint.json' + +env: + LC_ALL: en_US.UTF-8 + +defaults: + run: + shell: bash + +permissions: + contents: read + +jobs: + actionlint: + runs-on: ubuntu-latest + steps: + - name: "Checkout" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 + + - name: "Run actionlint" + run: | + echo "::add-matcher::.github/workflows/matchers/actionlint.json" + tools/actionlint.sh -color diff --git a/.gitignore b/.gitignore index ac224af..8da361f 100644 --- a/.gitignore +++ b/.gitignore @@ -10,7 +10,7 @@ __pycache__/ .vscode/ # egg-info -vllm_ascend_plugin.egg-info/ +vllm_ascend.egg-info/ # mypy .mypy_cache/ diff --git a/Dockerfile b/Dockerfile index 09a871d..4302d02 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,7 +21,7 @@ RUN pip install cmake>=3.26 wheel packaging ninja "setuptools-scm>=8" numpy RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/vllm_ascend//vllm/requirements-build.txt # build vLLM with NPU backend RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVICE="cpu" python3 -m pip install /workspace/vllm_ascend/vllm/ -# install vllm_ascend_plugin +# install vllm_ascend RUN python3 -m pip install /workspace/vllm_ascend/ CMD ["/bin/bash"] diff --git a/README.md b/README.md index d21a8e7..b06806a 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ docker exec -it vllm bash ### 1. Prepare CANN env -Before install vllm_ascend_plugin, you need to install the Ascend CANN Toolkit and Kernels. Please follow the [installation tutorial](https://ascend.github.io/docs/sources/ascend/quick_install.html#id1) or use the following commands for quick installation: +Before install vllm_ascend, you need to install the Ascend CANN Toolkit and Kernels. Please follow the [installation tutorial](https://ascend.github.io/docs/sources/ascend/quick_install.html#id1) or use the following commands for quick installation: ```bash # replace the url according to your CANN version and devices diff --git a/mypy.ini b/mypy.ini index 316d335..fe0fd66 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,3 +1,10 @@ -# Suppress all missing import errors from torch_npu for mypy. +[mypy] +; warn_return_any = True +warn_unused_configs = True + +; Suppress all missing import errors from torch_npu for mypy. [mypy-torch_npu.*] ignore_missing_imports = True + +[mypy-transformers.*] +ignore_missing_imports = True diff --git a/setup.py b/setup.py index 7817e48..59fdd41 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ import os -from typing import Dict, List +from typing import List + from setuptools import setup ROOT_DIR = os.path.dirname(__file__) @@ -8,6 +9,7 @@ def get_path(*filepath) -> str: return os.path.join(ROOT_DIR, *filepath) + def get_requirements() -> List[str]: """Get Python package dependencies from requirements.txt.""" @@ -23,25 +25,25 @@ def _read_requirements(filename: str) -> List[str]: else: resolved_requirements.append(line) return resolved_requirements - + try: requirements = _read_requirements("requirements.txt") except ValueError: - print("Failed to read requirements.txt in vllm_ascend_plugin.") + print("Failed to read requirements.txt in vllm_ascend.") return requirements -setup(name='vllm_ascend_plugin', - version='0.1', - packages=['vllm_ascend_plugin'], - install_requires=get_requirements(), - extras_require={ +setup( + name='vllm_ascend', + version='0.1', + packages=['vllm_ascend'], + install_requires=get_requirements(), + extras_require={ "tensorizer": ["tensorizer>=2.9.0"], "runai": ["runai-model-streamer", "runai-model-streamer-s3", "boto3"], "audio": ["librosa", "soundfile"], # Required for audio processing "video": ["decord"] # Required for video processing - }, - entry_points={ - 'vllm.platform_plugins': - ["ascend_plugin = vllm_ascend_plugin:register"] - }) + }, + entry_points={ + 'vllm.platform_plugins': ["ascend_plugin = vllm_ascend:register"] + }) diff --git a/tools/mypy.sh b/tools/mypy.sh index 1c6339a..5c789fa 100755 --- a/tools/mypy.sh +++ b/tools/mypy.sh @@ -17,4 +17,5 @@ run_mypy() { } run_mypy # Note that this is less strict than CI -run_mypy vllm_ascend_plugin +run_mypy vllm_ascend +run_mypy examples diff --git a/vllm_ascend_plugin/__init__.py b/vllm_ascend/__init__.py similarity index 50% rename from vllm_ascend_plugin/__init__.py rename to vllm_ascend/__init__.py index 07dea97..16e91da 100644 --- a/vllm_ascend_plugin/__init__.py +++ b/vllm_ascend/__init__.py @@ -1,3 +1,3 @@ def register(): """Register the NPU platform.""" - return "vllm_ascend_plugin.platform.NPUPlatform" + return "vllm_ascend.platform.NPUPlatform" diff --git a/vllm_ascend_plugin/attention.py b/vllm_ascend/attention.py similarity index 99% rename from vllm_ascend_plugin/attention.py rename to vllm_ascend/attention.py index 1ab9779..6afaf63 100644 --- a/vllm_ascend_plugin/attention.py +++ b/vllm_ascend/attention.py @@ -5,9 +5,9 @@ import torch try: - import torch_npu # noqa: F401 + import torch_npu except ImportError: - print("Failed to import torch_npu.") + print("Failed to import torch_npu") from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, AttentionMetadata, AttentionType) @@ -19,7 +19,7 @@ PagedAttentionMetadata) if TYPE_CHECKING: - from vllm_ascend_plugin.model_runner import ModelInputForNPUBuilder + from vllm_ascend.model_runner import ModelInputForNPUBuilder SHARE_MASK_TRIL_PREFIX_CACHE = None SHARE_MASK_TRIL = None @@ -70,7 +70,7 @@ def swap_blocks( @staticmethod def copy_blocks( - kv_caches: List[torch.Tensor], + kv_caches: List[torch.Tensor], src_to_dists: torch.Tensor, ) -> None: src_indices = src_to_dists[:, 0] diff --git a/vllm_ascend_plugin/communicator.py b/vllm_ascend/communicator.py similarity index 100% rename from vllm_ascend_plugin/communicator.py rename to vllm_ascend/communicator.py diff --git a/vllm_ascend_plugin/model_runner.py b/vllm_ascend/model_runner.py similarity index 100% rename from vllm_ascend_plugin/model_runner.py rename to vllm_ascend/model_runner.py diff --git a/vllm_ascend/ops/__init__.py b/vllm_ascend/ops/__init__.py new file mode 100644 index 0000000..424da25 --- /dev/null +++ b/vllm_ascend/ops/__init__.py @@ -0,0 +1 @@ +import vllm_ascend.ops.layernorm # noqa diff --git a/vllm_ascend/ops/layernorm.py b/vllm_ascend/ops/layernorm.py new file mode 100644 index 0000000..7480613 --- /dev/null +++ b/vllm_ascend/ops/layernorm.py @@ -0,0 +1,24 @@ +from typing import Optional, Tuple, Union + +import torch + +from vllm.model_executor.layers.layernorm import RMSNorm + + +def forward_oot( + self, + x: torch.Tensor, + residual: Optional[torch.Tensor] = None, +) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: + import torch_npu + + if residual is not None: + x, _, residual = torch_npu.npu_add_rms_norm( + x, residual, self.weight, self.variance_epsilon) + return x, residual + + x, residual = torch_npu.npu_rms_norm(x, self.weight, + self.variance_epsilon) + return x + +RMSNorm.forward_oot = forward_oot diff --git a/vllm_ascend_plugin/platform.py b/vllm_ascend/platform.py similarity index 87% rename from vllm_ascend_plugin/platform.py rename to vllm_ascend/platform.py index 3445f20..6ed8bd6 100644 --- a/vllm_ascend_plugin/platform.py +++ b/vllm_ascend/platform.py @@ -4,9 +4,9 @@ import torch try: - import torch_npu # noqa: F401 + import torch_npu except ImportError: - print("Failed to import torch_npu.") + print("Failed to import torch_npu") from vllm.config import VllmConfig from vllm.platforms import Platform @@ -32,7 +32,7 @@ class NPUPlatform(Platform): _enum = "Ascend" device_name: str = "npu" device_type: str = "npu" - torch_compile_backend: str = "npu" + simple_compile_backend: str = "npu" ray_device_key: str = "NPU" visible_device_name: str = "ASCEND_RT" @@ -72,11 +72,11 @@ def mem_get_info(cls) -> Tuple[int, int]: @classmethod def check_and_update_config(cls, vllm_config: VllmConfig) -> None: # Register ops when setup. - from vllm_ascend_plugin import ops # noqa: F401 + from vllm_ascend import ops parallel_config = vllm_config.parallel_config if parallel_config.worker_cls == "auto": - parallel_config.worker_cls = "vllm_ascend_plugin.worker.NPUWorker" + parallel_config.worker_cls = "vllm_ascend.worker.NPUWorker" cache_config = vllm_config.cache_config if cache_config and cache_config.block_size is None: cache_config.block_size = 16 @@ -84,7 +84,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: @classmethod def get_attn_backend_cls(cls, selected_backend, head_size, dtype, kv_cache_dtype, block_size, use_v1): - return "vllm_ascend_plugin.attention.AscendAttentionBackend" + return "vllm_ascend.attention.AscendAttentionBackend" @classmethod def get_current_memory_usage(cls, @@ -95,4 +95,4 @@ def get_current_memory_usage(cls, @classmethod def get_device_communicator_cls(cls) -> str: - return "vllm_ascend_plugin.communicator.NPUCommunicator" + return "vllm_ascend.communicator.NPUCommunicator" diff --git a/vllm_ascend_plugin/worker.py b/vllm_ascend/worker.py similarity index 99% rename from vllm_ascend_plugin/worker.py rename to vllm_ascend/worker.py index 5a90484..e0ddd64 100644 --- a/vllm_ascend_plugin/worker.py +++ b/vllm_ascend/worker.py @@ -18,7 +18,7 @@ from vllm.worker.worker import Worker from vllm.worker.worker_base import WorkerBase -from vllm_ascend_plugin.model_runner import NPUModelRunner +from vllm_ascend.model_runner import NPUModelRunner class NPUWorker(Worker): diff --git a/vllm_ascend_plugin/ops/__init__.py b/vllm_ascend_plugin/ops/__init__.py deleted file mode 100644 index 63f5be5..0000000 --- a/vllm_ascend_plugin/ops/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from vllm.model_executor.custom_op import CustomOp - -import vllm_ascend_plugin.ops.layernorm - -def forward_npu(self, *args, **kwargs): - # By default, we assume that NPU ops are compatible with the - # PyTorch-native implementation. - return self.forward_native(*args, **kwargs) - -CustomOp.set_foward_method(forward_npu) diff --git a/vllm_ascend_plugin/ops/layernorm.py b/vllm_ascend_plugin/ops/layernorm.py deleted file mode 100644 index 3c25380..0000000 --- a/vllm_ascend_plugin/ops/layernorm.py +++ /dev/null @@ -1,23 +0,0 @@ -from typing import Optional, Tuple, Union - -import torch -from vllm.model_executor.layers.layernorm import RMSNorm - - -def forward_npu( - self, - x: torch.Tensor, - residual: Optional[torch.Tensor] = None, -) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: - import torch_npu # noqa: F401 - - if residual is not None: - x, _, residual = torch_npu.npu_add_rms_norm(x, residual, self.weight, - self.variance_epsilon) - return x, residual - - x, residual = torch_npu.npu_rms_norm(x, self.weight, self.variance_epsilon) - return x - - -RMSNorm.set_foward_method(forward_npu)