merge main

cosdt · Jan 13, 2025 · 410accb · 410accb
1 parent 8a9cad2
commit 410accb
Show file tree

Hide file tree

Showing 17 changed files with 106 additions and 64 deletions.
diff --git a/.github/workflows/actionlint.yml b/.github/workflows/actionlint.yml
@@ -0,0 +1,40 @@
+name: Lint GitHub Actions workflows
+on:
+  push:
+    branches:
+      - "main"
+    paths:
+      - '.github/workflows/*.ya?ml'
+      - '.github/workflows/actionlint.*'
+      - '.github/workflows/matchers/actionlint.json'
+  pull_request:
+    branches:
+      - "main"
+    paths:
+      - '.github/workflows/*.ya?ml'
+      - '.github/workflows/actionlint.*'
+      - '.github/workflows/matchers/actionlint.json'
+
+env:
+  LC_ALL: en_US.UTF-8
+
+defaults:
+  run:
+    shell: bash
+
+permissions:
+  contents: read
+
+jobs:
+  actionlint:
+    runs-on: ubuntu-latest
+    steps:
+      - name: "Checkout"
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0
+
+      - name: "Run actionlint"
+        run: |
+          echo "::add-matcher::.github/workflows/matchers/actionlint.json"
+          tools/actionlint.sh -color
diff --git a/.gitignore b/.gitignore
@@ -10,7 +10,7 @@ __pycache__/
 .vscode/
 
 # egg-info
-vllm_ascend_plugin.egg-info/
+vllm_ascend.egg-info/
 
 # mypy
 .mypy_cache/

diff --git a/Dockerfile b/Dockerfile
@@ -21,7 +21,7 @@ RUN pip install cmake>=3.26 wheel packaging ninja "setuptools-scm>=8" numpy
 RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/vllm_ascend//vllm/requirements-build.txt
 # build vLLM with NPU backend
 RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVICE="cpu" python3 -m pip install /workspace/vllm_ascend/vllm/
-# install vllm_ascend_plugin
+# install vllm_ascend
 RUN python3 -m pip install /workspace/vllm_ascend/
 
 CMD ["/bin/bash"]
diff --git a/README.md b/README.md
@@ -38,7 +38,7 @@ docker exec -it vllm bash
 
 ### 1. Prepare CANN env
 
-Before install vllm_ascend_plugin, you need to install the Ascend CANN Toolkit and Kernels. Please follow the [installation tutorial](https://ascend.github.io/docs/sources/ascend/quick_install.html#id1) or use the following commands for quick installation:
+Before install vllm_ascend, you need to install the Ascend CANN Toolkit and Kernels. Please follow the [installation tutorial](https://ascend.github.io/docs/sources/ascend/quick_install.html#id1) or use the following commands for quick installation:
 
 ```bash
 # replace the url according to your CANN version and devices

diff --git a/mypy.ini b/mypy.ini
@@ -1,3 +1,10 @@
-# Suppress all missing import errors from torch_npu for mypy.
+[mypy]
+; warn_return_any = True
+warn_unused_configs = True
+
+; Suppress all missing import errors from torch_npu for mypy.
 [mypy-torch_npu.*]
 ignore_missing_imports = True
+
+[mypy-transformers.*]
+ignore_missing_imports = True
diff --git a/setup.py b/setup.py
@@ -1,5 +1,6 @@
 import os
-from typing import Dict, List
+from typing import List
+
 from setuptools import setup
 
 ROOT_DIR = os.path.dirname(__file__)
@@ -8,6 +9,7 @@
 def get_path(*filepath) -> str:
     return os.path.join(ROOT_DIR, *filepath)
 
+
 def get_requirements() -> List[str]:
     """Get Python package dependencies from requirements.txt."""
 
@@ -23,25 +25,25 @@ def _read_requirements(filename: str) -> List[str]:
             else:
                 resolved_requirements.append(line)
         return resolved_requirements
-    
+
     try:
         requirements = _read_requirements("requirements.txt")
     except ValueError:
-        print("Failed to read requirements.txt in vllm_ascend_plugin.")
+        print("Failed to read requirements.txt in vllm_ascend.")
     return requirements
 
 
-setup(name='vllm_ascend_plugin',
-      version='0.1',
-      packages=['vllm_ascend_plugin'],
-      install_requires=get_requirements(),
-      extras_require={
+setup(
+    name='vllm_ascend',
+    version='0.1',
+    packages=['vllm_ascend'],
+    install_requires=get_requirements(),
+    extras_require={
         "tensorizer": ["tensorizer>=2.9.0"],
         "runai": ["runai-model-streamer", "runai-model-streamer-s3", "boto3"],
         "audio": ["librosa", "soundfile"],  # Required for audio processing
         "video": ["decord"]  # Required for video processing
-      },
-      entry_points={
-          'vllm.platform_plugins':
-          ["ascend_plugin = vllm_ascend_plugin:register"]
-      })
+    },
+    entry_points={
+        'vllm.platform_plugins': ["ascend_plugin = vllm_ascend:register"]
+    })
diff --git a/tools/mypy.sh b/tools/mypy.sh
@@ -17,4 +17,5 @@ run_mypy() {
 }
 
 run_mypy # Note that this is less strict than CI
-run_mypy vllm_ascend_plugin
+run_mypy vllm_ascend
+run_mypy examples
diff --git a/vllm_ascend_plugin/__init__.py → vllm_ascend/__init__.py b/vllm_ascend_plugin/__init__.py → vllm_ascend/__init__.py
@@ -1,3 +1,3 @@
 def register():
     """Register the NPU platform."""
-    return "vllm_ascend_plugin.platform.NPUPlatform"
+    return "vllm_ascend.platform.NPUPlatform"
diff --git a/vllm_ascend_plugin/attention.py → vllm_ascend/attention.py b/vllm_ascend_plugin/attention.py → vllm_ascend/attention.py
@@ -5,9 +5,9 @@
 import torch
 
 try:
-    import torch_npu  # noqa: F401
+    import torch_npu
 except ImportError:
-    print("Failed to import torch_npu.")
+    print("Failed to import torch_npu")
 
 from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
                                               AttentionMetadata, AttentionType)
@@ -19,7 +19,7 @@
                                            PagedAttentionMetadata)
 
 if TYPE_CHECKING:
-    from vllm_ascend_plugin.model_runner import ModelInputForNPUBuilder
+    from vllm_ascend.model_runner import ModelInputForNPUBuilder
 
 SHARE_MASK_TRIL_PREFIX_CACHE = None
 SHARE_MASK_TRIL = None
@@ -70,7 +70,7 @@ def swap_blocks(
 
     @staticmethod
     def copy_blocks(
-        kv_caches: List[torch.Tensor],
+          kv_caches: List[torch.Tensor],
         src_to_dists: torch.Tensor,
     ) -> None:
         src_indices = src_to_dists[:, 0]

diff --git a/vllm_ascend_plugin/communicator.py → vllm_ascend/communicator.py b/vllm_ascend_plugin/communicator.py → vllm_ascend/communicator.py
diff --git a/vllm_ascend_plugin/model_runner.py → vllm_ascend/model_runner.py b/vllm_ascend_plugin/model_runner.py → vllm_ascend/model_runner.py
diff --git a/vllm_ascend/ops/__init__.py b/vllm_ascend/ops/__init__.py
@@ -0,0 +1 @@
+import vllm_ascend.ops.layernorm  # noqa
diff --git a/vllm_ascend/ops/layernorm.py b/vllm_ascend/ops/layernorm.py
@@ -0,0 +1,24 @@
+from typing import Optional, Tuple, Union
+
+import torch
+
+from vllm.model_executor.layers.layernorm import RMSNorm
+
+
+def forward_oot(
+       self,
+    x: torch.Tensor,
+    residual: Optional[torch.Tensor] = None,
+) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+    import torch_npu
+
+    if residual is not None:
+        x, _, residual = torch_npu.npu_add_rms_norm(
+            x, residual, self.weight, self.variance_epsilon)
+        return x, residual
+
+    x, residual = torch_npu.npu_rms_norm(x, self.weight,
+                                            self.variance_epsilon)
+    return x
+
+RMSNorm.forward_oot = forward_oot
diff --git a/vllm_ascend_plugin/platform.py → vllm_ascend/platform.py b/vllm_ascend_plugin/platform.py → vllm_ascend/platform.py
@@ -4,9 +4,9 @@
 import torch
 
 try:
-    import torch_npu  # noqa: F401
+    import torch_npu
 except ImportError:
-    print("Failed to import torch_npu.")
+    print("Failed to import torch_npu")
 
 from vllm.config import VllmConfig
 from vllm.platforms import Platform
@@ -32,7 +32,7 @@ class NPUPlatform(Platform):
     _enum = "Ascend"
     device_name: str = "npu"
     device_type: str = "npu"
-    torch_compile_backend: str = "npu"
+    simple_compile_backend: str = "npu"
     ray_device_key: str = "NPU"
     visible_device_name: str = "ASCEND_RT"
 
@@ -72,19 +72,19 @@ def mem_get_info(cls) -> Tuple[int, int]:
     @classmethod
     def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
         # Register ops when setup.
-        from vllm_ascend_plugin import ops  # noqa: F401
+        from vllm_ascend import ops
 
         parallel_config = vllm_config.parallel_config
         if parallel_config.worker_cls == "auto":
-            parallel_config.worker_cls = "vllm_ascend_plugin.worker.NPUWorker"
+            parallel_config.worker_cls = "vllm_ascend.worker.NPUWorker"
         cache_config = vllm_config.cache_config
         if cache_config and cache_config.block_size is None:
             cache_config.block_size = 16
 
     @classmethod
     def get_attn_backend_cls(cls, selected_backend, head_size, dtype,
                              kv_cache_dtype, block_size, use_v1):
-        return "vllm_ascend_plugin.attention.AscendAttentionBackend"
+        return "vllm_ascend.attention.AscendAttentionBackend"
 
     @classmethod
     def get_current_memory_usage(cls,
@@ -95,4 +95,4 @@ def get_current_memory_usage(cls,
 
     @classmethod
     def get_device_communicator_cls(cls) -> str:
-        return "vllm_ascend_plugin.communicator.NPUCommunicator"
+        return "vllm_ascend.communicator.NPUCommunicator"
diff --git a/vllm_ascend_plugin/worker.py → vllm_ascend/worker.py b/vllm_ascend_plugin/worker.py → vllm_ascend/worker.py
@@ -18,7 +18,7 @@
 from vllm.worker.worker import Worker
 from vllm.worker.worker_base import WorkerBase
 
-from vllm_ascend_plugin.model_runner import NPUModelRunner
+from vllm_ascend.model_runner import NPUModelRunner
 
 
 class NPUWorker(Worker):

diff --git a/vllm_ascend_plugin/ops/__init__.py b/vllm_ascend_plugin/ops/__init__.py
diff --git a/vllm_ascend_plugin/ops/layernorm.py b/vllm_ascend_plugin/ops/layernorm.py