From 410accb82b240413371897a58f4ec517323c848f Mon Sep 17 00:00:00 2001
From: Shanshan Shen <467638484@qq.com>
Date: Mon, 13 Jan 2025 03:44:48 +0000
Subject: [PATCH] merge main

---
 .github/workflows/actionlint.yml              | 40 +++++++++++++++++++
 .gitignore                                    |  2 +-
 Dockerfile                                    |  2 +-
 README.md                                     |  2 +-
 mypy.ini                                      |  9 ++++-
 setup.py                                      | 28 +++++++------
 tools/mypy.sh                                 |  3 +-
 .../__init__.py                               |  2 +-
 .../attention.py                              |  8 ++--
 .../communicator.py                           |  0
 .../model_runner.py                           |  0
 vllm_ascend/ops/__init__.py                   |  1 +
 vllm_ascend/ops/layernorm.py                  | 24 +++++++++++
 .../platform.py                               | 14 +++----
 {vllm_ascend_plugin => vllm_ascend}/worker.py |  2 +-
 vllm_ascend_plugin/ops/__init__.py            | 10 -----
 vllm_ascend_plugin/ops/layernorm.py           | 23 -----------
 17 files changed, 106 insertions(+), 64 deletions(-)
 create mode 100644 .github/workflows/actionlint.yml
 rename {vllm_ascend_plugin => vllm_ascend}/__init__.py (50%)
 rename {vllm_ascend_plugin => vllm_ascend}/attention.py (99%)
 rename {vllm_ascend_plugin => vllm_ascend}/communicator.py (100%)
 rename {vllm_ascend_plugin => vllm_ascend}/model_runner.py (100%)
 create mode 100644 vllm_ascend/ops/__init__.py
 create mode 100644 vllm_ascend/ops/layernorm.py
 rename {vllm_ascend_plugin => vllm_ascend}/platform.py (87%)
 rename {vllm_ascend_plugin => vllm_ascend}/worker.py (99%)
 delete mode 100644 vllm_ascend_plugin/ops/__init__.py
 delete mode 100644 vllm_ascend_plugin/ops/layernorm.py

diff --git a/.github/workflows/actionlint.yml b/.github/workflows/actionlint.yml
new file mode 100644
index 0000000..0226cf0
--- /dev/null
+++ b/.github/workflows/actionlint.yml
@@ -0,0 +1,40 @@
+name: Lint GitHub Actions workflows
+on:
+  push:
+    branches:
+      - "main"
+    paths:
+      - '.github/workflows/*.ya?ml'
+      - '.github/workflows/actionlint.*'
+      - '.github/workflows/matchers/actionlint.json'
+  pull_request:
+    branches:
+      - "main"
+    paths:
+      - '.github/workflows/*.ya?ml'
+      - '.github/workflows/actionlint.*'
+      - '.github/workflows/matchers/actionlint.json'
+
+env:
+  LC_ALL: en_US.UTF-8
+
+defaults:
+  run:
+    shell: bash
+
+permissions:
+  contents: read
+
+jobs:
+  actionlint:
+    runs-on: ubuntu-latest
+    steps:
+      - name: "Checkout"
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0
+
+      - name: "Run actionlint"
+        run: |
+          echo "::add-matcher::.github/workflows/matchers/actionlint.json"
+          tools/actionlint.sh -color
diff --git a/.gitignore b/.gitignore
index ac224af..8da361f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,7 +10,7 @@ __pycache__/
 .vscode/
 
 # egg-info
-vllm_ascend_plugin.egg-info/
+vllm_ascend.egg-info/
 
 # mypy
 .mypy_cache/
diff --git a/Dockerfile b/Dockerfile
index 09a871d..4302d02 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -21,7 +21,7 @@ RUN pip install cmake>=3.26 wheel packaging ninja "setuptools-scm>=8" numpy
 RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/vllm_ascend//vllm/requirements-build.txt
 # build vLLM with NPU backend
 RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVICE="cpu" python3 -m pip install /workspace/vllm_ascend/vllm/
-# install vllm_ascend_plugin
+# install vllm_ascend
 RUN python3 -m pip install /workspace/vllm_ascend/
 
 CMD ["/bin/bash"]
diff --git a/README.md b/README.md
index d21a8e7..b06806a 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ docker exec -it vllm bash
 
 ### 1. Prepare CANN env
 
-Before install vllm_ascend_plugin, you need to install the Ascend CANN Toolkit and Kernels. Please follow the [installation tutorial](https://ascend.github.io/docs/sources/ascend/quick_install.html#id1) or use the following commands for quick installation:
+Before install vllm_ascend, you need to install the Ascend CANN Toolkit and Kernels. Please follow the [installation tutorial](https://ascend.github.io/docs/sources/ascend/quick_install.html#id1) or use the following commands for quick installation:
 
 ```bash
 # replace the url according to your CANN version and devices
diff --git a/mypy.ini b/mypy.ini
index 316d335..fe0fd66 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -1,3 +1,10 @@
-# Suppress all missing import errors from torch_npu for mypy.
+[mypy]
+; warn_return_any = True
+warn_unused_configs = True
+
+; Suppress all missing import errors from torch_npu for mypy.
 [mypy-torch_npu.*]
 ignore_missing_imports = True
+
+[mypy-transformers.*]
+ignore_missing_imports = True
diff --git a/setup.py b/setup.py
index 7817e48..59fdd41 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,6 @@
 import os
-from typing import Dict, List
+from typing import List
+
 from setuptools import setup
 
 ROOT_DIR = os.path.dirname(__file__)
@@ -8,6 +9,7 @@
 def get_path(*filepath) -> str:
     return os.path.join(ROOT_DIR, *filepath)
 
+
 def get_requirements() -> List[str]:
     """Get Python package dependencies from requirements.txt."""
 
@@ -23,25 +25,25 @@ def _read_requirements(filename: str) -> List[str]:
             else:
                 resolved_requirements.append(line)
         return resolved_requirements
-    
+
     try:
         requirements = _read_requirements("requirements.txt")
     except ValueError:
-        print("Failed to read requirements.txt in vllm_ascend_plugin.")
+        print("Failed to read requirements.txt in vllm_ascend.")
     return requirements
 
 
-setup(name='vllm_ascend_plugin',
-      version='0.1',
-      packages=['vllm_ascend_plugin'],
-      install_requires=get_requirements(),
-      extras_require={
+setup(
+    name='vllm_ascend',
+    version='0.1',
+    packages=['vllm_ascend'],
+    install_requires=get_requirements(),
+    extras_require={
         "tensorizer": ["tensorizer>=2.9.0"],
         "runai": ["runai-model-streamer", "runai-model-streamer-s3", "boto3"],
         "audio": ["librosa", "soundfile"],  # Required for audio processing
         "video": ["decord"]  # Required for video processing
-      },
-      entry_points={
-          'vllm.platform_plugins':
-          ["ascend_plugin = vllm_ascend_plugin:register"]
-      })
+    },
+    entry_points={
+        'vllm.platform_plugins': ["ascend_plugin = vllm_ascend:register"]
+    })
diff --git a/tools/mypy.sh b/tools/mypy.sh
index 1c6339a..5c789fa 100755
--- a/tools/mypy.sh
+++ b/tools/mypy.sh
@@ -17,4 +17,5 @@ run_mypy() {
 }
 
 run_mypy # Note that this is less strict than CI
-run_mypy vllm_ascend_plugin
+run_mypy vllm_ascend
+run_mypy examples
diff --git a/vllm_ascend_plugin/__init__.py b/vllm_ascend/__init__.py
similarity index 50%
rename from vllm_ascend_plugin/__init__.py
rename to vllm_ascend/__init__.py
index 07dea97..16e91da 100644
--- a/vllm_ascend_plugin/__init__.py
+++ b/vllm_ascend/__init__.py
@@ -1,3 +1,3 @@
 def register():
     """Register the NPU platform."""
-    return "vllm_ascend_plugin.platform.NPUPlatform"
+    return "vllm_ascend.platform.NPUPlatform"
diff --git a/vllm_ascend_plugin/attention.py b/vllm_ascend/attention.py
similarity index 99%
rename from vllm_ascend_plugin/attention.py
rename to vllm_ascend/attention.py
index 1ab9779..6afaf63 100644
--- a/vllm_ascend_plugin/attention.py
+++ b/vllm_ascend/attention.py
@@ -5,9 +5,9 @@
 import torch
 
 try:
-    import torch_npu  # noqa: F401
+    import torch_npu
 except ImportError:
-    print("Failed to import torch_npu.")
+    print("Failed to import torch_npu")
 
 from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
                                               AttentionMetadata, AttentionType)
@@ -19,7 +19,7 @@
                                            PagedAttentionMetadata)
 
 if TYPE_CHECKING:
-    from vllm_ascend_plugin.model_runner import ModelInputForNPUBuilder
+    from vllm_ascend.model_runner import ModelInputForNPUBuilder
 
 SHARE_MASK_TRIL_PREFIX_CACHE = None
 SHARE_MASK_TRIL = None
@@ -70,7 +70,7 @@ def swap_blocks(
 
     @staticmethod
     def copy_blocks(
-        kv_caches: List[torch.Tensor],
+          kv_caches: List[torch.Tensor],
         src_to_dists: torch.Tensor,
     ) -> None:
         src_indices = src_to_dists[:, 0]
diff --git a/vllm_ascend_plugin/communicator.py b/vllm_ascend/communicator.py
similarity index 100%
rename from vllm_ascend_plugin/communicator.py
rename to vllm_ascend/communicator.py
diff --git a/vllm_ascend_plugin/model_runner.py b/vllm_ascend/model_runner.py
similarity index 100%
rename from vllm_ascend_plugin/model_runner.py
rename to vllm_ascend/model_runner.py
diff --git a/vllm_ascend/ops/__init__.py b/vllm_ascend/ops/__init__.py
new file mode 100644
index 0000000..424da25
--- /dev/null
+++ b/vllm_ascend/ops/__init__.py
@@ -0,0 +1 @@
+import vllm_ascend.ops.layernorm  # noqa
diff --git a/vllm_ascend/ops/layernorm.py b/vllm_ascend/ops/layernorm.py
new file mode 100644
index 0000000..7480613
--- /dev/null
+++ b/vllm_ascend/ops/layernorm.py
@@ -0,0 +1,24 @@
+from typing import Optional, Tuple, Union
+
+import torch
+
+from vllm.model_executor.layers.layernorm import RMSNorm
+
+
+def forward_oot(
+       self,
+    x: torch.Tensor,
+    residual: Optional[torch.Tensor] = None,
+) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+    import torch_npu
+
+    if residual is not None:
+        x, _, residual = torch_npu.npu_add_rms_norm(
+            x, residual, self.weight, self.variance_epsilon)
+        return x, residual
+
+    x, residual = torch_npu.npu_rms_norm(x, self.weight,
+                                            self.variance_epsilon)
+    return x
+
+RMSNorm.forward_oot = forward_oot
diff --git a/vllm_ascend_plugin/platform.py b/vllm_ascend/platform.py
similarity index 87%
rename from vllm_ascend_plugin/platform.py
rename to vllm_ascend/platform.py
index 3445f20..6ed8bd6 100644
--- a/vllm_ascend_plugin/platform.py
+++ b/vllm_ascend/platform.py
@@ -4,9 +4,9 @@
 import torch
 
 try:
-    import torch_npu  # noqa: F401
+    import torch_npu
 except ImportError:
-    print("Failed to import torch_npu.")
+    print("Failed to import torch_npu")
 
 from vllm.config import VllmConfig
 from vllm.platforms import Platform
@@ -32,7 +32,7 @@ class NPUPlatform(Platform):
     _enum = "Ascend"
     device_name: str = "npu"
     device_type: str = "npu"
-    torch_compile_backend: str = "npu"
+    simple_compile_backend: str = "npu"
     ray_device_key: str = "NPU"
     visible_device_name: str = "ASCEND_RT"
 
@@ -72,11 +72,11 @@ def mem_get_info(cls) -> Tuple[int, int]:
     @classmethod
     def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
         # Register ops when setup.
-        from vllm_ascend_plugin import ops  # noqa: F401
+        from vllm_ascend import ops
 
         parallel_config = vllm_config.parallel_config
         if parallel_config.worker_cls == "auto":
-            parallel_config.worker_cls = "vllm_ascend_plugin.worker.NPUWorker"
+            parallel_config.worker_cls = "vllm_ascend.worker.NPUWorker"
         cache_config = vllm_config.cache_config
         if cache_config and cache_config.block_size is None:
             cache_config.block_size = 16
@@ -84,7 +84,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
     @classmethod
     def get_attn_backend_cls(cls, selected_backend, head_size, dtype,
                              kv_cache_dtype, block_size, use_v1):
-        return "vllm_ascend_plugin.attention.AscendAttentionBackend"
+        return "vllm_ascend.attention.AscendAttentionBackend"
 
     @classmethod
     def get_current_memory_usage(cls,
@@ -95,4 +95,4 @@ def get_current_memory_usage(cls,
 
     @classmethod
     def get_device_communicator_cls(cls) -> str:
-        return "vllm_ascend_plugin.communicator.NPUCommunicator"
+        return "vllm_ascend.communicator.NPUCommunicator"
diff --git a/vllm_ascend_plugin/worker.py b/vllm_ascend/worker.py
similarity index 99%
rename from vllm_ascend_plugin/worker.py
rename to vllm_ascend/worker.py
index 5a90484..e0ddd64 100644
--- a/vllm_ascend_plugin/worker.py
+++ b/vllm_ascend/worker.py
@@ -18,7 +18,7 @@
 from vllm.worker.worker import Worker
 from vllm.worker.worker_base import WorkerBase
 
-from vllm_ascend_plugin.model_runner import NPUModelRunner
+from vllm_ascend.model_runner import NPUModelRunner
 
 
 class NPUWorker(Worker):
diff --git a/vllm_ascend_plugin/ops/__init__.py b/vllm_ascend_plugin/ops/__init__.py
deleted file mode 100644
index 63f5be5..0000000
--- a/vllm_ascend_plugin/ops/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from vllm.model_executor.custom_op import CustomOp
-
-import vllm_ascend_plugin.ops.layernorm
-
-def forward_npu(self, *args, **kwargs):
-    # By default, we assume that NPU ops are compatible with the
-    # PyTorch-native implementation.
-    return self.forward_native(*args, **kwargs)
-
-CustomOp.set_foward_method(forward_npu)
diff --git a/vllm_ascend_plugin/ops/layernorm.py b/vllm_ascend_plugin/ops/layernorm.py
deleted file mode 100644
index 3c25380..0000000
--- a/vllm_ascend_plugin/ops/layernorm.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from typing import Optional, Tuple, Union
-
-import torch
-from vllm.model_executor.layers.layernorm import RMSNorm
-
-
-def forward_npu(
-    self,
-    x: torch.Tensor,
-    residual: Optional[torch.Tensor] = None,
-) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
-    import torch_npu  # noqa: F401
-
-    if residual is not None:
-        x, _, residual = torch_npu.npu_add_rms_norm(x, residual, self.weight,
-                                                    self.variance_epsilon)
-        return x, residual
-
-    x, residual = torch_npu.npu_rms_norm(x, self.weight, self.variance_epsilon)
-    return x
-
-
-RMSNorm.set_foward_method(forward_npu)