Merge pull request #9 from wangxiyuan/rename

Rename to vllm-ascend
cosdt · Jan 13, 2025 · 319f283 · 319f283
2 parents 8baa637 + 98b01b8
commit 319f283
Show file tree

Hide file tree

Showing 13 changed files with 18 additions and 27 deletions.
diff --git a/.gitignore b/.gitignore
@@ -10,4 +10,4 @@ __pycache__/
 .vscode/
 
 # egg-info
-vllm_ascend_plugin.egg-info/
+vllm_ascend.egg-info/
diff --git a/Dockerfile b/Dockerfile
@@ -21,7 +21,7 @@ RUN pip install cmake>=3.26 wheel packaging ninja "setuptools-scm>=8" numpy
 RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/vllm_ascend//vllm/requirements-build.txt
 # build vLLM with NPU backend
 RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVICE="cpu" python3 -m pip install /workspace/vllm_ascend/vllm/
-# install vllm_ascend_plugin
+# install vllm_ascend
 RUN python3 -m pip install /workspace/vllm_ascend/
 
 CMD ["/bin/bash"]
diff --git a/README.md b/README.md
@@ -38,7 +38,7 @@ docker exec -it vllm bash
 
 ### 1. Prepare CANN env
 
-Before install vllm_ascend_plugin, you need to install the Ascend CANN Toolkit and Kernels. Please follow the [installation tutorial](https://ascend.github.io/docs/sources/ascend/quick_install.html#id1) or use the following commands for quick installation:
+Before install vllm_ascend, you need to install the Ascend CANN Toolkit and Kernels. Please follow the [installation tutorial](https://ascend.github.io/docs/sources/ascend/quick_install.html#id1) or use the following commands for quick installation:
 
 ```bash
 # replace the url according to your CANN version and devices

diff --git a/setup.py b/setup.py
@@ -27,13 +27,13 @@ def _read_requirements(filename: str) -> List[str]:
     try:
         requirements = _read_requirements("requirements.txt")
     except ValueError:
-        print("Failed to read requirements.txt in vllm_ascend_plugin.")
+        print("Failed to read requirements.txt in vllm_ascend.")
     return requirements
 
 
-setup(name='vllm_ascend_plugin',
+setup(name='vllm_ascend',
       version='0.1',
-      packages=['vllm_ascend_plugin'],
+      packages=['vllm_ascend'],
       install_requires=get_requirements(),
       extras_require={
         "tensorizer": ["tensorizer>=2.9.0"],
@@ -43,5 +43,5 @@ def _read_requirements(filename: str) -> List[str]:
       },
       entry_points={
           'vllm.platform_plugins':
-          ["ascend_plugin = vllm_ascend_plugin:register"]
+          ["ascend_plugin = vllm_ascend:register"]
       })
diff --git a/vllm_ascend_plugin/__init__.py → vllm_ascend/__init__.py b/vllm_ascend_plugin/__init__.py → vllm_ascend/__init__.py
@@ -1,3 +1,3 @@
 def register():
     """Register the NPU platform."""
-    return "vllm_ascend_plugin.platform.NPUPlatform"
+    return "vllm_ascend.platform.NPUPlatform"
diff --git a/vllm_ascend_plugin/attention.py → vllm_ascend/attention.py b/vllm_ascend_plugin/attention.py → vllm_ascend/attention.py
@@ -19,7 +19,7 @@
                                            PagedAttentionMetadata)
 
 if TYPE_CHECKING:
-    from vllm_ascend_plugin.worker import ModelInputForNPUBuilder
+    from vllm_ascend.model_runner import ModelInputForNPUBuilder
 
 SHARE_MASK_TRIL_PREFIX_CACHE = None
 SHARE_MASK_TRIL = None

diff --git a/vllm_ascend_plugin/communicator.py → vllm_ascend/communicator.py b/vllm_ascend_plugin/communicator.py → vllm_ascend/communicator.py
diff --git a/vllm_ascend_plugin/model_runner.py → vllm_ascend/model_runner.py b/vllm_ascend_plugin/model_runner.py → vllm_ascend/model_runner.py
diff --git a/vllm_ascend/ops/__init__.py b/vllm_ascend/ops/__init__.py
@@ -0,0 +1 @@
+import vllm_ascend.ops.layernorm # noqa
diff --git a/vllm_ascend_plugin/ops/layernorm.py → vllm_ascend/ops/layernorm.py b/vllm_ascend_plugin/ops/layernorm.py → vllm_ascend/ops/layernorm.py
@@ -4,7 +4,7 @@
 
 from vllm.model_executor.layers.layernorm import RMSNorm
 
-def forward_npu(
+def forward_oot(
     self,
     x: torch.Tensor,
     residual: Optional[torch.Tensor] = None,
@@ -20,4 +20,4 @@ def forward_npu(
                                             self.variance_epsilon)
     return x
 
-RMSNorm.set_foward_method(forward_npu)
+RMSNorm.forward_oot = forward_oot
diff --git a/vllm_ascend_plugin/platform.py → vllm_ascend/platform.py b/vllm_ascend_plugin/platform.py → vllm_ascend/platform.py
@@ -32,7 +32,7 @@ class NPUPlatform(Platform):
     _enum = "Ascend"
     device_name: str = "npu"
     device_type: str = "npu"
-    torch_compile_backend: str = "npu"
+    simple_compile_backend: str = "npu"
     ray_device_key: str = "NPU"
     visible_device_name: str = "ASCEND_RT"
 
@@ -72,19 +72,19 @@ def mem_get_info(cls) -> Tuple[int, int]:
     @classmethod
     def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
         # Register ops when setup.
-        from vllm_ascend_plugin import ops
+        from vllm_ascend import ops
 
         parallel_config = vllm_config.parallel_config
         if parallel_config.worker_cls == "auto":
-            parallel_config.worker_cls = "vllm_ascend_plugin.worker.NPUWorker"
+            parallel_config.worker_cls = "vllm_ascend.worker.NPUWorker"
         cache_config = vllm_config.cache_config
         if cache_config and cache_config.block_size is None:
             cache_config.block_size = 16
 
     @classmethod
     def get_attn_backend_cls(cls, selected_backend, head_size, dtype,
                              kv_cache_dtype, block_size, use_v1):
-        return "vllm_ascend_plugin.attention.AscendAttentionBackend"
+        return "vllm_ascend.attention.AscendAttentionBackend"
 
     @classmethod
     def get_current_memory_usage(cls,
@@ -95,4 +95,4 @@ def get_current_memory_usage(cls,
 
     @classmethod
     def get_device_communicator_cls(cls) -> str:
-        return "vllm_ascend_plugin.communicator.NPUCommunicator"
+        return "vllm_ascend.communicator.NPUCommunicator"
diff --git a/vllm_ascend_plugin/worker.py → vllm_ascend/worker.py b/vllm_ascend_plugin/worker.py → vllm_ascend/worker.py
@@ -18,7 +18,7 @@
 from vllm.worker.worker import Worker
 from vllm.worker.worker_base import WorkerBase
 
-from vllm_ascend_plugin.model_runner import NPUModelRunner
+from vllm_ascend.model_runner import NPUModelRunner
 
 
 class NPUWorker(Worker):

diff --git a/vllm_ascend_plugin/ops/__init__.py b/vllm_ascend_plugin/ops/__init__.py