Skip to content
This repository has been archived by the owner on Feb 18, 2025. It is now read-only.

Commit

Permalink
merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
shen-shanshan committed Jan 13, 2025
1 parent 8a9cad2 commit 410accb
Show file tree
Hide file tree
Showing 17 changed files with 106 additions and 64 deletions.
40 changes: 40 additions & 0 deletions .github/workflows/actionlint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Lint GitHub Actions workflows
on:
push:
branches:
- "main"
paths:
- '.github/workflows/*.ya?ml'
- '.github/workflows/actionlint.*'
- '.github/workflows/matchers/actionlint.json'
pull_request:
branches:
- "main"
paths:
- '.github/workflows/*.ya?ml'
- '.github/workflows/actionlint.*'
- '.github/workflows/matchers/actionlint.json'

env:
LC_ALL: en_US.UTF-8

defaults:
run:
shell: bash

permissions:
contents: read

jobs:
actionlint:
runs-on: ubuntu-latest
steps:
- name: "Checkout"
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0

- name: "Run actionlint"
run: |
echo "::add-matcher::.github/workflows/matchers/actionlint.json"
tools/actionlint.sh -color
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ __pycache__/
.vscode/

# egg-info
vllm_ascend_plugin.egg-info/
vllm_ascend.egg-info/

# mypy
.mypy_cache/
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ RUN pip install cmake>=3.26 wheel packaging ninja "setuptools-scm>=8" numpy
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/vllm_ascend//vllm/requirements-build.txt
# build vLLM with NPU backend
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVICE="cpu" python3 -m pip install /workspace/vllm_ascend/vllm/
# install vllm_ascend_plugin
# install vllm_ascend
RUN python3 -m pip install /workspace/vllm_ascend/

CMD ["/bin/bash"]
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ docker exec -it vllm bash

### 1. Prepare CANN env

Before install vllm_ascend_plugin, you need to install the Ascend CANN Toolkit and Kernels. Please follow the [installation tutorial](https://ascend.github.io/docs/sources/ascend/quick_install.html#id1) or use the following commands for quick installation:
Before install vllm_ascend, you need to install the Ascend CANN Toolkit and Kernels. Please follow the [installation tutorial](https://ascend.github.io/docs/sources/ascend/quick_install.html#id1) or use the following commands for quick installation:

```bash
# replace the url according to your CANN version and devices
Expand Down
9 changes: 8 additions & 1 deletion mypy.ini
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# Suppress all missing import errors from torch_npu for mypy.
[mypy]
; warn_return_any = True
warn_unused_configs = True

; Suppress all missing import errors from torch_npu for mypy.
[mypy-torch_npu.*]
ignore_missing_imports = True

[mypy-transformers.*]
ignore_missing_imports = True
28 changes: 15 additions & 13 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from typing import Dict, List
from typing import List

from setuptools import setup

ROOT_DIR = os.path.dirname(__file__)
Expand All @@ -8,6 +9,7 @@
def get_path(*filepath) -> str:
return os.path.join(ROOT_DIR, *filepath)


def get_requirements() -> List[str]:
"""Get Python package dependencies from requirements.txt."""

Expand All @@ -23,25 +25,25 @@ def _read_requirements(filename: str) -> List[str]:
else:
resolved_requirements.append(line)
return resolved_requirements

try:
requirements = _read_requirements("requirements.txt")
except ValueError:
print("Failed to read requirements.txt in vllm_ascend_plugin.")
print("Failed to read requirements.txt in vllm_ascend.")
return requirements


setup(name='vllm_ascend_plugin',
version='0.1',
packages=['vllm_ascend_plugin'],
install_requires=get_requirements(),
extras_require={
setup(
name='vllm_ascend',
version='0.1',
packages=['vllm_ascend'],
install_requires=get_requirements(),
extras_require={
"tensorizer": ["tensorizer>=2.9.0"],
"runai": ["runai-model-streamer", "runai-model-streamer-s3", "boto3"],
"audio": ["librosa", "soundfile"], # Required for audio processing
"video": ["decord"] # Required for video processing
},
entry_points={
'vllm.platform_plugins':
["ascend_plugin = vllm_ascend_plugin:register"]
})
},
entry_points={
'vllm.platform_plugins': ["ascend_plugin = vllm_ascend:register"]
})
3 changes: 2 additions & 1 deletion tools/mypy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ run_mypy() {
}

run_mypy # Note that this is less strict than CI
run_mypy vllm_ascend_plugin
run_mypy vllm_ascend
run_mypy examples
2 changes: 1 addition & 1 deletion vllm_ascend_plugin/__init__.py → vllm_ascend/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
def register():
"""Register the NPU platform."""
return "vllm_ascend_plugin.platform.NPUPlatform"
return "vllm_ascend.platform.NPUPlatform"
8 changes: 4 additions & 4 deletions vllm_ascend_plugin/attention.py → vllm_ascend/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import torch

try:
import torch_npu # noqa: F401
import torch_npu
except ImportError:
print("Failed to import torch_npu.")
print("Failed to import torch_npu")

from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
AttentionMetadata, AttentionType)
Expand All @@ -19,7 +19,7 @@
PagedAttentionMetadata)

if TYPE_CHECKING:
from vllm_ascend_plugin.model_runner import ModelInputForNPUBuilder
from vllm_ascend.model_runner import ModelInputForNPUBuilder

SHARE_MASK_TRIL_PREFIX_CACHE = None
SHARE_MASK_TRIL = None
Expand Down Expand Up @@ -70,7 +70,7 @@ def swap_blocks(

@staticmethod
def copy_blocks(
kv_caches: List[torch.Tensor],
kv_caches: List[torch.Tensor],
src_to_dists: torch.Tensor,
) -> None:
src_indices = src_to_dists[:, 0]
Expand Down
File renamed without changes.
File renamed without changes.
1 change: 1 addition & 0 deletions vllm_ascend/ops/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
import vllm_ascend.ops.layernorm # noqa
24 changes: 24 additions & 0 deletions vllm_ascend/ops/layernorm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from typing import Optional, Tuple, Union

import torch

from vllm.model_executor.layers.layernorm import RMSNorm


def forward_oot(
self,
x: torch.Tensor,
residual: Optional[torch.Tensor] = None,
) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
import torch_npu

if residual is not None:
x, _, residual = torch_npu.npu_add_rms_norm(
x, residual, self.weight, self.variance_epsilon)
return x, residual

x, residual = torch_npu.npu_rms_norm(x, self.weight,
self.variance_epsilon)
return x

RMSNorm.forward_oot = forward_oot
14 changes: 7 additions & 7 deletions vllm_ascend_plugin/platform.py → vllm_ascend/platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import torch

try:
import torch_npu # noqa: F401
import torch_npu
except ImportError:
print("Failed to import torch_npu.")
print("Failed to import torch_npu")

from vllm.config import VllmConfig
from vllm.platforms import Platform
Expand All @@ -32,7 +32,7 @@ class NPUPlatform(Platform):
_enum = "Ascend"
device_name: str = "npu"
device_type: str = "npu"
torch_compile_backend: str = "npu"
simple_compile_backend: str = "npu"
ray_device_key: str = "NPU"
visible_device_name: str = "ASCEND_RT"

Expand Down Expand Up @@ -72,19 +72,19 @@ def mem_get_info(cls) -> Tuple[int, int]:
@classmethod
def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
# Register ops when setup.
from vllm_ascend_plugin import ops # noqa: F401
from vllm_ascend import ops

parallel_config = vllm_config.parallel_config
if parallel_config.worker_cls == "auto":
parallel_config.worker_cls = "vllm_ascend_plugin.worker.NPUWorker"
parallel_config.worker_cls = "vllm_ascend.worker.NPUWorker"
cache_config = vllm_config.cache_config
if cache_config and cache_config.block_size is None:
cache_config.block_size = 16

@classmethod
def get_attn_backend_cls(cls, selected_backend, head_size, dtype,
kv_cache_dtype, block_size, use_v1):
return "vllm_ascend_plugin.attention.AscendAttentionBackend"
return "vllm_ascend.attention.AscendAttentionBackend"

@classmethod
def get_current_memory_usage(cls,
Expand All @@ -95,4 +95,4 @@ def get_current_memory_usage(cls,

@classmethod
def get_device_communicator_cls(cls) -> str:
return "vllm_ascend_plugin.communicator.NPUCommunicator"
return "vllm_ascend.communicator.NPUCommunicator"
2 changes: 1 addition & 1 deletion vllm_ascend_plugin/worker.py → vllm_ascend/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from vllm.worker.worker import Worker
from vllm.worker.worker_base import WorkerBase

from vllm_ascend_plugin.model_runner import NPUModelRunner
from vllm_ascend.model_runner import NPUModelRunner


class NPUWorker(Worker):
Expand Down
10 changes: 0 additions & 10 deletions vllm_ascend_plugin/ops/__init__.py

This file was deleted.

23 changes: 0 additions & 23 deletions vllm_ascend_plugin/ops/layernorm.py

This file was deleted.

0 comments on commit 410accb

Please sign in to comment.