diff --git a/.github/workflows/actionlint.yml b/.github/workflows/actionlint.yml
new file mode 100644
index 0000000..0226cf0
--- /dev/null
+++ b/.github/workflows/actionlint.yml
@@ -0,0 +1,40 @@
+name: Lint GitHub Actions workflows
+on:
+  push:
+    branches:
+      - "main"
+    paths:
+      - '.github/workflows/*.ya?ml'
+      - '.github/workflows/actionlint.*'
+      - '.github/workflows/matchers/actionlint.json'
+  pull_request:
+    branches:
+      - "main"
+    paths:
+      - '.github/workflows/*.ya?ml'
+      - '.github/workflows/actionlint.*'
+      - '.github/workflows/matchers/actionlint.json'
+
+env:
+  LC_ALL: en_US.UTF-8
+
+defaults:
+  run:
+    shell: bash
+
+permissions:
+  contents: read
+
+jobs:
+  actionlint:
+    runs-on: ubuntu-latest
+    steps:
+      - name: "Checkout"
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0
+
+      - name: "Run actionlint"
+        run: |
+          echo "::add-matcher::.github/workflows/matchers/actionlint.json"
+          tools/actionlint.sh -color
diff --git a/.github/workflows/matchers/actionlint.json b/.github/workflows/matchers/actionlint.json
new file mode 100644
index 0000000..4613e16
--- /dev/null
+++ b/.github/workflows/matchers/actionlint.json
@@ -0,0 +1,17 @@
+{
+  "problemMatcher": [
+    {
+      "owner": "actionlint",
+      "pattern": [
+        {
+          "regexp": "^(?:\\x1b\\[\\d+m)?(.+?)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*: (?:\\x1b\\[\\d+m)*(.+?)(?:\\x1b\\[\\d+m)* \\[(.+?)\\]$",
+          "file": 1,
+          "line": 2,
+          "column": 3,
+          "message": 4,
+          "code": 5
+        }
+      ]
+    }
+  ]
+}
diff --git a/.github/workflows/matchers/mypy.json b/.github/workflows/matchers/mypy.json
new file mode 100644
index 0000000..f048fce
--- /dev/null
+++ b/.github/workflows/matchers/mypy.json
@@ -0,0 +1,16 @@
+{
+  "problemMatcher": [
+    {
+      "owner": "mypy",
+      "pattern": [
+        {
+          "regexp": "^(.+):(\\d+):\\s(error|warning):\\s(.+)$",
+          "file": 1,
+          "line": 2,
+          "severity": 3,
+          "message": 4
+        }
+      ]
+    }
+  ]
+}
diff --git a/.github/workflows/matchers/ruff.json b/.github/workflows/matchers/ruff.json
new file mode 100644
index 0000000..f6d4479
--- /dev/null
+++ b/.github/workflows/matchers/ruff.json
@@ -0,0 +1,17 @@
+{
+    "problemMatcher": [
+      {
+        "owner": "ruff",
+        "pattern": [
+          {
+            "regexp": "^(.+?):(\\d+):(\\d+): (\\w+): (.+)$",
+            "file": 1,
+            "line": 2,
+            "column": 3,
+            "code": 4,
+            "message": 5
+          }
+        ]
+      }
+    ]
+  }
diff --git a/.gitignore b/.gitignore
index adcb690..8da361f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,12 @@ __pycache__/
 
 # egg-info
 vllm_ascend.egg-info/
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Linting
+actionlint
+shellcheck*/
diff --git a/README.md b/README.md
index 81d13bf..b06806a 100644
--- a/README.md
+++ b/README.md
@@ -109,6 +109,3 @@ bash format.sh
 # 3. Commit changed files with message 'Run yapf and ruff'
 git commit -m "Run yapf and ruff"
 ```
-
-> [!NOTE]
-> The warning "F401 `torch_npu` imported but unused" doesn't matter because the api `torch.npu` will call this library indirectly while there are not `torch_npu` in the codes explicitly. 
diff --git a/examples/offline_inference_audio_language.py b/examples/offline_inference_audio_language.py
index a0b7aff..57fa98a 100644
--- a/examples/offline_inference_audio_language.py
+++ b/examples/offline_inference_audio_language.py
@@ -7,7 +7,6 @@
 """
 
 from transformers import AutoTokenizer
-
 from vllm import LLM, SamplingParams
 from vllm.assets.audio import AudioAsset
 from vllm.utils import FlexibleArgumentParser
diff --git a/format.sh b/format.sh
index 5ffe784..11e6240 100755
--- a/format.sh
+++ b/format.sh
@@ -43,7 +43,7 @@ ISORT_VERSION=$(isort --vn)
 CLANGFORMAT_VERSION=$(clang-format --version | awk '{print $3}')
 SPHINX_LINT_VERSION=$(sphinx-lint --version | awk '{print $2}')
 
-# # params: tool name, tool version, required version
+# params: tool name, tool version, required version
 tool_version_check() {
     expected=$(grep "$1" requirements-lint.txt | cut -d'=' -f3)
     if [[ "$2" != "$expected" ]]; then
@@ -112,9 +112,9 @@ fi
 echo 'vLLM yapf: Done'
 
 # Run mypy
-# echo 'vLLM mypy:'
-# tools/mypy.sh
-# echo 'vLLM mypy: Done'
+echo 'vLLM mypy:'
+tools/mypy.sh
+echo 'vLLM mypy: Done'
 
 
 # If git diff returns a file that is in the skip list, the file may be checked anyway:
@@ -316,6 +316,6 @@ else
     echo "✨🎉 Format check passed! Congratulations! 🎉✨"
 fi
 
-echo 'vLLM sphinx-lint:'
-tools/sphinx-lint.sh
-echo 'vLLM sphinx-lint: Done'
+# echo 'vLLM sphinx-lint:'
+# tools/sphinx-lint.sh
+# echo 'vLLM sphinx-lint: Done'
diff --git a/mypy.ini b/mypy.ini
new file mode 100644
index 0000000..fe0fd66
--- /dev/null
+++ b/mypy.ini
@@ -0,0 +1,10 @@
+[mypy]
+; warn_return_any = True
+warn_unused_configs = True
+
+; Suppress all missing import errors from torch_npu for mypy.
+[mypy-torch_npu.*]
+ignore_missing_imports = True
+
+[mypy-transformers.*]
+ignore_missing_imports = True
diff --git a/setup.py b/setup.py
index d34c5ac..59fdd41 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,6 @@
 import os
-from typing import Dict, List
+from typing import List
+
 from setuptools import setup
 
 ROOT_DIR = os.path.dirname(__file__)
@@ -8,6 +9,7 @@
 def get_path(*filepath) -> str:
     return os.path.join(ROOT_DIR, *filepath)
 
+
 def get_requirements() -> List[str]:
     """Get Python package dependencies from requirements.txt."""
 
@@ -23,7 +25,7 @@ def _read_requirements(filename: str) -> List[str]:
             else:
                 resolved_requirements.append(line)
         return resolved_requirements
-    
+
     try:
         requirements = _read_requirements("requirements.txt")
     except ValueError:
@@ -31,17 +33,17 @@ def _read_requirements(filename: str) -> List[str]:
     return requirements
 
 
-setup(name='vllm_ascend',
-      version='0.1',
-      packages=['vllm_ascend'],
-      install_requires=get_requirements(),
-      extras_require={
+setup(
+    name='vllm_ascend',
+    version='0.1',
+    packages=['vllm_ascend'],
+    install_requires=get_requirements(),
+    extras_require={
         "tensorizer": ["tensorizer>=2.9.0"],
         "runai": ["runai-model-streamer", "runai-model-streamer-s3", "boto3"],
         "audio": ["librosa", "soundfile"],  # Required for audio processing
         "video": ["decord"]  # Required for video processing
-      },
-      entry_points={
-          'vllm.platform_plugins':
-          ["ascend_plugin = vllm_ascend:register"]
-      })
+    },
+    entry_points={
+        'vllm.platform_plugins': ["ascend_plugin = vllm_ascend:register"]
+    })
diff --git a/tools/actionlint.sh b/tools/actionlint.sh
new file mode 100755
index 0000000..f6a8b5e
--- /dev/null
+++ b/tools/actionlint.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+if command -v actionlint &> /dev/null; then
+    actionlint "$@"
+    exit 0
+elif [ -x ./actionlint ]; then
+    ./actionlint "$@"
+    exit 0
+fi
+
+# download a binary to the current directory - v1.7.3
+bash <(curl https://raw.githubusercontent.com/rhysd/actionlint/aa0a7be8e566b096e64a5df8ff290ec24fa58fbc/scripts/download-actionlint.bash)
+./actionlint "$@"
diff --git a/tools/check_repo.sh b/tools/check_repo.sh
new file mode 100644
index 0000000..48eba5b
--- /dev/null
+++ b/tools/check_repo.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+# Checks whether the repo is clean and whether tags are available (necessary to correctly produce vllm version at build time)
+
+if ! git diff --quiet; then
+	echo "Repo is dirty" >&2
+
+	exit 1
+fi
+
+if ! git describe --tags; then
+	echo "No tags are present. Is this a shallow clone? git fetch --unshallow --tags" >&2
+
+	exit 1
+fi
diff --git a/tools/mypy.sh b/tools/mypy.sh
new file mode 100755
index 0000000..5c789fa
--- /dev/null
+++ b/tools/mypy.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+CI=${1:-0}
+PYTHON_VERSION=${2:-3.9}
+
+if [ "$CI" -eq 1 ]; then
+    set -e
+fi
+
+run_mypy() {
+    echo "Running mypy on $1"
+    if [ "$CI" -eq 1 ] && [ -z "$1" ]; then
+        mypy --python-version "${PYTHON_VERSION}" "$@"
+        return
+    fi
+    mypy --follow-imports skip --python-version "${PYTHON_VERSION}" "$@"
+}
+
+run_mypy # Note that this is less strict than CI
+run_mypy vllm_ascend
+run_mypy examples
diff --git a/tools/png-lint.sh b/tools/png-lint.sh
new file mode 100755
index 0000000..a80fe98
--- /dev/null
+++ b/tools/png-lint.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+# Ensure that *.excalidraw.png files have the excalidraw metadata
+# embedded in them. This ensures they can be loaded back into
+# the tool and edited in the future.
+
+find . -iname '*.excalidraw.png' | while read -r file; do
+	if git check-ignore -q "$file"; then
+		continue
+	fi
+	if ! grep -q "excalidraw+json" "$file"; then
+		echo "$file was not exported from excalidraw with 'Embed Scene' enabled."
+		exit 1
+	fi
+done
diff --git a/tools/shellcheck.sh b/tools/shellcheck.sh
new file mode 100755
index 0000000..bd46f7c
--- /dev/null
+++ b/tools/shellcheck.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+set -e
+
+scversion="stable"
+
+if [ -d "shellcheck-${scversion}" ]; then
+    PATH="$PATH:$(pwd)/shellcheck-${scversion}"
+    export PATH
+fi
+
+if ! [ -x "$(command -v shellcheck)" ]; then
+    if [ "$(uname -s)" != "Linux" ] || [ "$(uname -m)" != "x86_64" ]; then
+        echo "Please install shellcheck: https://github.com/koalaman/shellcheck?tab=readme-ov-file#installing"
+        exit 1
+    fi
+
+    # automatic local install if linux x86_64
+    wget -qO- "https://github.com/koalaman/shellcheck/releases/download/${scversion?}/shellcheck-${scversion?}.linux.x86_64.tar.xz" | tar -xJv
+    PATH="$PATH:$(pwd)/shellcheck-${scversion}"
+    export PATH
+fi
+
+# TODO - fix warnings in .buildkite/run-amd-test.sh
+find . -name "*.sh" -not -path "./.buildkite/run-amd-test.sh" -print0 | xargs -0 -I {} sh -c 'git check-ignore -q "{}" || shellcheck "{}"'
diff --git a/tools/sphinx-lint.sh b/tools/sphinx-lint.sh
new file mode 100755
index 0000000..04f8075
--- /dev/null
+++ b/tools/sphinx-lint.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+sphinx-lint --disable trailing-whitespace,missing-final-newline docs
diff --git a/vllm_ascend/attention.py b/vllm_ascend/attention.py
index 2c0ab1f..199130f 100644
--- a/vllm_ascend/attention.py
+++ b/vllm_ascend/attention.py
@@ -5,9 +5,9 @@
 import torch
 
 try:
-    import torch_npu
+    import torch_npu  # noqa: F401
 except ImportError:
-    print("Failed to import torch_npu")
+    print("Failed to import torch_npu.")
 
 from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
                                               AttentionMetadata, AttentionType)
@@ -375,7 +375,7 @@ def _add_seq_group(
             # TODO(sang): Combine chunked prefill and prefix caching by
             # only allowing multiple of block_size chunk size.
             # NOTE: This only works for oooooooxxx style attention.
-            block_table = []
+            block_table: List[int] = []
             prefix_cache_hit = any([
                 inter_data.prefix_cache_hit
                 for inter_data in self.input_builder.inter_data_list
@@ -383,7 +383,8 @@ def _add_seq_group(
             if prefix_cache_hit:
                 # NOTE(woosuk): For flash-attn, the block table should
                 # include the entries for the incoming prefill tokens.
-                block_table = block_tables[seq_id]
+                if block_tables is not None:
+                    block_table = block_tables[seq_id]
             elif ((chunked_prefill_enabled or not is_prompt)
                   and block_tables is not None):
                 if curr_sliding_window_block == 0:
diff --git a/vllm_ascend/model_runner.py b/vllm_ascend/model_runner.py
index 7ffaa00..b1dfb54 100644
--- a/vllm_ascend/model_runner.py
+++ b/vllm_ascend/model_runner.py
@@ -3,13 +3,12 @@
 
 import torch
 import torch.distributed
-
 from vllm.distributed import get_pp_group
 from vllm.logger import init_logger
 from vllm.lora.layers import LoRAMapping
 from vllm.lora.request import LoRARequest
 from vllm.model_executor import SamplingMetadata
-from vllm.multimodal import MultiModalKwargs
+from vllm.multimodal import MultiModalKwargs, MultiModalPlaceholderMap
 from vllm.platforms import current_platform
 from vllm.prompt_adapter.layers import PromptAdapterMapping
 from vllm.prompt_adapter.request import PromptAdapterRequest
@@ -214,6 +213,218 @@ def build(self) -> ModelInputForGPU:
             prompt_adapter_mapping=prompt_adapter_mapping,
             prompt_adapter_requests=prompt_adapter_requests)
 
+    class InterDataForSeqGroup:
+        """Intermediate data for the current sequence group."""
+
+        def simple_reinit(self):
+            self.input_tokens[0].clear()  # type: ignore
+            self.input_positions[0].clear()  # type: ignore
+            self.token_types[0].clear()  # type: ignore
+            self.mrope_input_positions = None  # type: ignore
+            self.seq_lens[0] = 0  # type: ignore
+            self.orig_seq_lens[0] = 0  # type: ignore
+            self.query_lens[0] = 0  # type: ignore
+            self.context_lens[0] = 0  # type: ignore
+            self.curr_sliding_window_blocks[0] = 0  # type: ignore
+            self.lora_index_mapping.clear()  # type: ignore
+            self.lora_prompt_mapping.clear()  # type: ignore
+            self.lora_requests.clear()  # type: ignore
+            self.prompt_adapter_index_mapping.clear()  # type: ignore
+            self.prompt_adapter_prompt_mapping.clear()  # type: ignore
+
+        def __init__(
+            self,
+            *,
+            # From sequence group metadata.
+            request_id: str,
+            seq_ids: List[int],
+            is_prompt: bool,
+            block_tables: Optional[Dict[int, List[int]]],
+            computed_block_nums: List[int],
+            n_seqs: int = 0,
+
+            # Input tokens and positions.
+            input_tokens: Optional[List[List[int]]] = None,
+            input_positions: Optional[List[List[int]]] = None,
+            token_types: Optional[List[List[int]]] = None,
+            mrope_input_positions: Optional[List[List[List[int]]]] = None,
+
+            # The sequence length (may be capped to the sliding window).
+            seq_lens: Optional[List[int]] = None,
+            # The original sequence length (before applying sliding window).
+            # This is used to compute slot mapping.
+            orig_seq_lens: Optional[List[int]] = None,
+            # The query length.
+            query_lens: Optional[List[int]] = None,
+            # The number of tokens that are already computed.
+            context_lens: Optional[List[int]] = None,
+            # The current sliding window block.
+            curr_sliding_window_blocks: Optional[List[int]] = None,
+
+            # LoRA inputs.
+            lora_index_mapping: Optional[List[List[int]]] = None,
+            lora_prompt_mapping: Optional[List[List[int]]] = None,
+            lora_requests: Optional[Set[LoRARequest]] = None,
+
+            # Prompt adapter inputs.
+            prompt_adapter_index_mapping: Optional[List[int]] = None,
+            prompt_adapter_prompt_mapping: Optional[List[int]] = None,
+            prompt_adapter_request: Optional[PromptAdapterRequest] = None,
+
+            # Multi-modal inputs.
+            multi_modal_kwargs: Optional[MultiModalKwargs] = None,
+            multi_modal_placeholder_maps: Optional[Dict[
+                str, MultiModalPlaceholderMap]] = None,
+
+            # Whether the prefix cache is hit (prefill only).
+            prefix_cache_hit: bool = False,
+            reinit: bool = False,
+            reinit_use_defaults: bool = False,
+            encoder_seq_len: int = 0,
+        ):
+            if reinit:
+                assert len(self.seq_ids) == len(seq_ids)  # type: ignore
+                for i, seq_id in enumerate(seq_ids):
+                    self.seq_ids[i] = seq_id  # type: ignore
+            else:
+                self.seq_ids = seq_ids
+
+            self.request_id = request_id
+            self.is_prompt = is_prompt
+            self.block_tables = block_tables
+            self.computed_block_nums = computed_block_nums
+            self.n_seqs = n_seqs
+            self.encoder_seq_len = encoder_seq_len
+
+            if reinit:
+                if len(self.seq_ids) == 1 and reinit_use_defaults:
+                    self.simple_reinit()
+                else:
+                    if input_tokens:
+                        self.input_tokens = input_tokens
+                    else:
+                        for seq_id in range(len(self.seq_ids)):
+                            self.input_tokens[seq_id].clear()
+
+                    if input_positions:
+                        self.input_positions = input_positions
+                    else:
+                        for seq_id in range(len(self.seq_ids)):
+                            self.input_positions[seq_id].clear()
+
+                    if token_types:
+                        self.token_types = token_types
+                    else:
+                        for seq_id in range(len(self.seq_ids)):
+                            self.token_types[seq_id].clear()
+
+                    self.mrope_input_positions = None
+
+                    if seq_lens:
+                        self.seq_lens = seq_lens
+                    else:
+                        for seq_id in range(len(self.seq_ids)):
+                            self.seq_lens[seq_id] = 0
+
+                    if orig_seq_lens:
+                        self.orig_seq_lens = orig_seq_lens
+                    else:
+                        for seq_id in range(len(self.seq_ids)):
+                            self.orig_seq_lens[seq_id] = 0
+
+                    if query_lens:
+                        self.query_lens = query_lens
+                    else:
+                        for seq_id in range(len(self.seq_ids)):
+                            self.query_lens[seq_id] = 0
+
+                    if context_lens:
+                        self.context_lens = context_lens
+                    else:
+                        for seq_id in range(len(self.seq_ids)):
+                            self.context_lens[seq_id] = 0
+
+                    if curr_sliding_window_blocks:
+                        self.curr_sliding_window_blocks = \
+                            curr_sliding_window_blocks
+                    else:
+                        for seq_id in range(len(self.seq_ids)):
+                            self.curr_sliding_window_blocks[seq_id] = 0
+
+                    if lora_index_mapping:
+                        self.lora_index_mapping = lora_index_mapping
+                    else:
+                        self.lora_index_mapping.clear()
+
+                    if lora_prompt_mapping:
+                        self.lora_prompt_mapping = lora_prompt_mapping
+                    else:
+                        self.lora_prompt_mapping.clear()
+
+                    if lora_requests:
+                        self.lora_requests = lora_requests
+                    else:
+                        self.lora_requests.clear()
+
+                    if prompt_adapter_index_mapping:
+                        self.prompt_adapter_index_mapping = \
+                            prompt_adapter_index_mapping
+                    else:
+                        self.prompt_adapter_index_mapping.clear()
+
+                    if prompt_adapter_prompt_mapping:
+                        self.prompt_adapter_prompt_mapping = \
+                            prompt_adapter_prompt_mapping
+                    else:
+                        self.prompt_adapter_prompt_mapping.clear()
+
+            else:
+                self.input_tokens = input_tokens or []
+                self.input_positions = input_positions or []
+                self.token_types = token_types or []
+                self.mrope_input_positions = mrope_input_positions or None
+                self.seq_lens = seq_lens or []
+                self.orig_seq_lens = orig_seq_lens or []
+                self.query_lens = query_lens or []
+                self.context_lens = context_lens or []
+                self.curr_sliding_window_blocks = \
+                    curr_sliding_window_blocks or []
+
+                self.lora_index_mapping = lora_index_mapping or []
+                self.lora_prompt_mapping = lora_prompt_mapping or []
+                self.lora_requests = lora_requests or set()
+
+                self.prompt_adapter_index_mapping = (
+                    prompt_adapter_index_mapping or [])
+                self.prompt_adapter_prompt_mapping = (
+                    prompt_adapter_prompt_mapping or [])
+
+            self.prompt_adapter_request = prompt_adapter_request
+            self.multi_modal_kwargs = multi_modal_kwargs
+            self.multi_modal_placeholder_maps = multi_modal_placeholder_maps
+            self.prefix_cache_hit = prefix_cache_hit
+
+            self.n_seqs = len(self.seq_ids)
+
+            if not reinit:
+                self.__post_init__()
+
+        def __post_init__(self):
+            self.n_seqs = len(self.seq_ids)
+
+            self.input_tokens = [[] for _ in range(self.n_seqs)]
+            self.input_positions = [[] for _ in range(self.n_seqs)]
+            self.token_types = [[] for _ in range(self.n_seqs)]
+            self.mrope_input_positions = None
+            self.seq_lens = [0] * self.n_seqs
+            self.orig_seq_lens = [0] * self.n_seqs
+            self.query_lens = [0] * self.n_seqs
+            self.context_lens = [0] * self.n_seqs
+            self.curr_sliding_window_blocks = [0] * self.n_seqs
+
+            self.lora_index_mapping = []
+            self.lora_prompt_mapping = []
+
 
 class NPUModelRunner(ModelRunner):
     """
@@ -375,7 +586,7 @@ def prepare_model_input(
                 self.sampling_metadata_cache,
                 # TODO (cmq): enable this after supported in vllm
                 # pad_for_invariant_seq_len=True,
-                )
+            )
         else:
             sampling_metadata = None
         is_prompt = (seq_group_metadata_list[0].is_prompt
diff --git a/vllm_ascend/ops/__init__.py b/vllm_ascend/ops/__init__.py
index 7162ac1..424da25 100644
--- a/vllm_ascend/ops/__init__.py
+++ b/vllm_ascend/ops/__init__.py
@@ -1 +1 @@
-import vllm_ascend.ops.layernorm # noqa
+import vllm_ascend.ops.layernorm  # noqa
diff --git a/vllm_ascend/ops/layernorm.py b/vllm_ascend/ops/layernorm.py
index 30310e6..a03fb43 100644
--- a/vllm_ascend/ops/layernorm.py
+++ b/vllm_ascend/ops/layernorm.py
@@ -1,9 +1,9 @@
 from typing import Optional, Tuple, Union
 
 import torch
-
 from vllm.model_executor.layers.layernorm import RMSNorm
 
+
 def forward_oot(
     self,
     x: torch.Tensor,
@@ -12,12 +12,12 @@ def forward_oot(
     import torch_npu
 
     if residual is not None:
-        x, _, residual = torch_npu.npu_add_rms_norm(
-            x, residual, self.weight, self.variance_epsilon)
+        x, _, residual = torch_npu.npu_add_rms_norm(x, residual, self.weight,
+                                                    self.variance_epsilon)
         return x, residual
 
-    x, residual = torch_npu.npu_rms_norm(x, self.weight,
-                                            self.variance_epsilon)
+    x, residual = torch_npu.npu_rms_norm(x, self.weight, self.variance_epsilon)
     return x
 
+
 RMSNorm.forward_oot = forward_oot
diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py
index 6ed8bd6..41a23a3 100644
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -4,9 +4,9 @@
 import torch
 
 try:
-    import torch_npu
+    import torch_npu  # noqa: F401
 except ImportError:
-    print("Failed to import torch_npu")
+    print("Failed to import torch_npu.")
 
 from vllm.config import VllmConfig
 from vllm.platforms import Platform
@@ -72,7 +72,7 @@ def mem_get_info(cls) -> Tuple[int, int]:
     @classmethod
     def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
         # Register ops when setup.
-        from vllm_ascend import ops
+        from vllm_ascend import ops  # noqa: F401
 
         parallel_config = vllm_config.parallel_config
         if parallel_config.worker_cls == "auto":