Skip to content

Commit

Permalink
Merge pull request #164 from attilamester/feature/156-simplified-mode…
Browse files Browse the repository at this point in the history
…l-keep-only-mnemonic-when-encoding-an-instruction-into-pixel

Feature/156 simplified model keep only mnemonic when encoding an instruction into pixel
  • Loading branch information
attilamester authored May 21, 2024
2 parents 5c29aeb + 0c8bfce commit 8a2cd36
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 4 deletions.
6 changes: 5 additions & 1 deletion src/core/model/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,11 @@ def set_instructions_from_function_disassembly(self, pdfj: Dict):
if "disasm" not in op or "bytes" not in op:
continue
refs = op.get("refs", []) if ("call" in op["type"] or "jmp" in op["type"]) else []
self.instructions.append(Instruction(op["disasm"], op["bytes"].encode(), refs))
try:
instruction = Instruction(op["disasm"], op["bytes"].encode(), refs)
self.instructions.append(instruction)
except Exception as e:
Logger.error(f"Skipping instruction on {self.rva}. Could not process instruction: {e}")

def __str__(self):
return f"CGNode({self.label}, {self.rva}, {self.type})"
Expand Down
2 changes: 1 addition & 1 deletion src/core/model/instruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def is_section(token: str):

@staticmethod
def is_block(token: str):
return token.startswith("case.") or token.startswith("switch.")
return token.startswith("case.") or token.startswith("switch.") or token.startswith("segment.")

@staticmethod
def is_function(token: str):
Expand Down
10 changes: 10 additions & 0 deletions src/core/model/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,16 @@ def get_size_fmt(self):
self.size_fmt = display_size(len(self.content))
return self.size_fmt

def get_ember_features(self):
import numpy as np
np.int = np.int32
np.float = np.float64
np.bool = np.bool_

from ember.features import PEFeatureExtractor
ex = PEFeatureExtractor(2, print_feature_warning=False)
return ex.feature_vector(self.content)

def __str__(self):
return (f"""Sample( {self.filepath}
md5={self.md5}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def get_model(debug: bool = False) -> torch.nn.Module:
MODEL = torchvision.models.resnet50(weights=weights)
MODEL.fc = torch.nn.Linear(512 * 4, DATASET.num_classes)
elif hp_model == "resnet1d":
MODEL = ResNet1D(3, 2, 9, 1, 1, 3, DATASET.num_classes)
MODEL = ResNet1D(3, 64, 9, 1, 1, 4, DATASET.num_classes)
elif hp_model.startswith("alexnet"):
weights = None if not hp_model_pretrained else torchvision.models.AlexNet_Weights.IMAGENET1K_V1
MODEL = torchvision.models.alexnet(weights=weights)
Expand Down
3 changes: 2 additions & 1 deletion src/core/processors/r2_scanner/scan_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from core.data.bodmas import Bodmas
from core.model import CallGraph, CallGraphCompressed
from core.model.sample import Sample
from core.processors.r2_scanner.create_dfs import create_callgraph_dfs
from core.processors.r2_scanner.create_dfs import create_callgraph_dfs, create_callgraph_function_blocks
from core.processors.r2_scanner.scan_instructions import extract_callgraph_instructions_stats
from core.processors.util import process_samples
from util import config
Expand All @@ -30,6 +30,7 @@ def scan_sample(dset: Type[DatasetProvider], sample: Sample):
scan(dset, cg)
extract_callgraph_instructions_stats(dset, cg)
create_callgraph_dfs(dset, cg, img_dims=[(30, 30), (100, 100), (224, 224), (300, 300)])
create_callgraph_function_blocks(dset, cg, img_dims=[(30, 30), (100, 100), (224, 224), (300, 300)])
else:
Logger.info(f">> Already existing r2 found on disk: {md5}")

Expand Down

0 comments on commit 8a2cd36

Please sign in to comment.