Skip to content

Commit 14b7b0a

Browse files
Fix Neural Solution SQL/CMD injection (#1627)
Signed-off-by: Kaihui-intel <kaihui.tang@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 8ddd755 commit 14b7b0a

File tree

12 files changed

+187
-13
lines changed

12 files changed

+187
-13
lines changed

neural_solution/backend/scheduler.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def prepare_task(self, task: Task):
154154
if not task.optimized:
155155
# Generate quantization code with Neural Coder API
156156
neural_coder_cmd = ["python -m neural_coder --enable --approach"]
157-
# for users to define approach: "static, ""static_ipex", "dynamic", "auto"
157+
# for users to define approach: "static", "static_ipex", "dynamic", "auto"
158158
approach = task.approach
159159
neural_coder_cmd.append(approach)
160160
if is_remote_url(task.script_url):

neural_solution/examples/custom_models_optimized/tf_example1/README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ optional arguments:
9191
"script_url": "tf_example1",
9292
"optimized": "True",
9393
"arguments": [
94-
"--dataset_location=dataset --model_path=model"
94+
"--dataset_location=dataset", "--model_path=model"
9595
],
9696
"approach": "static",
9797
"requirements": [
@@ -106,7 +106,7 @@ When using distributed quantization, the `workers` needs to be set to greater th
106106
"script_url": "tf_example1",
107107
"optimized": "True",
108108
"arguments": [
109-
"--dataset_location=dataset --model_path=model"
109+
"--dataset_location=dataset", "--model_path=model"
110110
],
111111
"approach": "static",
112112
"requirements": [

neural_solution/examples/custom_models_optimized/tf_example1/task_request.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"script_url": "custom_models_optimized/tf_example1",
33
"optimized": "True",
44
"arguments": [
5-
"--dataset_location=dataset --model_path=model"
5+
"--dataset_location=dataset", "--model_path=model"
66
],
77
"approach": "static",
88
"requirements": ["tensorflow"

neural_solution/examples/custom_models_optimized/tf_example1/task_request_distributed.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"script_url": "custom_models_optimized/tf_example1",
33
"optimized": "True",
44
"arguments": [
5-
"--dataset_location=dataset --model_path=model"
5+
"--dataset_location=dataset", "--model_path=model"
66
],
77
"approach": "static",
88
"requirements": ["tensorflow"

neural_solution/examples/hf_models/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ optional arguments:
7272
"script_url": "https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py",
7373
"optimized": "False",
7474
"arguments": [
75-
"--model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result"
75+
"--model_name_or_path=bert-base-cased", "--task_name=mrpc", "--do_eval", "--output_dir=result"
7676
],
7777
"approach": "static",
7878
"requirements": [],

neural_solution/examples/hf_models/task_request.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"script_url": "https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py",
33
"optimized": "False",
44
"arguments": [
5-
"--model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result"
5+
"--model_name_or_path=bert-base-cased", "--task_name=mrpc", "--do_eval", "--output_dir=result"
66
],
77
"approach": "static",
88
"requirements": ["datasets", "transformers=4.21.0", "torch"],

neural_solution/examples/hf_models_grpc/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ optional arguments:
6868
"script_url": "https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py",
6969
"optimized": "False",
7070
"arguments": [
71-
"--model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result"
71+
"--model_name_or_path=bert-base-cased", "--task_name=mrpc", "--do_eval", "--output_dir=result"
7272
],
7373
"approach": "static",
7474
"requirements": [],

neural_solution/examples/hf_models_grpc/task_request.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"script_url": "https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py",
33
"optimized": "False",
44
"arguments": [
5-
"--model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result"
5+
"--model_name_or_path=bert-base-cased", "--task_name=mrpc", "--do_eval", "--output_dir=result"
66
],
77
"approach": "static",
88
"requirements": ["datasets", "transformers=4.21.0", "torch"],

neural_solution/frontend/fastapi/main_server.py

+5
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
get_cluster_info,
3737
get_cluster_table,
3838
get_res_during_tuning,
39+
is_valid_task,
3940
list_to_string,
4041
serialize,
4142
)
@@ -153,10 +154,14 @@ async def submit_task(task: Task):
153154
Returns:
154155
json: status , id of task and messages.
155156
"""
157+
if not is_valid_task(task.dict()):
158+
raise HTTPException(status_code=422, detail="Invalid task")
159+
156160
msg = "Task submitted successfully"
157161
status = "successfully"
158162
# search the current
159163
db_path = get_db_path(config.workspace)
164+
160165
if os.path.isfile(db_path):
161166
conn = sqlite3.connect(db_path)
162167
cursor = conn.cursor()

neural_solution/frontend/utility.py

+58
Original file line numberDiff line numberDiff line change
@@ -295,3 +295,61 @@ def list_to_string(lst: list):
295295
str: string
296296
"""
297297
return " ".join(str(i) for i in lst)
298+
299+
300+
def is_invalid_str(to_test_str: str):
301+
"""Verify whether the to_test_str is valid.
302+
303+
Args:
304+
to_test_str (str): string to be tested.
305+
306+
Returns:
307+
bool: valid or invalid
308+
"""
309+
return any(char in to_test_str for char in [" ", '"', "'", "&", "|", ";", "`", ">"])
310+
311+
312+
def is_valid_task(task: dict) -> bool:
313+
"""Verify whether the task is valid.
314+
315+
Args:
316+
task (dict): task request
317+
318+
Returns:
319+
bool: valid or invalid
320+
"""
321+
required_fields = ["script_url", "optimized", "arguments", "approach", "requirements", "workers"]
322+
323+
for field in required_fields:
324+
if field not in task:
325+
return False
326+
327+
if not isinstance(task["script_url"], str) or is_invalid_str(task["script_url"]):
328+
return False
329+
330+
if (isinstance(task["optimized"], str) and task["optimized"] not in ["True", "False"]) or (
331+
not isinstance(task["optimized"], str) and not isinstance(task["optimized"], bool)
332+
):
333+
return False
334+
335+
if not isinstance(task["arguments"], list):
336+
return False
337+
else:
338+
for argument in task["arguments"]:
339+
if is_invalid_str(argument):
340+
return False
341+
342+
if not isinstance(task["approach"], str) or task["approach"] not in ["static", "static_ipex", "dynamic", "auto"]:
343+
return False
344+
345+
if not isinstance(task["requirements"], list):
346+
return False
347+
else:
348+
for requirement in task["requirements"]:
349+
if is_invalid_str(requirement):
350+
return False
351+
352+
if not isinstance(task["workers"], int) or task["workers"] < 1:
353+
return False
354+
355+
return True

neural_solution/test/frontend/fastapi/test_main_server.py

+18-4
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,26 @@ def test_get_description(self):
118118
def test_submit_task(self, mock_submit_task):
119119
task = {
120120
"script_url": "http://example.com/script.py",
121-
"optimized": True,
121+
"optimized": "True",
122122
"arguments": ["arg1", "arg2"],
123-
"approach": "approach1",
123+
"approach": "static",
124+
"requirements": ["req1", "req2"],
125+
"workers": 3,
126+
}
127+
128+
# test invalid task
129+
task_invalid = {
130+
"script_url": "http://example.com/script.py",
131+
"optimized": "True",
132+
"arguments": "invalid str, should be list",
133+
"approach": "static",
124134
"requirements": ["req1", "req2"],
125135
"workers": 3,
126136
}
137+
response = client.post("/task/submit/", json=task_invalid)
138+
print(response)
139+
self.assertEqual(response.status_code, 422)
140+
self.assertIn("arguments", response.text)
127141

128142
# test no db case
129143
delete_db()
@@ -174,7 +188,7 @@ def test_get_task_by_id(self, mock_submit_task):
174188
"script_url": "http://example.com/script.py",
175189
"optimized": True,
176190
"arguments": ["arg1", "arg2"],
177-
"approach": "approach1",
191+
"approach": "static",
178192
"requirements": ["req1", "req2"],
179193
"workers": 3,
180194
}
@@ -200,7 +214,7 @@ def test_get_task_status_by_id(self, mock_submit_task):
200214
"script_url": "http://example.com/script.py",
201215
"optimized": True,
202216
"arguments": ["arg1", "arg2"],
203-
"approach": "approach1",
217+
"approach": "static",
204218
"requirements": ["req1", "req2"],
205219
"workers": 3,
206220
}

neural_solution/test/frontend/fastapi/test_utils.py

+97
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
get_cluster_info,
1111
get_cluster_table,
1212
get_res_during_tuning,
13+
is_valid_task,
1314
list_to_string,
1415
serialize,
1516
)
@@ -110,6 +111,102 @@ def test_list_to_string(self):
110111
expected_result = "Hello Neural Solution"
111112
self.assertEqual(list_to_string(lst), expected_result)
112113

114+
def test_is_valid_task(self):
115+
task_sql_injection = {
116+
"script_url": "https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py",
117+
"optimized": "False",
118+
"arguments": [],
119+
"approach": "5', '6', 7, 'pending'), ('1b9ff5c2fd2143d58522bd71d18845a3', '2', 3, '4', '5', '6', 7, 'pending') ON CONFLICT (id) DO UPDATE SET id = '1b9ff5c2fd2143d58522bd71d18845a3', q_model_path = '/home/victim/.ssh' --",
120+
"requirements": [],
121+
"workers": 1,
122+
}
123+
self.assertFalse(is_valid_task(task_sql_injection))
124+
125+
task_cmd_injection = {
126+
"script_url": 'https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py & eval "$(echo ZWNobyAiRG9tYWluIGV4cGFuc2lvbiIgPiB+L2F0dGFjay5weSI= | base64 --decode)"',
127+
"optimized": "False",
128+
"arguments": ["--model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result"],
129+
"approach": "static",
130+
"requirements": [],
131+
"workers": 1,
132+
}
133+
self.assertFalse(is_valid_task(task_cmd_injection))
134+
135+
task_lack_field = {
136+
"optimized": "True",
137+
}
138+
self.assertFalse(is_valid_task(task_lack_field))
139+
140+
task_script_url_not_str = {
141+
"script_url": ["custom_models_optimized/tf_example1"],
142+
"optimized": "True",
143+
"arguments": ["--dataset_location=dataset --model_path=model"],
144+
"approach": "static",
145+
"requirements": ["tensorflow"],
146+
"workers": 1,
147+
}
148+
self.assertFalse(is_valid_task(task_script_url_not_str))
149+
150+
task_optimized_not_bool_str = {
151+
"script_url": ["custom_models_optimized/tf_example1"],
152+
"optimized": "True or False",
153+
"arguments": ["--dataset_location=dataset", "--model_path=model"],
154+
"approach": "static",
155+
"requirements": ["tensorflow"],
156+
"workers": 1,
157+
}
158+
self.assertFalse(is_valid_task(task_optimized_not_bool_str))
159+
160+
task_arguments_not_list = {
161+
"script_url": ["custom_models_optimized/tf_example1"],
162+
"optimized": "True",
163+
"arguments": 123,
164+
"approach": "static",
165+
"requirements": ["tensorflow"],
166+
"workers": 1,
167+
}
168+
self.assertFalse(is_valid_task(task_arguments_not_list))
169+
170+
task_arguments_invalid = {
171+
"script_url": ["custom_models_optimized/tf_example1"],
172+
"optimized": "True",
173+
"arguments": ["--dataset_location=dataset --model_path=model"],
174+
"approach": "static",
175+
"requirements": ["tensorflow"],
176+
"workers": 1,
177+
}
178+
self.assertFalse(is_valid_task(task_arguments_not_list))
179+
180+
task_approach_is_invalid = {
181+
"script_url": ["custom_models_optimized/tf_example1"],
182+
"optimized": "True",
183+
"arguments": [],
184+
"approach": "static or dynamic",
185+
"requirements": ["tensorflow"],
186+
"workers": 1,
187+
}
188+
self.assertFalse(is_valid_task(task_approach_is_invalid))
189+
190+
task_requirements_not_list = {
191+
"script_url": ["custom_models_optimized/tf_example1"],
192+
"optimized": "True",
193+
"arguments": [],
194+
"approach": "static",
195+
"requirements": "tensorflow",
196+
"workers": 1,
197+
}
198+
self.assertFalse(is_valid_task(task_requirements_not_list))
199+
200+
task_normal = {
201+
"script_url": "custom_models_optimized/tf_example1",
202+
"optimized": "True",
203+
"arguments": ["--dataset_location=dataset", "--model_path=model"],
204+
"approach": "static",
205+
"requirements": ["tensorflow"],
206+
"workers": 1,
207+
}
208+
self.assertTrue(is_valid_task(task_normal))
209+
113210

114211
if __name__ == "__main__":
115212
unittest.main()

0 commit comments

Comments
 (0)