Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Default ramalama serve to only listen on localhost #876

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/ramalama-serve.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ Generate specified configuration format for running the AI Model as a service
show this help message and exit

#### **--host**="0.0.0.0"
IP address for llama.cpp to listen on.
IP address for llama.cpp to listen on. Only supported when llama.cpp is running
without a container.

#### **--name**, **-n**
Name of the container to run the Model in.
Expand Down
5 changes: 3 additions & 2 deletions docs/ramalama.conf
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,10 @@
#
#image = "quay.io/ramalama/ramalama:latest"

# IP address for llama.cpp to listen on.
# IP address for llama.cpp to listen on. Only supported when llama.cpp is
# running without a container.
#
#host = "0.0.0.0"
#host = "127.0.0.1"

# Pass `--group-add keep-groups` to podman, when using podman.
# In some cases this is needed to access the gpu from a rootless container
Expand Down
5 changes: 3 additions & 2 deletions docs/ramalama.conf.5.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,10 @@ Run RamaLama using the specified container engine.
Valid options are: Podman and Docker
This field can be overridden by the RAMALAMA_CONTAINER_ENGINE environment variable.

**host**="0.0.0.0"
**host**="127.0.0.1"

IP address for llama.cpp to listen on.
IP address for llama.cpp to listen on. Only supported when llama.cpp is running
without a container.

**image**="quay.io/ramalama/ramalama:latest"

Expand Down
2 changes: 1 addition & 1 deletion ramalama/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def load_config_defaults(config: Dict[str, Any]):
config['ctx_size'] = config.get('ctx_size', 2048)
config['pull'] = config.get('pull', "newer")
config['temp'] = config.get('temp', "0.8")
config['host'] = config.get('host', "0.0.0.0")
config['host'] = config.get('host', "127.0.0.1")
config['port'] = config.get('port', "8080")


Expand Down
3 changes: 2 additions & 1 deletion ramalama/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,8 @@ def handle_runtime(self, args, exec_args, exec_model_path):
gpu_args = self.gpu_args(args=args)
if gpu_args is not None:
exec_args.extend(gpu_args)
exec_args.extend(["--host", args.host])
if not args.container:
exec_args.extend(["--host", args.host])
return exec_args

def generate_container_config(self, model_path, args, exec_args):
Expand Down
9 changes: 4 additions & 5 deletions test/system/040-serve.bats
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,11 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"
run_ramalama --dryrun serve --name foobar ${model}
is "$output" "${verify_begin} foobar .*" "dryrun correct with --name"
assert "$output" !~ ".*--network" "--network is not part of the output"
assert "$output" =~ ".*--host 0.0.0.0" "verify host 0.0.0.0 is added when run within container"
is "$output" ".*${model}" "verify model name"
assert "$output" !~ ".*--host" "verify --host not added when run within container"
assert "$output" !~ ".*--seed" "assert seed does not show by default"

run_ramalama --dryrun serve --network bridge --host 127.1.2.3 --name foobar ${model}
assert "$output" =~ "--network bridge.*--host 127.1.2.3" "verify --host is modified when run within container"
is "$output" ".*${model}" "verify model name"
is "$output" ".*--temp 0.8" "verify temp is set"

Expand Down Expand Up @@ -53,9 +52,9 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"
run_ramalama stop --all
else
run_ramalama --dryrun serve ${model}
assert "$output" =~ ".*--host 0.0.0.0" "Outside container sets host to 0.0.0.0"
run_ramalama --dryrun serve --seed abcd --host 127.0.0.1 ${model}
assert "$output" =~ ".*--host 127.0.0.1" "Outside container overrides host to 127.0.0.1"
assert "$output" =~ ".*--host 127.0.0.1" "Outside container sets host to 127.0.0.1"
run_ramalama --dryrun serve --seed abcd --host 127.0.0.2 ${model}
assert "$output" =~ ".*--host 127.0.0.2" "Outside container overrides host to 127.0.0.2"
assert "$output" =~ ".*--seed abcd" "Verify seed is set"
run_ramalama 1 --nocontainer serve --name foobar tiny
is "${lines[0]}" "Error: --nocontainer and --name options conflict. The --name option requires a container." "conflict between nocontainer and --name line"
Expand Down
24 changes: 22 additions & 2 deletions test/unit/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_load_config_from_env(env, config, expected):
"ctx_size": 2048,
"pull": "newer",
"temp": "0.8",
"host": "0.0.0.0",
"host": "127.0.0.1",
"port": "8080",
},
),
Expand All @@ -80,10 +80,30 @@ def test_load_config_from_env(env, config, expected):
"ctx_size": 2048,
"pull": "newer",
"temp": "0.8",
"host": "0.0.0.0",
"host": "127.0.0.1",
"port": "8080",
},
),
(
{
"host": "1.2.3.4",
"port": "8081",
"temp": "10.0",
"pull": "never",
},
{
"nocontainer": False,
"carimage": "registry.access.redhat.com/ubi9-micro:latest",
"runtime": "llama.cpp",
"ngl": -1,
"keep_groups": False,
"ctx_size": 2048,
"pull": "never",
"temp": "10.0",
"host": "1.2.3.4",
"port": "8081",
},
),
],
)
def test_load_config_defaults(config, expected):
Expand Down
Loading