Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add preemptive param in session to give pod a good chance of being sc… #192

Merged
merged 4 commits into from
Mar 15, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ __pycache__/
*.pyc
.ipynb_checkpoints
*.egg-info/
*.eggs
python/*.eggs

# coordinator
coordinator/proto
Expand Down
33 changes: 22 additions & 11 deletions coordinator/gscoordinator/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def __init__(
image_pull_secrets=None,
volumes=None,
num_workers=None,
preemptive=None,
instance_id=None,
log_level=None,
timeout_seconds=None,
Expand Down Expand Up @@ -211,6 +212,7 @@ def __init__(
self._namespace = namespace
self._service_type = service_type
self._num_workers = num_workers
self._preemptive = preemptive

self._coordinator_name = coordinator_name
self._coordinator_service_name = coordinator_service_name
Expand Down Expand Up @@ -298,6 +300,10 @@ def get_namespace(self):
def get_gie_graph_manager_service_name(self):
return self._gie_graph_manager_service_name

@property
def preemptive(self):
return self._preemptive

def _create_engine_replicaset(self):
logger.info("Launching GraphScope engines pod ...")
labels = {"name": self._engine_name}
Expand Down Expand Up @@ -340,20 +346,22 @@ def _create_engine_replicaset(self):
engine_builder.add_simple_envs({"GLOG_v": str(self._glog_level)})
# add vineyard container
engine_builder.add_vineyard_container(
self._vineyard_container_name,
self._gs_image,
self._vineyard_cpu,
self._vineyard_mem,
self._vineyard_shared_mem,
self._etcd_endpoint,
self._vineyard_service_port,
name=self._vineyard_container_name,
image=self._gs_image,
cpu=self._vineyard_cpu,
mem=self._vineyard_mem,
shared_mem=self._vineyard_shared_mem,
preemptive=self._preemptive,
etcd_endpoint=self._etcd_endpoint,
port=self._vineyard_service_port,
)
# add engine container
engine_builder.add_engine_container(
self._engine_container_name,
self._gs_image,
self._engine_cpu,
self._engine_mem,
name=self._engine_container_name,
image=self._gs_image,
cpu=self._engine_cpu,
mem=self._engine_mem,
preemptive=self._preemptive,
)
for name in self._image_pull_secrets:
engine_builder.add_image_pull_secret(name)
Expand Down Expand Up @@ -399,6 +407,7 @@ def _create_etcd(self):
image=self._etcd_image,
cpu=self._etcd_cpu,
mem=self._etcd_mem,
preemptive=self._preemptive,
listen_peer_service_port=self._random_etcd_listen_peer_service_port,
listen_client_service_port=self._random_etcd_listen_client_service_port,
)
Expand Down Expand Up @@ -516,6 +525,7 @@ def _create_interactive_engine_service(self):
image=self._gie_graph_manager_image,
cpu=self._gie_graph_manager_cpu,
mem=self._gie_graph_manager_mem,
preemptive=self._preemptive,
port=self._interactive_engine_manager_port,
)

Expand All @@ -525,6 +535,7 @@ def _create_interactive_engine_service(self):
image=self._zookeeper_image,
cpu=self._zookeeper_cpu,
mem=self._zookeeper_mem,
preemptive=self._preemptive,
port=self._zookeeper_port,
)

Expand Down
10 changes: 10 additions & 0 deletions coordinator/gscoordinator/coordinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,7 @@ def CreateInteractiveInstance(self, request, context):
"schemaJson": schema_json,
"podNameList": ",".join(self._pods_list),
"containerName": ENGINE_CONTAINER,
"preemptive": str(self._launcher.preemptive),
"gremlinServerCpu": str(gremlin_server_cpu),
"gremlinServerMem": gremlin_server_mem,
}
Expand Down Expand Up @@ -678,6 +679,14 @@ def parse_sys_args():
default=4,
help="The number of engine workers.",
)
parser.add_argument(
"--preemptive",
type=str2bool,
nargs="?",
const=True,
default=True,
help="Support resource preemption or resource guarantee",
)
parser.add_argument(
"--instance_id",
type=str,
Expand Down Expand Up @@ -911,6 +920,7 @@ def launch_graphscope():
image_pull_secrets=args.k8s_image_pull_secrets,
volumes=args.k8s_volumes,
num_workers=args.num_workers,
preemptive=args.preemptive,
instance_id=args.instance_id,
log_level=args.log_level,
timeout_seconds=args.timeout_seconds,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,22 @@ function _create_pod {
coordinator_image=$(printf '%s\n' "$coordinator_image" | sed -e 's/[\/&]/\\&/g')
# node_host=`kubectl describe pods -l "app=manager" | grep "Node:" | head -1 | awk -F '[ /]+' '{print $2}'`
kubectl create configmap $config_name --from-file /home/maxgraph/config_$object_id
sed -e "s/unique_pod_name/$pod_name/g" -e "s/unique_config_name/$config_name/g" \
-e "s/gremlin_image/$gremlin_image/g" -e "s/unique_object_id/$object_id/g" \
-e "s/gremlin_server_cpu/$gremlin_server_cpu/g" -e "s/gremlin_server_mem/$gremlin_server_mem/g" \
-e "s/coordinator_image/$coordinator_image/g" \
/root/maxgraph/pod.yaml > /root/maxgraph/pod_${object_id}.yaml

if [ "$preemptive" = "True" ]; then
sed -e "s/unique_pod_name/$pod_name/g" -e "s/unique_config_name/$config_name/g" \
-e "s/gremlin_image/$gremlin_image/g" -e "s/unique_object_id/$object_id/g" \
-e "s/requests_cpu/$requests_cpu/g" -e "s/requests_mem/$requests_mem/g" \
-e "s/limits_cpu/$gremlin_server_cpu/g" -e "s/limits_mem/$gremlin_server_mem/g" \
-e "s/coordinator_image/$coordinator_image/g" \
/root/maxgraph/pod.yaml > /root/maxgraph/pod_${object_id}.yaml
else
sed -e "s/unique_pod_name/$pod_name/g" -e "s/unique_config_name/$config_name/g" \
-e "s/gremlin_image/$gremlin_image/g" -e "s/unique_object_id/$object_id/g" \
-e "s/requests_cpu/$gremlin_server_cpu/g" -e "s/requests_mem/$gremlin_server_mem/g" \
-e "s/limits_cpu/$gremlin_server_cpu/g" -e "s/limits_mem/$gremlin_server_mem/g" \
-e "s/coordinator_image/$coordinator_image/g" \
/root/maxgraph/pod.yaml > /root/maxgraph/pod_${object_id}.yaml
fi
kubectl apply -f /root/maxgraph/pod_${object_id}.yaml
}
function _render_schema_path {
Expand Down Expand Up @@ -99,8 +110,12 @@ export schema_path=$2
export engine_count=`echo $3 | awk -F"," '{print NF}'`
export pod_hosts=`echo $3 | awk -F"," '{for(i=1;i<=NF;++i) {print $i" "}}'`
export ENGINE_CONTAINER=$4
export gremlin_server_cpu=$5
export gremlin_server_mem=$6
export engine_paras=$7
export preemptive=$5
export gremlin_server_cpu=$6
export gremlin_server_mem=$7
export engine_paras=$8
export launch_engine_cmd="export object_id=${object_id} && /home/maxgraph/executor-entrypoint.sh"

export requests_cpu=1.0
export requests_mem="1Gi"
_create_maxgraph_instance
16 changes: 8 additions & 8 deletions interactive_engine/deploy/docker/dockerfile/pod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ spec:
- image: gremlin_image
resources:
requests:
cpu: gremlin_server_cpu
memory: gremlin_server_mem
cpu: requests_cpu
memory: requests_mem
limits:
cpu: gremlin_server_cpu
memory: gremlin_server_mem
cpu: limits_cpu
memory: limits_mem
name: gremlin-container
imagePullPolicy: IfNotPresent
args: ["/home/maxgraph/frontend-entrypoint.sh"]
Expand All @@ -35,11 +35,11 @@ spec:
- image: coordinator_image
resources:
requests:
cpu: gremlin_server_cpu
memory: gremlin_server_mem
cpu: requests_cpu
memory: requests_mem
limits:
cpu: gremlin_server_cpu
memory: gremlin_server_mem
cpu: limits_cpu
memory: limits_mem
name: coordinator-container
imagePullPolicy: IfNotPresent
args: ["/home/maxgraph/coordinator-entrypoint.sh"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ public CreateInstanceEntity createInstance(@RequestParam("graphName") String gra
@RequestParam("schemaJson") String schemaJson,
@RequestParam("podNameList") String podNameList,
@RequestParam("containerName") String containerName,
@RequestParam("preemptive") String preemptive,
@RequestParam("gremlinServerCpu") String gremlinServerCpu,
@RequestParam("gremlinServerMem") String gremlinServerMem,
@RequestParam("engineParams") String engineParams) throws Exception {
Expand Down Expand Up @@ -126,6 +127,7 @@ public CreateInstanceEntity createInstance(@RequestParam("graphName") String gra
createCommandList.add(schemaPath);
createCommandList.add(podNameList);
createCommandList.add(containerName);
createCommandList.add(preemptive);
createCommandList.add(gremlinServerCpu);
createCommandList.add(gremlinServerMem);
createCommandList.add(engineParams);
Expand Down
8 changes: 8 additions & 0 deletions python/graphscope/client/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def __init__(
config=None,
addr=gs_config.addr,
num_workers=gs_config.num_workers,
preemptive=gs_config.preemptive,
k8s_namespace=gs_config.k8s_namespace,
k8s_service_type=gs_config.k8s_service_type,
k8s_gs_image=gs_config.k8s_gs_image,
Expand Down Expand Up @@ -188,6 +189,11 @@ def __init__(

num_workers (int, optional): The number of workers to launch GraphScope engine. Defaults to 2.

preemptive (bool, optional): If True, GraphScope instance will treat resource params (e.g. k8s_coordinator_cpu)
as limits and provide the minimum available value as requests, but this will make pod has a `Burstable` QOS,
which can be preempted by other pods with high QOS. Otherwise, it will set both requests and limits with the
same value.

k8s_namespace (str, optional): Contains the namespace to create all resource inside.
If param missing, it will try to read namespace from kubernetes context, or
a random namespace will be created and deleted if namespace not exist.
Expand Down Expand Up @@ -324,6 +330,7 @@ def __init__(
self._accessable_params = (
"addr",
"num_workers",
"preemptive",
"k8s_namespace",
"k8s_service_type",
"k8s_gs_image",
Expand Down Expand Up @@ -703,6 +710,7 @@ def _connect(self):
service_type=self._config_params["k8s_service_type"],
num_workers=self._config_params["num_workers"],
gs_image=self._config_params["k8s_gs_image"],
preemptive=self._config_params["preemptive"],
etcd_image=self._config_params["k8s_etcd_image"],
gie_graph_manager_image=self._config_params[
"k8s_gie_graph_manager_image"
Expand Down
3 changes: 3 additions & 0 deletions python/graphscope/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ class GSConfig(object):

k8s_service_type = "NodePort"

# support resource preemption or resource guarantee
preemptive = True

k8s_waiting_for_delete = False
num_workers = 2
show_log = False
Expand Down
6 changes: 6 additions & 0 deletions python/graphscope/deploy/kubernetes/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ class KubernetesCluster(object):
num_workers: int
Number of workers to launch graphscope engine.

preemptive: bool, optional
Support resource preemption or resource guarantee.

gs_image: str
GraphScope engine image.

Expand Down Expand Up @@ -161,6 +164,7 @@ def __init__(
namespace=None,
service_type=None,
num_workers=None,
preemptive=None,
gs_image=None,
etcd_image=None,
gie_graph_manager_image=None,
Expand Down Expand Up @@ -195,6 +199,7 @@ def __init__(
self._service_type = service_type
self._gs_image = gs_image
self._num_workers = num_workers
self._preemptive = preemptive
self._etcd_image = etcd_image
self._gie_graph_manager_image = gie_graph_manager_image
self._zookeeper_image = zookeeper_image
Expand Down Expand Up @@ -432,6 +437,7 @@ def _create_coordinator(self):
name=self._coordinator_container_name,
port=self._random_coordinator_service_port,
num_workers=self._num_workers,
preemptive=self._preemptive,
instance_id=self._instance_id,
log_level=gs_config.log_level,
namespace=self._namespace,
Expand Down
Loading