diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c6426c750..d822e53be 100755 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -3,7 +3,7 @@ # Add steps that build, run tests, deploy, and more: # https://aka.ms/yaml trigger: -- citest +- dltsdev pool: name: 'DLTS-Platform' @@ -24,7 +24,7 @@ steps: echo $CONFIG_TYPE ./set_config.sh $CONFIG_TYPE cd .. - ./bash_step_by_step_deploy.sh + ./step_by_step.sh azure displayName: 'Deploy DLWorkspace' - script: | diff --git a/src/ClusterBootstrap/az_params.py b/src/ClusterBootstrap/az_params.py index d9c50dd7e..6e37a8227 100755 --- a/src/ClusterBootstrap/az_params.py +++ b/src/ClusterBootstrap/az_params.py @@ -17,5 +17,8 @@ "nfs_data_disk_num": 1, "nfs_data_disk_path": '/data', "nfs_vm": [], + "eviction_policy": "Deallocate", + "single_placement_group": "false", + "default_low_priority_domain": "redmond.corp.microsoft.com", }, } diff --git a/src/ClusterBootstrap/az_tools.py b/src/ClusterBootstrap/az_tools.py index 7dda34332..da6617aa1 100755 --- a/src/ClusterBootstrap/az_tools.py +++ b/src/ClusterBootstrap/az_tools.py @@ -45,8 +45,6 @@ def init_config(): config[k] = v for k, v in default_az_parameters.iteritems(): config[k] = v - # print config - # exit() return config @@ -377,6 +375,7 @@ def create_nfs_nsg(): print(output) print type(config["cloud_config"]["nfs_ssh"]["source_ips"]), config["cloud_config"]["nfs_ssh"]["source_ips"],type(source_addresses_prefixes), source_addresses_prefixes + merged_ip = utils.keep_widest_subnet(config["cloud_config"]["nfs_ssh"]["source_ips"] + source_addresses_prefixes) cmd = """ az network nsg rule create \ --resource-group %s \ @@ -389,8 +388,10 @@ def create_nfs_nsg(): """ % ( config["azure_cluster"]["resource_group_name"], config["azure_cluster"]["nfs_nsg_name"], config["cloud_config"]["nfs_ssh"]["port"], - " ".join(list(set(config["cloud_config"]["nfs_ssh"]["source_ips"] + source_addresses_prefixes))), + " ".join(merged_ip), ) + if verbose: + print(cmd) if not no_execution: output = utils.exec_cmd_local(cmd) print(output) @@ -466,8 +467,15 @@ def create_cluster(arm_vm_password=None, parallelism=1): create_vm_param(i, "infra", config["azure_cluster"]["infra_vm_size"], arm_vm_password is not None, arm_vm_password) + add_workers(arm_vm_password, parallelism) + + # create nfs server if specified. + for i in range(int(config["azure_cluster"]["nfs_node_num"])): + create_vm_param(i, "nfs", config["azure_cluster"]["nfs_vm_size"], False, + arm_vm_password, config["azure_cluster"]["nfs_vm"][i] if i < len(config["azure_cluster"]["nfs_vm"]) else None ) + +def add_workers(arm_vm_password=None, parallelism=1): if config["priority"] == "regular": - print("entering") if parallelism > 1: # TODO: Tolerate faults from multiprocessing import Pool @@ -479,15 +487,10 @@ def create_cluster(arm_vm_password=None, parallelism=1): else: for i in range(int(config["azure_cluster"]["worker_node_num"])): create_vm_param(i, "worker", config["azure_cluster"]["worker_vm_size"], - arm_vm_password is not None, arm_vm_password) + arm_vm_password is not None, arm_vm_password) elif config["priority"] == "low": utils.render_template("./template/vmss/vmss.sh.template", "scripts/vmss.sh",config) - utils.exec_cmd_local("chmod +x scripts/vmss.sh;./scripts/vmss.sh") - - # create nfs server if specified. - for i in range(int(config["azure_cluster"]["nfs_node_num"])): - create_vm_param(i, "nfs", config["azure_cluster"]["nfs_vm_size"], False, - arm_vm_password, config["azure_cluster"]["nfs_vm"][i] if i < len(config["azure_cluster"]["nfs_vm"]) else None ) + utils.exec_cmd_local("chmod +x scripts/vmss.sh; ./scripts/vmss.sh") def create_vm_param_wrapper(arg_tuple): i, role, vm_size, no_az, arm_vm_password = arg_tuple @@ -666,8 +669,8 @@ def get_disk_from_vm(vmname): def gen_cluster_config(output_file_name, output_file=True, no_az=False): if config["priority"] == "low": utils.render_template("./template/dns/cname_and_private_ips.sh.template", "scripts/cname_and_ips.sh", config) - utils.exec_cmd_local("chmod +x scripts/cname_and_ips.sh") - print "\nPlease copy the commands in dns_add_commands and register the DNS records on http://servicebook/dns/self-service.html\n" + utils.exec_cmd_local("chmod +x scripts/cname_and_ips.sh; bash scripts/cname_and_ips.sh") + print "\nPlease copy the commands in dns_add_commands and register the DNS records \n" bSQLOnly = (config["azure_cluster"]["infra_node_num"] <= 0) if useAzureFileshare() and not no_az: # theoretically it could be supported, but would require storage account to be created first in nested template and then @@ -718,14 +721,13 @@ def gen_cluster_config(output_file_name, output_file=True, no_az=False): cc["deploydockerETCD"] = False cc["platform-scripts"] = "ubuntu" cc["basic_auth"] = "%s,admin,1000" % uuid.uuid4().hex[:16] - domain_mapping = {"regular":"%s.cloudapp.azure.com" % config["azure_cluster"]["azure_location"], "low": config["domain_name"]} + domain_mapping = {"regular":"%s.cloudapp.azure.com" % config["azure_cluster"]["azure_location"], "low": config.get("domain_name",config["azure_cluster"]["default_low_priority_domain"])} if not bSQLOnly: cc["network"] = {"domain": domain_mapping[config["priority"]]} cc["machines"] = {} for i in range(int(config["azure_cluster"]["infra_node_num"])): - vmname = "%s-infra%02d" % (config["azure_cluster"] - ["cluster_name"].lower(), i + 1) + vmname = "{}-infra{:02d}".format(config["azure_cluster"]["cluster_name"], i + 1).lower() cc["machines"][vmname] = {"role": "infrastructure", "private-ip": get_vm_ip(i, "infra")} # Generate the workers in machines. @@ -747,7 +749,7 @@ def gen_cluster_config(output_file_name, output_file=True, no_az=False): for l in rf: worker_machines += l.split()[0], for vmname in worker_machines: - cc["machines"][vmname] = {"role": "worker","node-group": config["azure_cluster"]["worker_vm_size"], + cc["machines"][vmname.lower()] = {"role": "worker","node-group": config["azure_cluster"]["worker_vm_size"], "gpu-type":sku_mapping[config["azure_cluster"]["worker_vm_size"]]["gpu-type"]} elif config["priority"] == "regular": for vm in vm_list: @@ -756,18 +758,18 @@ def gen_cluster_config(output_file_name, output_file=True, no_az=False): worker_machines += vmname, for vmname in worker_machines: if isNewlyScaledMachine(vmname): - cc["machines"][vmname] = { + cc["machines"][vmname.lower()] = { "role": "worker", "scaled": True, "node-group": vm["vmSize"],"gpu-type":sku_mapping.get(vm["vmSize"],sku_mapping["default"])["gpu-type"]} else: - cc["machines"][vmname] = { + cc["machines"][vmname.lower()] = { "role": "worker", "node-group": vm["vmSize"],"gpu-type":sku_mapping.get(vm["vmSize"],sku_mapping["default"])["gpu-type"]} nfs_nodes = [] for vm in vm_list: vmname = vm["name"] if "-nfs" in vmname: - cc["machines"][vmname] = { + cc["machines"][vmname.lower()] = { "role": "nfs", "node-group": vm["vmSize"]} @@ -920,10 +922,12 @@ def run_command(args, command, nargs, parser): else: check_subscription() if command == "create": - # print config["azure_cluster"]["infra_vm_size"] create_cluster(args.arm_password, args.parallelism) vm_interconnects() + elif command == "addworkers": + add_workers(args.arm_password, args.parallelism) + vm_interconnects() elif command == "list": list_vm() @@ -964,7 +968,6 @@ def run_command(args, command, nargs, parser): if __name__ == '__main__': # the program always run at the current directory. dirpath = os.path.dirname(os.path.abspath(os.path.realpath(__file__))) - # print "Directory: " + dirpath os.chdir(dirpath) config = init_config() parser = argparse.ArgumentParser(prog='az_utils.py', @@ -1089,15 +1092,11 @@ def run_command(args, command, nargs, parser): if os.path.exists(config_file): with open(config_file) as cf: tmpconfig = yaml.load(cf) - assert tmpconfig["cluster_name"] in tmpconfig["azure_cluster"] merge_config(config, tmpconfig, verbose) if tmpconfig is not None and "cluster_name" in tmpconfig: config["azure_cluster"]["cluster_name"] = tmpconfig["cluster_name"] if tmpconfig is not None and "datasource" in tmpconfig: config["azure_cluster"]["datasource"] = tmpconfig["datasource"] - if tmpconfig is not None and "azure_cluster" in tmpconfig and config["azure_cluster"]["cluster_name"] in tmpconfig["azure_cluster"]: - merge_config(config["azure_cluster"], tmpconfig["azure_cluster"][ - config["azure_cluster"]["cluster_name"]], verbose) if (args.cluster_name is not None): config["azure_cluster"]["cluster_name"] = args.cluster_name @@ -1123,7 +1122,6 @@ def run_command(args, command, nargs, parser): config["azure_cluster"]["file_share_name"] = args.file_share_name config = update_config(config) - # print (config) with open(config_cluster, 'w') as outfile: yaml.dump(config, outfile, default_flow_style=False) diff --git a/src/ClusterBootstrap/deploy.py b/src/ClusterBootstrap/deploy.py index 51d2c7759..5c854b55c 100755 --- a/src/ClusterBootstrap/deploy.py +++ b/src/ClusterBootstrap/deploy.py @@ -96,7 +96,6 @@ def expand_path_in_config(key_in_config): raise Exception("Error: no %s in config " % key_in_config) def parse_capacity_in_GB( inp ): - # print "match capacity of %s" % inp mt = capacityMatch.search(inp) if mt is None: return 0.0 @@ -500,7 +499,6 @@ def init_deployment(): utils.render_template( template_file, target_file ,config) def check_node_availability(ipAddress): - # print "Check node availability on: " + str(ipAddress) status = os.system('ssh -o "StrictHostKeyChecking no" -o "UserKnownHostsFile=/dev/null" -i %s -oBatchMode=yes %s@%s hostname > /dev/null' % (config["admin_username"], config["ssh_cert"], ipAddress)) #status = sock.connect_ex((ipAddress,22)) return status == 0 @@ -513,7 +511,7 @@ def get_domain(): domain = "" return domain -# Get a list of nodes from cluster.yaml +# Get a list of nodes DNS from cluster.yaml def get_nodes_from_config(machinerole): machinerole = "infrastructure" if machinerole == "infra" else machinerole if "machines" not in config: @@ -572,6 +570,13 @@ def get_ETCD_master_nodes_from_cluster_portal(clusterId): def get_ETCD_master_nodes_from_config(clusterId): Nodes = get_nodes_from_config("infrastructure") + if int(config["etcd_node_num"]) == 1: + for nodename in config["machines"]: + nodeInfo = config["machines"][nodename] + if "role" in nodeInfo and nodeInfo["role"]=="infrastructure": + assert "private-ip" in nodeInfo and "private IP of the infrastructure node is not provided!" + config["etcd_private_ip"] = nodeInfo["private-ip"] + break config["etcd_node"] = Nodes config["kubernetes_master_node"] = Nodes return Nodes @@ -705,8 +710,8 @@ def GetCertificateProperty(): masterdns.append(value) config["apiserver_ssl_dns"] = "\n".join(["DNS."+str(i+5)+" = "+dns for i,dns in enumerate(masterdns)]) - config["apiserver_ssl_ip"] = "IP.1 = "+config["api-server-ip"]+"\nIP.2 = 127.0.0.1\n"+ "\n".join(["IP."+str(i+3)+" = "+ip for i,ip in enumerate(masterips)]) - + config["apiserver_ssl_ip"] = "\n".join(["IP.{} = {}".format(i, sslip) for i, sslip in enumerate([config["api-server-ip"]] + config["ssl_localhost_ips"] + masterips)]) + # config["apiserver_ssl_ip"] = "IP.1 = "+config["api-server-ip"]+"\nIP.2 = 127.0.0.1\n"+ "\n".join(["IP."+str(i+3)+" = "+ip for i,ip in enumerate(masterips)]) # kube-apiserver aggregator use easyrsa to generate crt files, we need to generate a group of master names for it. # It does not care if it's a DNS name or IP. @@ -725,7 +730,8 @@ def GetCertificateProperty(): etcddns.append(value) config["etcd_ssl_dns"] = "\n".join(["DNS."+str(i+5)+" = "+dns for i,dns in enumerate(etcddns)]) - config["etcd_ssl_ip"] = "IP.1 = 127.0.0.1\n" + "\n".join(["IP."+str(i+2)+" = "+ip for i,ip in enumerate(etcdips)]) + config["etcd_ssl_ip"] = "\n".join(["IP.{} = {}".format(i, sslip) for i, sslip in enumerate(config["ssl_localhost_ips"] + etcdips)]) + # config["etcd_ssl_ip"] = "IP.1 = 127.0.0.1\n" + "\n".join(["IP."+str(i+2)+" = "+ip for i,ip in enumerate(etcdips)]) def gen_worker_certificates(): @@ -747,7 +753,38 @@ def gen_ETCD_certificates(): utils.render_template_directory("./template/ssl", "./deploy/ssl",config) os.system("cd ./deploy/ssl && bash ./gencerts_etcd.sh") - +def load_az_params_as_default(): + from az_params import default_az_parameters + # need az_params default, in case we don't have the key in config.yaml + default_cfg = { k: v for k, v in default_az_parameters.items() } + azure_cluster_cfg = { k: v for k, v in config["azure_cluster"].items() } if "azure_cluster" in config else {} + merge_config(config["azure_cluster"], default_cfg["azure_cluster"]) + merge_config(config["azure_cluster"], azure_cluster_cfg) + +def on_premise_params(): + print("Warning: remember to set parameters:\ngpu_count_per_node, gpu_type, worker_node_num\n when using on premise machine!") + +def load_platform_type(): + platform_type = list(set(config.keys()) & set(config["supported_platform"])) + assert len(platform_type) == 1 and "platform type should be specified explicitly and unique!" + platform_type = platform_type[0] + config["platform_type"] = platform_type + +def gen_platform_wise_config(): + load_platform_type() + azdefault = { 'network_domain':"config['network']['domain']", + 'worker_node_num':"config['azure_cluster']['worker_node_num']", + 'gpu_count_per_node':'config["sku_mapping"].get(config["azure_cluster"]["worker_vm_size"],config["sku_mapping"]["default"])["gpu-count"]', + 'gpu_type':'config["sku_mapping"].get(config["azure_cluster"]["worker_vm_size"],config["sku_mapping"]["default"])["gpu-type"]' } + on_premise_default = {'network_domain':"config['network']['domain']"} + platform_dict = { 'azure_cluster': azdefault, 'onpremise': on_premise_default } + platform_func = { 'azure_cluster': load_az_params_as_default, 'onpremise': on_premise_params } + default_dict, default_func = platform_dict[config["platform_type"]], platform_func[config["platform_type"]] + default_func() + need_val = ['network_domain', 'worker_node_num', 'gpu_count_per_node', 'gpu_type'] + for ky in need_val: + if ky not in config: + config[ky] = eval(default_dict[ky]) def gen_configs(): print "===============================================" @@ -805,6 +842,7 @@ def gen_configs(): add_ssh_key() check_config(config) + gen_platform_wise_config() utils.render_template_directory("./template/etcd", "./deploy/etcd",config) utils.render_template_directory("./template/master", "./deploy/master",config) @@ -950,32 +988,32 @@ def deploy_masters(force = False): deploycmd = """ until curl -q http://127.0.0.1:8080/version/ ; do sleep 5; - echo 'waiting for master...'; + echo 'waiting for master kubernetes service...'; done; until sudo /opt/bin/kubectl apply -f /opt/addons/kube-addons/weave.yaml --validate=false ; do sleep 5; - echo 'waiting for master...'; + echo 'waiting for master kube-addons weave...'; done ; until sudo /opt/bin/kubectl apply -f /opt/addons/kube-addons/dashboard.yaml --validate=false ; do sleep 5; - echo 'waiting for master...'; + echo 'waiting for master kube-addons dashboard...'; done ; until sudo /opt/bin/kubectl apply -f /opt/addons/kube-addons/dns-addon.yml --validate=false ; do sleep 5; - echo 'waiting for master...'; + echo 'waiting for master kube-addons dns-addon...'; done ; until sudo /opt/bin/kubectl apply -f /opt/addons/kube-addons/kube-proxy.json --validate=false ; do sleep 5; - echo 'waiting for master...'; + echo 'waiting for master kube-addons kube-proxy.json...'; done ; until sudo /opt/bin/kubectl create -f /etc/kubernetes/clusterroles/ ; do sleep 5; - echo 'waiting for master...'; + echo 'waiting for master kubernetes clusterroles...'; done ; sudo ln -s /opt/bin/kubectl /usr/bin/; """ @@ -1047,7 +1085,7 @@ def deploy_ETCD_docker(): def deploy_ETCD(): - + # this condition would not be satisfied at least when deploying new clusters if "deploydockerETCD" in config and config["deploydockerETCD"]: deploy_ETCD_docker() return @@ -1110,17 +1148,16 @@ def set_nfs_disk(): """ we assume there's only 1 cluster. """ + load_platform_type() etcd_server_user = config["nfs_user"] nfs_servers = config["nfs_node"] if len(config["nfs_node"]) > 0 else config["etcd_node"] machine_name_2_full = {nm.split('.')[0]:nm for nm in nfs_servers} for srvr_nm, nfs_cnf in config["nfs_disk_mnt"].items(): - nfs_cnf["cloud_config"] = {"vnet_range":config["cloud_config"]["vnet_range"], "samba_range": config["cloud_config"]["samba_range"]} + nfs_cnf["nfs_client_CIDR"] = config["nfs_client_CIDR"] + nfs_cnf["platform_type"] = config["platform_type"] nfs_server = machine_name_2_full[srvr_nm] - # print nfs_cnf, nfs_server - utils.render_template("./template/nfs/nfs_config.sh.template","./deploy/scripts/setup_nfs_server.sh",nfs_cnf) - # os.system("cat ./deploy/scripts/setup_nfs_server.sh") - # print("------------------>nfs_server<------------------------"+nfs_server) - utils.SSH_exec_script( config["ssh_cert"], etcd_server_user, nfs_server, "./deploy/scripts/setup_nfs_server.sh") + utils.render_template("./template/nfs/nfs_config.sh.template", "./scripts/setup_nfs_server.sh", nfs_cnf) + utils.SSH_exec_script( config["ssh_cert"], etcd_server_user, nfs_server, "./scripts/setup_nfs_server.sh") def create_ISO(): imagename = "./deploy/iso/dlworkspace-cluster-deploy-"+config["cluster_name"]+".iso" @@ -1141,18 +1178,10 @@ def create_PXE(): os.system("cp -r ./deploy/cloud-config/* ./deploy/pxe/tftp/usr/share/oem") dockername = push_one_docker("./deploy/pxe", config["dockerprefix"], config["dockertag"], "pxe-coreos", config ) - - #tarname = "deploy/docker/dlworkspace-pxe-%s.tar" % config["cluster_name"] - # os.system("docker save " + dockername + " > " + tarname ) print ("A DL workspace docker is built at: "+ dockername) - # print ("It is also saved as a tar file to: "+ tarname) - - #os.system("docker rmi dlworkspace-pxe:%s" % config["cluster_name"]) def config_ubuntu(): - # print config["ubuntuconfig"] ubuntuConfig = fetch_config( config, ["ubuntuconfig"] ) - # print ubuntuConfig useversion = fetch_dictionary( ubuntuConfig, [ "version" ] ) specificConfig = fetch_dictionary( ubuntuConfig, [ useversion ] ) for key, value in specificConfig.iteritems(): @@ -1166,11 +1195,8 @@ def create_PXE_ubuntu(): utils.render_template_directory("./template/pxe-ubuntu", "./deploy/pxe-ubuntu",config, verbose=verbose ) dockername = push_one_docker("./deploy/pxe-ubuntu", config["dockerprefix"], config["dockertag"], "pxe-ubuntu", config ) - # tarname = "deploy/docker/pxe-ubuntu.tar" - # os.system("docker save " + dockername + " > " + tarname ) print ("A DL workspace docker is built at: "+ dockername) - # print ("It is also saved as a tar file to: "+ tarname) def clean_worker_nodes(): @@ -1256,7 +1282,6 @@ def update_worker_nodes( nargs ): os.system('sed "s/##api_servers##/%s/" ./deploy/kubelet/kubelet.service.template > ./deploy/kubelet/kubelet.service' % config["api_servers"].replace("/","\\/")) os.system('sed "s/##api_servers##/%s/" ./deploy/kubelet/worker-kubeconfig.yaml.template > ./deploy/kubelet/worker-kubeconfig.yaml' % config["api_servers"].replace("/","\\/")) - #urllib.urlretrieve ("http://ccsdatarepo.westus.cloudapp.azure.com/data/kube/kubelet/kubelet", "./deploy/bin/kubelet") get_hyperkube_docker() workerNodes = get_worker_nodes(config["clusterId"], False) @@ -1269,10 +1294,6 @@ def update_worker_nodes( nargs ): os.system("rm ./deploy/kubelet/kubelet.service") os.system("rm ./deploy/kubelet/worker-kubeconfig.yaml") - #if len(config["kubernetes_master_node"]) > 0: - #utils.SSH_exec_cmd(config["ssh_cert"], config["admin_username"], config["kubernetes_master_node"][0], "sudo /opt/bin/kubelet get nodes") - - def update_worker_nodes_in_parallel(nargs): # TODO: Merge with update_worker_nodes utils.render_template_directory("./template/kubelet", "./deploy/kubelet", config) @@ -1335,12 +1356,9 @@ def deploy_restful_API_on_node(ipAddress): utils.SSH_exec_cmd(config["ssh_cert"], config["admin_username"], masterIP, "sudo chown -R %s /etc/kubernetes" % config["admin_username"]) utils.SSH_exec_cmd(config["ssh_cert"], config["admin_username"], masterIP, "sudo cp /etc/kubernetes/certs/client.crt /etc/kubernetes/ssl/apiserver.pem") utils.SSH_exec_cmd(config["ssh_cert"], config["admin_username"], masterIP, "sudo cp /etc/kubernetes/certs/client.key /etc/kubernetes/ssl/apiserver-key.pem") - utils.SSH_exec_cmd(config["ssh_cert"], config["admin_username"], masterIP, "sudo cp /etc/kubernetes/certs/ca.crt /etc/kubernetes/ssl/ca.pem") - # overwrite ~/.kube/config (to be mounted from /etc/kubernetes/restapi-kubeconfig.yaml) + utils.SSH_exec_cmd(config["ssh_cert"], config["admin_username"], masterIP, "sudo cp /etc/kubernetes/certs/ca.crt /etc/kubernetes/ssl/ca.pem") utils.SSH_exec_cmd(config["ssh_cert"], config["admin_username"], masterIP, "sudo cp /home/%s/.kube/config /etc/kubernetes/restapi-kubeconfig.yaml" % config["admin_username"]) - # utils.SSH_exec_cmd(config["ssh_cert"], config["admin_username"], masterIP, "sudo mkdir -p /dlws-data && sudo mount %s /dlws-data ; docker rm -f restfulapi; docker rm -f jobScheduler ; docker pull %s ; docker run -d -p %s:80 --restart always -v /etc/RestfulAPI:/RestfulAPI --name restfulapi %s ; docker run -d -v /dlws-data:/dlws-data -v /etc/RestfulAPI:/RestfulAPI -v /etc/kubernetes/restapi-kubeconfig.yaml:/root/.kube/config -v /etc/kubernetes/ssl:/etc/kubernetes/ssl --restart always --name jobScheduler %s /runScheduler.sh ;" % (config["nfs-server"], dockername,config["restfulapiport"],dockername,dockername)) - print "===============================================" print "restful api is running at: http://%s:%s" % (masterIP,config["restfulapiport"]) config["restapi"] = "http://%s:%s" % (masterIP,config["restfulapiport"]) @@ -1385,7 +1403,6 @@ def deploy_webUI_on_node(ipAddress): utils.sudo_scp(config["ssh_cert"],"./deploy/WebUI/dashboardConfig.json","/etc/WebUI/dashboardConfig.json", sshUser, webUIIP ) utils.render_template("./template/WebUI/Master-Templates.json", "./deploy/WebUI/Master-Templates.json", config) - #os.system("cp --verbose ./template/WebUI/Master-Templates.json ./deploy/WebUI/Master-Templates.json") os.system("cp --verbose ./deploy/WebUI/Master-Templates.json ../WebUI/dotnet/WebPortal/Master-Templates.json") utils.sudo_scp(config["ssh_cert"],"./deploy/WebUI/Master-Templates.json","/etc/WebUI/Master-Templates.json", sshUser, webUIIP ) @@ -1395,8 +1412,6 @@ def deploy_webUI_on_node(ipAddress): utils.sudo_scp(config["ssh_cert"],"./deploy/RestfulAPI/config.yaml","/etc/RestfulAPI/config.yaml", sshUser, webUIIP ) - # utils.SSH_exec_cmd(config["ssh_cert"], sshUser, webUIIP, "docker pull %s ; docker rm -f webui ; docker run -d -p %s:80 -v /etc/WebUI:/WebUI --restart always --name webui %s ;" % (dockername,str(config["webuiport"]),dockername)) - print "===============================================" print "Web UI is running at: http://%s:%s" % (webUIIP,str(config["webuiport"])) @@ -1547,9 +1562,6 @@ def acs_untaint_nodes(): def acs_post_deploy(): # set nodes get_nodes(config["clusterId"]) - #print "Master: {0}".format(config["kubernetes_master_node"]) - #print "Worker: {0}".format(config["worker_node"]) - # Label nodes acs_label_webui() kubernetes_label_nodes("active", [], args.yes) @@ -1565,9 +1577,6 @@ def acs_post_deploy(): # get CNI binary get_cni_binary() # deploy - #print config["master_predeploy"] - #print config["master_filesdeploy"] - #print config["master_postdeploy"] deploy_on_nodes(config["master_predeploy"], config["master_filesdeploy"], config["master_postdeploy"], config["kubernetes_master_node"]) deploy_on_nodes(config["worker_predeploy"], config["worker_filesdeploy"], config["worker_postdeploy"], @@ -1577,7 +1586,6 @@ def acs_post_deploy(): def acs_prepare_machines(): nodes = get_nodes(config["clusterId"]) for node in nodes: - #exec_rmt_cmd(node, "curl -L -sf https://raw.githubusercontent.com/ritazh/acs-k8s-gpu/master/install-nvidia-driver.sh | sudo sh") run_script(node, ["./scripts/prepare_ubuntu.sh"], True) # restart kubelet incase GPU installed utils.SSH_exec_cmd(config["ssh_cert"], config["admin_username"], node, "sudo systemctl restart kubelet.service") @@ -1603,9 +1611,7 @@ def get_mount_fileshares(curNode = None): physicalmountpoint = config["physical-mount-path"] storagemountpoint = config["storage-mount-path"] mountshares = {} - # print(config["mountpoints"]) for k,v in config["mountpoints"].iteritems(): - # print("<<<<<<<<<<<<<<<<<<=0 else "None" drivename = drivesInfo[i*2+1] + drivesInfo[i*2+2][:pos_semi] driveString = drivesInfo[i*2+2][pos_semi+1:] - #print "Drive Name: " + drivename - #print "Drive String: " + driveString if not (prog.match(drivename) is None): - # print driveString capacity = parse_capacity_in_GB( driveString ) lines = driveString.splitlines() @@ -2037,7 +2030,6 @@ def get_partions_of_node(node, prog): if capacity > 0 and len(parted)==0: parted[0] = capacity - # print drivename + " Capacity: " + str(capacity) + " GB, " + str(parted) deviceinfo["modelName"] = modelName deviceinfo["name"] = drivename deviceinfo["capacity"] = capacity @@ -2078,8 +2070,6 @@ def calculate_partitions( capacity, partitionConfig): npart = len(partitionConfig) partitionSize = [0.0]*npart sumProportion = 0.0 - #print "Beginning Capacity " + str(capacity) - #print partitionSize for i in range(npart): if partitionConfig[i] < 0.0: if capacity > 0.0: @@ -2089,8 +2079,6 @@ def calculate_partitions( capacity, partitionConfig): partitionSize[i] = 0.0 else: sumProportion += partitionConfig[i] - #print "Ending Capacity " + str(capacity) - #print partitionSize for i in range(npart): if partitionConfig[i] >= 0.0: if sumProportion == 0.0: @@ -2112,12 +2100,10 @@ def repartition_nodes(nodes, nodesinfo, partitionConfig): removedPartitions = [] for part in existingPartitions: removedPartitions.append(part) - # print removedPartitions removedPartitions.sort(reverse=True) for part in removedPartitions: cmd += "sudo parted -s " + deviceinfo["name"] + " rm " + str(part) + "; " partitionSize = calculate_partitions( deviceinfo["capacity"], partitionConfig) - # print partitionSize totalPartitionSize = sum( partitionSize ) start = 0 npart = len(partitionSize) @@ -2170,7 +2156,6 @@ def regmatch_glusterFS( glusterFSargs ): regexp = "/dev/[s|h]d[^a]"+str(glusterFSargs) else: regexp = glusterFSargs - #print regexp regmatch = re.compile(regexp) return regmatch @@ -2180,11 +2165,9 @@ def find_matched_volume( alldeviceinfo, regmatch ): deviceinfo = alldeviceinfo[bdevice] for part in deviceinfo["parted"]: bdevicename = deviceinfo["name"] + str(part) - # print bdevicename match = regmatch.search(bdevicename) if not ( match is None ): deviceList[match.group(0)] = deviceinfo["parted"][part] - #print deviceList; return deviceList # Form a configuration file for operation of glusterfs @@ -2244,8 +2227,6 @@ def stop_glusterFS_endpoint( ): def format_mount_partition_volume( nodes, deviceSelect, format=True ): nodesinfo = get_partitions(nodes, deviceSelect ) - #if verbose: - # print nodesinfo reg = re.compile( deviceSelect ) for node in nodesinfo: alldeviceinfo = nodesinfo[node] @@ -2269,8 +2250,6 @@ def format_mount_partition_volume( nodes, deviceSelect, format=True ): def unmount_partition_volume( nodes, deviceSelect ): nodesinfo = get_partitions(nodes, deviceSelect ) - #if verbose: - # print nodesinfo reg = re.compile( deviceSelect ) for node in nodesinfo: alldeviceinfo = nodesinfo[node] @@ -2327,8 +2306,6 @@ def hdfs_config( nodes, deviceSelect): if verbose: print "HDFS Configuration: %s " % hdfsconfig nodesinfo = get_partitions(nodes, deviceSelect ) - #if verbose: - # print nodesinfo reg = re.compile( deviceSelect ) for node in nodesinfo: alldeviceinfo = nodesinfo[node] @@ -2369,13 +2346,10 @@ def create_glusterFS_volume( nodesinfo, glusterFSargs ): utils.render_template_directory("./storage/glusterFS", "./deploy/storage/glusterFS", config, verbose) config_glusterFS = write_glusterFS_configuration( nodesinfo, glusterFSargs ) regmatch = regmatch_glusterFS(glusterFSargs) - # print nodesinfo for node in nodesinfo: alldeviceinfo = nodesinfo[node] volumes = find_matched_volume( alldeviceinfo, regmatch ) print "................. Node %s ................." % node - # print volumes - # print alldeviceinfo remotecmd = "" remotecmd += "sudo modprobe dm_thin_pool; " remotecmd += "sudo apt-get install -y thin-provisioning-tools; " @@ -2463,7 +2437,6 @@ def remove_glusterFS_volume( nodesinfo, glusterFSargs ): break; for volume in volumes: remotecmd += "sudo pvremove -y %s; " % volume - # print remotecmd utils.SSH_exec_cmd( config["ssh_cert"], config["admin_username"], node, remotecmd ) def display_glusterFS_volume( nodesinfo, glusterFSargs ): @@ -2573,7 +2546,6 @@ def create_mac_dictionary( machineEntry ): add_mac_dictionary(dic, name, mac) else: print "Error, machine " + name + ", mac entry is of unknown type: " + str(macs) - #print dic return dic def set_host_names_by_lookup(): @@ -2594,7 +2566,6 @@ def set_host_names_by_lookup(): if len(namelist) > 1: print "Error, machine with mac "+str(macs)+" has more than 1 name entries " +str(namelist) elif len(namelist) == 0: - # print "Warning, cannot find an entry for machine with mac "+str(macs) hostname = node.split(".")[0] cmd = "sudo hostnamectl set-hostname " + hostname print "Set hostname of node " + node + " to " + hostname @@ -2699,7 +2670,6 @@ def kubernetes_get_node_name(node): if len(domain) < 2: kube_node_name = node elif domain in node: - # print "Remove domain %d" % len(domain) kube_node_name = node[:-(len(domain))] else: kube_node_name = node @@ -2759,7 +2729,6 @@ def get_service_name(service_config_file): except: return None f.close() - # print service_config name = fetch_dictionary(service_config, ["metadata","name"]) if not name is None: return name @@ -2772,13 +2741,11 @@ def get_service_name(service_config_file): def get_service_yaml( use_service ): servicedic = get_all_services() - #print servicedic newentries = {} for service in servicedic: servicename = get_service_name(servicedic[service]) newentries[servicename] = servicedic[service] servicedic.update(newentries) - #print servicedic fname = servicedic[use_service] return fname @@ -2826,17 +2793,12 @@ def get_node_lists_for_service(service): # The kubernete node will be marked accordingly to facilitate the running of daemon service. def kubernetes_label_nodes( verb, servicelists, force ): servicedic = get_all_services() - # print servicedic get_nodes(config["clusterId"]) labels = fetch_config(config, ["kubelabels"]) - # print labels for service, serviceinfo in servicedic.iteritems(): servicename = get_service_name(servicedic[service]) - # print "Service %s - %s" %(service, servicename ) if (not service in labels) and (not servicename in labels) and "default" in labels and (not servicename is None): labels[servicename] = labels["default"] - # print servicelists - # print labels if len(servicelists)==0: servicelists = labels else: @@ -2865,7 +2827,7 @@ def kubernetes_label_nodes( verb, servicelists, force ): # Label kubernete nodes with gpu types.skip for CPU workers def kubernetes_label_GpuTypes(): for nodename,nodeInfo in config["machines"].items(): - if nodeInfo["role"] == "worker" and nodeInfo["gpu-type"] != "NULL": + if nodeInfo["role"] == "worker": kubernetes_label_node("--overwrite", nodename, "gpuType="+nodeInfo["gpu-type"]) @@ -2925,7 +2887,6 @@ def stop_one_kube_service(fname): def start_kube_service( servicename ): fname = get_service_yaml( servicename ) - # print "start service %s with %s" % (servicename, fname) dirname = os.path.dirname(fname) if os.path.exists(os.path.join(dirname,"launch_order")) and "/" not in servicename: with open(os.path.join(dirname,"launch_order"),'r') as f: @@ -3000,7 +2961,6 @@ def check_buildable_images(nargs): def run_docker_image( imagename, native = False, sudo = False ): dockerConfig = fetch_config( config, ["docker-run", imagename ]) full_dockerimage_name, local_dockerimage_name = build_docker_fullname( config, imagename ) - # print full_dockerimage_name matches = find_dockers( full_dockerimage_name ) if len( matches ) == 0: matches = find_dockers( local_dockerimage_name ) @@ -3027,7 +2987,6 @@ def gen_warm_up_cluster_script(): def run_command( args, command, nargs, parser ): # If necessary, show parsed arguments. - # print args global discoverserver global homeinserver global verbose @@ -3063,7 +3022,6 @@ def run_command( args, command, nargs, parser ): config_file = os.path.join(dirpath,"config.yaml") - # print "Config file: " + config_file if not os.path.exists(config_file): parser.print_help() print "ERROR: config.yaml does not exist!" @@ -3072,7 +3030,6 @@ def run_command( args, command, nargs, parser ): f = open(config_file) merge_config(config, yaml.load(f)) f.close() - # print config if os.path.exists("./deploy/clusterID.yml"): f = open("./deploy/clusterID.yml") tmp = yaml.load(f) @@ -3145,7 +3102,6 @@ def run_command( args, command, nargs, parser ): elif command == "connect": check_master_ETCD_status() role2connect = nargs[0] - # print(role2connect, config["ssh_cert"], config["admin_username"]) if len(nargs) < 1 or role2connect == "master": nodes = config["kubernetes_master_node"] elif role2connect in ["etcd", "worker", "nfs", "samba"]: @@ -3509,7 +3465,6 @@ def run_command( args, command, nargs, parser ): elif command == "runscriptonall" and len(nargs)>=1: nodes = get_nodes(config["clusterId"]) - # print(nodes) run_script_on_all(nodes, nargs, sudo = args.sudo ) elif command == "runscriptonallinparallel" and len(nargs)>=1: @@ -3526,8 +3481,7 @@ def run_command( args, command, nargs, parser ): else: break nodes = get_nodes_by_roles(nodeset) - # print(nodes) - run_script_on_all(nodes, nargs[scripts_start:], sudo = args.sudo ) + run_script_on_all_in_parallel(nodes, nargs[scripts_start:], sudo = args.sudo ) elif command == "runscriptonrandmaster" and len(nargs)>=1: run_script_on_rand_master(nargs, args) @@ -3680,7 +3634,6 @@ def run_command( args, command, nargs, parser ): servicenames = [] for service in allservices: servicenames.append(service) - # print servicenames generate_hdfs_containermounts() configuration( config, verbose ) if nargs[0] == "start": @@ -3737,6 +3690,7 @@ def run_command( args, command, nargs, parser ): kubernetes_label_GpuTypes() elif command == "genscripts": + gen_platform_wise_config() gen_dns_config_script() gen_pass_secret_script() gen_warm_up_cluster_script() @@ -3929,32 +3883,32 @@ def upgrade_masters(hypekube_url="gcr.io/google-containers/hyperkube:v1.15.2"): deploy_cmd = """ until curl -q http://127.0.0.1:8080/version/ ; do sleep 5; - echo 'waiting for master...'; + echo 'waiting for master kubernetes service...'; done; until sudo /opt/bin/kubectl apply -f /opt/addons/kube-addons/weave.yaml --validate=false ; do sleep 5; - echo 'waiting for master...'; + echo 'waiting for master kube-addons weave...'; done ; until sudo /opt/bin/kubectl apply -f /opt/addons/kube-addons/dashboard.yaml --validate=false ; do sleep 5; - echo 'waiting for master...'; + echo 'waiting for master kube-addons dashboard...'; done ; until sudo /opt/bin/kubectl apply -f /opt/addons/kube-addons/dns-addon.yml --validate=false ; do sleep 5; - echo 'waiting for master...'; + echo 'waiting for master kube-addons dns-addon...'; done ; until sudo /opt/bin/kubectl apply -f /opt/addons/kube-addons/kube-proxy.json --validate=false ; do sleep 5; - echo 'waiting for master...'; + echo 'waiting for master kube-addons kube-proxy...'; done ; until sudo /opt/bin/kubectl apply -f /etc/kubernetes/clusterroles/ ; do sleep 5; - echo 'waiting for master...'; + echo 'waiting for master kubernetes clusterroles...'; done ; """ utils.SSH_exec_cmd(config["ssh_cert"], kubernetes_master_user, kubernetes_masters[0], deploy_cmd , False) @@ -3962,7 +3916,6 @@ def upgrade_masters(hypekube_url="gcr.io/google-containers/hyperkube:v1.15.2"): if __name__ == '__main__': # the program always run at the current directory. dirpath = os.path.dirname(os.path.abspath(os.path.realpath(__file__))) - # print "Directory: " + dirpath os.chdir(dirpath) parser = argparse.ArgumentParser( prog='deploy.py', formatter_class=argparse.RawDescriptionHelpFormatter, diff --git a/src/ClusterBootstrap/params.py b/src/ClusterBootstrap/params.py index f0451374b..f843a2499 100755 --- a/src/ClusterBootstrap/params.py +++ b/src/ClusterBootstrap/params.py @@ -1,9 +1,12 @@ # These are the default configuration parameter default_config_parameters = { + "supported_platform": ["azure_cluster", "onpremise"], "allroles": {"infra", "infrastructure", "worker", "nfs", "sql", "dev"}, # Kubernetes setting "service_cluster_ip_range": "10.3.0.0/16", "pod_ip_range": "10.2.0.0/16", + "ssl_localhost_ips": [ "127.0.0.1", "127.0.1.1" ], + "dns_server": {"azure_cluster": '8.8.8.8', 'onpremise':'10.50.10.50'}, # Home in server, to aide Kubernete setup "homeinserver": "http://dlws-clusterportal.westus.cloudapp.azure.com:5000", "cloud_influxdb_node": "dlws-influxdb.westus.cloudapp.azure.com", @@ -626,6 +629,11 @@ }, }, + "nfs_client_CIDR": { + "node_range": ["192.168.0.0/16"], + "samba_range": [], + }, + "nfs_mnt_setup": [ { "mnt_point": {"rootshare":{"curphysicalmountpoint":"/mntdlws/infranfs","filesharename":"/infradata/share","mountpoints":""}}} @@ -634,7 +642,6 @@ "VC-Default":["*"], }, "registry_credential": {}, - "domain_name": "redmond.corp.microsoft.com", "priority": "regular", "sku_mapping": { "Standard_ND6s":{"gpu-type": "P40","gpu-count": 1}, @@ -671,7 +678,7 @@ "genscripts", "runscriptonroles infra worker ./scripts/dns.sh", "-y deploy", - "-y updateworker", + "-y updateworkerinparallel", "-y kubernetes labels", "-y gpulabel", "kubernetes start nvidia-device-plugin", diff --git a/src/ClusterBootstrap/scripts/setup_nfs_server.sh b/src/ClusterBootstrap/scripts/setup_nfs_server.sh deleted file mode 100755 index 48cc90586..000000000 --- a/src/ClusterBootstrap/scripts/setup_nfs_server.sh +++ /dev/null @@ -1,10 +0,0 @@ -sudo apt-get update -sudo apt-get install -y nfs-kernel-server - -sudo mkdir -p /data/share -sudo chown nobody:nogroup /data/share - -echo "/data/share {{cnf["cloud_config"]["vnet_range"]}}(rw,sync,no_subtree_check,no_root_squash)" | sudo tee /etc/exports -sudo systemctl restart nfs-kernel-server - - diff --git a/src/ClusterBootstrap/bash_step_by_step_deploy.sh b/src/ClusterBootstrap/step_by_step.sh similarity index 62% rename from src/ClusterBootstrap/bash_step_by_step_deploy.sh rename to src/ClusterBootstrap/step_by_step.sh index a36f4af06..0a2e5443c 100755 --- a/src/ClusterBootstrap/bash_step_by_step_deploy.sh +++ b/src/ClusterBootstrap/step_by_step.sh @@ -1,7 +1,12 @@ -./deploy.py -y build -./az_tools.py create -./az_tools.py genconfig -./deploy.py runscriptonroles infra worker ./scripts/prepare_vm_disk.sh +platform=$1 +if [ $platform == "azure" ]; then + ./deploy.py -y build + ./az_tools.py create + ./az_tools.py genconfig + ./deploy.py runscriptonroles infra worker ./scripts/prepare_vm_disk.sh +elif [ $platform == "onpremise" ]; then + echo "make sure that you've run ./deploy.py build and set the correct ssh keys in deploy/sshkey before run this script" +fi ./deploy.py nfs-server create ./deploy.py runscriptonroles infra worker ./scripts/prepare_ubuntu.sh ./deploy.py runscriptonroles infra worker ./scripts/disable_kernel_auto_updates.sh @@ -9,7 +14,7 @@ ./deploy.py genscripts ./deploy.py runscriptonroles infra worker ./scripts/dns.sh ./deploy.py -y deploy -./deploy.py -y updateworker +./deploy.py -y updateworkerinparallel ./deploy.py -y kubernetes labels ./deploy.py -y gpulabel ./deploy.py kubernetes start nvidia-device-plugin @@ -17,6 +22,10 @@ ./deploy.py webui ./deploy.py docker push restfulapi ./deploy.py docker push webui +./deploy.py docker push watchdog +./deploy.py docker push gpu-reporter +./deploy.py docker push reaper +./deploy.py docker push job-exporter ./deploy.py mount ./deploy.py kubernetes start mysql ./deploy.py kubernetes start jobmanager diff --git a/src/ClusterBootstrap/template/RestfulAPI/config.yaml b/src/ClusterBootstrap/template/RestfulAPI/config.yaml index 92f2575f5..26c47b435 100755 --- a/src/ClusterBootstrap/template/RestfulAPI/config.yaml +++ b/src/ClusterBootstrap/template/RestfulAPI/config.yaml @@ -33,10 +33,9 @@ webportal_node: {{cnf["webportal_node"]}} datasource : {{cnf["datasource"]}} kube_custom_scheduler: {{cnf["kube_custom_scheduler"]}} WinbindServers: {{cnf["WinbindServers"]}} -azure_cluster : - worker_node_num : {{cnf["azure_cluster"][cnf["cluster_name"]]["worker_node_num"]}} - worker_vm_size : {{cnf["azure_cluster"][cnf["cluster_name"]]["worker_vm_size"]}} -sku_mapping: {{cnf["sku_mapping"]}} +gpu_count_per_node: {{cnf["gpu_count_per_node"]}} +worker_node_num: {{cnf["worker_node_num"]}} +gpu_type: {{cnf["gpu_type"]}} defalt_virtual_cluster_name: {{cnf["defalt_virtual_cluster_name"]}} {% if cnf["job-manager"] %} job-manager: diff --git a/src/ClusterBootstrap/template/dns/cname_and_private_ips.sh.template b/src/ClusterBootstrap/template/dns/cname_and_private_ips.sh.template new file mode 100755 index 000000000..09062eb86 --- /dev/null +++ b/src/ClusterBootstrap/template/dns/cname_and_private_ips.sh.template @@ -0,0 +1,40 @@ +# try to use immediately connectable IP/dns name, since the DNS records take time to broadcast. For workers, the Azure DNS name in fqdns_sorted is preferred, for infra, they have fixed public IPs + +# sort the worker machine names, append their pairs to dns_add_commands + +az vmss list-instance-public-ips --name {{cnf["cluster_name"]}}-worker --resource-group {{cnf["cluster_name"]}}ResGrp | grep "fqdn" | awk '{print $2}' | sed 's/[",]//g' > fqdns +first_line=$(head -n 1 fqdns) +cat fqdns | cut -d'.' -f1 | cut -d'm' -f2 | sort -n | awk '{printf("vm%s.%s\n", $1, DOMAIN_SUFFIX)}' DOMAIN_SUFFIX=$(echo ${first_line#*.}) > fqdns_sorted +# get host name (string, like lowpr68f3000000) of each worker node +rm -rf hostnames && for fqdn in `cat fqdns_sorted`; do ssh -oStrictHostKeyChecking=no -i ./deploy/sshkey/id_rsa core@${fqdn} hostname >> hostnames; done +paste -d' ' hostnames fqdns_sorted > hostname_fqdn_map +cat hostname_fqdn_map | awk '{ printf("add %s.{{cnf["domain_name"]}} CNAME %s DLTSPAdmin\n", $1, $2) }' > dns_add_commands + +# get infra machine names and public ips, append their pairs to dns_add_commands + +# use [?contains(virtualMachine.name,'infra')] in query if want only infra +# but since we can not ping redmond domain name when /etc/resolv.conf is 8.8.8.8, we need to add both infra and nfs DNS to hosts +az vm list-ip-addresses -g {{cnf["cluster_name"]}}ResGrp --query "[].{Name:virtualMachine.name,pubIP:virtualMachine.network.publicIpAddresses[0].ipAddress, privIP:virtualMachine.network.privateIpAddresses[0]}" -o table | tail -n +3 > infra_ips +cat infra_ips | awk '{ printf("add %s.{{cnf["domain_name"]}} A %s DLTSPAdmin\n", $1, $2) }' >> dns_add_commands + +# ADD HOSTS to all nodes + +# get private IP of worker nodes +# this would not work: cannot guarantee order:az vmss nic list -g lowpriResGrp --vmss-name lowpri-worker --query [].{ip:ipConfigurations[0].privateIpAddress} -o tsv > private_ips + +rm -rf private_ips && for fqdn in `cat fqdns_sorted`; do ssh -oStrictHostKeyChecking=no -i ./deploy/sshkey/id_rsa core@${fqdn} hostname -I | awk '{print $1}' >> private_ips;done +# add worker triplets +paste -d' ' private_ips hostnames > worksheet_hosts +rm -rf hosts && cat worksheet_hosts | awk '{ printf("%s %s.{{cnf["domain_name"]}} %s\n", $1, $2, $2) }' > hosts +# add infra triplets +cat infra_ips | awk '{ printf("%s %s.{{cnf["domain_name"]}} %s\n", $3, $1, $1) }' >> hosts +# set hosts file on workers +workernum=$(wc -l < fqdns_sorted) +parallel-scp -t 0 -p $workernum -h fqdns_sorted -x "-oStrictHostKeyChecking=no -oUserKnownHostsFile=/dev/null -i ./deploy/sshkey/id_rsa" -l {{cnf["admin_username"]}} hosts /home/{{cnf["admin_username"]}} +parallel-ssh -o pssh-log/stdout -e pssh-log/stderr -t 0 -p $workernum -h fqdns_sorted -x "-oStrictHostKeyChecking=no -oUserKnownHostsFile=/dev/null -i deploy/sshkey/id_rsa" -l {{cnf["admin_username"]}} "sudo chmod 666 /etc/hosts && cat /home/{{cnf["admin_username"]}}/hosts >> /etc/hosts && sudo chmod 644 /etc/hosts" + +# set hosts file on infras +infranum=$(wc -l < infra_ips) +cat infra_ips | awk '{print $2}' > infra_ipv4 +parallel-scp -t 0 -p $infranum -h infra_ipv4 -x "-oStrictHostKeyChecking=no -oUserKnownHostsFile=/dev/null -i ./deploy/sshkey/id_rsa" -l {{cnf["admin_username"]}} hosts /home/{{cnf["admin_username"]}} +parallel-ssh -o pssh-log/stdout -e pssh-log/stderr -t 0 -p $infranum -h infra_ipv4 -x "-oStrictHostKeyChecking=no -oUserKnownHostsFile=/dev/null -i deploy/sshkey/id_rsa" -l {{cnf["admin_username"]}} "sudo chmod 666 /etc/hosts && cat /home/{{cnf["admin_username"]}}/hosts >> /etc/hosts && sudo chmod 644 /etc/hosts" \ No newline at end of file diff --git a/src/ClusterBootstrap/template/dns/dns.sh.template b/src/ClusterBootstrap/template/dns/dns.sh.template index f0d910661..06600d739 100755 --- a/src/ClusterBootstrap/template/dns/dns.sh.template +++ b/src/ClusterBootstrap/template/dns/dns.sh.template @@ -2,6 +2,6 @@ sudo systemctl disable systemd-resolved.service sudo systemctl stop systemd-resolved # echo "dns=default" | sudo tee -a /etc/NetworkManager/NetworkManager.conf sudo rm /etc/resolv.conf -echo "nameserver 8.8.8.8" | sudo tee -a /etc/resolv.conf -echo 'search {{cnf["azure_cluster"][cnf["cluster_name"]]["azure_location"]}}.cloudapp.azure.com' | sudo tee -a /etc/resolv.conf +echo "nameserver {{cnf["dns_server"][cnf["platform_type"]]}}" | sudo tee -a /etc/resolv.conf +echo 'search {{cnf["network_domain"]}}' | sudo tee -a /etc/resolv.conf # sudo service network-manager restart \ No newline at end of file diff --git a/src/ClusterBootstrap/template/etcd/docker_etcd.sh b/src/ClusterBootstrap/template/etcd/docker_etcd.sh index 9097f69eb..04a2b1888 100755 --- a/src/ClusterBootstrap/template/etcd/docker_etcd.sh +++ b/src/ClusterBootstrap/template/etcd/docker_etcd.sh @@ -6,10 +6,16 @@ docker run -d -v /usr/share/ca-certificates/:/etc/ssl/certs -v /var/etcd:/var/et --restart always \ --name etcd dlws/etcd:3.1.10 /usr/local/bin/etcd \ -name $HOSTNAME \ - -advertise-client-urls http://{{cnf["etcd_node_ip"]}}:{{cnf["etcd3port1"]}} \ - -listen-client-urls http://0.0.0.0:{{cnf["etcd3port1"]}} \ - -initial-advertise-peer-urls http://{{cnf["etcd_node_ip"]}}:{{cnf["etcd3portserver"]}} \ - -listen-peer-urls http://0.0.0.0:2380 \ + {% if cnf["etcd_node_num"] == 1 %}-initial-cluster {{cnf["hostname"]}}=https://{{cnf["etcd_private_ip"]}}:{{cnf["etcd3portserver"]}} \ + -initial-cluster-state new \ + -initial-cluster-token {{cnf["clusterId"]}} \ + -advertise-client-urls https://{{cnf["etcd_private_ip"]}}:{{cnf["etcd3port1"]}} \ + -listen-client-urls https://0.0.0.0:{{cnf["etcd3port1"]}} \ + -initial-advertise-peer-urls https://{{cnf["etcd_private_ip"]}}:{{cnf["etcd3portserver"]}} \ + -listen-peer-urls https://0.0.0.0:{{cnf["etcd3portserver"]}} \ + {% else %}-advertise-client-urls https://{{cnf["etcd_node_ip"]}}:{{cnf["etcd3port1"]}} \ + -listen-client-urls https://0.0.0.0:{{cnf["etcd3port1"]}} \ + -initial-advertise-peer-urls https://{{cnf["etcd_node_ip"]}}:{{cnf["etcd3portserver"]}} \ + -listen-peer-urls https://0.0.0.0:{{cnf["etcd3portserver"]}} \ -discovery {{cnf["discovery_url"]}} \ - -data-dir /var/etcd/data - + {% endif %}-data-dir /var/etcd/data \ \ No newline at end of file diff --git a/src/ClusterBootstrap/template/etcd/docker_etcd_ssl.sh b/src/ClusterBootstrap/template/etcd/docker_etcd_ssl.sh index 285866890..1d640dd8b 100755 --- a/src/ClusterBootstrap/template/etcd/docker_etcd_ssl.sh +++ b/src/ClusterBootstrap/template/etcd/docker_etcd_ssl.sh @@ -9,12 +9,19 @@ docker run -d -v /usr/share/ca-certificates/mozilla:/etc/ssl/certs -v /etc/etcd/ --restart always \ --name philly-etcd3 dlws/etcd:3.1.10 /usr/local/bin/etcd \ -name $HOSTNAME \ - -advertise-client-urls https://{{cnf["etcd_node_ip"]}}:{{cnf["etcd3port1"]}} \ + {% if cnf["etcd_node_num"] == 1 %}-initial-cluster {{cnf["hostname"]}}=https://{{cnf["etcd_private_ip"]}}:{{cnf["etcd3portserver"]}} \ + -initial-cluster-state new \ + -initial-cluster-token {{cnf["clusterId"]}} \ + -advertise-client-urls https://{{cnf["etcd_private_ip"]}}:{{cnf["etcd3port1"]}} \ + -listen-client-urls https://0.0.0.0:{{cnf["etcd3port1"]}} \ + -initial-advertise-peer-urls https://{{cnf["etcd_private_ip"]}}:{{cnf["etcd3portserver"]}} \ + -listen-peer-urls https://0.0.0.0:{{cnf["etcd3portserver"]}} \ + {% else %}-advertise-client-urls https://{{cnf["etcd_node_ip"]}}:{{cnf["etcd3port1"]}} \ -listen-client-urls https://0.0.0.0:{{cnf["etcd3port1"]}} \ -initial-advertise-peer-urls https://{{cnf["etcd_node_ip"]}}:{{cnf["etcd3portserver"]}} \ -listen-peer-urls https://0.0.0.0:{{cnf["etcd3portserver"]}} \ -discovery {{cnf["discovery_url"]}} \ - -data-dir /var/etcd/data \ + {% endif %}-data-dir /var/etcd/data \ -client-cert-auth \ -trusted-ca-file=/etc/etcd/ssl/ca.pem \ -cert-file=/etc/etcd/ssl/etcd.pem \ @@ -22,4 +29,4 @@ docker run -d -v /usr/share/ca-certificates/mozilla:/etc/ssl/certs -v /etc/etcd/ -peer-client-cert-auth \ -peer-trusted-ca-file=/etc/etcd/ssl/ca.pem \ -peer-cert-file=/etc/etcd/ssl/etcd.pem \ - -peer-key-file=/etc/etcd/ssl/etcd-key.pem + -peer-key-file=/etc/etcd/ssl/etcd-key.pem \ No newline at end of file diff --git a/src/ClusterBootstrap/template/etcd/etcd3.service b/src/ClusterBootstrap/template/etcd/etcd3.service index 07aee7caf..5784ebe40 100755 --- a/src/ClusterBootstrap/template/etcd/etcd3.service +++ b/src/ClusterBootstrap/template/etcd/etcd3.service @@ -1,28 +1,35 @@ -[Service] -ExecStart=/usr/bin/docker run -v /usr/share/ca-certificates/mozilla:/etc/ssl/certs -v /etc/etcd/ssl:/etc/etcd/ssl -v /var/etcd:/var/etcd -p {{cnf["etcd3port1"]}}:{{cnf["etcd3port1"]}} -p {{cnf["etcd3portserver"]}}:{{cnf["etcd3portserver"]}} \ - --net=host \ - --name etcd3 {{cnf["dockers"]["container"]["etcd"]["fullname"]}} /usr/local/bin/etcd \ - -name {{cnf["hostname"]}} \ - -advertise-client-urls https://{{cnf["etcd_node_ip"]}}:{{cnf["etcd3port1"]}} \ - -listen-client-urls https://0.0.0.0:{{cnf["etcd3port1"]}} \ - -initial-advertise-peer-urls https://{{cnf["etcd_node_ip"]}}:{{cnf["etcd3portserver"]}} \ - -listen-peer-urls https://0.0.0.0:{{cnf["etcd3portserver"]}} \ - -discovery {{cnf["discovery_url"]}} \ - -data-dir /var/etcd/data \ - -client-cert-auth \ - -trusted-ca-file=/etc/etcd/ssl/ca.pem \ - -cert-file=/etc/etcd/ssl/etcd.pem \ - -key-file=/etc/etcd/ssl/etcd-key.pem \ - -peer-client-cert-auth \ - -peer-trusted-ca-file=/etc/etcd/ssl/ca.pem \ - -peer-cert-file=/etc/etcd/ssl/etcd.pem \ - -peer-key-file=/etc/etcd/ssl/etcd-key.pem - - - - -Restart=always -RestartSec=5 - -[Install] +[Service] +ExecStart=/usr/bin/docker run -v /usr/share/ca-certificates/mozilla:/etc/ssl/certs -v /etc/etcd/ssl:/etc/etcd/ssl -v /var/etcd:/var/etcd -p {{cnf["etcd3port1"]}}:{{cnf["etcd3port1"]}} -p {{cnf["etcd3portserver"]}}:{{cnf["etcd3portserver"]}} \ + --net=host \ + --name etcd3 {{cnf["dockers"]["container"]["etcd"]["fullname"]}} /usr/local/bin/etcd \ + -name {{cnf["hostname"]}} \ + {% if cnf["etcd_node_num"] == 1 %}-initial-cluster {{cnf["hostname"]}}=https://{{cnf["etcd_private_ip"]}}:{{cnf["etcd3portserver"]}} \ + -initial-cluster-state new \ + -initial-cluster-token {{cnf["clusterId"]}} \ + -advertise-client-urls https://{{cnf["etcd_private_ip"]}}:{{cnf["etcd3port1"]}} \ + -listen-client-urls https://0.0.0.0:{{cnf["etcd3port1"]}} \ + -initial-advertise-peer-urls https://{{cnf["etcd_private_ip"]}}:{{cnf["etcd3portserver"]}} \ + -listen-peer-urls https://0.0.0.0:{{cnf["etcd3portserver"]}} \ + {% else %}-advertise-client-urls https://{{cnf["etcd_node_ip"]}}:{{cnf["etcd3port1"]}} \ + -listen-client-urls https://0.0.0.0:{{cnf["etcd3port1"]}} \ + -initial-advertise-peer-urls https://{{cnf["etcd_node_ip"]}}:{{cnf["etcd3portserver"]}} \ + -listen-peer-urls https://0.0.0.0:{{cnf["etcd3portserver"]}} \ + -discovery {{cnf["discovery_url"]}} \ + {% endif %}-data-dir /var/etcd/data \ + -client-cert-auth \ + -trusted-ca-file=/etc/etcd/ssl/ca.pem \ + -cert-file=/etc/etcd/ssl/etcd.pem \ + -key-file=/etc/etcd/ssl/etcd-key.pem \ + -peer-client-cert-auth \ + -peer-trusted-ca-file=/etc/etcd/ssl/ca.pem \ + -peer-cert-file=/etc/etcd/ssl/etcd.pem \ + -peer-key-file=/etc/etcd/ssl/etcd-key.pem + + + + +Restart=always +RestartSec=5 + +[Install] WantedBy=multi-user.target \ No newline at end of file diff --git a/src/ClusterBootstrap/template/kube-addons/weave.yaml b/src/ClusterBootstrap/template/kube-addons/weave.yaml index 9ef1d9021..929cd5bb8 100755 --- a/src/ClusterBootstrap/template/kube-addons/weave.yaml +++ b/src/ClusterBootstrap/template/kube-addons/weave.yaml @@ -17,7 +17,7 @@ items: labels: name: weave-net namespace: kube-system - - apiVersion: rbac.authorization.k8s.io/v1beta1 + - apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: weave-net @@ -58,7 +58,7 @@ items: verbs: - patch - update - - apiVersion: rbac.authorization.k8s.io/v1beta1 + - apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: weave-net @@ -81,7 +81,7 @@ items: - kind: ServiceAccount name: weave-net namespace: kube-system - - apiVersion: rbac.authorization.k8s.io/v1beta1 + - apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: weave-net diff --git a/src/ClusterBootstrap/template/kubelet/daemon.json b/src/ClusterBootstrap/template/kubelet/daemon.json index 78fb7af09..6a9bbeb7c 100755 --- a/src/ClusterBootstrap/template/kubelet/daemon.json +++ b/src/ClusterBootstrap/template/kubelet/daemon.json @@ -1,4 +1,4 @@ -{% if cnf["azure_cluster"][cnf["cluster_name"]]["worker_vm_size"] in cnf["sku_mapping"] %} +{% if cnf["gpu_type"] != "None" %} { "default-runtime": "nvidia", "runtimes": { @@ -10,5 +10,4 @@ } {% else %} {} -{% endif %} - +{% endif %} \ No newline at end of file diff --git a/src/ClusterBootstrap/template/nfs/nfs_config.sh.template b/src/ClusterBootstrap/template/nfs/nfs_config.sh.template index fa4ba2b17..64c7d4555 100755 --- a/src/ClusterBootstrap/template/nfs/nfs_config.sh.template +++ b/src/ClusterBootstrap/template/nfs/nfs_config.sh.template @@ -1,3 +1,4 @@ +{% if cnf["platform_type"] == "azure_cluster" %} sudo parted -l 2>&1 >/dev/null | awk -F': ' '{print $2}' > unlabeled_disk_file # Partition for disk in `cat unlabeled_disk_file`; do printf "n\n1\n\n\n8e00\nw\nY\n" | sudo gdisk ${disk}; done @@ -9,6 +10,7 @@ sudo vgcreate dlts-data-lvm ${pv_list} sudo lvcreate -l 100%FREE -n dlts-data-lvm-vol1 dlts-data-lvm sudo mkfs.ext4 /dev/mapper/dlts--data--lvm-dlts--data--lvm--vol1 echo "UUID=$(sudo blkid | grep dlts | sed -n 's/.*UUID=\"\(.*\)\" TYPE.*/\1/p') {{cnf["path"]}} ext4 defaults,discard 0 0" | sudo tee -a /etc/fstab +{% endif %} sudo mkdir -p {{cnf["path"]}} sudo mount {{cnf["path"]}} @@ -20,8 +22,12 @@ sudo apt-get install -y nfs-kernel-server sudo mkdir -p {{ fileshare }} sudo chmod -R 777 {{ fileshare }} sudo chown nobody:nogroup {{fileshare}} -echo "{{ fileshare }} {{cnf["cloud_config"]["vnet_range"]}}(rw,sync,no_subtree_check,no_root_squash)" | sudo tee -a /etc/exports -echo "{{ fileshare }} {{cnf["cloud_config"]["samba_range"]}}(rw,fsid=1,nohide,insecure,sync,no_subtree_check,no_root_squash)" | sudo tee -a /etc/exports +{% for range in cnf["nfs_client_CIDR"]["node_range"] %} +echo "{{ fileshare }} {{range}}(rw,sync,no_subtree_check,no_root_squash)" | sudo tee -a /etc/exports +{% endfor %} +{% for range in cnf["nfs_client_CIDR"]["samba_range"] %} +echo "{{ fileshare }} {{range}}(rw,fsid=1,nohide,insecure,sync,no_subtree_check,no_root_squash)" | sudo tee -a /etc/exports +{% endfor %} {% endfor %} # Get number of CPU diff --git a/src/ClusterBootstrap/template/secret/pass_secret.sh.template b/src/ClusterBootstrap/template/secret/pass_secret.sh.template index 1f198bc65..ddf9bb32e 100755 --- a/src/ClusterBootstrap/template/secret/pass_secret.sh.template +++ b/src/ClusterBootstrap/template/secret/pass_secret.sh.template @@ -1,6 +1,6 @@ {% for regi_name, regi_cred in cnf["registry_credential"].items() %} docker login {{ regi_name }} -u {{ regi_cred["username"] }} -p {{ regi_cred["password"] }} {% endfor %} -chown -R {{cnf["cloud_config"]["default_admin_username"]}}:{{cnf["cloud_config"]["default_admin_username"]}} /home/{{cnf["cloud_config"]["default_admin_username"]}}/.docker/ -chown -R {{cnf["cloud_config"]["default_admin_username"]}}:{{cnf["cloud_config"]["default_admin_username"]}} /home/{{cnf["cloud_config"]["default_admin_username"]}}/.kube/ -/opt/bin/kubectl create secret generic regcred --from-file=.dockerconfigjson=/home/{{cnf["cloud_config"]["default_admin_username"]}}/.docker/config.json --type=kubernetes.io/dockerconfigjson --dry-run -o yaml | /opt/bin/kubectl apply -f - \ No newline at end of file +chown -R {{cnf["admin_username"]}}:{{cnf["admin_username"]}} /home/{{cnf["admin_username"]}}/.docker/ +chown -R {{cnf["admin_username"]}}:{{cnf["admin_username"]}} /home/{{cnf["admin_username"]}}/.kube/ +/opt/bin/kubectl create secret generic regcred --from-file=.dockerconfigjson=/home/{{cnf["admin_username"]}}/.docker/config.json --type=kubernetes.io/dockerconfigjson --dry-run -o yaml | /opt/bin/kubectl apply -f - \ No newline at end of file diff --git a/src/ClusterBootstrap/template/vmss/vmss.sh.template b/src/ClusterBootstrap/template/vmss/vmss.sh.template new file mode 100755 index 000000000..99313c778 --- /dev/null +++ b/src/ClusterBootstrap/template/vmss/vmss.sh.template @@ -0,0 +1,23 @@ +az vmss create \ + --resource-group {{cnf["cluster_name"]}}ResGrp \ + --name {{cnf["cluster_name"].lower()}}-worker \ + --image {{cnf["azure_cluster"]["vm_image"]}} \ + --generate-ssh-keys \ + --public-ip-address-dns-name {{cnf["cluster_name"].lower()}}-worker \ + --public-ip-per-vm \ + --public-ip-address-allocation static \ + --vm-domain-name {{cnf["cluster_name"].lower()}} \ + --vm-sku {{cnf["azure_cluster"]["worker_vm_size"]}} \ + --vnet-name {{cnf["cluster_name"]}}-VNet \ + --subnet mySubnet \ + --nsg {{cnf["cluster_name"]}}-nsg \ + --admin-username {{cnf["admin_username"]}} \ + --storage-sku Premium_LRS \ + --data-disk-sizes-gb {{cnf["azure_cluster"]["worker_local_storage_sz"]}} \ + --data-disk-caching ReadWrite \ + --ssh-key-values ./deploy/sshkey/id_rsa.pub \ + --instance-count {{cnf["azure_cluster"]["worker_node_num"]}} \ + --priority {{cnf["priority"]}} \ + --disable-overprovision \ + --eviction-policy {{cnf["azure_cluster"]["eviction_policy"]}} \ + --single-placement-group {{cnf["azure_cluster"]["single_placement_group"]}} \ No newline at end of file diff --git a/src/ClusterBootstrap/utils.py b/src/ClusterBootstrap/utils.py index 6742b6854..ba636e727 100755 --- a/src/ClusterBootstrap/utils.py +++ b/src/ClusterBootstrap/utils.py @@ -351,13 +351,16 @@ def SSH_exec_script( identity_file, user, host, script, supressWarning = False, def get_ETCD_discovery_URL(size): + if size == 1: + output = "we don't use discovery url for 1 node etcd" + else: try: output = urllib.urlopen("https://discovery.etcd.io/new?size=%d" % size ).read() if not "https://discovery.etcd.io" in output: raise Exception("ERROR: we cannot get etcd discovery url from 'https://discovery.etcd.io/new?size=%d', got message %s" % (size,output)) except Exception as e: raise Exception("ERROR: we cannot get etcd discovery url from 'https://discovery.etcd.io/new?size=%d'" % size) - return output + return output def get_cluster_ID_from_file(): @@ -622,3 +625,44 @@ def mergeDict(configDst, configSrc, bOverwrite): elif isinstance(configSrc[entry], dict) and isinstance(configDst[entry], dict): mergeDict(configDst[entry], configSrc[entry], bOverwrite) +def ip2int(addr): + return struct.unpack("!I", socket.inet_aton(addr))[0] + +def mask_num(valid_bit): + return int('1'*valid_bit+'0'*(32 - valid_bit), 2) + +def remain_num(valid_bit): + return int('0'*valid_bit+'1'*(32 - valid_bit), 2) + +def check_covered_by_ipvals(ipvals, masked2check): + for wider_ipval in ipvals: + if wider_ipval == masked2check: + return True + return False + +def check_covered_by_wider_ips(mask2ip, ipval2check, mask4ipval): + for msk in mask2ip.keys(): + # wider mask range + if msk < mask4ipval: + this_masked = ipval2check & mask_num(msk) + if check_covered_by_ipvals(mask2ip[msk], this_masked): + return True + return False + +def keep_widest_subnet(ips): + res = set() + mask2ip = {} + ips = sorted(ips, key = lambda x: int(x[-2:])) + for ip in ips: + ipv4, mask = ip.split("/") + mask = int(mask) + ipval = ip2int(ipv4) + remnmsk = remain_num(mask) + assert (remnmsk & ipval == 0), "invalid ip/mask {}!".format(ip) + if check_covered_by_wider_ips(mask2ip, ipval, mask): + continue + if mask not in mask2ip: + mask2ip[mask] = set() + mask2ip[mask].add(ipval) + res.add(ip) + return list(res) \ No newline at end of file diff --git a/src/utils/MySQLDataHandler.py b/src/utils/MySQLDataHandler.py index 4a4395527..a8bfadcc4 100755 --- a/src/utils/MySQLDataHandler.py +++ b/src/utils/MySQLDataHandler.py @@ -185,11 +185,10 @@ def CreateTable(self): # when the VC has vm of same GPU type but different VMsizes, e.g., when VC has Standard_NC6s_v3 and Standard_NC12s_v3 both? # impossible since there's no way to do it with current config mechanism - worker_cnt = int(config["azure_cluster"]["worker_node_num"]) - sku_mapping = config["sku_mapping"] - sku = sku_mapping.get(config["azure_cluster"]["worker_vm_size"],sku_mapping["default"]) - n_gpu_pernode = sku["gpu-count"] - gpu_type = sku["gpu-type"] + gpu_count_per_node = config["gpu_count_per_node"] + worker_node_num = config["worker_node_num"] + gpu_type = config["gpu_type"] + sql = """ CREATE TABLE IF NOT EXISTS `%s` ( @@ -203,7 +202,7 @@ def CreateTable(self): CONSTRAINT `hierarchy` FOREIGN KEY (`parent`) REFERENCES `%s` (`vcName`) ) AS SELECT \'%s\' AS vcName, NULL AS parent, '{\\\"%s\\\":%s}' AS quota, '{\\\"%s\\\":{\\\"num_gpu_per_node\\\":%s}}' AS metadata; - """ % (self.vctablename, self.vctablename, config['defalt_virtual_cluster_name'], gpu_type, n_gpu_pernode*worker_cnt, gpu_type,n_gpu_pernode) + """ % (self.vctablename, self.vctablename, config['defalt_virtual_cluster_name'], gpu_type, gpu_count_per_node*worker_node_num, gpu_type,gpu_count_per_node) cursor = self.conn.cursor() cursor.execute(sql)