Skip to content

Commit

Permalink
Merge pull request #675 from tmarballi/allow-type-per-role
Browse files Browse the repository at this point in the history
Allow different instance type per role
  • Loading branch information
scragraham authored Mar 1, 2018
2 parents e68cf2f + 331c164 commit dadad21
Show file tree
Hide file tree
Showing 10 changed files with 200 additions and 105 deletions.
5 changes: 4 additions & 1 deletion appscale/tools/agents/azure_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ class AzureAgent(BaseAgent):
PARAM_APP_SECRET,
PARAM_APP_ID,
PARAM_IMAGE_ID,
PARAM_INSTANCE_TYPE,
PARAM_KEYNAME,
PARAM_SUBSCRIBER_ID,
PARAM_TENANT_ID,
Expand Down Expand Up @@ -491,6 +490,7 @@ def add_instances_to_existing_ss(self, count, parameters):
credentials = self.open_connection(parameters)
subscription_id = str(parameters[self.PARAM_SUBSCRIBER_ID])
resource_group = parameters[self.PARAM_RESOURCE_GROUP]
instance_type = parameters[self.PARAM_INSTANCE_TYPE]
compute_client = ComputeManagementClient(credentials, subscription_id)

num_instances_added = 0
Expand All @@ -505,6 +505,9 @@ def add_instances_to_existing_ss(self, count, parameters):
if ss_instance_count >= self.MAX_VMSS_CAPACITY:
continue

if not vmss.sku.name == instance_type:
continue

scaleset = compute_client.virtual_machine_scale_sets.get(
resource_group, vmss.name)
ss_upgrade_policy = scaleset.upgrade_policy
Expand Down
1 change: 0 additions & 1 deletion appscale/tools/agents/ec2_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ class EC2Agent(BaseAgent):
PARAM_CREDENTIALS,
PARAM_GROUP,
PARAM_IMAGE_ID,
PARAM_INSTANCE_TYPE,
PARAM_KEYNAME,
PARAM_SPOT
)
Expand Down
1 change: 0 additions & 1 deletion appscale/tools/agents/gce_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ class GCEAgent(BaseAgent):
REQUIRED_CREDENTIALS = (
PARAM_GROUP,
PARAM_IMAGE_ID,
PARAM_INSTANCE_TYPE,
PARAM_KEYNAME,
PARAM_PROJECT,
PARAM_ZONE
Expand Down
60 changes: 43 additions & 17 deletions appscale/tools/node_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from agents.factory import InfrastructureAgentFactory
from appscale_logger import AppScaleLogger
from custom_exceptions import BadConfigurationException
from local_state import LocalState
from parse_args import ParseArgs


class NodeLayout():
Expand Down Expand Up @@ -134,6 +136,9 @@ def __init__(self, options):
self.replication = options.get('replication')
self.database_type = options.get('table', 'cassandra')
self.add_to_existing = options.get('add_to_existing')
self.default_instance_type = options.get('instance_type')
self.test = options.get('test')
self.force = options.get('force')

if 'login_host' in options and options['login_host'] is not None:
self.login_host = options['login_host']
Expand Down Expand Up @@ -261,6 +266,22 @@ def validate_node_layout(self):
for node, disk in zip(nodes, disks):
node.disk = disk

instance_type = node_set.get('instance_type', self.default_instance_type)

if self.infrastructure:
if not instance_type:
self.invalid("Must set a default instance type or specify instance "
"type per role.")

# Check if this is an allowed instance type.
if instance_type in ParseArgs.DISALLOWED_INSTANCE_TYPES and \
not (self.force or self.test):
reason = "the suggested 4GB of RAM"
if 'database' in roles:
reason += " to run Cassandra"
LocalState.confirm_or_abort("The {0} instance type does not have {1}."
"Please consider using a larger instance "
"type.".format(instance_type, reason))
# Assign master.
if 'master' in roles:
self.master = nodes[0]
Expand All @@ -271,6 +292,7 @@ def validate_node_layout(self):
node.add_role(role)
if role == 'login':
node.public_ip = self.login_host or node.public_ip
node.instance_type = instance_type
if not node.is_valid():
self.invalid(",".join(node.errors()))

Expand Down Expand Up @@ -526,8 +548,9 @@ def from_locations_json_list(self, locations_nodes_list):
open_nodes.append(old_node)
continue
for _, node in enumerate(nodes_copy):
# Match nodes based on jobs/roles.
if set(old_node_roles) == set(node.roles):
# Match nodes based on jobs/roles and the instance type specified.
if set(old_node_roles) == set(node.roles) \
and old_node.get('instance_type') == node.instance_type:
nodes_copy.remove(node)
node.from_json(old_node)
if node.is_valid():
Expand All @@ -537,19 +560,19 @@ def from_locations_json_list(self, locations_nodes_list):
return None
break
for open_node in open_nodes:
try:
node = nodes_copy.pop()
except IndexError:
return None
# Match nodes based on jobs/roles.
roles = node.roles
node.from_json(open_node)
node.roles = roles
if node.is_valid():
nodes.append(node)
else:
# Locations JSON is incorrect if we get here.
return None
for node in nodes_copy:
# Match nodes based on jobs/roles and the instance type specified.
if node.instance_type == open_node.get('instance_type'):
roles = node.roles
node.from_json(open_node)
node.roles = roles
if node.is_valid():
nodes.append(node)
else:
# Locations JSON is incorrect if we get here.
return None
else:
continue

# If these lengths are equal all nodes were matched.
if len(nodes) == len(self.nodes):
Expand All @@ -572,7 +595,7 @@ class Node():

DUMMY_INSTANCE_ID = "i-APPSCALE"

def __init__(self, public_ip, cloud, roles=[], disk=None):
def __init__(self, public_ip, cloud, roles=[], disk=None, instance_type=None):
"""Creates a new Node, representing the given id in the specified cloud.
Expand All @@ -589,6 +612,7 @@ def __init__(self, public_ip, cloud, roles=[], disk=None):
self.cloud = cloud
self.roles = roles
self.disk = disk
self.instance_type = instance_type
self.expand_roles()


Expand Down Expand Up @@ -712,7 +736,8 @@ def to_json(self):
'private_ip': self.private_ip,
'instance_id': self.instance_id,
'jobs': self.roles,
'disk': self.disk
'disk': self.disk,
'instance_type' : self.instance_type
}


Expand All @@ -735,3 +760,4 @@ def from_json(self, node_dict):
self.instance_id = node_dict.get('instance_id')
self.roles = node_dict.get('jobs')
self.disk = node_dict.get('disk')
self.instance_type = node_dict.get('instance_type')
11 changes: 3 additions & 8 deletions appscale/tools/parse_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,16 +651,11 @@ def validate_infrastructure_flags(self):
raise BadConfigurationException("--disks must be a dict, but was a " \
"{0}".format(type(self.args.disks)))

if not self.args.instance_type:
raise BadConfigurationException("Cannot start a cloud instance without " \
"the instance type.")

if self.args.instance_type in self.DISALLOWED_INSTANCE_TYPES and \
not (self.args.force or self.args.test):
LocalState.confirm_or_abort("The {0} instance type does not have " \
"enough RAM to run Cassandra in a production setting. Please " \
"consider using a larger instance type.".format(
self.args.instance_type))
LocalState.confirm_or_abort("The {0} instance type does not have "
"the suggested 4GB of RAM. Please consider using a larger instance "
"type.".format(self.args.instance_type))

if self.args.infrastructure == 'azure':
if not self.args.azure_subscription_id:
Expand Down
145 changes: 101 additions & 44 deletions appscale/tools/remote_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import uuid
import yaml

from boto.exception import BotoServerError

# AppScale-specific imports
from agents.factory import InfrastructureAgentFactory
Expand All @@ -24,6 +25,7 @@
from custom_exceptions import BadConfigurationException
from custom_exceptions import ShellException
from custom_exceptions import TimeoutException
from agents.base_agent import AgentRuntimeException
from agents.gce_agent import CredentialTypes
from agents.gce_agent import GCEAgent
from local_state import APPSCALE_VERSION
Expand Down Expand Up @@ -151,27 +153,86 @@ def start_all_nodes(cls, options, node_layout):

agent.configure_instance_security(params)

load_balancer_nodes = node_layout.get_nodes('load_balancer', True)
instance_ids, public_ips, private_ips = cls.spawn_load_balancers_in_cloud(
options, agent, params,
len(load_balancer_nodes))
load_balancer_roles = {}
instance_type_roles = {}

for node_index, node in enumerate(load_balancer_nodes):
index = node_layout.nodes.index(node)
node_layout.nodes[index].public_ip = public_ips[node_index]
node_layout.nodes[index].private_ip = private_ips[node_index]
node_layout.nodes[index].instance_id = instance_ids[node_index]
for node in node_layout.get_nodes('load_balancer', True):
load_balancer_roles.setdefault(node.instance_type, []).append(node)

for node in node_layout.get_nodes('load_balancer', False):
instance_type = instance_type_roles
instance_type.setdefault(node.instance_type, []).append(node)

spawned_instance_ids = []

for instance_type, load_balancer_nodes in load_balancer_roles.items():
# Copy parameters so we can modify the instance type.
instance_type_params = params.copy()
instance_type_params['instance_type'] = instance_type

try:
instance_ids, public_ips, private_ips = cls.spawn_nodes_in_cloud(
agent, instance_type_params, count=len(load_balancer_nodes),
load_balancer=True)
except (AgentRuntimeException, BotoServerError):
AppScaleLogger.warn("AppScale was unable to start the requested number "
"of instances, attempting to terminate those that "
"were started.")
if len(spawned_instance_ids) > 0:
AppScaleLogger.warn("Attempting to terminate those that were started.")
cls.terminate_spawned_instances(spawned_instance_ids, agent, params)

# Cleanup the keyname since it failed.
LocalState.cleanup_keyname(options.keyname)

# Re-raise the original exception.
raise

# Keep track of instances we have started.
spawned_instance_ids.extend(instance_ids)

for node_index, node in enumerate(load_balancer_nodes):
index = node_layout.nodes.index(node)
node_layout.nodes[index].public_ip = public_ips[node_index]
node_layout.nodes[index].private_ip = private_ips[node_index]
node_layout.nodes[index].instance_id = instance_ids[node_index]

if options.static_ip:
node = node_layout.head_node()
agent.associate_static_ip(params, node.instance_id,
options.static_ip)
node.public_ip = options.static_ip
AppScaleLogger.log("Static IP associated with head node.")

AppScaleLogger.log("\nPlease wait for AppScale to prepare your machines "
"for use. This can take few minutes.")

other_nodes = node_layout.get_nodes('load_balancer', False)
if len(other_nodes) > 0:
_instance_ids, _public_ips, _private_ips = cls.spawn_other_nodes_in_cloud(
agent, params,
len(other_nodes))
for instance_type, nodes in instance_type_roles.items():
# Copy parameters so we can modify the instance type.
instance_type_params = params.copy()
instance_type_params['instance_type'] = instance_type

for node_index, node in enumerate(other_nodes):
try:
_instance_ids, _public_ips, _private_ips = cls.spawn_nodes_in_cloud(
agent, instance_type_params, count=len(nodes))
except (AgentRuntimeException, BotoServerError):
AppScaleLogger.warn("AppScale was unable to start the requested number "
"of instances, attempting to terminate those that "
"were started.")
if len(spawned_instance_ids) > 0:
AppScaleLogger.warn("Attempting to terminate those that were started.")
cls.terminate_spawned_instances(spawned_instance_ids, agent, params)

# Cleanup the keyname since it failed.
LocalState.cleanup_keyname(options.keyname)

# Re-raise the original exception.
raise

# Keep track of instances we have started.
spawned_instance_ids.extend(_instance_ids)

for node_index, node in enumerate(nodes):
index = node_layout.nodes.index(node)
node_layout.nodes[index].public_ip = _public_ips[node_index]
node_layout.nodes[index].private_ip = _private_ips[node_index]
Expand Down Expand Up @@ -276,52 +337,27 @@ def start_head_node(cls, options, my_id, node_layout):


@classmethod
def spawn_load_balancers_in_cloud(cls, options, agent, params, count=1):
def spawn_nodes_in_cloud(cls, agent, params, count=1, load_balancer=False):
"""Starts count number of virtual machines in a cloud infrastructure with
public ips.
This method also prepares the virtual machine for use by the AppScale Tools.
Args:
options: A Namespace that specifies the cloud infrastructure to use, as
well as how to interact with that cloud.
agent: The agent to start VMs with, must be passed as an argument
because agents cannot be made twice.
params: The parameters to be sent to the agent.
count: A int, the number of instances to start.
load_balancer: A boolean indicating whether the spawned instance should
have a public ip or not.
Returns:
The instance ID, public IP address, and private IP address of the machine
that was started.
"""
instance_ids, public_ips, private_ips = agent.run_instances(
count=count, parameters=params, security_configured=True,
public_ip_needed=True)
public_ip_needed=load_balancer)

if options.static_ip:
agent.associate_static_ip(params, instance_ids[0], options.static_ip)
public_ips[0] = options.static_ip
AppScaleLogger.log("Static IP associated with head node.")
return instance_ids, public_ips, private_ips


@classmethod
def spawn_other_nodes_in_cloud(cls, agent, params, count=1):
"""Starts count number of virtual machines in a cloud infrastructure.
This method also prepares the virtual machine for use by the AppScale Tools.
Args:
agent: The agent to start VMs with, must be passed as an argument
because agents cannot be made twice.
params: The parameters to be sent to the agent.
count: A int, the number of instances to start.
Returns:
The instance ID, public IP address, and private IP address of the machine
that was started.
"""
instance_ids, public_ips, private_ips = agent.run_instances(
count=count, parameters=params, security_configured=True,
public_ip_needed=False)
return instance_ids, public_ips, private_ips

@classmethod
Expand Down Expand Up @@ -850,6 +886,27 @@ def wait_for_machines_to_finish_loading(cls, host, keyname):
time.sleep(cls.WAIT_TIME)


@classmethod
def terminate_spawned_instances(cls, spawned_instance_ids, agent, params):
""" Shuts down instances specified. For use when AppScale has failed to
start all the instances for the deployment since we do not check or clean
any local files.
Args:
spawned_instance_ids: A list of instance ids we have started that
should be terminated.
agent: The agent to call terminate instance with.
params: Agent parameters.
"""
terminate_params = params.copy()
terminate_params[agent.PARAM_INSTANCE_IDS] = spawned_instance_ids
try:
agent.terminate_instances(terminate_params)
except (AgentRuntimeException, BotoServerError):
AppScaleLogger.warn("AppScale failed to terminate instance(s) with "
"id(s): {}".format(spawned_instance_ids))


@classmethod
def terminate_cloud_instance(cls, instance_id, options):
""" Powers off a single instance in the currently AppScale deployment and
Expand Down
Loading

0 comments on commit dadad21

Please sign in to comment.