Skip to content

Commit 7abfed7

Browse files
hanwen-clusterhanwen-pcluste
authored andcommitted
Remove prolog/epilog used as a workaround for cluster without Internet connection.
The Slurm bug has been fixed with Slurm 22.05. The workaround is no longer necessary Signed-off-by: Hanwen <[email protected]>
1 parent 50823c6 commit 7abfed7

File tree

6 files changed

+0
-157
lines changed

6 files changed

+0
-157
lines changed

attributes/default.rb

-1
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,6 @@
552552
default['cluster']['raid_vol_ids'] = ''
553553
default['cluster']['dns_domain'] = nil
554554
default['cluster']['use_private_hostname'] = 'false'
555-
default['cluster']['add_node_hostnames_in_hosts_file'] = node['cluster']['use_private_hostname']
556555
default['cluster']['skip_install_recipes'] = 'yes'
557556
default['cluster']['enable_nss_slurm'] = node['cluster']['directory_service']['enabled']
558557
default['cluster']['realmemory_to_ec2memory_ratio'] = 0.95

cookbooks/aws-parallelcluster-config/files/default/cloudwatch_agent/cloudwatch_log_files.json

-22
Original file line numberDiff line numberDiff line change
@@ -540,28 +540,6 @@
540540
],
541541
"feature_conditions": []
542542
},
543-
{
544-
"timestamp_format_key": "default",
545-
"file_path": "/var/log/parallelcluster/slurm_prolog_epilog.log",
546-
"log_stream_name": "slurm_prolog_epilog",
547-
"schedulers": [
548-
"slurm"
549-
],
550-
"platforms": [
551-
"centos",
552-
"ubuntu",
553-
"amazon"
554-
],
555-
"node_roles": [
556-
"ComputeFleet"
557-
],
558-
"feature_conditions": [
559-
{
560-
"dna_key": "use_private_hostname",
561-
"satisfying_values": ["true"]
562-
}
563-
]
564-
},
565543
{
566544
"timestamp_format_key": "default",
567545
"file_path": "/var/log/parallelcluster/clusterstatusmgtd",

cookbooks/aws-parallelcluster-slurm/files/default/head_node_slurm/epilog

-39
This file was deleted.

cookbooks/aws-parallelcluster-slurm/files/default/head_node_slurm/prolog

-54
This file was deleted.

cookbooks/aws-parallelcluster-slurm/recipes/config_head_node.rb

-28
Original file line numberDiff line numberDiff line change
@@ -192,34 +192,6 @@
192192
action :create
193193
end
194194

195-
if node['cluster']['add_node_hostnames_in_hosts_file'] == "true"
196-
directory "#{node['cluster']['slurm']['install_dir']}/etc/pcluster/prolog.d" do
197-
user 'root'
198-
group 'root'
199-
mode '0755'
200-
end
201-
202-
cookbook_file "#{node['cluster']['slurm']['install_dir']}/etc/pcluster/prolog.d/01-pcluster-prolog" do
203-
source 'head_node_slurm/prolog'
204-
owner node['cluster']['slurm']['user']
205-
group node['cluster']['slurm']['group']
206-
mode '0744'
207-
end
208-
209-
directory "#{node['cluster']['slurm']['install_dir']}/etc/pcluster/epilog.d" do
210-
user 'root'
211-
group 'root'
212-
mode '0755'
213-
end
214-
215-
cookbook_file "#{node['cluster']['slurm']['install_dir']}/etc/pcluster/epilog.d/01-pcluster-epilog" do
216-
source 'head_node_slurm/epilog'
217-
owner node['cluster']['slurm']['user']
218-
group node['cluster']['slurm']['group']
219-
mode '0744'
220-
end
221-
end
222-
223195
service "slurmctld" do
224196
supports restart: false
225197
action %i(enable start)

cookbooks/aws-parallelcluster-slurm/templates/default/slurm/slurm.conf.erb

-13
Original file line numberDiff line numberDiff line change
@@ -40,19 +40,6 @@ SuspendTimeout=120
4040
PrivateData=cloud
4141
ResumeRate=0
4242
SuspendRate=0
43-
<% if node["cluster"]["add_node_hostnames_in_hosts_file"] == 'true' -%>
44-
#
45-
# PROLOG AND EPILOG
46-
# prolog is executed to add nodes info to /etc/hosts on compute nodes when each job is allocated
47-
# epilog is executed to clean contents written by prolog
48-
# PrologFlags specifies the prolog is executed at job allocation and prologs and epilogs are of different jobs are executed serially
49-
# SchedulerParameters allows jobs to be requeued to other nodes if prolog error exits.
50-
# Note the error exit of prolog drains a node, because the error of prolog is considered as a node error.
51-
Epilog=<%= node['cluster']['slurm']['install_dir'] %>/etc/pcluster/epilog.d/*
52-
Prolog=<%= node['cluster']['slurm']['install_dir'] %>/etc/pcluster/prolog.d/*
53-
PrologFlags=alloc,serial
54-
SchedulerParameters=nohold_on_prolog_fail
55-
<% end -%>
5643
#
5744
# TIMERS
5845
SlurmctldTimeout=300

0 commit comments

Comments
 (0)