Skip to content

Commit

Permalink
exec created new process SHOULD enforce slurm cgroup limit (China-HPC#50
Browse files Browse the repository at this point in the history
)

fix: exec created new process SHOULD enforce slurm cgroup limit
  • Loading branch information
ansiz authored Dec 16, 2018
1 parent 7ddf9e5 commit 63f9fc0
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 7 deletions.
28 changes: 22 additions & 6 deletions pkg/socker/socker.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
const (
cmdDocker = "docker"
cmdCgclassify = "cgclassify"
cmdPs = "ps"
cmdPgrep = "pgrep"
sepColon = ":"
sepPipe = "|"
Expand Down Expand Up @@ -379,13 +380,21 @@ func isContainerRan(containerName string) (bool, error) {
}

func queryContainerPID(containerName string) (string, error) {
args := []string{"inspect", "-f", "'{{ .State.Pid }}'", containerName}
args := []string{"inspect", "-f", "{{ .State.Pid }}", containerName}
output, err := exec.Command(cmdDocker, args...).CombinedOutput()
if err != nil {
log.Errorf("query container pid failed: %v:%s", err, output)
return "", err
}
containerPID := strings.Trim(string(output), "\r\n'")
cmdPid := strings.TrimSpace(string(output))
output, err = exec.Command(cmdPs, "-o", "ppid=",
"-p", cmdPid).CombinedOutput()
log.Debugf("find cmdPid command: ps -o ppid= -p %s", cmdPid)
if err != nil {
log.Errorf("can't find docker-containe pid: %v,%s", err, output)
return "", err
}
containerPID := strings.TrimSpace(string(output))
log.Debugf("container PID is: %s", containerPID)
return containerPID, nil
}
Expand Down Expand Up @@ -435,11 +444,18 @@ func (s *Socker) enforceLimit() error {
}
cgroupID := fmt.Sprintf("slurm/uid_%s/job_%s/", s.CurrentUID, s.slurmJobID)
log.Debugf("target cgroup id is: %s", cgroupID)
pids, err := QueryChildPIDs(containerPID)
if err != nil {
log.Errorf("query child process ids failed: %v", err)
for {
pids, err := QueryChildPIDs(containerPID)
if err != nil {
log.Errorf("query child process ids failed: %v", err)
}
err = s.setCgroupLimit(pids, cgroupID)
if err != nil {
return err
}
// TODO: find a better way to watch cgroup new tasks without polling.
time.Sleep(time.Second * 1)
}
return s.setCgroupLimit(append(pids, containerPID), cgroupID)
}

func (s *Socker) setCgroupLimit(pids []string, cgroupID string) error {
Expand Down
2 changes: 1 addition & 1 deletion scripts/epilog.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ if [ -f $recordFile ];then
pidRecord=$ownerRecord"-pids"
docker rm -f $containerName
for pid in `cat $pidRecord`; do
kill -- $pid
kill $pid
done
rm -f $recordFile $ownerRecord $pidRecord
fi

0 comments on commit 63f9fc0

Please sign in to comment.