Skip to content

Commit d382056

Browse files
LeoHongyiYinYangOfDao
authored andcommitted
Add nfs storage info in gpu card page , fix the issue of blank load in job page, issue of prreemtible job total and training job submission error (microsoft#613)
1 parent 4629a1a commit d382056

File tree

5 files changed

+259
-55
lines changed

5 files changed

+259
-55
lines changed

src/dashboard/src/pages/ClusterStatus/index.tsx

+12-8
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ const ClusterStatus: FC = () => {
6565
response['prometheus'] = prometheus;
6666
return response;
6767
}
68-
const fetchClusterStatus = () => {
69-
if (clusters) {
68+
const fetchClusterStatus = (mount: boolean) => {
69+
if (clusters && mount) {
7070
const params = new URLSearchParams({
7171
query:`count+(task_gpu_percent{vc_name="${selectedTeam}"}+==+0)+by+(username)`,
7272
});
@@ -172,6 +172,9 @@ const ClusterStatus: FC = () => {
172172
if (!mu.hasOwnProperty('idle')) {
173173
mu['idle'] = "0";
174174
}
175+
if (!mu.hasOwnProperty('preemptableGPU')) {
176+
mu['preemptableGPU'] = "0";
177+
}
175178
});
176179
let finalUserStatus = _.values(mergeTwoObjsByKey(tmpMerged,prometheusResp,'userName'));
177180
let totalRow: any = {};
@@ -180,8 +183,9 @@ const ClusterStatus: FC = () => {
180183
totalRow['idle'] = 0;
181184
totalRow['usedGPU'] = 0;
182185
totalRow['idleGPU'] = 0;
186+
totalRow['preemptableGPU'] = 0;
183187
for (let us of finalUserStatus) {
184-
console.log(us);
188+
console.log(us['preemptableGPU']);
185189
totalRow['booked'] += parseInt(us['booked']);
186190
totalRow['idle'] += parseInt(us['idle']);
187191
totalRow['usedGPU'] += parseInt(us['usedGPU']);
@@ -210,21 +214,21 @@ const ClusterStatus: FC = () => {
210214
let mount = true;
211215
let timeout: any;
212216
if (mount) {
213-
fetchClusterStatus()
214-
timeout = setTimeout(() => {fetchClusterStatus()},30000)
217+
fetchClusterStatus(mount)
218+
timeout = setTimeout(() => {fetchClusterStatus(mount)}, 30000)
215219
}
216220

217221
return () => {
218222
mount = false;
219223
clearTimeout(timeout)
220224
}
221225
},[clusters, selectedTeam])
222-
const handleChange = (event: React.ChangeEvent<HTMLInputElement>) => {
226+
const handleChange = (event: React.ChangeEvent<HTMLInputElement>, mount: boolean) => {
223227
setSelectedValue(event.target.value);
224228
localStorage.setItem('selectedCluster', event.target.value);
225229
const filteredVCStatus: any = vcStatus.filter((vc)=>vc['ClusterName'] === event.target.value);
226230
console.log(vcStatus)
227-
fetchClusterStatus()
231+
fetchClusterStatus(mount)
228232
setNodeStatus(filteredVCStatus[0]['node_status']);
229233
setIframeUrl((filteredVCStatus[0]['GranaUrl']));
230234
}
@@ -238,7 +242,7 @@ const ClusterStatus: FC = () => {
238242
onChangeIndex={(value) => handleChangeIndex(value, setValue)}
239243
>
240244
<DLTSTabPanel value={value} index={0} dir={theme.direction} title={ClusterStatusTitles[value]}>
241-
<TeamVirtualClusterStatus vcStatus={vcStatus} selectedValue={selectedValue} handleChange={handleChange}/>
245+
<TeamVirtualClusterStatus vcStatus={vcStatus} selectedValue={selectedValue} handleChange={(event: React.ChangeEvent<HTMLInputElement>) => handleChange(event, true)}/>
242246
</DLTSTabPanel>
243247
<DLTSTabPanel value={value} index={1} dir={theme.direction} title={ClusterStatusTitles[value]}>
244248
<TeamVCUserStatus userStatus={userStatus} currentCluster={selectedValue} showCurrentUser={showCurrentUser} handleSwitch={handleSwitch}/>

src/dashboard/src/pages/CommonComponents/DLTSTabs.tsx

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ export const DLTSTabs = (props: TabsProps) => {
2525
{...other}
2626
>
2727
{ titles && titles.map((title, index)=>(
28-
<Tab label={title} />
28+
<Tab label={title} key={index}/>
2929
)) }
3030
{children}
3131
</Tabs>

src/dashboard/src/pages/Home/GPUCard.tsx

+179-7
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,52 @@
11
import React, {useEffect, useState} from "react";
22
import { Link } from "react-router-dom";
33
import useFetch from "use-http";
4+
import Table from '@material-ui/core/Table';
5+
import TableBody from '@material-ui/core/TableBody';
6+
import TableCell from '@material-ui/core/TableCell';
7+
import TableRow from '@material-ui/core/TableRow';
8+
import LinearProgress from "@material-ui/core/LinearProgress";
49
import {
10+
Box,
511
Button,
612
Card,
713
CardActions,
814
CardContent,
9-
CardHeader,
15+
CardHeader, createMuiTheme,
1016
Divider,
1117
IconButton,
1218
InputAdornment,
1319
Menu,
14-
MenuItem,
20+
MenuItem, MuiThemeProvider,
1521
TextField,
16-
Tooltip
22+
Tooltip, Typography, withStyles
1723
} from "@material-ui/core";
18-
import { makeStyles, createStyles, useTheme, Theme } from "@material-ui/core/styles";
24+
import {
25+
makeStyles,
26+
createStyles,
27+
useTheme,
28+
Theme,
29+
lighten
30+
} from "@material-ui/core/styles";
1931
import { MoreVert, FileCopyRounded} from "@material-ui/icons";
2032

2133
import {Cell, PieChart, Pie, ResponsiveContainer,Sector} from "recharts";
2234
import UserContext from "../../contexts/User";
2335
import TeamsContext from '../../contexts/Teams';
24-
import {green, lightGreen, deepOrange } from "@material-ui/core/colors";
36+
import {
37+
green,
38+
lightGreen,
39+
deepOrange,
40+
red,
41+
yellow
42+
} from "@material-ui/core/colors";
2543
import copy from 'clipboard-copy'
2644
import {checkObjIsEmpty, sumValues} from "../../utlities/ObjUtlities";
2745
import {DLTSSnackbar} from "../CommonComponents/DLTSSnackbar";
46+
47+
import _ from "lodash";
48+
import {type} from "os";
49+
import useCheckIsDesktop from "../../utlities/layoutUtlities";
2850
const useStyles = makeStyles((theme: Theme) => createStyles({
2951
avatar: {
3052
backgroundColor: theme.palette.secondary.main,
@@ -48,6 +70,14 @@ const useStyles = makeStyles((theme: Theme) => createStyles({
4870
},
4971
container: {
5072
margin: '0 auto',
73+
},
74+
tableTitle: {
75+
display: "flex",
76+
justifyContent: "center"
77+
},
78+
tableInfo: {
79+
justifyContent: "space-between",
80+
display: "flex"
5181
}
5282
}));
5383

@@ -242,6 +272,7 @@ const GPUCard: React.FC<{ cluster: string }> = ({ cluster }) => {
242272
const name = typeof email === 'string' ? email.split('@', 1)[0] : email;
243273
setDataStorage(data.dataStorage);
244274
setWorkStorage(`${data.workStorage}/${name}`);
275+
return data;
245276
}
246277
const fetchClusterStatusUrl = `/api`;
247278
const requestClusterStatus = useFetch(fetchClusterStatusUrl);
@@ -250,9 +281,71 @@ const GPUCard: React.FC<{ cluster: string }> = ({ cluster }) => {
250281
const data = await requestClusterStatus.get(`/teams/${selectedTeam}/clusters/${cluster}`);
251282
return data;
252283
}
284+
const [nfsStorage, setNfsStorage] = useState([]);
253285
useEffect(()=>{
254-
fetchDirectories();
255-
fetchClusterStatus().then((res)=>{
286+
fetchDirectories().then((res) => {
287+
let fetchStorage = [];
288+
let freeBytesSubPath = '/prometheus/api/v1/query?query=node_filesystem_free_bytes%7Bfstype%3D%27nfs4%27%7D';
289+
let sizeBytesSubPath = '/prometheus/api/v1/query?query=node_filesystem_size_bytes%7Bfstype%3D%27nfs4%27%7D';
290+
fetchStorage.push(fetch(`${res['prometheus']}${freeBytesSubPath}`));
291+
fetchStorage.push(fetch(`${res['prometheus']}${sizeBytesSubPath}`));
292+
let storageRes: any = [];
293+
let tmpStorage: any = [];
294+
Promise.all(fetchStorage).then((responses) => {
295+
responses.forEach(async (response: any) => {
296+
const res = await response.json();
297+
if (res['data']) {
298+
for (let item of res['data']["result"]) {
299+
let tmp = {} as any;
300+
if (item['metric']['__name__'] == "node_filesystem_size_bytes") {
301+
let mountpointName = item['metric']['mountpoint']
302+
let val = Math.floor(item['value'][1] / (Math.pow(10, 9)))
303+
tmp['mountpointName'] = mountpointName;
304+
tmp['total'] = val;
305+
}
306+
let tmpUsed = {} as any;
307+
//node_filesystem_free_bytes
308+
if (item['metric']['__name__'] == "node_filesystem_free_bytes") {
309+
let mountpointName = item['metric']['mountpoint']
310+
let val = Math.floor(item['value'][1] / (Math.pow(10, 9)))
311+
tmpUsed['mountpointName'] = mountpointName;
312+
tmpUsed['Free'] = val;
313+
}
314+
tmpStorage.push(tmp)
315+
tmpStorage.push(tmpUsed)
316+
}
317+
}
318+
//({ mountpointName: key, users: value })
319+
storageRes = tmpStorage.filter((store: any) => !checkObjIsEmpty(store));
320+
let finalStorageRes: any = [];
321+
if (storageRes && storageRes.length > 0) {
322+
finalStorageRes = _.chain(storageRes).groupBy('mountpointName').map((value, key) => {
323+
let tmpTotal: any = value.filter((item: any) => item.hasOwnProperty('total'));
324+
let tmpFree: any = value.filter((item: any) => item.hasOwnProperty('Free'));
325+
let total = 0;
326+
let used = 0;
327+
if (typeof tmpTotal[0] !== "undefined" && typeof tmpFree[0] !== "undefined") {
328+
total = tmpTotal[0]["total"];
329+
used = tmpTotal[0]["total"] - tmpFree[0]["Free"]
330+
}
331+
return {
332+
mountpointName: key, total:total, used: used
333+
}
334+
}).value();
335+
}
336+
finalStorageRes.forEach((item: any,i: number) => {
337+
if(item["mountpointName"].indexOf("dlws/nfs") !== -1){
338+
finalStorageRes.splice(i, 1);
339+
finalStorageRes.unshift(item);
340+
}
341+
});
342+
setNfsStorage(finalStorageRes.filter((store: any) => {
343+
return store['mountpointName'].indexOf(selectedTeam) !== -1 || store['mountpointName'].indexOf("dlws/nfs") !== -1;
344+
}));
345+
});
346+
});
347+
});
348+
fetchClusterStatus().then((res) => {
256349
const availableGpu = !checkObjIsEmpty(res['gpu_avaliable']) ? (Number)(sumValues(res['gpu_avaliable'])) : 0;
257350
setAvailable(availableGpu);
258351
const usedGpu = !checkObjIsEmpty(res['gpu_used']) ? (Number)(sumValues(res['gpu_used'])) : 0;
@@ -263,6 +356,50 @@ const GPUCard: React.FC<{ cluster: string }> = ({ cluster }) => {
263356
setActivate(true);
264357
})
265358
},[selectedTeam]);
359+
const tableTheme = createMuiTheme({
360+
overrides: {
361+
MuiTableCell: {
362+
root: {
363+
paddingTop: 10,
364+
paddingBottom: 10,
365+
paddingLeft:2,
366+
paddingRight:5,
367+
}
368+
}
369+
}
370+
});
371+
const BorderLinearProgress = withStyles({
372+
root: {
373+
height: 10,
374+
backgroundColor: lighten('#363636', 0.5),
375+
},
376+
bar: {
377+
borderRadius: 20,
378+
backgroundColor: green[400],
379+
},
380+
})(LinearProgress);
381+
const GenernalLinerProgress = withStyles({
382+
root: {
383+
height: 10,
384+
backgroundColor: lighten('#363636', 0.5),
385+
},
386+
bar: {
387+
borderRadius: 20,
388+
backgroundColor: yellow[800],
389+
},
390+
})(LinearProgress);
391+
const FullBorderLinearProgress = withStyles({
392+
root: {
393+
height: 10,
394+
backgroundColor: lighten('#363636', 0.5),
395+
},
396+
bar: {
397+
borderRadius: 20,
398+
backgroundColor: red[400],
399+
},
400+
})(LinearProgress);
401+
const theme = useTheme();
402+
266403
return (
267404
<Card>
268405
<CardHeader
@@ -278,6 +415,41 @@ const GPUCard: React.FC<{ cluster: string }> = ({ cluster }) => {
278415
/>
279416
<CardContent className={styles.chart}>
280417
<Chart available={available} used={used} reserved={reversed} isActive={activate} />
418+
<Divider />
419+
<Typography variant="h6" id="tableTitle" className={styles.tableTitle}>
420+
{"Storage (GB)"}
421+
</Typography>
422+
<Box minHeight={100} style={{ overflow: 'auto' }}>
423+
<MuiThemeProvider theme={tableTheme}>
424+
<Table>
425+
<TableBody>
426+
{
427+
nfsStorage.map((nfs: any, index: number) => {
428+
let nfsMountNames = nfs['mountpointName'].split("/");
429+
let mounName = "";
430+
if (nfs['mountpointName'].indexOf("dlws") !== -1) {
431+
mounName = "/data";
432+
} else {
433+
nfsMountNames.splice(0, nfsMountNames.length - 1);
434+
mounName = "/" + nfsMountNames.join('/');
435+
}
436+
let value = nfs['total'] == 0 ? 0 : (nfs['used'] / nfs['total']) * 100;
437+
return (
438+
<TableRow key={index}>
439+
<TableCell>
440+
{
441+
value < 80 ? <BorderLinearProgress value={value} variant={"determinate"}/> : value >= 80 && value < 90 ? <GenernalLinerProgress value={value} variant={"determinate"}/> : <FullBorderLinearProgress value={value} variant={"determinate"}/>
442+
}
443+
<div className={styles.tableInfo}><span>{`${mounName}`}</span><span>{`(${nfs['used']}/${nfs['total']}) ${Math.floor(value)}% used`}</span></div>
444+
</TableCell>
445+
</TableRow>
446+
)
447+
})
448+
}
449+
</TableBody>
450+
</Table>
451+
</MuiThemeProvider>
452+
</Box>
281453
</CardContent>
282454
<CardActions>
283455
<Button component={Link}

src/dashboard/src/pages/Jobs/index.tsx

+14-14
Original file line numberDiff line numberDiff line change
@@ -96,19 +96,6 @@ const Jobs: React.FC = (props: any) => {
9696
const classes = useStyles();
9797
const [value, setValue] = React.useState(0);
9898
const [refresh, setRefresh] = React.useState(window.navigator.userAgent.indexOf('Edge') == -1);
99-
useEffect(()=>{
100-
let mount = true;
101-
let timeout: any;
102-
if (window.navigator.userAgent.indexOf('Edge') != -1) {
103-
timeout = setTimeout(()=>{
104-
setRefresh(true);
105-
},1000);
106-
}
107-
return () => {
108-
mount = false;
109-
clearTimeout(timeout)
110-
}
111-
},[])
11299
const[open, setOpen] = React.useState(false);
113100
const[openApprove, setOpenApprove] = React.useState(false);
114101
const[openPause, setOpenPause] = React.useState(false);
@@ -123,6 +110,19 @@ const Jobs: React.FC = (props: any) => {
123110
const [currentJob, setCurrentJob] = React.useState({jobId:'',cluster:'',priority: 100});
124111
const deleteUrl = `/api/clusters/`;
125112
const requestDelete = useFetch(deleteUrl);
113+
useEffect(() => {
114+
let mount = true;
115+
let timeout: any;
116+
if (window.navigator.userAgent.indexOf('Edge') != -1) {
117+
timeout = setTimeout(()=>{
118+
setRefresh(true);
119+
}, 1000);
120+
}
121+
return () => {
122+
mount = false;
123+
clearTimeout(timeout)
124+
}
125+
}, [])
126126
const killJob = async () => {
127127
const body = {"status":"killing"};
128128
const data = await requestDelete.put(`${currentJob.cluster}/jobs/${currentJob.jobId}/status/`,body);
@@ -303,7 +303,7 @@ const Jobs: React.FC = (props: any) => {
303303
disabled={!isAdmin}
304304
key={rowData['jobId']}
305305
type="number"
306-
id={rowData.tableData.id}
306+
id={rowData.tableData.id.toString()}
307307
defaultValue={rowData.priority}
308308
onKeyPress={(event) => handlePriorityKeyPress(rowData, event)}
309309
onChange={(event)=>handleChangePriority(rowData, event)}

0 commit comments

Comments
 (0)