Skip to content

Commit a4c2e06

Browse files
tbeuseanm
authored andcommitted
Fixed read performance of cell/struct array from HDF5 MAT file
* The performance gain is obtained by removing the slow HDF5 API function H5Iget_name being the main bottleneck. Handles of HDF5 groups or datasets are now kept open for the lifetime of the matvar_t instance. * As a side-effect, the hdf5_name could be removed from matvar_t.internal, too. * Fix reference counting in Mat_VarDuplicate * As reported by tbeu#65 and tbeu#198
1 parent 6b94c1a commit a4c2e06

File tree

4 files changed

+80
-131
lines changed

4 files changed

+80
-131
lines changed

src/mat.c

+7-12
Original file line numberDiff line numberDiff line change
@@ -935,9 +935,8 @@ Mat_VarCalloc(void)
935935
matvar = NULL;
936936
} else {
937937
#if defined(MAT73) && MAT73
938-
matvar->internal->hdf5_name = NULL;
939938
matvar->internal->hdf5_ref = 0;
940-
matvar->internal->id = -1;
939+
matvar->internal->id = H5I_INVALID_HID;
941940
#endif
942941
matvar->internal->datapos = 0;
943942
matvar->internal->num_fields = 0;
@@ -1468,11 +1467,11 @@ Mat_VarDuplicate(const matvar_t *in, int opt)
14681467

14691468
if ( NULL != in->internal ) {
14701469
#if defined(MAT73) && MAT73
1471-
if ( NULL != in->internal->hdf5_name )
1472-
out->internal->hdf5_name = strdup(in->internal->hdf5_name);
1473-
14741470
out->internal->hdf5_ref = in->internal->hdf5_ref;
14751471
out->internal->id = in->internal->id;
1472+
if ( out->internal->id >= 0 ) {
1473+
H5Iinc_ref(out->internal->id);
1474+
}
14761475
#endif
14771476
out->internal->datapos = in->internal->datapos;
14781477
#if HAVE_ZLIB
@@ -1771,24 +1770,20 @@ Mat_VarFree(matvar_t *matvar)
17711770
}
17721771
#endif
17731772
#if defined(MAT73) && MAT73
1774-
if ( -1 < matvar->internal->id ) {
1773+
if ( H5I_INVALID_HID != matvar->internal->id ) {
17751774
switch ( H5Iget_type(matvar->internal->id) ) {
17761775
case H5I_GROUP:
17771776
H5Gclose(matvar->internal->id);
1778-
matvar->internal->id = -1;
1777+
matvar->internal->id = H5I_INVALID_HID;
17791778
break;
17801779
case H5I_DATASET:
17811780
H5Dclose(matvar->internal->id);
1782-
matvar->internal->id = -1;
1781+
matvar->internal->id = H5I_INVALID_HID;
17831782
break;
17841783
default:
17851784
break;
17861785
}
17871786
}
1788-
if ( NULL != matvar->internal->hdf5_name ) {
1789-
free(matvar->internal->hdf5_name);
1790-
matvar->internal->hdf5_name = NULL;
1791-
}
17921787
#endif
17931788
if ( NULL != matvar->internal->fieldnames && matvar->internal->num_fields > 0 ) {
17941789
size_t i;

src/mat73.c

+31-76
Original file line numberDiff line numberDiff line change
@@ -460,20 +460,10 @@ static int
460460
Mat_H5ReadVarInfo(matvar_t *matvar, hid_t dset_id)
461461
{
462462
hid_t attr_id, type_id;
463-
ssize_t name_len;
464463
int err = MATIO_E_NO_ERROR;
465464
char *class_str;
466465

467-
/* Get the HDF5 name of the variable */
468-
name_len = H5Iget_name(dset_id, NULL, 0);
469-
if ( name_len > 0 ) {
470-
matvar->internal->hdf5_name = (char *)malloc(name_len + 1);
471-
(void)H5Iget_name(dset_id, matvar->internal->hdf5_name, name_len + 1);
472-
} else {
473-
/* Can not get an internal name, so leave the identifier open */
474-
matvar->internal->id = dset_id;
475-
}
476-
466+
matvar->internal->id = dset_id;
477467
attr_id = H5Aopen_by_name(dset_id, ".", "MATLAB_class", H5P_DEFAULT, H5P_DEFAULT);
478468
type_id = H5Aget_type(attr_id);
479469
class_str = (char *)calloc(H5Tget_size(type_id) + 1, 1);
@@ -922,19 +912,17 @@ Mat_H5ReadGroupInfo(mat_t *mat, matvar_t *matvar, hid_t dset_id)
922912
struct ReadGroupInfoIterData group_data = {0, NULL};
923913

924914
/* First iteration to retrieve number of relevant links */
925-
herr = H5Literate_by_name(dset_id, matvar->internal->hdf5_name, H5_INDEX_NAME,
926-
H5_ITER_NATIVE, NULL, Mat_H5ReadGroupInfoIterate,
927-
(void *)&group_data, H5P_DEFAULT);
915+
herr = H5Literate(dset_id, H5_INDEX_NAME, H5_ITER_NATIVE, NULL,
916+
Mat_H5ReadGroupInfoIterate, (void *)&group_data);
928917
if ( herr > 0 && group_data.nfields > 0 ) {
929918
matvar->internal->fieldnames = (char **)calloc(
930919
(size_t)(group_data.nfields), sizeof(*matvar->internal->fieldnames));
931920
group_data.nfields = 0;
932921
group_data.matvar = matvar;
933922
if ( matvar->internal->fieldnames != NULL ) {
934923
/* Second iteration to fill fieldnames */
935-
H5Literate_by_name(dset_id, matvar->internal->hdf5_name, H5_INDEX_NAME,
936-
H5_ITER_NATIVE, NULL, Mat_H5ReadGroupInfoIterate,
937-
(void *)&group_data, H5P_DEFAULT);
924+
H5Literate(dset_id, H5_INDEX_NAME, H5_ITER_NATIVE, NULL,
925+
Mat_H5ReadGroupInfoIterate, (void *)&group_data);
938926
}
939927
matvar->internal->num_fields = (unsigned)group_data.nfields;
940928
nfields = group_data.nfields;
@@ -1085,19 +1073,18 @@ Mat_H5ReadGroupInfo(mat_t *mat, matvar_t *matvar, hid_t dset_id)
10851073
} else {
10861074
err = MATIO_E_OUT_OF_MEMORY;
10871075
}
1076+
H5Dclose(field_id);
10881077
} else {
10891078
fields[k] = Mat_VarCalloc();
10901079
fields[k]->name = strdup(matvar->internal->fieldnames[k]);
10911080
err = Mat_H5ReadDatasetInfo(mat, fields[k], field_id);
10921081
}
1093-
H5Dclose(field_id);
10941082
} else if ( object_info.type == H5O_TYPE_GROUP ) {
10951083
field_id = H5Gopen(dset_id, matvar->internal->fieldnames[k], H5P_DEFAULT);
10961084
if ( -1 < field_id ) {
10971085
fields[k] = Mat_VarCalloc();
10981086
fields[k]->name = strdup(matvar->internal->fieldnames[k]);
10991087
err = Mat_H5ReadGroupInfo(mat, fields[k], field_id);
1100-
H5Gclose(field_id);
11011088
}
11021089
}
11031090
if ( err ) {
@@ -1225,8 +1212,10 @@ Mat_H5ReadNextReferenceData(matvar_t *matvar, mat_t *mat)
12251212
int err = MATIO_E_NO_ERROR;
12261213
size_t nelems = 1;
12271214

1228-
if ( NULL == matvar || NULL == matvar->internal || matvar->internal->id < 0 )
1229-
return err;
1215+
if ( NULL == mat || NULL == matvar )
1216+
return MATIO_E_BAD_ARGUMENT;
1217+
if ( matvar->internal->id < 0 )
1218+
return MATIO_E_FAIL_TO_IDENTIFY;
12301219

12311220
/* If the datatype with references is a cell, we've already read info into
12321221
* the variable data, so just loop over each cell element and call
@@ -1273,7 +1262,7 @@ Mat_H5ReadNextReferenceData(matvar_t *matvar, mat_t *mat)
12731262
err |= Mul(&matvar->nbytes, nelems, matvar->data_size);
12741263
if ( err || matvar->nbytes < 1 ) {
12751264
H5Dclose(matvar->internal->id);
1276-
matvar->internal->id = -1;
1265+
matvar->internal->id = H5I_INVALID_HID;
12771266
break;
12781267
}
12791268

@@ -1286,8 +1275,6 @@ Mat_H5ReadNextReferenceData(matvar_t *matvar, mat_t *mat)
12861275
err = Mat_H5ReadData(matvar->internal->id, data_type_id, H5S_ALL, H5S_ALL,
12871276
matvar->isComplex, matvar->data);
12881277
}
1289-
H5Dclose(matvar->internal->id);
1290-
matvar->internal->id = -1;
12911278
break;
12921279
}
12931280
case H5I_GROUP: {
@@ -1303,7 +1290,7 @@ Mat_H5ReadNextReferenceData(matvar_t *matvar, mat_t *mat)
13031290
fields = (matvar_t **)matvar->data;
13041291
for ( i = 0; i < nelems; i++ ) {
13051292
if ( NULL != fields[i] && 0 < fields[i]->internal->hdf5_ref &&
1306-
-1 < fields[i]->internal->id ) {
1293+
fields[i]->internal->id >= 0 ) {
13071294
/* Dataset of references */
13081295
err = Mat_H5ReadNextReferenceData(fields[i], mat);
13091296
} else {
@@ -2679,8 +2666,8 @@ Mat_VarRead73(mat_t *mat, matvar_t *matvar)
26792666

26802667
if ( NULL == mat || NULL == matvar )
26812668
return MATIO_E_BAD_ARGUMENT;
2682-
else if ( NULL == matvar->internal->hdf5_name && 0 > matvar->internal->id )
2683-
return MATIO_E_READ_VARIABLE_DOES_NOT_EXIST;
2669+
else if ( matvar->internal->id < 0 )
2670+
return MATIO_E_FAIL_TO_IDENTIFY;
26842671

26852672
fid = *(hid_t *)mat->fp;
26862673

@@ -2711,15 +2698,9 @@ Mat_VarRead73(mat_t *mat, matvar_t *matvar)
27112698
if ( nelems < 1 )
27122699
break;
27132700

2714-
if ( NULL != matvar->internal->hdf5_name ) {
2715-
ref_id = H5Dopen(fid, matvar->internal->hdf5_name, H5P_DEFAULT);
2716-
if ( ref_id == H5I_INVALID_HID ) {
2717-
Mat_Critical("Unexpected error from H5Dopen");
2718-
}
2719-
} else {
2720-
ref_id = matvar->internal->id;
2721-
H5Iinc_ref(ref_id);
2722-
}
2701+
ref_id = matvar->internal->id;
2702+
H5Iinc_ref(ref_id);
2703+
27232704
if ( 0 < matvar->internal->hdf5_ref ) {
27242705
dset_id = H5RDEREFERENCE(ref_id, H5R_OBJECT, &matvar->internal->hdf5_ref);
27252706
} else {
@@ -2754,15 +2735,9 @@ Mat_VarRead73(mat_t *mat, matvar_t *matvar)
27542735
return err;
27552736
}
27562737

2757-
if ( NULL != matvar->internal->hdf5_name ) {
2758-
dset_id = H5Dopen(fid, matvar->internal->hdf5_name, H5P_DEFAULT);
2759-
if ( dset_id == H5I_INVALID_HID ) {
2760-
Mat_Critical("Unexpected error from H5Dopen");
2761-
}
2762-
} else {
2763-
dset_id = matvar->internal->id;
2764-
H5Iinc_ref(dset_id);
2765-
}
2738+
dset_id = matvar->internal->id;
2739+
H5Iinc_ref(dset_id);
2740+
27662741
if ( matvar->nbytes > 0 ) {
27672742
matvar->data = malloc(matvar->nbytes);
27682743
if ( NULL != matvar->data ) {
@@ -2799,7 +2774,7 @@ Mat_VarRead73(mat_t *mat, matvar_t *matvar)
27992774
fields = (matvar_t **)matvar->data;
28002775
for ( i = 0; i < nelems_x_nfields; i++ ) {
28012776
if ( NULL != fields[i] && 0 < fields[i]->internal->hdf5_ref &&
2802-
-1 < fields[i]->internal->id ) {
2777+
fields[i]->internal->id >= 0 ) {
28032778
/* Dataset of references */
28042779
err = Mat_H5ReadNextReferenceData(fields[i], mat);
28052780
} else {
@@ -2836,15 +2811,8 @@ Mat_VarRead73(mat_t *mat, matvar_t *matvar)
28362811
hid_t sparse_dset_id;
28372812
mat_sparse_t *sparse_data = (mat_sparse_t *)calloc(1, sizeof(*sparse_data));
28382813

2839-
if ( NULL != matvar->internal->hdf5_name ) {
2840-
dset_id = H5Gopen(fid, matvar->internal->hdf5_name, H5P_DEFAULT);
2841-
if ( dset_id == H5I_INVALID_HID ) {
2842-
Mat_Critical("Unexpected error from H5Dopen");
2843-
}
2844-
} else {
2845-
dset_id = matvar->internal->id;
2846-
H5Iinc_ref(dset_id);
2847-
}
2814+
dset_id = matvar->internal->id;
2815+
H5Iinc_ref(dset_id);
28482816

28492817
if ( H5Lexists(dset_id, "ir", H5P_DEFAULT) ) {
28502818
size_t *dims;
@@ -3010,7 +2978,7 @@ Mat_VarReadData73(mat_t *mat, matvar_t *matvar, void *data, int *start, int *str
30102978
if ( NULL == mat || NULL == matvar || NULL == data || NULL == start || NULL == stride ||
30112979
NULL == edge )
30122980
return MATIO_E_BAD_ARGUMENT;
3013-
else if ( NULL == matvar->internal->hdf5_name && 0 > matvar->internal->id )
2981+
else if ( matvar->internal->id < 0 )
30142982
return MATIO_E_FAIL_TO_IDENTIFY;
30152983

30162984
fid = *(hid_t *)mat->fp;
@@ -3041,15 +3009,9 @@ Mat_VarReadData73(mat_t *mat, matvar_t *matvar, void *data, int *start, int *str
30413009
case MAT_C_UINT16:
30423010
case MAT_C_INT8:
30433011
case MAT_C_UINT8:
3044-
if ( NULL != matvar->internal->hdf5_name ) {
3045-
ref_id = H5Dopen(fid, matvar->internal->hdf5_name, H5P_DEFAULT);
3046-
if ( ref_id == H5I_INVALID_HID ) {
3047-
Mat_Critical("Unexpected error from H5Dopen");
3048-
}
3049-
} else {
3050-
ref_id = matvar->internal->id;
3051-
H5Iinc_ref(ref_id);
3052-
}
3012+
ref_id = matvar->internal->id;
3013+
H5Iinc_ref(ref_id);
3014+
30533015
if ( 0 < matvar->internal->hdf5_ref ) {
30543016
dset_id = H5RDEREFERENCE(ref_id, H5R_OBJECT, &matvar->internal->hdf5_ref);
30553017
} else {
@@ -3101,7 +3063,7 @@ Mat_VarReadDataLinear73(mat_t *mat, matvar_t *matvar, void *data, int start, int
31013063

31023064
if ( NULL == mat || NULL == matvar || NULL == data )
31033065
return MATIO_E_BAD_ARGUMENT;
3104-
else if ( NULL == matvar->internal->hdf5_name && 0 > matvar->internal->id )
3066+
else if ( matvar->internal->id < 0 )
31053067
return MATIO_E_FAIL_TO_IDENTIFY;
31063068

31073069
fid = *(hid_t *)mat->fp;
@@ -3145,15 +3107,9 @@ Mat_VarReadDataLinear73(mat_t *mat, matvar_t *matvar, void *data, int start, int
31453107
}
31463108
free(dimp);
31473109

3148-
if ( NULL != matvar->internal->hdf5_name ) {
3149-
dset_id = H5Dopen(fid, matvar->internal->hdf5_name, H5P_DEFAULT);
3150-
if ( dset_id == H5I_INVALID_HID ) {
3151-
Mat_Critical("Unexpected error from H5Dopen");
3152-
}
3153-
} else {
3154-
dset_id = matvar->internal->id;
3155-
H5Iinc_ref(dset_id);
3156-
}
3110+
dset_id = matvar->internal->id;
3111+
H5Iinc_ref(dset_id);
3112+
31573113
dset_space = H5Dget_space(dset_id);
31583114
H5Sselect_elements(dset_space, H5S_SELECT_SET, (size_t)dset_edge, points);
31593115
free(points);
@@ -3280,7 +3236,6 @@ Mat_VarReadNextInfoIterate(hid_t id, const char *name, const H5L_info_t *info, v
32803236

32813237
dset_id = H5Gopen(id, name, H5P_DEFAULT);
32823238
err = Mat_H5ReadGroupInfo(mat, matvar, dset_id);
3283-
H5Gclose(dset_id);
32843239
if ( err ) {
32853240
Mat_VarFree(matvar);
32863241
return -1;

src/matio_private.h

-1
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ struct _mat_t
127127
struct matvar_internal
128128
{
129129
#if defined(MAT73) && MAT73
130-
char *hdf5_name; /**< Name */
131130
hobj_ref_t hdf5_ref; /**< Reference */
132131
hid_t id; /**< Id */
133132
#endif

0 commit comments

Comments
 (0)