diff --git a/MatFileHandler.Tests/MatFileReaderHdfTests.cs b/MatFileHandler.Tests/MatFileReaderHdfTests.cs index 9b086e4..3727358 100644 --- a/MatFileHandler.Tests/MatFileReaderHdfTests.cs +++ b/MatFileHandler.Tests/MatFileReaderHdfTests.cs @@ -1,5 +1,6 @@ using NUnit.Framework; using System.IO; +using System.Linq; using System.Numerics; namespace MatFileHandler.Tests @@ -165,6 +166,44 @@ namespace MatFileHandler.Tests CheckComplexLimits(array as IArrayOf>, CommonData.UInt64Limits); } + /// + /// Test reading a structure array. + /// + [Test] + public void TestStruct() + { + var matFile = ReadHdfTestFile("struct"); + var structure = matFile["struct_"].Value as IStructureArray; + Assert.That(structure, Is.Not.Null); + Assert.That(structure.FieldNames, Is.EquivalentTo(new[] { "x", "y" })); + var element = structure[0, 0]; + Assert.That(element.ContainsKey("x"), Is.True); + Assert.That(element.Count, Is.EqualTo(2)); + Assert.That(element.TryGetValue("x", out var _), Is.True); + Assert.That(element.TryGetValue("z", out var _), Is.False); + Assert.That(element.Keys, Has.Exactly(2).Items); + Assert.That(element.Values, Has.Exactly(2).Items); + var keys = element.Select(pair => pair.Key); + Assert.That(keys, Is.EquivalentTo(new[] { "x", "y" })); + + Assert.That((element["x"] as IArrayOf)?[0], Is.EqualTo(12.345)); + + Assert.That((structure["x", 0, 0] as IArrayOf)?[0], Is.EqualTo(12.345)); + Assert.That((structure["y", 0, 0] as ICharArray)?.String, Is.EqualTo("abc")); + Assert.That((structure["x", 1, 0] as ICharArray)?.String, Is.EqualTo("xyz")); + Assert.That(structure["y", 1, 0].IsEmpty, Is.True); + Assert.That((structure["x", 0, 1] as IArrayOf)?[0], Is.EqualTo(2.0)); + Assert.That((structure["y", 0, 1] as IArrayOf)?[0], Is.EqualTo(13.0)); + Assert.That(structure["x", 1, 1].IsEmpty, Is.True); + Assert.That((structure["y", 1, 1] as ICharArray)?[0, 0], Is.EqualTo('a')); + Assert.That(((structure["x", 0, 2] as ICellArray)?[0] as ICharArray)?.String, Is.EqualTo("x")); + Assert.That(((structure["x", 0, 2] as ICellArray)?[1] as ICharArray)?.String, Is.EqualTo("yz")); + Assert.That((structure["y", 0, 2] as IArrayOf)?.Dimensions, Is.EqualTo(new[] { 2, 3 })); + Assert.That((structure["y", 0, 2] as IArrayOf)?[0, 2], Is.EqualTo(3.0)); + Assert.That((structure["x", 1, 2] as IArrayOf)?[0], Is.EqualTo(1.5f)); + Assert.That(structure["y", 1, 2].IsEmpty, Is.True); + } + private static void CheckComplexLimits(IArrayOf> array, T[] limits) where T : struct { diff --git a/MatFileHandler.Tests/test-data/hdf/struct.mat b/MatFileHandler.Tests/test-data/hdf/struct.mat new file mode 100644 index 0000000..abe6e63 Binary files /dev/null and b/MatFileHandler.Tests/test-data/hdf/struct.mat differ diff --git a/MatFileHandler/HdfFileReader.cs b/MatFileHandler/HdfFileReader.cs index d8b348c..8181b2f 100644 --- a/MatFileHandler/HdfFileReader.cs +++ b/MatFileHandler/HdfFileReader.cs @@ -1,4 +1,5 @@ using System; +using System.Collections; using System.Collections.Generic; using System.Linq; using System.Numerics; @@ -8,6 +9,23 @@ using HDF.PInvoke; namespace MatFileHandler { + internal enum HdfMatlabClass + { + MEmpty, + MChar, + MInt8, + MUInt8, + MInt16, + MUInt16, + MInt32, + MUInt32, + MInt64, + MUInt64, + MSingle, + MDouble, + MCell, + } + internal class HdfArray : IArray { /// @@ -49,6 +67,25 @@ namespace MatFileHandler public bool IsEmpty => Dimensions.Length == 0; } + internal class HdfCellArray : HdfArray, ICellArray + { + public HdfCellArray(int[] dimensions, IEnumerable elements) + : base(dimensions) + { + Data = elements.ToArray(); + } + + /// + public IArray[] Data { get; } + + /// + public IArray this[params int[] indices] + { + get => Data[Dimensions.DimFlatten(indices)]; + set => Data[Dimensions.DimFlatten(indices)] = value; + } + } + /// /// A numerical array. /// @@ -174,6 +211,139 @@ namespace MatFileHandler private string StringData { get; set; } } + internal class HdfStructureArray : HdfArray, IStructureArray + { + public HdfStructureArray( + int[] dimensions, + Dictionary> fields) + : base(dimensions) + { + Fields = fields; + } + + /// + public IEnumerable FieldNames => Fields.Keys; + + /// + /// Gets null: not implemented. + /// + public IReadOnlyDictionary[] Data => null; + + /// + /// Gets a dictionary that maps field names to lists of values. + /// + internal Dictionary> Fields { get; } + + /// + public IArray this[string field, params int[] list] + { + get => Fields[field][Dimensions.DimFlatten(list)]; + set => Fields[field][Dimensions.DimFlatten(list)] = value; + } + + /// + IReadOnlyDictionary IArrayOf>.this[params int[] list] + { + get => ExtractStructure(Dimensions.DimFlatten(list)); + set => throw new NotSupportedException( + "Cannot set structure elements via this[params int[]] indexer. Use this[string, int[]] instead."); + } + + private IReadOnlyDictionary ExtractStructure(int i) + { + return new HdfStructureArrayElement(this, i); + } + + /// + /// Provides access to an element of a structure array by fields. + /// + internal class HdfStructureArrayElement : IReadOnlyDictionary + { + /// + /// Initializes a new instance of the class. + /// + /// Parent structure array. + /// Index in the structure array. + internal HdfStructureArrayElement(HdfStructureArray parent, int index) + { + Parent = parent; + Index = index; + } + + /// + /// Gets the number of fields. + /// + public int Count => Parent.Fields.Count; + + /// + /// Gets a list of all fields. + /// + public IEnumerable Keys => Parent.Fields.Keys; + + /// + /// Gets a list of all values. + /// + public IEnumerable Values => Parent.Fields.Values.Select(array => array[Index]); + + private HdfStructureArray Parent { get; } + + private int Index { get; } + + /// + /// Gets the value of a given field. + /// + /// Field name. + /// The corresponding value. + public IArray this[string key] => Parent.Fields[key][Index]; + + /// + /// Enumerates fieldstructure/value pairs of the dictionary. + /// + /// All field/value pairs in the structure. + public IEnumerator> GetEnumerator() + { + foreach (var field in Parent.Fields) + { + yield return new KeyValuePair(field.Key, field.Value[Index]); + } + } + + /// + /// Enumerates field/value pairs of the structure. + /// + /// All field/value pairs in the structure. + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + + /// + /// Checks if the structure has a given field. + /// + /// Field name + /// True iff the structure has a given field. + public bool ContainsKey(string key) => Parent.Fields.ContainsKey(key); + + /// + /// Tries to get the value of a given field. + /// + /// Field name. + /// Value (or null if the field is not present). + /// Success status of the query. + public bool TryGetValue(string key, out IArray value) + { + var success = Parent.Fields.TryGetValue(key, out var array); + if (!success) + { + value = default(IArray); + return false; + } + value = array[Index]; + return true; + } + } + } + internal class HdfFileReader { private long fileId; @@ -219,6 +389,15 @@ namespace MatFileHandler variables.Add(new MatVariable(value, variableName, false)); break; case H5O.type_t.GROUP: + if (variableName == "#refs#") + { + return 0; + } + var groupId = H5G.open(group, variableName); + var groupValue = ReadGroup(groupId); + variables.Add(new MatVariable(groupValue, variableName, false)); + break; + default: throw new NotImplementedException(); } return 0; @@ -241,6 +420,7 @@ namespace MatFileHandler H5A.read(attributeId, classId, buf); var matlabClassNameBytes = new byte[(int)typeIdSize]; Marshal.Copy(buf, matlabClassNameBytes, 0, (int)typeIdSize); + Marshal.FreeHGlobal(buf); return Encoding.ASCII.GetString(matlabClassNameBytes); } @@ -254,30 +434,155 @@ namespace MatFileHandler return dims.Select(x => (int)x).ToArray(); } - private static ArrayType ArrayTypeFromMatlabClassName(string matlabClassName) + private static HdfMatlabClass ArrayTypeFromMatlabClassName(string matlabClassName) { switch (matlabClassName) { + case "canonical empty": + return HdfMatlabClass.MEmpty; case "char": - return ArrayType.MxChar; + return HdfMatlabClass.MChar; case "int8": - return ArrayType.MxInt8; + return HdfMatlabClass.MInt8; case "uint8": - return ArrayType.MxUInt8; + return HdfMatlabClass.MUInt8; case "int16": - return ArrayType.MxInt16; + return HdfMatlabClass.MInt16; case "uint16": - return ArrayType.MxUInt16; + return HdfMatlabClass.MUInt16; case "int32": - return ArrayType.MxInt32; + return HdfMatlabClass.MInt32; case "uint32": - return ArrayType.MxUInt32; + return HdfMatlabClass.MUInt32; case "int64": - return ArrayType.MxInt64; + return HdfMatlabClass.MInt64; case "uint64": - return ArrayType.MxUInt64; + return HdfMatlabClass.MUInt64; + case "single": + return HdfMatlabClass.MSingle; case "double": - return ArrayType.MxDouble; + return HdfMatlabClass.MDouble; + case "cell": + return HdfMatlabClass.MCell; + } + throw new NotImplementedException(); + } + + private static int GroupFieldNamesIterator(long group, IntPtr name, ref H5L.info_t info, IntPtr data) + { + var nameString = Marshal.PtrToStringAnsi(name); + H5O.info_t objectInfo = default(H5O.info_t); + H5O.get_info_by_name(group, nameString, ref objectInfo, H5P.DEFAULT); + return 0; + } + + private static IArray ReadGroup(long groupId) + { + var matlabClass = GetMatlabClassOfDataset(groupId); + if (matlabClass == "struct") + { + return ReadStruct(groupId); + } + throw new NotImplementedException(); + } + + private static string[] ReadFieldNames(long groupId) + { + // Try to read fields from MATLAB_fields. + var attrId = H5A.open_by_name(groupId, ".", "MATLAB_fields"); + if (attrId == 0) + { + throw new NotImplementedException(); + } + var spaceId = H5A.get_space(attrId); + var rank = H5S.get_simple_extent_ndims(spaceId); + var dims = new ulong[rank]; + H5S.get_simple_extent_dims(spaceId, dims, null); + Array.Reverse(dims); + var dimensions = dims.Select(x => (int)x).ToArray(); + var numberOfFields = dimensions.NumberOfElements(); + + var field_id = H5A.get_type(attrId); + + var fieldNamePointersSizeInBytes = numberOfFields * Marshal.SizeOf(default(H5T.hvl_t)); + var fieldNamesBuf = Marshal.AllocHGlobal(fieldNamePointersSizeInBytes); + H5A.read(attrId, field_id, fieldNamesBuf); + + var fieldNamePointers = new IntPtr[numberOfFields * 2]; + Marshal.Copy(fieldNamesBuf, fieldNamePointers, 0, numberOfFields * 2); + Marshal.FreeHGlobal(fieldNamesBuf); + var fieldNames = new string[numberOfFields]; + for (var i = 0; i < numberOfFields; i++) + { + var stringLength = fieldNamePointers[i * 2]; + var stringPointer = fieldNamePointers[i * 2 + 1]; + fieldNames[i] = Marshal.PtrToStringAnsi(stringPointer, (int)stringLength); + } + return fieldNames; + } + + private static H5O.type_t GetObjectType(long groupId, string fieldName) + { + var objectInfo = default(H5O.info_t); + H5O.get_info_by_name(groupId, fieldName, ref objectInfo); + return objectInfo.type; + } + + private static IArray ReadStruct(long groupId) + { + var fieldNames = ReadFieldNames(groupId); + var firstObjectType = GetObjectType(groupId, fieldNames[0]); + if (firstObjectType == H5O.type_t.DATASET) + { + var firstFieldId = H5D.open(groupId, fieldNames[0]); + var firstFieldTypeId = H5D.get_type(firstFieldId); + if (H5T.get_class(firstFieldTypeId) == H5T.class_t.REFERENCE) + { + if (H5A.exists_by_name(firstFieldId, ".", "MATLAB_class") != 0) + { + throw new NotImplementedException(); + } + else + { + var dimensions = GetDimensionsOfDataset(firstFieldId); + var numberOfElements = dimensions.NumberOfElements(); + var dictionary = new Dictionary>(); + foreach (var fieldName in fieldNames) + { + var fieldType = GetObjectType(groupId, fieldName); + dictionary[fieldName] = new List(); + switch (fieldType) + { + case H5O.type_t.DATASET: + var fieldId = H5D.open(groupId, fieldName); + var buf = Marshal.AllocHGlobal(Marshal.SizeOf(default(IntPtr)) * numberOfElements); + H5D.read(fieldId, H5T.STD_REF_OBJ, H5S.ALL, H5S.ALL, H5P.DEFAULT, buf); + for (var i = 0; i < numberOfElements; i++) + { + var fieldDataSet = H5R.dereference( + fieldId, + H5P.DEFAULT, + H5R.type_t.OBJECT, + buf + (i * Marshal.SizeOf(default(IntPtr)))); + var dataset = ReadDataset(fieldDataSet); + dictionary[fieldName].Add(dataset); + } + break; + default: + throw new NotImplementedException(); + } + } + return new HdfStructureArray(dimensions, dictionary); + } + } + else + { + throw new NotImplementedException(); + } + } + else + { + throw new NotImplementedException(); } throw new NotImplementedException(); } @@ -291,84 +596,107 @@ namespace MatFileHandler switch (arrayType) { - case ArrayType.MxChar: + case HdfMatlabClass.MEmpty: + return HdfArray.Empty(); + case HdfMatlabClass.MChar: return ReadCharArray(datasetId, dims); - case ArrayType.MxInt8: + case HdfMatlabClass.MInt8: return ReadNumericalArray(datasetId, dims, arrayType); - case ArrayType.MxUInt8: + case HdfMatlabClass.MUInt8: return ReadNumericalArray(datasetId, dims, arrayType); - case ArrayType.MxInt16: + case HdfMatlabClass.MInt16: return ReadNumericalArray(datasetId, dims, arrayType); - case ArrayType.MxUInt16: + case HdfMatlabClass.MUInt16: return ReadNumericalArray(datasetId, dims, arrayType); - case ArrayType.MxInt32: + case HdfMatlabClass.MInt32: return ReadNumericalArray(datasetId, dims, arrayType); - case ArrayType.MxUInt32: + case HdfMatlabClass.MUInt32: return ReadNumericalArray(datasetId, dims, arrayType); - case ArrayType.MxInt64: + case HdfMatlabClass.MInt64: return ReadNumericalArray(datasetId, dims, arrayType); - case ArrayType.MxUInt64: + case HdfMatlabClass.MUInt64: return ReadNumericalArray(datasetId, dims, arrayType); - case ArrayType.MxSingle: + case HdfMatlabClass.MSingle: return ReadNumericalArray(datasetId, dims, arrayType); - case ArrayType.MxDouble: + case HdfMatlabClass.MDouble: return ReadNumericalArray(datasetId, dims, arrayType); + case HdfMatlabClass.MCell: + return ReadCellArray(datasetId, dims); } throw new NotImplementedException($"Unknown array type: {arrayType}."); } - private static int SizeOfArrayElement(ArrayType arrayType) + private static IArray ReadCellArray(long datasetId, int[] dims) + { + var numberOfElements = dims.NumberOfElements(); + var buf = Marshal.AllocHGlobal(Marshal.SizeOf(default(IntPtr)) * numberOfElements); + H5D.read(datasetId, H5T.STD_REF_OBJ, H5S.ALL, H5S.ALL, H5P.DEFAULT, buf); + var elements = new IArray[numberOfElements]; + for (var i = 0; i < numberOfElements; i++) + { + var fieldDataSet = H5R.dereference( + datasetId, + H5P.DEFAULT, + H5R.type_t.OBJECT, + buf + (i * Marshal.SizeOf(default(IntPtr)))); + var dataset = ReadDataset(fieldDataSet); + elements[i] = dataset; + } + return new HdfCellArray(dims, elements); + } + + private static int SizeOfArrayElement(HdfMatlabClass arrayType) { switch (arrayType) { - case ArrayType.MxInt8: - case ArrayType.MxUInt8: + case HdfMatlabClass.MInt8: + case HdfMatlabClass.MUInt8: return 1; - case ArrayType.MxInt16: - case ArrayType.MxUInt16: + case HdfMatlabClass.MInt16: + case HdfMatlabClass.MUInt16: return 2; - case ArrayType.MxInt32: - case ArrayType.MxUInt32: - case ArrayType.MxSingle: + case HdfMatlabClass.MInt32: + case HdfMatlabClass.MUInt32: + case HdfMatlabClass.MSingle: return 4; - case ArrayType.MxInt64: - case ArrayType.MxUInt64: - case ArrayType.MxDouble: + case HdfMatlabClass.MInt64: + case HdfMatlabClass.MUInt64: + case HdfMatlabClass.MDouble: return 8; } throw new NotImplementedException(); } - private static long H5tTypeFromArrayType(ArrayType arrayType) + private static long H5tTypeFromHdfMatlabClass(HdfMatlabClass arrayType) { switch (arrayType) { - case ArrayType.MxInt8: + case HdfMatlabClass.MInt8: return H5T.NATIVE_INT8; - case ArrayType.MxUInt8: + case HdfMatlabClass.MUInt8: return H5T.NATIVE_UINT8; - case ArrayType.MxInt16: + case HdfMatlabClass.MInt16: return H5T.NATIVE_INT16; - case ArrayType.MxUInt16: + case HdfMatlabClass.MUInt16: return H5T.NATIVE_UINT16; - case ArrayType.MxInt32: + case HdfMatlabClass.MInt32: return H5T.NATIVE_INT32; - case ArrayType.MxUInt32: + case HdfMatlabClass.MUInt32: return H5T.NATIVE_UINT32; - case ArrayType.MxInt64: + case HdfMatlabClass.MInt64: return H5T.NATIVE_INT64; - case ArrayType.MxUInt64: + case HdfMatlabClass.MUInt64: return H5T.NATIVE_UINT64; - case ArrayType.MxSingle: + case HdfMatlabClass.MSingle: return H5T.NATIVE_FLOAT; - case ArrayType.MxDouble: + case HdfMatlabClass.MDouble: return H5T.NATIVE_DOUBLE; } throw new NotImplementedException(); } - private static T[] ConvertDataToProperType(byte[] bytes, ArrayType arrayType) + private static T[] ConvertDataToProperType(byte[] bytes, HdfMatlabClass arrayType) where T : struct { var length = bytes.Length; @@ -384,10 +712,11 @@ namespace MatFileHandler H5D.read(datasetId, elementType, H5S.ALL, H5S.ALL, H5P.DEFAULT, dataBuffer); var data = new byte[dataSize]; Marshal.Copy(dataBuffer, data, 0, dataSize); + Marshal.FreeHGlobal(dataBuffer); return data; } - private static IArray ReadNumericalArray(long datasetId, int[] dims, ArrayType arrayType) + private static IArray ReadNumericalArray(long datasetId, int[] dims, HdfMatlabClass arrayType) where T : struct { var numberOfElements = dims.NumberOfElements(); @@ -398,7 +727,7 @@ namespace MatFileHandler var isCompound = dataSetTypeClass == H5T.class_t.COMPOUND; if (isCompound) { - var h5Type = H5tTypeFromArrayType(arrayType); + var h5Type = H5tTypeFromHdfMatlabClass(arrayType); var h5Size = H5T.get_size(h5Type); var h5tComplexReal = H5T.create(H5T.class_t.COMPOUND, h5Size); H5T.insert(h5tComplexReal, "real", IntPtr.Zero, h5Type); @@ -408,7 +737,7 @@ namespace MatFileHandler H5T.insert(h5tComplexImaginary, "imag", IntPtr.Zero, h5Type); var imaginaryData = ReadDataset(datasetId, h5tComplexImaginary, dataSize); var convertedImaginaryData = ConvertDataToProperType(imaginaryData, arrayType); - if (arrayType == ArrayType.MxDouble) + if (arrayType == HdfMatlabClass.MDouble) { var complexData = (convertedRealData as double[]) @@ -429,7 +758,7 @@ namespace MatFileHandler { throw new Exception("Data size mismatch."); } - var data = ReadDataset(datasetId, H5tTypeFromArrayType(arrayType), dataSize); + var data = ReadDataset(datasetId, H5tTypeFromHdfMatlabClass(arrayType), dataSize); var convertedData = ConvertDataToProperType(data, arrayType); return new HdfNumericalArrayOf(dims, convertedData); }