diff --git a/MatFileHandler/Hdf/Attribute.cs b/MatFileHandler/Hdf/Attribute.cs index 374fe78..a72bf76 100644 --- a/MatFileHandler/Hdf/Attribute.cs +++ b/MatFileHandler/Hdf/Attribute.cs @@ -1,9 +1,10 @@ using System; +using System.Runtime.InteropServices; using HDF.PInvoke; namespace MatFileHandler.Hdf { - public struct Attribute : IDisposable + internal struct Attribute : IDisposable { public long Id { get; private set; } @@ -20,5 +21,30 @@ namespace MatFileHandler.Hdf Id = -1; } } + + public bool ReadBool() + { + using (var h = new MemoryHandle(sizeof(int))) + { + H5A.read(Id, H5T.NATIVE_INT, h.Handle); + var result = Marshal.ReadInt32(h.Handle); + return result != 0; + } + } + + public void ReadToHandle(MemoryHandle handle, Type type) + { + H5A.read(Id, type.Id, handle.Handle); + } + + public Type GetHdfType() + { + return new Type(H5A.get_type(Id)); + } + + public Space GetSpace() + { + return new Space(H5A.get_space(Id)); + } } } \ No newline at end of file diff --git a/MatFileHandler/Hdf/Class.cs b/MatFileHandler/Hdf/Class.cs new file mode 100644 index 0000000..d735e9c --- /dev/null +++ b/MatFileHandler/Hdf/Class.cs @@ -0,0 +1,46 @@ +using System; +using HDF.PInvoke; + +namespace MatFileHandler.Hdf +{ + internal struct Class : IEquatable + { + public Class(H5T.class_t c) + { + C = c; + } + + public static Class String => new Class(H5T.class_t.STRING); + + public static Class Reference => new Class(H5T.class_t.REFERENCE); + + public static Class Compound => new Class(H5T.class_t.COMPOUND); + + public H5T.class_t C { get; } + + public static bool operator ==(Class one, Class other) + { + return one.Equals(other); + } + + public static bool operator !=(Class one, Class other) + { + return !one.Equals(other); + } + + public bool Equals(Class other) + { + return C == other.C; + } + + public override bool Equals(object obj) + { + return obj is Class other && Equals(other); + } + + public override int GetHashCode() + { + return (int)C; + } + } +} \ No newline at end of file diff --git a/MatFileHandler/Hdf/Dataset.cs b/MatFileHandler/Hdf/Dataset.cs index 2da07ea..fcc4cca 100644 --- a/MatFileHandler/Hdf/Dataset.cs +++ b/MatFileHandler/Hdf/Dataset.cs @@ -3,10 +3,15 @@ using HDF.PInvoke; namespace MatFileHandler.Hdf { - public struct Dataset : IDisposable + internal struct Dataset : IDisposable { public long Id { get; private set; } + public Dataset(long datasetId) + { + Id = datasetId; + } + public Dataset(long groupId, string name) { Id = H5D.open(groupId, name); @@ -20,5 +25,35 @@ namespace MatFileHandler.Hdf Id = -1; } } + + public Attribute GetAttribute(string name) + { + return new Attribute(Id, name); + } + + public bool AttributeExists(string name) + { + return H5A.exists_by_name(Id, ".", name) != 0; + } + + public Type GetHdfType() + { + return new Type(H5D.get_type(Id)); + } + + public int GetStorageSize() + { + return (int)H5D.get_storage_size(Id); + } + + public Space GetHdfSpace() + { + return new Space(H5D.get_space(Id)); + } + + public void ReadToHandle(Type type, MemoryHandle handle) + { + H5D.read(Id, type.Id, H5S.ALL, H5S.ALL, H5P.DEFAULT, handle.Handle); + } } } \ No newline at end of file diff --git a/MatFileHandler/Hdf/Group.cs b/MatFileHandler/Hdf/Group.cs index 04b0c51..3812a1e 100644 --- a/MatFileHandler/Hdf/Group.cs +++ b/MatFileHandler/Hdf/Group.cs @@ -3,7 +3,7 @@ using HDF.PInvoke; namespace MatFileHandler.Hdf { - public struct Group : IDisposable + internal struct Group : IDisposable { public long Id { get; private set; } @@ -20,5 +20,15 @@ namespace MatFileHandler.Hdf Id = -1; } } + + public Attribute GetAttribute(string name) + { + return new Attribute(Id, name); + } + + public bool AttributeExists(string name) + { + return H5A.exists_by_name(Id, ".", name) != 0; + } } } \ No newline at end of file diff --git a/MatFileHandler/Hdf/ReferenceArray.cs b/MatFileHandler/Hdf/ReferenceArray.cs new file mode 100644 index 0000000..adfcbeb --- /dev/null +++ b/MatFileHandler/Hdf/ReferenceArray.cs @@ -0,0 +1,60 @@ +using System; +using System.Collections; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using HDF.PInvoke; + +namespace MatFileHandler.Hdf +{ + internal struct ReferenceArray : IDisposable, IEnumerable + { + public Dataset Dataset { get; } + + public int Size { get; } + + public MemoryHandle Buf { get; } + + public Dataset[] References { get; } + + public ReferenceArray(Dataset dataset, int size) + { + Dataset = dataset; + Size = size; + Buf = new MemoryHandle(Marshal.SizeOf(default(IntPtr)) * size); + Dataset.ReadToHandle(Type.Reference, Buf); + References = new Dataset[size]; + for (var i = 0; i < size; i++) + { + References[i] = + new Dataset(H5R.dereference( + dataset.Id, + H5P.DEFAULT, + H5R.type_t.OBJECT, + Buf.Handle + (i * Marshal.SizeOf(default(IntPtr))))); + } + } + + public void Dispose() + { + Buf?.Dispose(); + if (!(References is null)) + { + foreach (var reference in References) + { + reference.Dispose(); + } + } + } + + public IEnumerator GetEnumerator() + { + return ((IEnumerable)References).GetEnumerator(); + } + + IEnumerator IEnumerable.GetEnumerator() + { + return References.GetEnumerator(); + } + } +} diff --git a/MatFileHandler/Hdf/Space.cs b/MatFileHandler/Hdf/Space.cs new file mode 100644 index 0000000..87751e4 --- /dev/null +++ b/MatFileHandler/Hdf/Space.cs @@ -0,0 +1,28 @@ +using System.Linq; +using HDF.PInvoke; + +namespace MatFileHandler.Hdf +{ + internal struct Space + { + public Space(long id) + { + Id = id; + } + + public long Id { get; } + + public int GetRank() + { + return H5S.get_simple_extent_ndims(Id); + } + + public int[] GetDimensions() + { + var dims = new ulong[GetRank()]; + H5S.get_simple_extent_dims(Id, dims, null); + System.Array.Reverse(dims); + return dims.Select(x => (int)x).ToArray(); + } + } +} \ No newline at end of file diff --git a/MatFileHandler/Hdf/Type.cs b/MatFileHandler/Hdf/Type.cs new file mode 100644 index 0000000..8a36d7c --- /dev/null +++ b/MatFileHandler/Hdf/Type.cs @@ -0,0 +1,70 @@ +using System; +using HDF.PInvoke; + +namespace MatFileHandler.Hdf +{ + internal struct Type + { + public Type(long id) + { + Id = id; + } + + public long Id { get; } + + public Class GetClass() + { + return new Class(H5T.get_class(Id)); + } + + public int GetSize() + { + return (int)H5T.get_size(Id); + } + + public static Type NativeInt8 => new Type(H5T.NATIVE_INT8); + + public static Type NativeUInt8 => new Type(H5T.NATIVE_UINT8); + + public static Type NativeInt16 => new Type(H5T.NATIVE_INT16); + + public static Type NativeUInt16 => new Type(H5T.NATIVE_UINT16); + + public static Type NativeInt32 => new Type(H5T.NATIVE_INT32); + + public static Type NativeUInt32 => new Type(H5T.NATIVE_UINT32); + + public static Type NativeInt64 => new Type(H5T.NATIVE_INT64); + + public static Type NativeUInt64 => new Type(H5T.NATIVE_UINT64); + + public static Type NativeFloat => new Type(H5T.NATIVE_FLOAT); + + public static Type NativeDouble => new Type(H5T.NATIVE_DOUBLE); + + public static Type NativeInt => new Type(H5T.NATIVE_INT); + + public static Type NativeUInt => new Type(H5T.NATIVE_UINT); + + public static Type CS1 => new Type(H5T.C_S1); + + public static Type Reference => new Type(H5T.STD_REF_OBJ); + + public Type WithSize(int size) + { + var classId = H5T.copy(Id); + H5T.set_size(classId, (IntPtr)size); + return new Type(classId); + } + + public static Type CreateCompound(int size) + { + return new Type(H5T.create(H5T.class_t.COMPOUND, (IntPtr)size)); + } + + public void InsertField(string name, Type fieldType) + { + H5T.insert(Id, name, IntPtr.Zero, fieldType.Id); + } + } +} \ No newline at end of file diff --git a/MatFileHandler/HdfFileReader.cs b/MatFileHandler/HdfFileReader.cs index 50565fd..7ceaa77 100644 --- a/MatFileHandler/HdfFileReader.cs +++ b/MatFileHandler/HdfFileReader.cs @@ -6,13 +6,16 @@ using System.Runtime.InteropServices; using System.Text; using HDF.PInvoke; using MatFileHandler.Hdf; -using Array = MatFileHandler.Hdf.Array; -using Attribute = MatFileHandler.Hdf.Attribute; namespace MatFileHandler { internal class HdfFileReader { + private const string classAttributeName = "MATLAB_class"; + + private const string globalAttributeName = "MATLAB_global"; + + private const string sparseAttributeName = "MATLAB_sparse"; private long fileId; private List variables; @@ -25,8 +28,8 @@ namespace MatFileHandler internal IMatFile Read() { variables = new List(); - H5G.info_t group_info = default(H5G.info_t); - var result = H5G.get_info(fileId, ref group_info); + var group_info = default(H5G.info_t); + H5G.get_info(fileId, ref group_info); var numberOfVariables = group_info.nlinks; ulong idx = 0; @@ -40,94 +43,10 @@ namespace MatFileHandler VariableIterator, IntPtr.Zero); } + return new MatFile(variables); } - private bool ReadGlobalFlag(long datasetId) - { - if (H5A.exists_by_name(datasetId, ".", "MATLAB_global") != 0) - { - using (var globalAttribute = new Attribute(datasetId, "MATLAB_global")) - { - using (var h = new MemoryHandle(sizeof(int))) - { - H5A.read(globalAttribute.Id, H5T.NATIVE_INT, h.Handle); - var result = Marshal.ReadInt32(h.Handle); - return result != 0; - } - } - } - - return false; - } - - private int VariableIterator(long group, IntPtr name, ref H5L.info_t info, IntPtr op_data) - { - var variableName = Marshal.PtrToStringAnsi(name); - var object_info = default(H5O.info_t); - H5O.get_info_by_name(group, variableName, ref object_info); - switch (object_info.type) - { - case H5O.type_t.DATASET: - using (var dataset = new Dataset(group, variableName)) - { - var isGlobal = ReadGlobalFlag(dataset.Id); - var value = ReadDataset(dataset.Id); - variables.Add(new MatVariable(value, variableName, isGlobal)); - } - break; - case H5O.type_t.GROUP: - if (variableName == "#refs#") - { - return 0; - } - using (var subGroup = new Group(group, variableName)) - { - var isGlobal = ReadGlobalFlag(subGroup.Id); - var groupValue = ReadGroup(subGroup.Id); - variables.Add(new MatVariable(groupValue, variableName, isGlobal)); - } - break; - default: - throw new NotImplementedException(); - } - return 0; - } - - private static string GetMatlabClassOfDataset(long datasetId) - { - using (var attribute = new Attribute(datasetId, "MATLAB_class")) - { - var typeId = H5A.get_type(attribute.Id); - var cl = H5T.get_class(typeId); - if (cl != H5T.class_t.STRING) - { - throw new NotImplementedException(); - } - var classId = H5T.copy(H5T.C_S1); - var typeIdSize = (int)H5T.get_size(typeId); - H5T.set_size(classId, (IntPtr)typeIdSize); - var matlabClassNameBytes = new byte[typeIdSize]; - using (var buf = new MemoryHandle(typeIdSize)) - { - H5A.read(attribute.Id, classId, buf.Handle); - Marshal.Copy(buf.Handle, matlabClassNameBytes, 0, typeIdSize); - } - - return Encoding.ASCII.GetString(matlabClassNameBytes); - } - } - - private static int[] GetDimensionsOfDataset(long datasetId) - { - var spaceId = H5D.get_space(datasetId); - var rank = H5S.get_simple_extent_ndims(spaceId); - var dims = new ulong[rank]; - H5S.get_simple_extent_dims(spaceId, dims, null); - System.Array.Reverse(dims); - return dims.Select(x => (int)x).ToArray(); - } - private static MatlabClass ArrayTypeFromMatlabClassName(string matlabClassName) { switch (matlabClassName) @@ -161,173 +80,59 @@ namespace MatFileHandler case "cell": return MatlabClass.MCell; } + throw new NotImplementedException(); } - private static int GroupFieldNamesIterator(long group, IntPtr name, ref H5L.info_t info, IntPtr data) - { - var nameString = Marshal.PtrToStringAnsi(name); - H5O.info_t objectInfo = default(H5O.info_t); - H5O.get_info_by_name(group, nameString, ref objectInfo, H5P.DEFAULT); - return 0; - } - - private static IArray ReadSparseArray(long groupId, MatlabClass arrayType) + private static T[] ConvertDataToProperType(byte[] bytes, MatlabClass arrayType) where T : struct { - using (var sparseAttribute = new Attribute(groupId, "MATLAB_sparse")) - { - using (var numberOfRowsHandle = new MemoryHandle(sizeof(uint))) - { - H5A.read(sparseAttribute.Id, H5T.NATIVE_UINT, numberOfRowsHandle.Handle); - var numberOfRows = Marshal.ReadInt32(numberOfRowsHandle.Handle); - int[] rowIndex; - int[] columnIndex; - using (var irData = new Dataset(groupId, "ir")) - { - var ds = GetDimensionsOfDataset(irData.Id); - var numberOfIr = ds.NumberOfElements(); - var irBytes = ReadDataset(irData.Id, H5T.NATIVE_INT, numberOfIr * sizeof(int)); - rowIndex = new int[numberOfIr]; - Buffer.BlockCopy(irBytes, 0, rowIndex, 0, irBytes.Length); - } - using (var jcData = new Dataset(groupId, "jc")) - { - var ds = GetDimensionsOfDataset(jcData.Id); - var numberOfJc = ds.NumberOfElements(); - var jcBytes = ReadDataset(jcData.Id, H5T.NATIVE_INT, numberOfJc * sizeof(int)); - columnIndex = new int[numberOfJc]; - Buffer.BlockCopy(jcBytes, 0, columnIndex, 0, jcBytes.Length); - } + var length = bytes.Length; + var arrayElementSize = SizeOfArrayElement(arrayType); + var data = new T[length / arrayElementSize]; + Buffer.BlockCopy(bytes, 0, data, 0, length); + return data; + } - using (var data = new Dataset(groupId, "data")) - { - var ds = GetDimensionsOfDataset(data.Id); - var dims = new int[2]; - dims[0] = numberOfRows; - dims[1] = columnIndex.Length - 1; - var dataSize = ds.NumberOfElements() * SizeOfArrayElement(arrayType); - var storageSize = (int)H5D.get_storage_size(data.Id); - var dataSetType = H5D.get_type(data.Id); - var dataSetTypeClass = H5T.get_class(dataSetType); - var isCompound = dataSetTypeClass == H5T.class_t.COMPOUND; - if (isCompound) - { - var (convertedRealData, convertedImaginaryData) = ReadComplexData(data.Id, dataSize, arrayType); - if (arrayType == MatlabClass.MDouble) - { - var complexData = - (convertedRealData as double[]) - .Zip(convertedImaginaryData as double[], (x, y) => new Complex(x, y)) - .ToArray(); - var complexDataDictionary = DataExtraction.ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => complexData[j]); - return new SparseArrayOf(dims, complexDataDictionary); - } - else - { - var complexData = - convertedRealData - .Zip(convertedImaginaryData, (x, y) => new ComplexOf(x, y)) - .ToArray(); - var complexDataDictionary = DataExtraction.ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => complexData[j]); - return new SparseArrayOf>(dims, complexDataDictionary); - } - } - if (dataSize != storageSize) - { - throw new Exception("Data size mismatch."); - } - var d = ReadDataset(data.Id, H5tTypeFromHdfMatlabClass(arrayType), dataSize); - var elements = ConvertDataToProperType(d, arrayType); - var dataDictionary = DataExtraction.ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => elements[j]); - return new SparseArrayOf(dims, dataDictionary); - } - } + private static int[] GetDimensionsOfDataset(Dataset dataset) + { + return dataset.GetHdfSpace().GetDimensions(); + } + + private static string GetMatlabClassFromAttribute(Hdf.Attribute attribute) + { + var type = attribute.GetHdfType(); + var cl = type.GetClass(); + if (cl != Class.String) + { + throw new NotImplementedException(); + } + + var typeIdSize = type.GetSize(); + var copiedType = Hdf.Type.CS1.WithSize(type.GetSize()); + var matlabClassNameBytes = new byte[typeIdSize]; + using (var buf = new MemoryHandle(typeIdSize)) + { + attribute.ReadToHandle(buf, copiedType); + Marshal.Copy(buf.Handle, matlabClassNameBytes, 0, typeIdSize); + } + + return Encoding.ASCII.GetString(matlabClassNameBytes); + } + + private static string GetMatlabClassOfDataset(Dataset dataset) + { + using (var attribute = dataset.GetAttribute(classAttributeName)) + { + return GetMatlabClassFromAttribute(attribute); } } - private static IArray ReadGroup(long groupId) + private static string GetMatlabClassOfGroup(Group group) { - var matlabClass = GetMatlabClassOfDataset(groupId); - if (matlabClass == "struct") + using (var attribute = group.GetAttribute(classAttributeName)) { - return ReadStruct(groupId); - } - - if (H5A.exists_by_name(groupId, ".", "MATLAB_sparse") != 0) - { - var dims = new int[0]; - var arrayType = ArrayTypeFromMatlabClassName(matlabClass); - - switch (arrayType) - { - case MatlabClass.MEmpty: - return Array.Empty(); - case MatlabClass.MLogical: - return ReadSparseArray(groupId, arrayType); - case MatlabClass.MInt8: - return ReadSparseArray(groupId, arrayType); - case MatlabClass.MUInt8: - return ReadSparseArray(groupId, arrayType); - case MatlabClass.MInt16: - return ReadSparseArray(groupId, arrayType); - case MatlabClass.MUInt16: - return ReadSparseArray(groupId, arrayType); - case MatlabClass.MInt32: - return ReadSparseArray(groupId, arrayType); - case MatlabClass.MUInt32: - return ReadSparseArray(groupId, arrayType); - case MatlabClass.MInt64: - return ReadSparseArray(groupId, arrayType); - case MatlabClass.MUInt64: - return ReadSparseArray(groupId, arrayType); - case MatlabClass.MSingle: - return ReadSparseArray(groupId, arrayType); - case MatlabClass.MDouble: - return ReadSparseArray(groupId, arrayType); - default: - throw new NotSupportedException(); - } - } - - throw new NotImplementedException(); - } - - private static string[] ReadFieldNames(long groupId) - { - // Try to read fields from MATLAB_fields. - using (var attr = new Attribute(groupId, "MATLAB_fields")) - { - if (attr.Id == 0) - { - throw new NotImplementedException(); - } - var spaceId = H5A.get_space(attr.Id); - var rank = H5S.get_simple_extent_ndims(spaceId); - var dims = new ulong[rank]; - H5S.get_simple_extent_dims(spaceId, dims, null); - System.Array.Reverse(dims); - var dimensions = dims.Select(x => (int)x).ToArray(); - var numberOfFields = dimensions.NumberOfElements(); - - var field_id = H5A.get_type(attr.Id); - - var fieldNamePointersSizeInBytes = numberOfFields * Marshal.SizeOf(default(H5T.hvl_t)); - var fieldNamePointers = new IntPtr[numberOfFields * 2]; - using (var fieldNamesBuf = new MemoryHandle(fieldNamePointersSizeInBytes)) - { - H5A.read(attr.Id, field_id, fieldNamesBuf.Handle); - Marshal.Copy(fieldNamesBuf.Handle, fieldNamePointers, 0, numberOfFields * 2); - } - - var fieldNames = new string[numberOfFields]; - for (var i = 0; i < numberOfFields; i++) - { - var stringLength = fieldNamePointers[i * 2]; - var stringPointer = fieldNamePointers[(i * 2) + 1]; - fieldNames[i] = Marshal.PtrToStringAnsi(stringPointer, (int)stringLength); - } - return fieldNames; + return GetMatlabClassFromAttribute(attribute); } } @@ -338,6 +143,365 @@ namespace MatFileHandler return objectInfo.type; } + private static Hdf.Type H5tTypeFromHdfMatlabClass(MatlabClass arrayType) + { + switch (arrayType) + { + case MatlabClass.MInt8: + return Hdf.Type.NativeInt8; + case MatlabClass.MUInt8: + case MatlabClass.MLogical: + return Hdf.Type.NativeUInt8; + case MatlabClass.MInt16: + return Hdf.Type.NativeInt16; + case MatlabClass.MUInt16: + return Hdf.Type.NativeUInt16; + case MatlabClass.MInt32: + return Hdf.Type.NativeInt32; + case MatlabClass.MUInt32: + return Hdf.Type.NativeUInt32; + case MatlabClass.MInt64: + return Hdf.Type.NativeInt64; + case MatlabClass.MUInt64: + return Hdf.Type.NativeUInt64; + case MatlabClass.MSingle: + return Hdf.Type.NativeFloat; + case MatlabClass.MDouble: + return Hdf.Type.NativeDouble; + } + + throw new NotImplementedException(); + } + + private static IArray ReadCellArray(Dataset dataset, int[] dims) + { + var numberOfElements = dims.NumberOfElements(); + var elements = new IArray[numberOfElements]; + using (var array = new ReferenceArray(dataset, numberOfElements)) + { + var i = 0; + foreach (var reference in array) + { + elements[i++] = ReadDataset(reference); + } + } + + return new CellArray(dims, elements); + } + + private static IArray ReadCharArray(Dataset dataset, int[] dims) + { + var storageSize = dataset.GetStorageSize(); + var data = ReadDataset(dataset, Hdf.Type.NativeUInt16, storageSize); + var str = Encoding.Unicode.GetString(data); + return new CharArray(dims, str); + } + + private static (T[] real, T[] imaginary) ReadComplexData( + Dataset dataset, + int dataSize, + MatlabClass arrayType) + where T : struct + { + var h5Type = H5tTypeFromHdfMatlabClass(arrayType); + var h5Size = h5Type.GetSize(); + var h5tComplexReal = Hdf.Type.CreateCompound(h5Size); + h5tComplexReal.InsertField("real", h5Type); + var realData = ReadDataset(dataset, h5tComplexReal, dataSize); + var h5tComplexImaginary = Hdf.Type.CreateCompound(h5Size); + h5tComplexImaginary.InsertField("imag", h5Type); + var imaginaryData = ReadDataset(dataset, h5tComplexImaginary, dataSize); + var convertedRealData = ConvertDataToProperType(realData, arrayType); + var convertedImaginaryData = ConvertDataToProperType(imaginaryData, arrayType); + return (convertedRealData, convertedImaginaryData); + } + + private static IArray ReadDataset(Dataset dataset) + { + var dims = GetDimensionsOfDataset(dataset); + + var matlabClass = GetMatlabClassOfDataset(dataset); + var arrayType = ArrayTypeFromMatlabClassName(matlabClass); + + switch (arrayType) + { + case MatlabClass.MEmpty: + return Hdf.Array.Empty(); + case MatlabClass.MLogical: + return ReadNumericalArray(dataset, dims, arrayType); + case MatlabClass.MChar: + return ReadCharArray(dataset, dims); + case MatlabClass.MInt8: + return ReadNumericalArray(dataset, dims, arrayType); + case MatlabClass.MUInt8: + return ReadNumericalArray(dataset, dims, arrayType); + case MatlabClass.MInt16: + return ReadNumericalArray(dataset, dims, arrayType); + case MatlabClass.MUInt16: + return ReadNumericalArray(dataset, dims, arrayType); + case MatlabClass.MInt32: + return ReadNumericalArray(dataset, dims, arrayType); + case MatlabClass.MUInt32: + return ReadNumericalArray(dataset, dims, arrayType); + case MatlabClass.MInt64: + return ReadNumericalArray(dataset, dims, arrayType); + case MatlabClass.MUInt64: + return ReadNumericalArray(dataset, dims, arrayType); + case MatlabClass.MSingle: + return ReadNumericalArray(dataset, dims, arrayType); + case MatlabClass.MDouble: + return ReadNumericalArray(dataset, dims, arrayType); + case MatlabClass.MCell: + return ReadCellArray(dataset, dims); + } + + throw new NotImplementedException($"Unknown array type: {arrayType}."); + } + + private static byte[] ReadDataset(Dataset dataset, Hdf.Type elementType, int dataSize) + { + var data = new byte[dataSize]; + using (var dataBuffer = new MemoryHandle(dataSize)) + { + dataset.ReadToHandle(elementType, dataBuffer); + Marshal.Copy(dataBuffer.Handle, data, 0, dataSize); + } + + return data; + } + + private static string[] ReadFieldNames(long groupId) + { + // Try to read fields from MATLAB_fields. + using (var attr = new Hdf.Attribute(groupId, "MATLAB_fields")) + { + if (attr.Id == 0) + { + throw new NotImplementedException(); + } + + var dimensions = attr.GetSpace().GetDimensions(); + var numberOfFields = dimensions.NumberOfElements(); + + var fieldType = attr.GetHdfType(); + + var fieldNamePointersSizeInBytes = numberOfFields * Marshal.SizeOf(default(H5T.hvl_t)); + var fieldNamePointers = new IntPtr[numberOfFields * 2]; + using (var fieldNamesBuf = new MemoryHandle(fieldNamePointersSizeInBytes)) + { + attr.ReadToHandle(fieldNamesBuf, fieldType); + Marshal.Copy(fieldNamesBuf.Handle, fieldNamePointers, 0, numberOfFields * 2); + } + + var fieldNames = new string[numberOfFields]; + for (var i = 0; i < numberOfFields; i++) + { + var stringLength = fieldNamePointers[i * 2]; + var stringPointer = fieldNamePointers[(i * 2) + 1]; + fieldNames[i] = Marshal.PtrToStringAnsi(stringPointer, (int)stringLength); + } + + return fieldNames; + } + } + + private static IArray ReadGroup(Group group) + { + var matlabClass = GetMatlabClassOfGroup(group); + if (matlabClass == "struct") + { + return ReadStruct(group.Id); + } + + if (group.AttributeExists(sparseAttributeName)) + { + var dims = new int[0]; + var arrayType = ArrayTypeFromMatlabClassName(matlabClass); + + switch (arrayType) + { + case MatlabClass.MEmpty: + return Hdf.Array.Empty(); + case MatlabClass.MLogical: + return ReadSparseArray(group.Id, arrayType); + case MatlabClass.MInt8: + return ReadSparseArray(group.Id, arrayType); + case MatlabClass.MUInt8: + return ReadSparseArray(group.Id, arrayType); + case MatlabClass.MInt16: + return ReadSparseArray(group.Id, arrayType); + case MatlabClass.MUInt16: + return ReadSparseArray(group.Id, arrayType); + case MatlabClass.MInt32: + return ReadSparseArray(group.Id, arrayType); + case MatlabClass.MUInt32: + return ReadSparseArray(group.Id, arrayType); + case MatlabClass.MInt64: + return ReadSparseArray(group.Id, arrayType); + case MatlabClass.MUInt64: + return ReadSparseArray(group.Id, arrayType); + case MatlabClass.MSingle: + return ReadSparseArray(group.Id, arrayType); + case MatlabClass.MDouble: + return ReadSparseArray(group.Id, arrayType); + default: + throw new NotSupportedException(); + } + } + + throw new NotImplementedException(); + } + + private static IEnumerable> CombineComplexOfData( + IEnumerable realData, + IEnumerable imaginaryData) + where T : struct + { + return realData.Zip( + imaginaryData, + (x, y) => new ComplexOf(x, y)); + } + + private static IEnumerable CombineComplexData( + IEnumerable realData, + IEnumerable imaginaryData) + { + return realData.Zip( + imaginaryData, + (x, y) => new Complex(x, y)); + } + + private static IArray ReadNumericalArray(Dataset dataset, int[] dims, MatlabClass arrayType) + where T : struct + { + var numberOfElements = dims.NumberOfElements(); + var dataSize = numberOfElements * SizeOfArrayElement(arrayType); + var storageSize = dataset.GetStorageSize(); + var dataSetType = dataset.GetHdfType(); + var dataSetTypeClass = dataSetType.GetClass(); + var isCompound = dataSetTypeClass == Class.Compound; + if (isCompound) + { + var (convertedRealData, convertedImaginaryData) = ReadComplexData(dataset, dataSize, arrayType); + if (arrayType == MatlabClass.MDouble) + { + var complexData = + CombineComplexData( + convertedRealData as double[], + convertedImaginaryData as double[]) + .ToArray(); + return new NumericalArrayOf(dims, complexData); + } + else + { + var complexData = + CombineComplexOfData( + convertedRealData, + convertedImaginaryData) + .ToArray(); + return new NumericalArrayOf>(dims, complexData); + } + } + + if (dataSize != storageSize) + { + throw new Exception("Data size mismatch."); + } + + var data = ReadDataset(dataset, H5tTypeFromHdfMatlabClass(arrayType), dataSize); + var convertedData = ConvertDataToProperType(data, arrayType); + return new NumericalArrayOf(dims, convertedData); + } + + private static IArray ReadSparseArray(long groupId, MatlabClass arrayType) + where T : struct + { + using (var sparseAttribute = new Hdf.Attribute(groupId, sparseAttributeName)) + { + using (var numberOfRowsHandle = new MemoryHandle(sizeof(uint))) + { + sparseAttribute.ReadToHandle(numberOfRowsHandle, Hdf.Type.NativeUInt); + var numberOfRows = Marshal.ReadInt32(numberOfRowsHandle.Handle); + int[] rowIndex; + int[] columnIndex; + using (var irData = new Dataset(groupId, "ir")) + { + var ds = GetDimensionsOfDataset(irData); + var numberOfIr = ds.NumberOfElements(); + var irBytes = ReadDataset(irData, Hdf.Type.NativeInt, numberOfIr * sizeof(int)); + rowIndex = new int[numberOfIr]; + Buffer.BlockCopy(irBytes, 0, rowIndex, 0, irBytes.Length); + } + + using (var jcData = new Dataset(groupId, "jc")) + { + var ds = GetDimensionsOfDataset(jcData); + var numberOfJc = ds.NumberOfElements(); + var jcBytes = ReadDataset(jcData, Hdf.Type.NativeInt, numberOfJc * sizeof(int)); + columnIndex = new int[numberOfJc]; + Buffer.BlockCopy(jcBytes, 0, columnIndex, 0, jcBytes.Length); + } + + using (var data = new Dataset(groupId, "data")) + { + var ds = GetDimensionsOfDataset(data); + var dims = new int[2]; + dims[0] = numberOfRows; + dims[1] = columnIndex.Length - 1; + var dataSize = ds.NumberOfElements() * SizeOfArrayElement(arrayType); + var storageSize = data.GetStorageSize(); + var dataSetType = data.GetHdfType(); + var dataSetTypeClass = dataSetType.GetClass(); + var isCompound = dataSetTypeClass == Class.Compound; + if (isCompound) + { + var (convertedRealData, convertedImaginaryData) = + ReadComplexData(data, dataSize, arrayType); + if (arrayType == MatlabClass.MDouble) + { + var complexData = + CombineComplexData( + convertedRealData as double[], + convertedImaginaryData as double[]) + .ToArray(); + var complexDataDictionary = + DataExtraction.ConvertMatlabSparseToDictionary( + rowIndex, + columnIndex, + j => complexData[j]); + return new SparseArrayOf(dims, complexDataDictionary); + } + else + { + var complexData = + CombineComplexOfData( + convertedRealData, + convertedImaginaryData) + .ToArray(); + var complexDataDictionary = + DataExtraction.ConvertMatlabSparseToDictionary( + rowIndex, + columnIndex, + j => complexData[j]); + return new SparseArrayOf>(dims, complexDataDictionary); + } + } + + if (dataSize != storageSize) + { + throw new Exception("Data size mismatch."); + } + + var d = ReadDataset(data, H5tTypeFromHdfMatlabClass(arrayType), dataSize); + var elements = ConvertDataToProperType(d, arrayType); + var dataDictionary = + DataExtraction.ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => elements[j]); + return new SparseArrayOf(dims, dataDictionary); + } + } + } + } + private static IArray ReadStruct(long groupId) { var fieldNames = ReadFieldNames(groupId); @@ -346,16 +510,16 @@ namespace MatFileHandler { using (var firstField = new Dataset(groupId, fieldNames[0])) { - var firstFieldTypeId = H5D.get_type(firstField.Id); - if (H5T.get_class(firstFieldTypeId) == H5T.class_t.REFERENCE) + var firstFieldType = firstField.GetHdfType(); + if (firstFieldType.GetClass() == Class.Reference) { - if (H5A.exists_by_name(firstField.Id, ".", "MATLAB_class") != 0) + if (firstField.AttributeExists(classAttributeName)) { throw new NotImplementedException(); } else { - var dimensions = GetDimensionsOfDataset(firstField.Id); + var dimensions = GetDimensionsOfDataset(firstField); var numberOfElements = dimensions.NumberOfElements(); var dictionary = new Dictionary>(); foreach (var fieldName in fieldNames) @@ -367,26 +531,22 @@ namespace MatFileHandler case H5O.type_t.DATASET: using (var field = new Dataset(groupId, fieldName)) { - using (var buf = new MemoryHandle(Marshal.SizeOf(default(IntPtr)) * numberOfElements)) + using (var array = new ReferenceArray(field, numberOfElements)) { - H5D.read(field.Id, H5T.STD_REF_OBJ, H5S.ALL, H5S.ALL, H5P.DEFAULT, buf.Handle); - for (var i = 0; i < numberOfElements; i++) + foreach (var reference in array) { - var fieldDataSet = H5R.dereference( - field.Id, - H5P.DEFAULT, - H5R.type_t.OBJECT, - buf.Handle + (i * Marshal.SizeOf(default(IntPtr)))); - var dataset = ReadDataset(fieldDataSet); - dictionary[fieldName].Add(dataset); + var value = ReadDataset(reference); + dictionary[fieldName].Add(value); } } } + break; default: throw new NotImplementedException(); } } + return new StructureArray(dimensions, dictionary); } } @@ -400,72 +560,10 @@ namespace MatFileHandler { throw new NotImplementedException(); } + throw new NotImplementedException(); } - private static IArray ReadDataset(long datasetId) - { - var dims = GetDimensionsOfDataset(datasetId); - - var matlabClass = GetMatlabClassOfDataset(datasetId); - var arrayType = ArrayTypeFromMatlabClassName(matlabClass); - - switch (arrayType) - { - case MatlabClass.MEmpty: - return Array.Empty(); - case MatlabClass.MLogical: - return ReadNumericalArray(datasetId, dims, arrayType); - case MatlabClass.MChar: - return ReadCharArray(datasetId, dims); - case MatlabClass.MInt8: - return ReadNumericalArray(datasetId, dims, arrayType); - case MatlabClass.MUInt8: - return ReadNumericalArray(datasetId, dims, arrayType); - case MatlabClass.MInt16: - return ReadNumericalArray(datasetId, dims, arrayType); - case MatlabClass.MUInt16: - return ReadNumericalArray(datasetId, dims, arrayType); - case MatlabClass.MInt32: - return ReadNumericalArray(datasetId, dims, arrayType); - case MatlabClass.MUInt32: - return ReadNumericalArray(datasetId, dims, arrayType); - case MatlabClass.MInt64: - return ReadNumericalArray(datasetId, dims, arrayType); - case MatlabClass.MUInt64: - return ReadNumericalArray(datasetId, dims, arrayType); - case MatlabClass.MSingle: - return ReadNumericalArray(datasetId, dims, arrayType); - case MatlabClass.MDouble: - return ReadNumericalArray(datasetId, dims, arrayType); - case MatlabClass.MCell: - return ReadCellArray(datasetId, dims); - } - throw new NotImplementedException($"Unknown array type: {arrayType}."); - } - - private static IArray ReadCellArray(long datasetId, int[] dims) - { - var numberOfElements = dims.NumberOfElements(); - var elements = new IArray[numberOfElements]; - using (var buf = new MemoryHandle(Marshal.SizeOf(default(IntPtr)) * numberOfElements)) - { - H5D.read(datasetId, H5T.STD_REF_OBJ, H5S.ALL, H5S.ALL, H5P.DEFAULT, buf.Handle); - for (var i = 0; i < numberOfElements; i++) - { - var fieldDataSet = - H5R.dereference( - datasetId, - H5P.DEFAULT, - H5R.type_t.OBJECT, - buf.Handle + (i * Marshal.SizeOf(default(IntPtr)))); - var dataset = ReadDataset(fieldDataSet); - elements[i] = dataset; - } - } - return new CellArray(dims, elements); - } - private static int SizeOfArrayElement(MatlabClass arrayType) { switch (arrayType) @@ -490,119 +588,67 @@ namespace MatFileHandler throw new NotImplementedException(); } - private static long H5tTypeFromHdfMatlabClass(MatlabClass arrayType) + private bool ReadGlobalFlag(Group group) { - switch (arrayType) + if (!group.AttributeExists(globalAttributeName)) { - case MatlabClass.MInt8: - return H5T.NATIVE_INT8; - case MatlabClass.MUInt8: - case MatlabClass.MLogical: - return H5T.NATIVE_UINT8; - case MatlabClass.MInt16: - return H5T.NATIVE_INT16; - case MatlabClass.MUInt16: - return H5T.NATIVE_UINT16; - case MatlabClass.MInt32: - return H5T.NATIVE_INT32; - case MatlabClass.MUInt32: - return H5T.NATIVE_UINT32; - case MatlabClass.MInt64: - return H5T.NATIVE_INT64; - case MatlabClass.MUInt64: - return H5T.NATIVE_UINT64; - case MatlabClass.MSingle: - return H5T.NATIVE_FLOAT; - case MatlabClass.MDouble: - return H5T.NATIVE_DOUBLE; + return false; } - throw new NotImplementedException(); - } - private static T[] ConvertDataToProperType(byte[] bytes, MatlabClass arrayType) - where T : struct - { - var length = bytes.Length; - var arrayElementSize = SizeOfArrayElement(arrayType); - var data = new T[length / arrayElementSize]; - Buffer.BlockCopy(bytes, 0, data, 0, length); - return data; - } - - private static byte[] ReadDataset(long datasetId, long elementType, int dataSize) - { - var data = new byte[dataSize]; - using (var dataBuffer = new MemoryHandle(dataSize)) + using (var globalAttribute = group.GetAttribute(globalAttributeName)) { - H5D.read(datasetId, elementType, H5S.ALL, H5S.ALL, H5P.DEFAULT, dataBuffer.Handle); - Marshal.Copy(dataBuffer.Handle, data, 0, dataSize); + return globalAttribute.ReadBool(); } - return data; } - private static (T[] real, T[] imaginary) ReadComplexData( - long datasetId, - int dataSize, - MatlabClass arrayType) - where T : struct + private bool ReadGlobalFlag(Dataset dataset) { - var h5Type = H5tTypeFromHdfMatlabClass(arrayType); - var h5Size = H5T.get_size(h5Type); - var h5tComplexReal = H5T.create(H5T.class_t.COMPOUND, h5Size); - H5T.insert(h5tComplexReal, "real", IntPtr.Zero, h5Type); - var realData = ReadDataset(datasetId, h5tComplexReal, dataSize); - var convertedRealData = ConvertDataToProperType(realData, arrayType); - var h5tComplexImaginary = H5T.create(H5T.class_t.COMPOUND, h5Size); - H5T.insert(h5tComplexImaginary, "imag", IntPtr.Zero, h5Type); - var imaginaryData = ReadDataset(datasetId, h5tComplexImaginary, dataSize); - var convertedImaginaryData = ConvertDataToProperType(imaginaryData, arrayType); - return (convertedRealData, convertedImaginaryData); - } - - private static IArray ReadNumericalArray(long datasetId, int[] dims, MatlabClass arrayType) - where T : struct - { - var numberOfElements = dims.NumberOfElements(); - var dataSize = numberOfElements * SizeOfArrayElement(arrayType); - var storageSize = (int)H5D.get_storage_size(datasetId); - var dataSetType = H5D.get_type(datasetId); - var dataSetTypeClass = H5T.get_class(dataSetType); - var isCompound = dataSetTypeClass == H5T.class_t.COMPOUND; - if (isCompound) + if (!dataset.AttributeExists(globalAttributeName)) { - var (convertedRealData, convertedImaginaryData) = ReadComplexData(datasetId, dataSize, arrayType); - if (arrayType == MatlabClass.MDouble) - { - var complexData = - (convertedRealData as double[]) - .Zip(convertedImaginaryData as double[], (x, y) => new Complex(x, y)) - .ToArray(); - return new NumericalArrayOf(dims, complexData); - } - else - { - var complexData = - convertedRealData - .Zip(convertedImaginaryData, (x, y) => new ComplexOf(x, y)) - .ToArray(); - return new NumericalArrayOf>(dims, complexData); - } + return false; } - if (dataSize != storageSize) + + using (var globalAttribute = dataset.GetAttribute(globalAttributeName)) { - throw new Exception("Data size mismatch."); + return globalAttribute.ReadBool(); } - var data = ReadDataset(datasetId, H5tTypeFromHdfMatlabClass(arrayType), dataSize); - var convertedData = ConvertDataToProperType(data, arrayType); - return new NumericalArrayOf(dims, convertedData); } - private static IArray ReadCharArray(long datasetId, int[] dims) + private int VariableIterator(long group, IntPtr name, ref H5L.info_t info, IntPtr op_data) { - var storageSize = (int)H5D.get_storage_size(datasetId); - var data = ReadDataset(datasetId, H5T.NATIVE_UINT16, storageSize); - var str = Encoding.Unicode.GetString(data); - return new CharArray(dims, str); + var variableName = Marshal.PtrToStringAnsi(name); + var object_info = default(H5O.info_t); + H5O.get_info_by_name(group, variableName, ref object_info); + switch (object_info.type) + { + case H5O.type_t.DATASET: + using (var dataset = new Dataset(group, variableName)) + { + var isGlobal = ReadGlobalFlag(dataset); + var value = ReadDataset(dataset); + variables.Add(new MatVariable(value, variableName, isGlobal)); + } + + break; + case H5O.type_t.GROUP: + if (variableName == "#refs#") + { + return 0; + } + + using (var subGroup = new Group(group, variableName)) + { + var isGlobal = ReadGlobalFlag(subGroup); + var groupValue = ReadGroup(subGroup); + variables.Add(new MatVariable(groupValue, variableName, isGlobal)); + } + + break; + default: + throw new NotImplementedException(); + } + + return 0; } } -} +} \ No newline at end of file