Hdf refactoring

This commit is contained in:
Alexander Luzgarev 2019-03-17 18:50:57 +01:00
parent 199ab46f0c
commit ef73a380bb
8 changed files with 744 additions and 423 deletions

View File

@ -1,9 +1,10 @@
using System; using System;
using System.Runtime.InteropServices;
using HDF.PInvoke; using HDF.PInvoke;
namespace MatFileHandler.Hdf namespace MatFileHandler.Hdf
{ {
public struct Attribute : IDisposable internal struct Attribute : IDisposable
{ {
public long Id { get; private set; } public long Id { get; private set; }
@ -20,5 +21,30 @@ namespace MatFileHandler.Hdf
Id = -1; Id = -1;
} }
} }
public bool ReadBool()
{
using (var h = new MemoryHandle(sizeof(int)))
{
H5A.read(Id, H5T.NATIVE_INT, h.Handle);
var result = Marshal.ReadInt32(h.Handle);
return result != 0;
}
}
public void ReadToHandle(MemoryHandle handle, Type type)
{
H5A.read(Id, type.Id, handle.Handle);
}
public Type GetHdfType()
{
return new Type(H5A.get_type(Id));
}
public Space GetSpace()
{
return new Space(H5A.get_space(Id));
}
} }
} }

View File

@ -0,0 +1,46 @@
using System;
using HDF.PInvoke;
namespace MatFileHandler.Hdf
{
internal struct Class : IEquatable<Class>
{
public Class(H5T.class_t c)
{
C = c;
}
public static Class String => new Class(H5T.class_t.STRING);
public static Class Reference => new Class(H5T.class_t.REFERENCE);
public static Class Compound => new Class(H5T.class_t.COMPOUND);
public H5T.class_t C { get; }
public static bool operator ==(Class one, Class other)
{
return one.Equals(other);
}
public static bool operator !=(Class one, Class other)
{
return !one.Equals(other);
}
public bool Equals(Class other)
{
return C == other.C;
}
public override bool Equals(object obj)
{
return obj is Class other && Equals(other);
}
public override int GetHashCode()
{
return (int)C;
}
}
}

View File

@ -3,10 +3,15 @@ using HDF.PInvoke;
namespace MatFileHandler.Hdf namespace MatFileHandler.Hdf
{ {
public struct Dataset : IDisposable internal struct Dataset : IDisposable
{ {
public long Id { get; private set; } public long Id { get; private set; }
public Dataset(long datasetId)
{
Id = datasetId;
}
public Dataset(long groupId, string name) public Dataset(long groupId, string name)
{ {
Id = H5D.open(groupId, name); Id = H5D.open(groupId, name);
@ -20,5 +25,35 @@ namespace MatFileHandler.Hdf
Id = -1; Id = -1;
} }
} }
public Attribute GetAttribute(string name)
{
return new Attribute(Id, name);
}
public bool AttributeExists(string name)
{
return H5A.exists_by_name(Id, ".", name) != 0;
}
public Type GetHdfType()
{
return new Type(H5D.get_type(Id));
}
public int GetStorageSize()
{
return (int)H5D.get_storage_size(Id);
}
public Space GetHdfSpace()
{
return new Space(H5D.get_space(Id));
}
public void ReadToHandle(Type type, MemoryHandle handle)
{
H5D.read(Id, type.Id, H5S.ALL, H5S.ALL, H5P.DEFAULT, handle.Handle);
}
} }
} }

View File

@ -3,7 +3,7 @@ using HDF.PInvoke;
namespace MatFileHandler.Hdf namespace MatFileHandler.Hdf
{ {
public struct Group : IDisposable internal struct Group : IDisposable
{ {
public long Id { get; private set; } public long Id { get; private set; }
@ -20,5 +20,15 @@ namespace MatFileHandler.Hdf
Id = -1; Id = -1;
} }
} }
public Attribute GetAttribute(string name)
{
return new Attribute(Id, name);
}
public bool AttributeExists(string name)
{
return H5A.exists_by_name(Id, ".", name) != 0;
}
} }
} }

View File

@ -0,0 +1,60 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Text;
using HDF.PInvoke;
namespace MatFileHandler.Hdf
{
internal struct ReferenceArray : IDisposable, IEnumerable<Dataset>
{
public Dataset Dataset { get; }
public int Size { get; }
public MemoryHandle Buf { get; }
public Dataset[] References { get; }
public ReferenceArray(Dataset dataset, int size)
{
Dataset = dataset;
Size = size;
Buf = new MemoryHandle(Marshal.SizeOf(default(IntPtr)) * size);
Dataset.ReadToHandle(Type.Reference, Buf);
References = new Dataset[size];
for (var i = 0; i < size; i++)
{
References[i] =
new Dataset(H5R.dereference(
dataset.Id,
H5P.DEFAULT,
H5R.type_t.OBJECT,
Buf.Handle + (i * Marshal.SizeOf(default(IntPtr)))));
}
}
public void Dispose()
{
Buf?.Dispose();
if (!(References is null))
{
foreach (var reference in References)
{
reference.Dispose();
}
}
}
public IEnumerator<Dataset> GetEnumerator()
{
return ((IEnumerable<Dataset>)References).GetEnumerator();
}
IEnumerator IEnumerable.GetEnumerator()
{
return References.GetEnumerator();
}
}
}

View File

@ -0,0 +1,28 @@
using System.Linq;
using HDF.PInvoke;
namespace MatFileHandler.Hdf
{
internal struct Space
{
public Space(long id)
{
Id = id;
}
public long Id { get; }
public int GetRank()
{
return H5S.get_simple_extent_ndims(Id);
}
public int[] GetDimensions()
{
var dims = new ulong[GetRank()];
H5S.get_simple_extent_dims(Id, dims, null);
System.Array.Reverse(dims);
return dims.Select(x => (int)x).ToArray();
}
}
}

View File

@ -0,0 +1,70 @@
using System;
using HDF.PInvoke;
namespace MatFileHandler.Hdf
{
internal struct Type
{
public Type(long id)
{
Id = id;
}
public long Id { get; }
public Class GetClass()
{
return new Class(H5T.get_class(Id));
}
public int GetSize()
{
return (int)H5T.get_size(Id);
}
public static Type NativeInt8 => new Type(H5T.NATIVE_INT8);
public static Type NativeUInt8 => new Type(H5T.NATIVE_UINT8);
public static Type NativeInt16 => new Type(H5T.NATIVE_INT16);
public static Type NativeUInt16 => new Type(H5T.NATIVE_UINT16);
public static Type NativeInt32 => new Type(H5T.NATIVE_INT32);
public static Type NativeUInt32 => new Type(H5T.NATIVE_UINT32);
public static Type NativeInt64 => new Type(H5T.NATIVE_INT64);
public static Type NativeUInt64 => new Type(H5T.NATIVE_UINT64);
public static Type NativeFloat => new Type(H5T.NATIVE_FLOAT);
public static Type NativeDouble => new Type(H5T.NATIVE_DOUBLE);
public static Type NativeInt => new Type(H5T.NATIVE_INT);
public static Type NativeUInt => new Type(H5T.NATIVE_UINT);
public static Type CS1 => new Type(H5T.C_S1);
public static Type Reference => new Type(H5T.STD_REF_OBJ);
public Type WithSize(int size)
{
var classId = H5T.copy(Id);
H5T.set_size(classId, (IntPtr)size);
return new Type(classId);
}
public static Type CreateCompound(int size)
{
return new Type(H5T.create(H5T.class_t.COMPOUND, (IntPtr)size));
}
public void InsertField(string name, Type fieldType)
{
H5T.insert(Id, name, IntPtr.Zero, fieldType.Id);
}
}
}

View File

@ -6,13 +6,16 @@ using System.Runtime.InteropServices;
using System.Text; using System.Text;
using HDF.PInvoke; using HDF.PInvoke;
using MatFileHandler.Hdf; using MatFileHandler.Hdf;
using Array = MatFileHandler.Hdf.Array;
using Attribute = MatFileHandler.Hdf.Attribute;
namespace MatFileHandler namespace MatFileHandler
{ {
internal class HdfFileReader internal class HdfFileReader
{ {
private const string classAttributeName = "MATLAB_class";
private const string globalAttributeName = "MATLAB_global";
private const string sparseAttributeName = "MATLAB_sparse";
private long fileId; private long fileId;
private List<IVariable> variables; private List<IVariable> variables;
@ -25,8 +28,8 @@ namespace MatFileHandler
internal IMatFile Read() internal IMatFile Read()
{ {
variables = new List<IVariable>(); variables = new List<IVariable>();
H5G.info_t group_info = default(H5G.info_t); var group_info = default(H5G.info_t);
var result = H5G.get_info(fileId, ref group_info); H5G.get_info(fileId, ref group_info);
var numberOfVariables = group_info.nlinks; var numberOfVariables = group_info.nlinks;
ulong idx = 0; ulong idx = 0;
@ -40,94 +43,10 @@ namespace MatFileHandler
VariableIterator, VariableIterator,
IntPtr.Zero); IntPtr.Zero);
} }
return new MatFile(variables); return new MatFile(variables);
} }
private bool ReadGlobalFlag(long datasetId)
{
if (H5A.exists_by_name(datasetId, ".", "MATLAB_global") != 0)
{
using (var globalAttribute = new Attribute(datasetId, "MATLAB_global"))
{
using (var h = new MemoryHandle(sizeof(int)))
{
H5A.read(globalAttribute.Id, H5T.NATIVE_INT, h.Handle);
var result = Marshal.ReadInt32(h.Handle);
return result != 0;
}
}
}
return false;
}
private int VariableIterator(long group, IntPtr name, ref H5L.info_t info, IntPtr op_data)
{
var variableName = Marshal.PtrToStringAnsi(name);
var object_info = default(H5O.info_t);
H5O.get_info_by_name(group, variableName, ref object_info);
switch (object_info.type)
{
case H5O.type_t.DATASET:
using (var dataset = new Dataset(group, variableName))
{
var isGlobal = ReadGlobalFlag(dataset.Id);
var value = ReadDataset(dataset.Id);
variables.Add(new MatVariable(value, variableName, isGlobal));
}
break;
case H5O.type_t.GROUP:
if (variableName == "#refs#")
{
return 0;
}
using (var subGroup = new Group(group, variableName))
{
var isGlobal = ReadGlobalFlag(subGroup.Id);
var groupValue = ReadGroup(subGroup.Id);
variables.Add(new MatVariable(groupValue, variableName, isGlobal));
}
break;
default:
throw new NotImplementedException();
}
return 0;
}
private static string GetMatlabClassOfDataset(long datasetId)
{
using (var attribute = new Attribute(datasetId, "MATLAB_class"))
{
var typeId = H5A.get_type(attribute.Id);
var cl = H5T.get_class(typeId);
if (cl != H5T.class_t.STRING)
{
throw new NotImplementedException();
}
var classId = H5T.copy(H5T.C_S1);
var typeIdSize = (int)H5T.get_size(typeId);
H5T.set_size(classId, (IntPtr)typeIdSize);
var matlabClassNameBytes = new byte[typeIdSize];
using (var buf = new MemoryHandle(typeIdSize))
{
H5A.read(attribute.Id, classId, buf.Handle);
Marshal.Copy(buf.Handle, matlabClassNameBytes, 0, typeIdSize);
}
return Encoding.ASCII.GetString(matlabClassNameBytes);
}
}
private static int[] GetDimensionsOfDataset(long datasetId)
{
var spaceId = H5D.get_space(datasetId);
var rank = H5S.get_simple_extent_ndims(spaceId);
var dims = new ulong[rank];
H5S.get_simple_extent_dims(spaceId, dims, null);
System.Array.Reverse(dims);
return dims.Select(x => (int)x).ToArray();
}
private static MatlabClass ArrayTypeFromMatlabClassName(string matlabClassName) private static MatlabClass ArrayTypeFromMatlabClassName(string matlabClassName)
{ {
switch (matlabClassName) switch (matlabClassName)
@ -161,173 +80,59 @@ namespace MatFileHandler
case "cell": case "cell":
return MatlabClass.MCell; return MatlabClass.MCell;
} }
throw new NotImplementedException(); throw new NotImplementedException();
} }
private static int GroupFieldNamesIterator(long group, IntPtr name, ref H5L.info_t info, IntPtr data) private static T[] ConvertDataToProperType<T>(byte[] bytes, MatlabClass arrayType)
{
var nameString = Marshal.PtrToStringAnsi(name);
H5O.info_t objectInfo = default(H5O.info_t);
H5O.get_info_by_name(group, nameString, ref objectInfo, H5P.DEFAULT);
return 0;
}
private static IArray ReadSparseArray<T>(long groupId, MatlabClass arrayType)
where T : struct where T : struct
{ {
using (var sparseAttribute = new Attribute(groupId, "MATLAB_sparse")) var length = bytes.Length;
{ var arrayElementSize = SizeOfArrayElement(arrayType);
using (var numberOfRowsHandle = new MemoryHandle(sizeof(uint))) var data = new T[length / arrayElementSize];
{ Buffer.BlockCopy(bytes, 0, data, 0, length);
H5A.read(sparseAttribute.Id, H5T.NATIVE_UINT, numberOfRowsHandle.Handle); return data;
var numberOfRows = Marshal.ReadInt32(numberOfRowsHandle.Handle);
int[] rowIndex;
int[] columnIndex;
using (var irData = new Dataset(groupId, "ir"))
{
var ds = GetDimensionsOfDataset(irData.Id);
var numberOfIr = ds.NumberOfElements();
var irBytes = ReadDataset(irData.Id, H5T.NATIVE_INT, numberOfIr * sizeof(int));
rowIndex = new int[numberOfIr];
Buffer.BlockCopy(irBytes, 0, rowIndex, 0, irBytes.Length);
}
using (var jcData = new Dataset(groupId, "jc"))
{
var ds = GetDimensionsOfDataset(jcData.Id);
var numberOfJc = ds.NumberOfElements();
var jcBytes = ReadDataset(jcData.Id, H5T.NATIVE_INT, numberOfJc * sizeof(int));
columnIndex = new int[numberOfJc];
Buffer.BlockCopy(jcBytes, 0, columnIndex, 0, jcBytes.Length);
} }
using (var data = new Dataset(groupId, "data")) private static int[] GetDimensionsOfDataset(Dataset dataset)
{ {
var ds = GetDimensionsOfDataset(data.Id); return dataset.GetHdfSpace().GetDimensions();
var dims = new int[2];
dims[0] = numberOfRows;
dims[1] = columnIndex.Length - 1;
var dataSize = ds.NumberOfElements() * SizeOfArrayElement(arrayType);
var storageSize = (int)H5D.get_storage_size(data.Id);
var dataSetType = H5D.get_type(data.Id);
var dataSetTypeClass = H5T.get_class(dataSetType);
var isCompound = dataSetTypeClass == H5T.class_t.COMPOUND;
if (isCompound)
{
var (convertedRealData, convertedImaginaryData) = ReadComplexData<T>(data.Id, dataSize, arrayType);
if (arrayType == MatlabClass.MDouble)
{
var complexData =
(convertedRealData as double[])
.Zip(convertedImaginaryData as double[], (x, y) => new Complex(x, y))
.ToArray();
var complexDataDictionary = DataExtraction.ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => complexData[j]);
return new SparseArrayOf<Complex>(dims, complexDataDictionary);
}
else
{
var complexData =
convertedRealData
.Zip(convertedImaginaryData, (x, y) => new ComplexOf<T>(x, y))
.ToArray();
var complexDataDictionary = DataExtraction.ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => complexData[j]);
return new SparseArrayOf<ComplexOf<T>>(dims, complexDataDictionary);
}
}
if (dataSize != storageSize)
{
throw new Exception("Data size mismatch.");
}
var d = ReadDataset(data.Id, H5tTypeFromHdfMatlabClass(arrayType), dataSize);
var elements = ConvertDataToProperType<T>(d, arrayType);
var dataDictionary = DataExtraction.ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => elements[j]);
return new SparseArrayOf<T>(dims, dataDictionary);
}
}
}
} }
private static IArray ReadGroup(long groupId) private static string GetMatlabClassFromAttribute(Hdf.Attribute attribute)
{ {
var matlabClass = GetMatlabClassOfDataset(groupId); var type = attribute.GetHdfType();
if (matlabClass == "struct") var cl = type.GetClass();
if (cl != Class.String)
{ {
return ReadStruct(groupId);
}
if (H5A.exists_by_name(groupId, ".", "MATLAB_sparse") != 0)
{
var dims = new int[0];
var arrayType = ArrayTypeFromMatlabClassName(matlabClass);
switch (arrayType)
{
case MatlabClass.MEmpty:
return Array.Empty();
case MatlabClass.MLogical:
return ReadSparseArray<bool>(groupId, arrayType);
case MatlabClass.MInt8:
return ReadSparseArray<sbyte>(groupId, arrayType);
case MatlabClass.MUInt8:
return ReadSparseArray<byte>(groupId, arrayType);
case MatlabClass.MInt16:
return ReadSparseArray<short>(groupId, arrayType);
case MatlabClass.MUInt16:
return ReadSparseArray<ushort>(groupId, arrayType);
case MatlabClass.MInt32:
return ReadSparseArray<int>(groupId, arrayType);
case MatlabClass.MUInt32:
return ReadSparseArray<uint>(groupId, arrayType);
case MatlabClass.MInt64:
return ReadSparseArray<long>(groupId, arrayType);
case MatlabClass.MUInt64:
return ReadSparseArray<ulong>(groupId, arrayType);
case MatlabClass.MSingle:
return ReadSparseArray<float>(groupId, arrayType);
case MatlabClass.MDouble:
return ReadSparseArray<double>(groupId, arrayType);
default:
throw new NotSupportedException();
}
}
throw new NotImplementedException(); throw new NotImplementedException();
} }
private static string[] ReadFieldNames(long groupId) var typeIdSize = type.GetSize();
var copiedType = Hdf.Type.CS1.WithSize(type.GetSize());
var matlabClassNameBytes = new byte[typeIdSize];
using (var buf = new MemoryHandle(typeIdSize))
{ {
// Try to read fields from MATLAB_fields. attribute.ReadToHandle(buf, copiedType);
using (var attr = new Attribute(groupId, "MATLAB_fields")) Marshal.Copy(buf.Handle, matlabClassNameBytes, 0, typeIdSize);
{
if (attr.Id == 0)
{
throw new NotImplementedException();
}
var spaceId = H5A.get_space(attr.Id);
var rank = H5S.get_simple_extent_ndims(spaceId);
var dims = new ulong[rank];
H5S.get_simple_extent_dims(spaceId, dims, null);
System.Array.Reverse(dims);
var dimensions = dims.Select(x => (int)x).ToArray();
var numberOfFields = dimensions.NumberOfElements();
var field_id = H5A.get_type(attr.Id);
var fieldNamePointersSizeInBytes = numberOfFields * Marshal.SizeOf(default(H5T.hvl_t));
var fieldNamePointers = new IntPtr[numberOfFields * 2];
using (var fieldNamesBuf = new MemoryHandle(fieldNamePointersSizeInBytes))
{
H5A.read(attr.Id, field_id, fieldNamesBuf.Handle);
Marshal.Copy(fieldNamesBuf.Handle, fieldNamePointers, 0, numberOfFields * 2);
} }
var fieldNames = new string[numberOfFields]; return Encoding.ASCII.GetString(matlabClassNameBytes);
for (var i = 0; i < numberOfFields; i++)
{
var stringLength = fieldNamePointers[i * 2];
var stringPointer = fieldNamePointers[(i * 2) + 1];
fieldNames[i] = Marshal.PtrToStringAnsi(stringPointer, (int)stringLength);
} }
return fieldNames;
private static string GetMatlabClassOfDataset(Dataset dataset)
{
using (var attribute = dataset.GetAttribute(classAttributeName))
{
return GetMatlabClassFromAttribute(attribute);
}
}
private static string GetMatlabClassOfGroup(Group group)
{
using (var attribute = group.GetAttribute(classAttributeName))
{
return GetMatlabClassFromAttribute(attribute);
} }
} }
@ -338,6 +143,365 @@ namespace MatFileHandler
return objectInfo.type; return objectInfo.type;
} }
private static Hdf.Type H5tTypeFromHdfMatlabClass(MatlabClass arrayType)
{
switch (arrayType)
{
case MatlabClass.MInt8:
return Hdf.Type.NativeInt8;
case MatlabClass.MUInt8:
case MatlabClass.MLogical:
return Hdf.Type.NativeUInt8;
case MatlabClass.MInt16:
return Hdf.Type.NativeInt16;
case MatlabClass.MUInt16:
return Hdf.Type.NativeUInt16;
case MatlabClass.MInt32:
return Hdf.Type.NativeInt32;
case MatlabClass.MUInt32:
return Hdf.Type.NativeUInt32;
case MatlabClass.MInt64:
return Hdf.Type.NativeInt64;
case MatlabClass.MUInt64:
return Hdf.Type.NativeUInt64;
case MatlabClass.MSingle:
return Hdf.Type.NativeFloat;
case MatlabClass.MDouble:
return Hdf.Type.NativeDouble;
}
throw new NotImplementedException();
}
private static IArray ReadCellArray(Dataset dataset, int[] dims)
{
var numberOfElements = dims.NumberOfElements();
var elements = new IArray[numberOfElements];
using (var array = new ReferenceArray(dataset, numberOfElements))
{
var i = 0;
foreach (var reference in array)
{
elements[i++] = ReadDataset(reference);
}
}
return new CellArray(dims, elements);
}
private static IArray ReadCharArray(Dataset dataset, int[] dims)
{
var storageSize = dataset.GetStorageSize();
var data = ReadDataset(dataset, Hdf.Type.NativeUInt16, storageSize);
var str = Encoding.Unicode.GetString(data);
return new CharArray(dims, str);
}
private static (T[] real, T[] imaginary) ReadComplexData<T>(
Dataset dataset,
int dataSize,
MatlabClass arrayType)
where T : struct
{
var h5Type = H5tTypeFromHdfMatlabClass(arrayType);
var h5Size = h5Type.GetSize();
var h5tComplexReal = Hdf.Type.CreateCompound(h5Size);
h5tComplexReal.InsertField("real", h5Type);
var realData = ReadDataset(dataset, h5tComplexReal, dataSize);
var h5tComplexImaginary = Hdf.Type.CreateCompound(h5Size);
h5tComplexImaginary.InsertField("imag", h5Type);
var imaginaryData = ReadDataset(dataset, h5tComplexImaginary, dataSize);
var convertedRealData = ConvertDataToProperType<T>(realData, arrayType);
var convertedImaginaryData = ConvertDataToProperType<T>(imaginaryData, arrayType);
return (convertedRealData, convertedImaginaryData);
}
private static IArray ReadDataset(Dataset dataset)
{
var dims = GetDimensionsOfDataset(dataset);
var matlabClass = GetMatlabClassOfDataset(dataset);
var arrayType = ArrayTypeFromMatlabClassName(matlabClass);
switch (arrayType)
{
case MatlabClass.MEmpty:
return Hdf.Array.Empty();
case MatlabClass.MLogical:
return ReadNumericalArray<bool>(dataset, dims, arrayType);
case MatlabClass.MChar:
return ReadCharArray(dataset, dims);
case MatlabClass.MInt8:
return ReadNumericalArray<sbyte>(dataset, dims, arrayType);
case MatlabClass.MUInt8:
return ReadNumericalArray<byte>(dataset, dims, arrayType);
case MatlabClass.MInt16:
return ReadNumericalArray<short>(dataset, dims, arrayType);
case MatlabClass.MUInt16:
return ReadNumericalArray<ushort>(dataset, dims, arrayType);
case MatlabClass.MInt32:
return ReadNumericalArray<int>(dataset, dims, arrayType);
case MatlabClass.MUInt32:
return ReadNumericalArray<uint>(dataset, dims, arrayType);
case MatlabClass.MInt64:
return ReadNumericalArray<long>(dataset, dims, arrayType);
case MatlabClass.MUInt64:
return ReadNumericalArray<ulong>(dataset, dims, arrayType);
case MatlabClass.MSingle:
return ReadNumericalArray<float>(dataset, dims, arrayType);
case MatlabClass.MDouble:
return ReadNumericalArray<double>(dataset, dims, arrayType);
case MatlabClass.MCell:
return ReadCellArray(dataset, dims);
}
throw new NotImplementedException($"Unknown array type: {arrayType}.");
}
private static byte[] ReadDataset(Dataset dataset, Hdf.Type elementType, int dataSize)
{
var data = new byte[dataSize];
using (var dataBuffer = new MemoryHandle(dataSize))
{
dataset.ReadToHandle(elementType, dataBuffer);
Marshal.Copy(dataBuffer.Handle, data, 0, dataSize);
}
return data;
}
private static string[] ReadFieldNames(long groupId)
{
// Try to read fields from MATLAB_fields.
using (var attr = new Hdf.Attribute(groupId, "MATLAB_fields"))
{
if (attr.Id == 0)
{
throw new NotImplementedException();
}
var dimensions = attr.GetSpace().GetDimensions();
var numberOfFields = dimensions.NumberOfElements();
var fieldType = attr.GetHdfType();
var fieldNamePointersSizeInBytes = numberOfFields * Marshal.SizeOf(default(H5T.hvl_t));
var fieldNamePointers = new IntPtr[numberOfFields * 2];
using (var fieldNamesBuf = new MemoryHandle(fieldNamePointersSizeInBytes))
{
attr.ReadToHandle(fieldNamesBuf, fieldType);
Marshal.Copy(fieldNamesBuf.Handle, fieldNamePointers, 0, numberOfFields * 2);
}
var fieldNames = new string[numberOfFields];
for (var i = 0; i < numberOfFields; i++)
{
var stringLength = fieldNamePointers[i * 2];
var stringPointer = fieldNamePointers[(i * 2) + 1];
fieldNames[i] = Marshal.PtrToStringAnsi(stringPointer, (int)stringLength);
}
return fieldNames;
}
}
private static IArray ReadGroup(Group group)
{
var matlabClass = GetMatlabClassOfGroup(group);
if (matlabClass == "struct")
{
return ReadStruct(group.Id);
}
if (group.AttributeExists(sparseAttributeName))
{
var dims = new int[0];
var arrayType = ArrayTypeFromMatlabClassName(matlabClass);
switch (arrayType)
{
case MatlabClass.MEmpty:
return Hdf.Array.Empty();
case MatlabClass.MLogical:
return ReadSparseArray<bool>(group.Id, arrayType);
case MatlabClass.MInt8:
return ReadSparseArray<sbyte>(group.Id, arrayType);
case MatlabClass.MUInt8:
return ReadSparseArray<byte>(group.Id, arrayType);
case MatlabClass.MInt16:
return ReadSparseArray<short>(group.Id, arrayType);
case MatlabClass.MUInt16:
return ReadSparseArray<ushort>(group.Id, arrayType);
case MatlabClass.MInt32:
return ReadSparseArray<int>(group.Id, arrayType);
case MatlabClass.MUInt32:
return ReadSparseArray<uint>(group.Id, arrayType);
case MatlabClass.MInt64:
return ReadSparseArray<long>(group.Id, arrayType);
case MatlabClass.MUInt64:
return ReadSparseArray<ulong>(group.Id, arrayType);
case MatlabClass.MSingle:
return ReadSparseArray<float>(group.Id, arrayType);
case MatlabClass.MDouble:
return ReadSparseArray<double>(group.Id, arrayType);
default:
throw new NotSupportedException();
}
}
throw new NotImplementedException();
}
private static IEnumerable<ComplexOf<T>> CombineComplexOfData<T>(
IEnumerable<T> realData,
IEnumerable<T> imaginaryData)
where T : struct
{
return realData.Zip(
imaginaryData,
(x, y) => new ComplexOf<T>(x, y));
}
private static IEnumerable<Complex> CombineComplexData(
IEnumerable<double> realData,
IEnumerable<double> imaginaryData)
{
return realData.Zip(
imaginaryData,
(x, y) => new Complex(x, y));
}
private static IArray ReadNumericalArray<T>(Dataset dataset, int[] dims, MatlabClass arrayType)
where T : struct
{
var numberOfElements = dims.NumberOfElements();
var dataSize = numberOfElements * SizeOfArrayElement(arrayType);
var storageSize = dataset.GetStorageSize();
var dataSetType = dataset.GetHdfType();
var dataSetTypeClass = dataSetType.GetClass();
var isCompound = dataSetTypeClass == Class.Compound;
if (isCompound)
{
var (convertedRealData, convertedImaginaryData) = ReadComplexData<T>(dataset, dataSize, arrayType);
if (arrayType == MatlabClass.MDouble)
{
var complexData =
CombineComplexData(
convertedRealData as double[],
convertedImaginaryData as double[])
.ToArray();
return new NumericalArrayOf<Complex>(dims, complexData);
}
else
{
var complexData =
CombineComplexOfData(
convertedRealData,
convertedImaginaryData)
.ToArray();
return new NumericalArrayOf<ComplexOf<T>>(dims, complexData);
}
}
if (dataSize != storageSize)
{
throw new Exception("Data size mismatch.");
}
var data = ReadDataset(dataset, H5tTypeFromHdfMatlabClass(arrayType), dataSize);
var convertedData = ConvertDataToProperType<T>(data, arrayType);
return new NumericalArrayOf<T>(dims, convertedData);
}
private static IArray ReadSparseArray<T>(long groupId, MatlabClass arrayType)
where T : struct
{
using (var sparseAttribute = new Hdf.Attribute(groupId, sparseAttributeName))
{
using (var numberOfRowsHandle = new MemoryHandle(sizeof(uint)))
{
sparseAttribute.ReadToHandle(numberOfRowsHandle, Hdf.Type.NativeUInt);
var numberOfRows = Marshal.ReadInt32(numberOfRowsHandle.Handle);
int[] rowIndex;
int[] columnIndex;
using (var irData = new Dataset(groupId, "ir"))
{
var ds = GetDimensionsOfDataset(irData);
var numberOfIr = ds.NumberOfElements();
var irBytes = ReadDataset(irData, Hdf.Type.NativeInt, numberOfIr * sizeof(int));
rowIndex = new int[numberOfIr];
Buffer.BlockCopy(irBytes, 0, rowIndex, 0, irBytes.Length);
}
using (var jcData = new Dataset(groupId, "jc"))
{
var ds = GetDimensionsOfDataset(jcData);
var numberOfJc = ds.NumberOfElements();
var jcBytes = ReadDataset(jcData, Hdf.Type.NativeInt, numberOfJc * sizeof(int));
columnIndex = new int[numberOfJc];
Buffer.BlockCopy(jcBytes, 0, columnIndex, 0, jcBytes.Length);
}
using (var data = new Dataset(groupId, "data"))
{
var ds = GetDimensionsOfDataset(data);
var dims = new int[2];
dims[0] = numberOfRows;
dims[1] = columnIndex.Length - 1;
var dataSize = ds.NumberOfElements() * SizeOfArrayElement(arrayType);
var storageSize = data.GetStorageSize();
var dataSetType = data.GetHdfType();
var dataSetTypeClass = dataSetType.GetClass();
var isCompound = dataSetTypeClass == Class.Compound;
if (isCompound)
{
var (convertedRealData, convertedImaginaryData) =
ReadComplexData<T>(data, dataSize, arrayType);
if (arrayType == MatlabClass.MDouble)
{
var complexData =
CombineComplexData(
convertedRealData as double[],
convertedImaginaryData as double[])
.ToArray();
var complexDataDictionary =
DataExtraction.ConvertMatlabSparseToDictionary(
rowIndex,
columnIndex,
j => complexData[j]);
return new SparseArrayOf<Complex>(dims, complexDataDictionary);
}
else
{
var complexData =
CombineComplexOfData<T>(
convertedRealData,
convertedImaginaryData)
.ToArray();
var complexDataDictionary =
DataExtraction.ConvertMatlabSparseToDictionary(
rowIndex,
columnIndex,
j => complexData[j]);
return new SparseArrayOf<ComplexOf<T>>(dims, complexDataDictionary);
}
}
if (dataSize != storageSize)
{
throw new Exception("Data size mismatch.");
}
var d = ReadDataset(data, H5tTypeFromHdfMatlabClass(arrayType), dataSize);
var elements = ConvertDataToProperType<T>(d, arrayType);
var dataDictionary =
DataExtraction.ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => elements[j]);
return new SparseArrayOf<T>(dims, dataDictionary);
}
}
}
}
private static IArray ReadStruct(long groupId) private static IArray ReadStruct(long groupId)
{ {
var fieldNames = ReadFieldNames(groupId); var fieldNames = ReadFieldNames(groupId);
@ -346,16 +510,16 @@ namespace MatFileHandler
{ {
using (var firstField = new Dataset(groupId, fieldNames[0])) using (var firstField = new Dataset(groupId, fieldNames[0]))
{ {
var firstFieldTypeId = H5D.get_type(firstField.Id); var firstFieldType = firstField.GetHdfType();
if (H5T.get_class(firstFieldTypeId) == H5T.class_t.REFERENCE) if (firstFieldType.GetClass() == Class.Reference)
{ {
if (H5A.exists_by_name(firstField.Id, ".", "MATLAB_class") != 0) if (firstField.AttributeExists(classAttributeName))
{ {
throw new NotImplementedException(); throw new NotImplementedException();
} }
else else
{ {
var dimensions = GetDimensionsOfDataset(firstField.Id); var dimensions = GetDimensionsOfDataset(firstField);
var numberOfElements = dimensions.NumberOfElements(); var numberOfElements = dimensions.NumberOfElements();
var dictionary = new Dictionary<string, List<IArray>>(); var dictionary = new Dictionary<string, List<IArray>>();
foreach (var fieldName in fieldNames) foreach (var fieldName in fieldNames)
@ -367,26 +531,22 @@ namespace MatFileHandler
case H5O.type_t.DATASET: case H5O.type_t.DATASET:
using (var field = new Dataset(groupId, fieldName)) using (var field = new Dataset(groupId, fieldName))
{ {
using (var buf = new MemoryHandle(Marshal.SizeOf(default(IntPtr)) * numberOfElements)) using (var array = new ReferenceArray(field, numberOfElements))
{ {
H5D.read(field.Id, H5T.STD_REF_OBJ, H5S.ALL, H5S.ALL, H5P.DEFAULT, buf.Handle); foreach (var reference in array)
for (var i = 0; i < numberOfElements; i++)
{ {
var fieldDataSet = H5R.dereference( var value = ReadDataset(reference);
field.Id, dictionary[fieldName].Add(value);
H5P.DEFAULT,
H5R.type_t.OBJECT,
buf.Handle + (i * Marshal.SizeOf(default(IntPtr))));
var dataset = ReadDataset(fieldDataSet);
dictionary[fieldName].Add(dataset);
} }
} }
} }
break; break;
default: default:
throw new NotImplementedException(); throw new NotImplementedException();
} }
} }
return new StructureArray(dimensions, dictionary); return new StructureArray(dimensions, dictionary);
} }
} }
@ -400,72 +560,10 @@ namespace MatFileHandler
{ {
throw new NotImplementedException(); throw new NotImplementedException();
} }
throw new NotImplementedException(); throw new NotImplementedException();
} }
private static IArray ReadDataset(long datasetId)
{
var dims = GetDimensionsOfDataset(datasetId);
var matlabClass = GetMatlabClassOfDataset(datasetId);
var arrayType = ArrayTypeFromMatlabClassName(matlabClass);
switch (arrayType)
{
case MatlabClass.MEmpty:
return Array.Empty();
case MatlabClass.MLogical:
return ReadNumericalArray<bool>(datasetId, dims, arrayType);
case MatlabClass.MChar:
return ReadCharArray(datasetId, dims);
case MatlabClass.MInt8:
return ReadNumericalArray<sbyte>(datasetId, dims, arrayType);
case MatlabClass.MUInt8:
return ReadNumericalArray<byte>(datasetId, dims, arrayType);
case MatlabClass.MInt16:
return ReadNumericalArray<short>(datasetId, dims, arrayType);
case MatlabClass.MUInt16:
return ReadNumericalArray<ushort>(datasetId, dims, arrayType);
case MatlabClass.MInt32:
return ReadNumericalArray<int>(datasetId, dims, arrayType);
case MatlabClass.MUInt32:
return ReadNumericalArray<uint>(datasetId, dims, arrayType);
case MatlabClass.MInt64:
return ReadNumericalArray<long>(datasetId, dims, arrayType);
case MatlabClass.MUInt64:
return ReadNumericalArray<ulong>(datasetId, dims, arrayType);
case MatlabClass.MSingle:
return ReadNumericalArray<float>(datasetId, dims, arrayType);
case MatlabClass.MDouble:
return ReadNumericalArray<double>(datasetId, dims, arrayType);
case MatlabClass.MCell:
return ReadCellArray(datasetId, dims);
}
throw new NotImplementedException($"Unknown array type: {arrayType}.");
}
private static IArray ReadCellArray(long datasetId, int[] dims)
{
var numberOfElements = dims.NumberOfElements();
var elements = new IArray[numberOfElements];
using (var buf = new MemoryHandle(Marshal.SizeOf(default(IntPtr)) * numberOfElements))
{
H5D.read(datasetId, H5T.STD_REF_OBJ, H5S.ALL, H5S.ALL, H5P.DEFAULT, buf.Handle);
for (var i = 0; i < numberOfElements; i++)
{
var fieldDataSet =
H5R.dereference(
datasetId,
H5P.DEFAULT,
H5R.type_t.OBJECT,
buf.Handle + (i * Marshal.SizeOf(default(IntPtr))));
var dataset = ReadDataset(fieldDataSet);
elements[i] = dataset;
}
}
return new CellArray(dims, elements);
}
private static int SizeOfArrayElement(MatlabClass arrayType) private static int SizeOfArrayElement(MatlabClass arrayType)
{ {
switch (arrayType) switch (arrayType)
@ -490,119 +588,67 @@ namespace MatFileHandler
throw new NotImplementedException(); throw new NotImplementedException();
} }
private static long H5tTypeFromHdfMatlabClass(MatlabClass arrayType) private bool ReadGlobalFlag(Group group)
{ {
switch (arrayType) if (!group.AttributeExists(globalAttributeName))
{ {
case MatlabClass.MInt8: return false;
return H5T.NATIVE_INT8;
case MatlabClass.MUInt8:
case MatlabClass.MLogical:
return H5T.NATIVE_UINT8;
case MatlabClass.MInt16:
return H5T.NATIVE_INT16;
case MatlabClass.MUInt16:
return H5T.NATIVE_UINT16;
case MatlabClass.MInt32:
return H5T.NATIVE_INT32;
case MatlabClass.MUInt32:
return H5T.NATIVE_UINT32;
case MatlabClass.MInt64:
return H5T.NATIVE_INT64;
case MatlabClass.MUInt64:
return H5T.NATIVE_UINT64;
case MatlabClass.MSingle:
return H5T.NATIVE_FLOAT;
case MatlabClass.MDouble:
return H5T.NATIVE_DOUBLE;
} }
using (var globalAttribute = group.GetAttribute(globalAttributeName))
{
return globalAttribute.ReadBool();
}
}
private bool ReadGlobalFlag(Dataset dataset)
{
if (!dataset.AttributeExists(globalAttributeName))
{
return false;
}
using (var globalAttribute = dataset.GetAttribute(globalAttributeName))
{
return globalAttribute.ReadBool();
}
}
private int VariableIterator(long group, IntPtr name, ref H5L.info_t info, IntPtr op_data)
{
var variableName = Marshal.PtrToStringAnsi(name);
var object_info = default(H5O.info_t);
H5O.get_info_by_name(group, variableName, ref object_info);
switch (object_info.type)
{
case H5O.type_t.DATASET:
using (var dataset = new Dataset(group, variableName))
{
var isGlobal = ReadGlobalFlag(dataset);
var value = ReadDataset(dataset);
variables.Add(new MatVariable(value, variableName, isGlobal));
}
break;
case H5O.type_t.GROUP:
if (variableName == "#refs#")
{
return 0;
}
using (var subGroup = new Group(group, variableName))
{
var isGlobal = ReadGlobalFlag(subGroup);
var groupValue = ReadGroup(subGroup);
variables.Add(new MatVariable(groupValue, variableName, isGlobal));
}
break;
default:
throw new NotImplementedException(); throw new NotImplementedException();
} }
private static T[] ConvertDataToProperType<T>(byte[] bytes, MatlabClass arrayType) return 0;
where T : struct
{
var length = bytes.Length;
var arrayElementSize = SizeOfArrayElement(arrayType);
var data = new T[length / arrayElementSize];
Buffer.BlockCopy(bytes, 0, data, 0, length);
return data;
}
private static byte[] ReadDataset(long datasetId, long elementType, int dataSize)
{
var data = new byte[dataSize];
using (var dataBuffer = new MemoryHandle(dataSize))
{
H5D.read(datasetId, elementType, H5S.ALL, H5S.ALL, H5P.DEFAULT, dataBuffer.Handle);
Marshal.Copy(dataBuffer.Handle, data, 0, dataSize);
}
return data;
}
private static (T[] real, T[] imaginary) ReadComplexData<T>(
long datasetId,
int dataSize,
MatlabClass arrayType)
where T : struct
{
var h5Type = H5tTypeFromHdfMatlabClass(arrayType);
var h5Size = H5T.get_size(h5Type);
var h5tComplexReal = H5T.create(H5T.class_t.COMPOUND, h5Size);
H5T.insert(h5tComplexReal, "real", IntPtr.Zero, h5Type);
var realData = ReadDataset(datasetId, h5tComplexReal, dataSize);
var convertedRealData = ConvertDataToProperType<T>(realData, arrayType);
var h5tComplexImaginary = H5T.create(H5T.class_t.COMPOUND, h5Size);
H5T.insert(h5tComplexImaginary, "imag", IntPtr.Zero, h5Type);
var imaginaryData = ReadDataset(datasetId, h5tComplexImaginary, dataSize);
var convertedImaginaryData = ConvertDataToProperType<T>(imaginaryData, arrayType);
return (convertedRealData, convertedImaginaryData);
}
private static IArray ReadNumericalArray<T>(long datasetId, int[] dims, MatlabClass arrayType)
where T : struct
{
var numberOfElements = dims.NumberOfElements();
var dataSize = numberOfElements * SizeOfArrayElement(arrayType);
var storageSize = (int)H5D.get_storage_size(datasetId);
var dataSetType = H5D.get_type(datasetId);
var dataSetTypeClass = H5T.get_class(dataSetType);
var isCompound = dataSetTypeClass == H5T.class_t.COMPOUND;
if (isCompound)
{
var (convertedRealData, convertedImaginaryData) = ReadComplexData<T>(datasetId, dataSize, arrayType);
if (arrayType == MatlabClass.MDouble)
{
var complexData =
(convertedRealData as double[])
.Zip(convertedImaginaryData as double[], (x, y) => new Complex(x, y))
.ToArray();
return new NumericalArrayOf<Complex>(dims, complexData);
}
else
{
var complexData =
convertedRealData
.Zip(convertedImaginaryData, (x, y) => new ComplexOf<T>(x, y))
.ToArray();
return new NumericalArrayOf<ComplexOf<T>>(dims, complexData);
}
}
if (dataSize != storageSize)
{
throw new Exception("Data size mismatch.");
}
var data = ReadDataset(datasetId, H5tTypeFromHdfMatlabClass(arrayType), dataSize);
var convertedData = ConvertDataToProperType<T>(data, arrayType);
return new NumericalArrayOf<T>(dims, convertedData);
}
private static IArray ReadCharArray(long datasetId, int[] dims)
{
var storageSize = (int)H5D.get_storage_size(datasetId);
var data = ReadDataset(datasetId, H5T.NATIVE_UINT16, storageSize);
var str = Encoding.Unicode.GetString(data);
return new CharArray(dims, str);
} }
} }
} }