diff --git a/MatFileHandler.Tests/MatFileReaderHdfTests.cs b/MatFileHandler.Tests/MatFileReaderHdfTests.cs
new file mode 100644
index 0000000..cdf2085
--- /dev/null
+++ b/MatFileHandler.Tests/MatFileReaderHdfTests.cs
@@ -0,0 +1,61 @@
+using NUnit.Framework;
+using System.IO;
+
+namespace MatFileHandler.Tests
+{
+ [TestFixture]
+ public class MatFileReaderHdfTests
+ {
+ private const string TestDirectory = "test-data";
+
+ ///
+ /// Test reading an ASCII-encoded string.
+ ///
+ [Test]
+ public void TestAscii()
+ {
+ var matFile = ReadHdfTestFile("ascii");
+ var arrayAscii = matFile["s"].Value as ICharArray;
+ Assert.That(arrayAscii, Is.Not.Null);
+ Assert.That(arrayAscii.Dimensions, Is.EqualTo(new[] { 1, 3 }));
+ Assert.That(arrayAscii.String, Is.EqualTo("abc"));
+ Assert.That(arrayAscii[2], Is.EqualTo('c'));
+ }
+
+ ///
+ /// Test reading a Unicode string.
+ ///
+ [Test]
+ public void TestUnicode()
+ {
+ var matFile = ReadHdfTestFile("unicode");
+ var arrayUnicode = matFile["s"].Value as ICharArray;
+ Assert.That(arrayUnicode, Is.Not.Null);
+ Assert.That(arrayUnicode.Dimensions, Is.EqualTo(new[] { 1, 2 }));
+ Assert.That(arrayUnicode.String, Is.EqualTo("必フ"));
+ Assert.That(arrayUnicode[0], Is.EqualTo('必'));
+ Assert.That(arrayUnicode[1], Is.EqualTo('フ'));
+ }
+
+ ///
+ /// Test reading a wide Unicode string.
+ ///
+ [Test]
+ public void TestUnicodeWide()
+ {
+ var matFile = ReadHdfTestFile("unicode-wide");
+ var arrayUnicodeWide = matFile["s"].Value as ICharArray;
+ Assert.That(arrayUnicodeWide, Is.Not.Null);
+ Assert.That(arrayUnicodeWide.Dimensions, Is.EqualTo(new[] { 1, 2 }));
+ Assert.That(arrayUnicodeWide.String, Is.EqualTo("🍆"));
+ }
+
+ private static AbstractTestDataFactory GetTests(string factoryName) =>
+ new MatTestDataFactory(Path.Combine(TestDirectory, factoryName));
+
+ private IMatFile ReadHdfTestFile(string testName)
+ {
+ return GetTests("hdf")[testName];
+ }
+ }
+}
diff --git a/MatFileHandler.Tests/test-data/hdf/ascii.mat b/MatFileHandler.Tests/test-data/hdf/ascii.mat
new file mode 100644
index 0000000..4bf17a7
Binary files /dev/null and b/MatFileHandler.Tests/test-data/hdf/ascii.mat differ
diff --git a/MatFileHandler.Tests/test-data/hdf/unicode-wide.mat b/MatFileHandler.Tests/test-data/hdf/unicode-wide.mat
new file mode 100644
index 0000000..6eaacd7
Binary files /dev/null and b/MatFileHandler.Tests/test-data/hdf/unicode-wide.mat differ
diff --git a/MatFileHandler.Tests/test-data/hdf/unicode.mat b/MatFileHandler.Tests/test-data/hdf/unicode.mat
new file mode 100644
index 0000000..e4583d5
Binary files /dev/null and b/MatFileHandler.Tests/test-data/hdf/unicode.mat differ
diff --git a/MatFileHandler/HdfFileReader.cs b/MatFileHandler/HdfFileReader.cs
new file mode 100644
index 0000000..512535b
--- /dev/null
+++ b/MatFileHandler/HdfFileReader.cs
@@ -0,0 +1,156 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Numerics;
+using System.Runtime.InteropServices;
+using System.Text;
+using HDF.PInvoke;
+
+namespace MatFileHandler
+{
+ public class HdfCharArray : ICharArray
+ {
+ public HdfCharArray(int[] dimensions, string data)
+ {
+ Dimensions = dimensions;
+ StringData = data;
+ }
+
+ public bool IsEmpty => Dimensions.Length == 0;
+
+ public int[] Dimensions { get; }
+
+ public int Count => Dimensions.NumberOfElements();
+
+ public double[] ConvertToDoubleArray()
+ {
+ return Data.Select(Convert.ToDouble).ToArray();
+ }
+
+ public Complex[] ConvertToComplexArray()
+ {
+ return ConvertToDoubleArray().Select(x => new Complex(x, 0.0)).ToArray();
+ }
+
+ public char[] Data => StringData.ToCharArray();
+
+ public char this[params int[] list]
+ {
+ get => StringData[Dimensions.DimFlatten(list)];
+ set {
+ var chars = StringData.ToCharArray();
+ chars[Dimensions.DimFlatten(list)] = value;
+ StringData = chars.ToString();
+ }
+ }
+
+ public string String => StringData;
+
+ private string StringData { get; set; }
+ }
+
+ internal class HdfFileReader
+ {
+ private long fileId;
+
+ private List variables;
+
+ internal HdfFileReader(long fileId)
+ {
+ this.fileId = fileId;
+ }
+
+ internal IMatFile Read()
+ {
+ variables = new List();
+ H5G.info_t group_info = default(H5G.info_t);
+ var result = H5G.get_info(fileId, ref group_info);
+ var numberOfVariables = group_info.nlinks;
+
+ ulong idx = 0;
+ while (idx < numberOfVariables)
+ {
+ H5L.iterate(
+ fileId,
+ H5.index_t.NAME,
+ H5.iter_order_t.NATIVE,
+ ref idx,
+ VariableIterator,
+ IntPtr.Zero);
+ }
+ return new MatFile(variables);
+ }
+
+ private int VariableIterator(long group, IntPtr name, ref H5L.info_t info, IntPtr op_data)
+ {
+ var variableName = Marshal.PtrToStringAnsi(name);
+ var object_info = default(H5O.info_t);
+ H5O.get_info_by_name(group, variableName, ref object_info);
+ switch (object_info.type)
+ {
+ case H5O.type_t.DATASET:
+ var datasetId = H5D.open(group, variableName);
+ var value = ReadDataset(datasetId);
+ variables.Add(new MatVariable(value, variableName, false));
+ break;
+ case H5O.type_t.GROUP:
+ throw new NotImplementedException();
+ }
+ return 0;
+ }
+
+ private static string GetMatlabClassOfDataset(long datasetId)
+ {
+ var attributeId = H5A.open_by_name(datasetId, ".", "MATLAB_class");
+
+ var typeId = H5A.get_type(attributeId);
+ var cl = H5T.get_class(typeId);
+ if (cl != H5T.class_t.STRING)
+ {
+ throw new NotImplementedException();
+ }
+ var classId = H5T.copy(H5T.C_S1);
+ var typeIdSize = H5T.get_size(typeId);
+ H5T.set_size(classId, typeIdSize);
+ var buf = Marshal.AllocHGlobal(typeIdSize);
+ H5A.read(attributeId, classId, buf);
+ var matlabClassNameBytes = new byte[(int)typeIdSize];
+ Marshal.Copy(buf, matlabClassNameBytes, 0, (int)typeIdSize);
+ return Encoding.ASCII.GetString(matlabClassNameBytes);
+ }
+
+ private static int[] GetDimensionsOfDataset(long datasetId)
+ {
+ var spaceId = H5D.get_space(datasetId);
+ var rank = H5S.get_simple_extent_ndims(spaceId);
+ var dims = new ulong[rank];
+ H5S.get_simple_extent_dims(spaceId, dims, null);
+ Array.Reverse(dims);
+ return dims.Select(x => (int)x).ToArray();
+ }
+
+ private static IArray ReadDataset(long datasetId)
+ {
+ var dims = GetDimensionsOfDataset(datasetId);
+
+ var matlabClass = GetMatlabClassOfDataset(datasetId);
+
+ if (matlabClass == "char")
+ {
+ return ReadCharArray(datasetId, dims);
+ }
+ throw new NotImplementedException();
+ }
+
+ private static IArray ReadCharArray(long datasetId, int[] dims)
+ {
+ var storageSize = (int)H5D.get_storage_size(datasetId);
+ var data = new byte[storageSize];
+ var dataBuffer = Marshal.AllocHGlobal(storageSize);
+ H5D.read(datasetId, H5T.NATIVE_UINT16, H5S.ALL, H5S.ALL, H5P.DEFAULT, dataBuffer);
+ Marshal.Copy(dataBuffer, data, 0, storageSize);
+ var str = Encoding.Unicode.GetString(data);
+ return new HdfCharArray(dims, str);
+ }
+ }
+}
diff --git a/MatFileHandler/Header.cs b/MatFileHandler/Header.cs
index 06b68ce..0e7db2b 100755
--- a/MatFileHandler/Header.cs
+++ b/MatFileHandler/Header.cs
@@ -13,13 +13,16 @@ namespace MatFileHandler
///
internal class Header
{
- private Header(string text, long subsystemDataOffset, int version)
+ private Header(byte[] rawBytes, string text, long subsystemDataOffset, int version)
{
+ RawBytes = rawBytes;
Text = text;
SubsystemDataOffset = subsystemDataOffset;
Version = version;
}
+ public byte[] RawBytes { get; }
+
///
/// Gets the header text.
///
@@ -55,7 +58,7 @@ namespace MatFileHandler
platform = platform.Remove(length);
}
var text = $"MATLAB 5.0 MAT-file, Platform: {platform}, Created on: {dateTime}{padding}";
- return new Header(text, 0, 256);
+ return new Header(null, text, 0, 256);
}
///
@@ -65,18 +68,26 @@ namespace MatFileHandler
/// The header read.
public static Header Read(BinaryReader reader)
{
- var textBytes = reader.ReadBytes(116);
- var text = System.Text.Encoding.UTF8.GetString(textBytes);
- var subsystemDataOffsetBytes = reader.ReadBytes(8);
- var subsystemDataOffset = BitConverter.ToInt64(subsystemDataOffsetBytes, 0);
- var version = reader.ReadInt16();
- var endian = reader.ReadInt16();
- var isLittleEndian = endian == 19785;
- if (!isLittleEndian)
+ var rawBytes = reader.ReadBytes(128);
+ using (var stream = new MemoryStream(rawBytes))
{
- throw new NotSupportedException("Big-endian files are not supported.");
+ using (var newReader = new BinaryReader(stream))
+ {
+ var textBytes = newReader.ReadBytes(116);
+ var text = System.Text.Encoding.UTF8.GetString(textBytes);
+ var subsystemDataOffsetBytes = newReader.ReadBytes(8);
+ var subsystemDataOffset = BitConverter.ToInt64(subsystemDataOffsetBytes, 0);
+ var version = newReader.ReadInt16();
+ var endian = newReader.ReadInt16();
+ var isLittleEndian = endian == 19785;
+ if (!isLittleEndian)
+ {
+ throw new NotSupportedException("Big-endian files are not supported.");
+ }
+
+ return new Header(rawBytes, text, subsystemDataOffset, version);
+ }
}
- return new Header(text, subsystemDataOffset, version);
}
private static string GetOperatingSystem()
diff --git a/MatFileHandler/MatFileHdfReader.cs b/MatFileHandler/MatFileHdfReader.cs
new file mode 100644
index 0000000..285b75b
--- /dev/null
+++ b/MatFileHandler/MatFileHdfReader.cs
@@ -0,0 +1,41 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Runtime.InteropServices;
+using System.Text;
+using HDF.PInvoke;
+
+namespace MatFileHandler
+{
+ internal static class MatFileHdfReader
+ {
+ internal static IMatFile ContinueReadingHdfFile(Header header, Stream stream)
+ {
+ using (var memoryStream = new MemoryStream())
+ {
+ using (var headerStream = new MemoryStream(header.RawBytes))
+ {
+ headerStream.CopyTo(memoryStream);
+ }
+ stream.CopyTo(memoryStream);
+ var bytes = memoryStream.ToArray();
+ return ReadFromByteArray(bytes);
+ }
+ }
+
+ private static IMatFile ReadFromByteArray(byte[] bytes)
+ {
+ var fileAccessPropertyList = H5P.create(H5P.FILE_ACCESS);
+ H5P.set_fapl_core(fileAccessPropertyList, IntPtr.Add(IntPtr.Zero, 1024), 0);
+ var ptr = Marshal.AllocCoTaskMem(bytes.Length);
+ Marshal.Copy(bytes, 0, ptr, bytes.Length);
+ H5P.set_file_image(fileAccessPropertyList, ptr, IntPtr.Add(IntPtr.Zero, bytes.Length));
+ var fileId = H5F.open(Guid.NewGuid().ToString(), H5F.ACC_RDONLY, fileAccessPropertyList);
+ var hdfFileReader = new HdfFileReader(fileId);
+ var result = hdfFileReader.Read();
+ H5F.close(fileId);
+ H5F.clear_elink_file_cache(fileId);
+ return result;
+ }
+ }
+}
diff --git a/MatFileHandler/MatFileLevel5Reader.cs b/MatFileHandler/MatFileLevel5Reader.cs
new file mode 100644
index 0000000..fc7a81e
--- /dev/null
+++ b/MatFileHandler/MatFileLevel5Reader.cs
@@ -0,0 +1,88 @@
+using System.Collections.Generic;
+using System.IO;
+
+namespace MatFileHandler
+{
+ internal static class MatFileLevel5Reader
+ {
+
+ ///
+ /// Read a sequence of raw variables from .mat file.
+ ///
+ /// Reader.
+ /// Offset of subsystem data in the file;
+ /// we need it because we may encounter it during reading, and
+ /// the subsystem data should be parsed in a special way.
+ ///
+ /// Link to the current file's subsystem data structure; initially it has dummy value
+ /// which will be replaced after we parse the whole subsystem data.
+ /// List of "raw" variables; the actual variables are constructed from them later.
+ internal static List ReadRawVariables(BinaryReader reader, long subsystemDataOffset, SubsystemData subsystemData)
+ {
+ var variables = new List();
+ var dataElementReader = new DataElementReader(subsystemData);
+ while (true)
+ {
+ try
+ {
+ var position = reader.BaseStream.Position;
+ var dataElement = dataElementReader.Read(reader);
+ if (position == subsystemDataOffset)
+ {
+ var subsystemDataElement = dataElement as IArrayOf;
+ var newSubsystemData = ReadSubsystemData(subsystemDataElement.Data, subsystemData);
+ subsystemData.Set(newSubsystemData);
+ }
+ else
+ {
+ variables.Add(new RawVariable(position, dataElement));
+ }
+ }
+ catch (EndOfStreamException)
+ {
+ break;
+ }
+ }
+
+ return variables;
+ }
+
+ ///
+ /// Read raw variables from a .mat file.
+ ///
+ /// Binary reader.
+ /// Offset to the subsystem data to use (read from the file header).
+ /// Raw variables read.
+ internal static List ReadRawVariables(BinaryReader reader, long subsystemDataOffset)
+ {
+ var subsystemData = new SubsystemData();
+ return ReadRawVariables(reader, subsystemDataOffset, subsystemData);
+ }
+
+ internal static IMatFile ContinueReadingLevel5File(Header header, BinaryReader reader)
+ {
+ var rawVariables = ReadRawVariables(reader, header.SubsystemDataOffset);
+ var variables = new List();
+ foreach (var variable in rawVariables)
+ {
+ var array = variable.DataElement as MatArray;
+ if (array is null)
+ {
+ continue;
+ }
+
+ variables.Add(new MatVariable(
+ array,
+ array.Name,
+ array.Flags.Variable.HasFlag(Variable.IsGlobal)));
+ }
+
+ return new MatFile(variables);
+ }
+
+ private static SubsystemData ReadSubsystemData(byte[] bytes, SubsystemData subsystemData)
+ {
+ return SubsystemDataReader.Read(bytes, subsystemData);
+ }
+ }
+}
diff --git a/MatFileHandler/MatFileReader.cs b/MatFileHandler/MatFileReader.cs
index 0050272..9a1b77e 100755
--- a/MatFileHandler/MatFileReader.cs
+++ b/MatFileHandler/MatFileReader.cs
@@ -34,89 +34,23 @@ namespace MatFileHandler
}
}
- ///
- /// Read a sequence of raw variables from .mat file.
- ///
- /// Reader.
- /// Offset of subsystem data in the file;
- /// we need it because we may encounter it during reading, and
- /// the subsystem data should be parsed in a special way.
- ///
- /// Link to the current file's subsystem data structure; initially it has dummy value
- /// which will be replaced after we parse the whole subsystem data.
- /// List of "raw" variables; the actual variables are constructed from them later.
- internal static List ReadRawVariables(BinaryReader reader, long subsystemDataOffset, SubsystemData subsystemData)
- {
- var variables = new List();
- var dataElementReader = new DataElementReader(subsystemData);
- while (true)
- {
- try
- {
- var position = reader.BaseStream.Position;
- var dataElement = dataElementReader.Read(reader);
- if (position == subsystemDataOffset)
- {
- var subsystemDataElement = dataElement as IArrayOf;
- var newSubsystemData = ReadSubsystemData(subsystemDataElement.Data, subsystemData);
- subsystemData.Set(newSubsystemData);
- }
- else
- {
- variables.Add(new RawVariable(position, dataElement));
- }
- }
- catch (EndOfStreamException)
- {
- break;
- }
- }
-
- return variables;
- }
-
- ///
- /// Read raw variables from a .mat file.
- ///
- /// Binary reader.
- /// Offset to the subsystem data to use (read from the file header).
- /// Raw variables read.
- internal static List ReadRawVariables(BinaryReader reader, long subsystemDataOffset)
- {
- var subsystemData = new SubsystemData();
- return ReadRawVariables(reader, subsystemDataOffset, subsystemData);
- }
-
- private static IMatFile Read(BinaryReader reader)
+ private IMatFile Read(BinaryReader reader)
{
var header = ReadHeader(reader);
- var rawVariables = ReadRawVariables(reader, header.SubsystemDataOffset);
- var variables = new List();
- foreach (var variable in rawVariables)
+ switch (header.Version)
{
- var array = variable.DataElement as MatArray;
- if (array is null)
- {
- continue;
- }
-
- variables.Add(new MatVariable(
- array,
- array.Name,
- array.Flags.Variable.HasFlag(Variable.IsGlobal)));
+ case 256:
+ return MatFileLevel5Reader.ContinueReadingLevel5File(header, reader);
+ case 512:
+ return MatFileHdfReader.ContinueReadingHdfFile(header, reader.BaseStream);
+ default:
+ throw new NotSupportedException($"Unknown file format.");
}
-
- return new MatFile(variables);
}
private static Header ReadHeader(BinaryReader reader)
{
return Header.Read(reader);
}
-
- private static SubsystemData ReadSubsystemData(byte[] bytes, SubsystemData subsystemData)
- {
- return SubsystemDataReader.Read(bytes, subsystemData);
- }
}
}
\ No newline at end of file
diff --git a/MatFileHandler/SubsystemDataReader.cs b/MatFileHandler/SubsystemDataReader.cs
index 9b1b6fc..9799549 100644
--- a/MatFileHandler/SubsystemDataReader.cs
+++ b/MatFileHandler/SubsystemDataReader.cs
@@ -29,7 +29,7 @@ namespace MatFileHandler
using (var reader = new BinaryReader(stream))
{
reader.ReadBytes(8);
- rawVariables = MatFileReader.ReadRawVariables(reader, -1, subsystemData);
+ rawVariables = MatFileLevel5Reader.ReadRawVariables(reader, -1, subsystemData);
}
}