Support char arrays
This commit is contained in:
parent
0e14434bae
commit
93be86d526
61
MatFileHandler.Tests/MatFileReaderHdfTests.cs
Normal file
61
MatFileHandler.Tests/MatFileReaderHdfTests.cs
Normal file
@ -0,0 +1,61 @@
|
||||
using NUnit.Framework;
|
||||
using System.IO;
|
||||
|
||||
namespace MatFileHandler.Tests
|
||||
{
|
||||
[TestFixture]
|
||||
public class MatFileReaderHdfTests
|
||||
{
|
||||
private const string TestDirectory = "test-data";
|
||||
|
||||
/// <summary>
|
||||
/// Test reading an ASCII-encoded string.
|
||||
/// </summary>
|
||||
[Test]
|
||||
public void TestAscii()
|
||||
{
|
||||
var matFile = ReadHdfTestFile("ascii");
|
||||
var arrayAscii = matFile["s"].Value as ICharArray;
|
||||
Assert.That(arrayAscii, Is.Not.Null);
|
||||
Assert.That(arrayAscii.Dimensions, Is.EqualTo(new[] { 1, 3 }));
|
||||
Assert.That(arrayAscii.String, Is.EqualTo("abc"));
|
||||
Assert.That(arrayAscii[2], Is.EqualTo('c'));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Test reading a Unicode string.
|
||||
/// </summary>
|
||||
[Test]
|
||||
public void TestUnicode()
|
||||
{
|
||||
var matFile = ReadHdfTestFile("unicode");
|
||||
var arrayUnicode = matFile["s"].Value as ICharArray;
|
||||
Assert.That(arrayUnicode, Is.Not.Null);
|
||||
Assert.That(arrayUnicode.Dimensions, Is.EqualTo(new[] { 1, 2 }));
|
||||
Assert.That(arrayUnicode.String, Is.EqualTo("必フ"));
|
||||
Assert.That(arrayUnicode[0], Is.EqualTo('必'));
|
||||
Assert.That(arrayUnicode[1], Is.EqualTo('フ'));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Test reading a wide Unicode string.
|
||||
/// </summary>
|
||||
[Test]
|
||||
public void TestUnicodeWide()
|
||||
{
|
||||
var matFile = ReadHdfTestFile("unicode-wide");
|
||||
var arrayUnicodeWide = matFile["s"].Value as ICharArray;
|
||||
Assert.That(arrayUnicodeWide, Is.Not.Null);
|
||||
Assert.That(arrayUnicodeWide.Dimensions, Is.EqualTo(new[] { 1, 2 }));
|
||||
Assert.That(arrayUnicodeWide.String, Is.EqualTo("🍆"));
|
||||
}
|
||||
|
||||
private static AbstractTestDataFactory<IMatFile> GetTests(string factoryName) =>
|
||||
new MatTestDataFactory(Path.Combine(TestDirectory, factoryName));
|
||||
|
||||
private IMatFile ReadHdfTestFile(string testName)
|
||||
{
|
||||
return GetTests("hdf")[testName];
|
||||
}
|
||||
}
|
||||
}
|
BIN
MatFileHandler.Tests/test-data/hdf/ascii.mat
Normal file
BIN
MatFileHandler.Tests/test-data/hdf/ascii.mat
Normal file
Binary file not shown.
BIN
MatFileHandler.Tests/test-data/hdf/unicode-wide.mat
Normal file
BIN
MatFileHandler.Tests/test-data/hdf/unicode-wide.mat
Normal file
Binary file not shown.
BIN
MatFileHandler.Tests/test-data/hdf/unicode.mat
Normal file
BIN
MatFileHandler.Tests/test-data/hdf/unicode.mat
Normal file
Binary file not shown.
156
MatFileHandler/HdfFileReader.cs
Normal file
156
MatFileHandler/HdfFileReader.cs
Normal file
@ -0,0 +1,156 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Numerics;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text;
|
||||
using HDF.PInvoke;
|
||||
|
||||
namespace MatFileHandler
|
||||
{
|
||||
public class HdfCharArray : ICharArray
|
||||
{
|
||||
public HdfCharArray(int[] dimensions, string data)
|
||||
{
|
||||
Dimensions = dimensions;
|
||||
StringData = data;
|
||||
}
|
||||
|
||||
public bool IsEmpty => Dimensions.Length == 0;
|
||||
|
||||
public int[] Dimensions { get; }
|
||||
|
||||
public int Count => Dimensions.NumberOfElements();
|
||||
|
||||
public double[] ConvertToDoubleArray()
|
||||
{
|
||||
return Data.Select(Convert.ToDouble).ToArray();
|
||||
}
|
||||
|
||||
public Complex[] ConvertToComplexArray()
|
||||
{
|
||||
return ConvertToDoubleArray().Select(x => new Complex(x, 0.0)).ToArray();
|
||||
}
|
||||
|
||||
public char[] Data => StringData.ToCharArray();
|
||||
|
||||
public char this[params int[] list]
|
||||
{
|
||||
get => StringData[Dimensions.DimFlatten(list)];
|
||||
set {
|
||||
var chars = StringData.ToCharArray();
|
||||
chars[Dimensions.DimFlatten(list)] = value;
|
||||
StringData = chars.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
public string String => StringData;
|
||||
|
||||
private string StringData { get; set; }
|
||||
}
|
||||
|
||||
internal class HdfFileReader
|
||||
{
|
||||
private long fileId;
|
||||
|
||||
private List<IVariable> variables;
|
||||
|
||||
internal HdfFileReader(long fileId)
|
||||
{
|
||||
this.fileId = fileId;
|
||||
}
|
||||
|
||||
internal IMatFile Read()
|
||||
{
|
||||
variables = new List<IVariable>();
|
||||
H5G.info_t group_info = default(H5G.info_t);
|
||||
var result = H5G.get_info(fileId, ref group_info);
|
||||
var numberOfVariables = group_info.nlinks;
|
||||
|
||||
ulong idx = 0;
|
||||
while (idx < numberOfVariables)
|
||||
{
|
||||
H5L.iterate(
|
||||
fileId,
|
||||
H5.index_t.NAME,
|
||||
H5.iter_order_t.NATIVE,
|
||||
ref idx,
|
||||
VariableIterator,
|
||||
IntPtr.Zero);
|
||||
}
|
||||
return new MatFile(variables);
|
||||
}
|
||||
|
||||
private int VariableIterator(long group, IntPtr name, ref H5L.info_t info, IntPtr op_data)
|
||||
{
|
||||
var variableName = Marshal.PtrToStringAnsi(name);
|
||||
var object_info = default(H5O.info_t);
|
||||
H5O.get_info_by_name(group, variableName, ref object_info);
|
||||
switch (object_info.type)
|
||||
{
|
||||
case H5O.type_t.DATASET:
|
||||
var datasetId = H5D.open(group, variableName);
|
||||
var value = ReadDataset(datasetId);
|
||||
variables.Add(new MatVariable(value, variableName, false));
|
||||
break;
|
||||
case H5O.type_t.GROUP:
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private static string GetMatlabClassOfDataset(long datasetId)
|
||||
{
|
||||
var attributeId = H5A.open_by_name(datasetId, ".", "MATLAB_class");
|
||||
|
||||
var typeId = H5A.get_type(attributeId);
|
||||
var cl = H5T.get_class(typeId);
|
||||
if (cl != H5T.class_t.STRING)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
var classId = H5T.copy(H5T.C_S1);
|
||||
var typeIdSize = H5T.get_size(typeId);
|
||||
H5T.set_size(classId, typeIdSize);
|
||||
var buf = Marshal.AllocHGlobal(typeIdSize);
|
||||
H5A.read(attributeId, classId, buf);
|
||||
var matlabClassNameBytes = new byte[(int)typeIdSize];
|
||||
Marshal.Copy(buf, matlabClassNameBytes, 0, (int)typeIdSize);
|
||||
return Encoding.ASCII.GetString(matlabClassNameBytes);
|
||||
}
|
||||
|
||||
private static int[] GetDimensionsOfDataset(long datasetId)
|
||||
{
|
||||
var spaceId = H5D.get_space(datasetId);
|
||||
var rank = H5S.get_simple_extent_ndims(spaceId);
|
||||
var dims = new ulong[rank];
|
||||
H5S.get_simple_extent_dims(spaceId, dims, null);
|
||||
Array.Reverse(dims);
|
||||
return dims.Select(x => (int)x).ToArray();
|
||||
}
|
||||
|
||||
private static IArray ReadDataset(long datasetId)
|
||||
{
|
||||
var dims = GetDimensionsOfDataset(datasetId);
|
||||
|
||||
var matlabClass = GetMatlabClassOfDataset(datasetId);
|
||||
|
||||
if (matlabClass == "char")
|
||||
{
|
||||
return ReadCharArray(datasetId, dims);
|
||||
}
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
private static IArray ReadCharArray(long datasetId, int[] dims)
|
||||
{
|
||||
var storageSize = (int)H5D.get_storage_size(datasetId);
|
||||
var data = new byte[storageSize];
|
||||
var dataBuffer = Marshal.AllocHGlobal(storageSize);
|
||||
H5D.read(datasetId, H5T.NATIVE_UINT16, H5S.ALL, H5S.ALL, H5P.DEFAULT, dataBuffer);
|
||||
Marshal.Copy(dataBuffer, data, 0, storageSize);
|
||||
var str = Encoding.Unicode.GetString(data);
|
||||
return new HdfCharArray(dims, str);
|
||||
}
|
||||
}
|
||||
}
|
@ -13,13 +13,16 @@ namespace MatFileHandler
|
||||
/// </summary>
|
||||
internal class Header
|
||||
{
|
||||
private Header(string text, long subsystemDataOffset, int version)
|
||||
private Header(byte[] rawBytes, string text, long subsystemDataOffset, int version)
|
||||
{
|
||||
RawBytes = rawBytes;
|
||||
Text = text;
|
||||
SubsystemDataOffset = subsystemDataOffset;
|
||||
Version = version;
|
||||
}
|
||||
|
||||
public byte[] RawBytes { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the header text.
|
||||
/// </summary>
|
||||
@ -55,7 +58,7 @@ namespace MatFileHandler
|
||||
platform = platform.Remove(length);
|
||||
}
|
||||
var text = $"MATLAB 5.0 MAT-file, Platform: {platform}, Created on: {dateTime}{padding}";
|
||||
return new Header(text, 0, 256);
|
||||
return new Header(null, text, 0, 256);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@ -65,18 +68,26 @@ namespace MatFileHandler
|
||||
/// <returns>The header read.</returns>
|
||||
public static Header Read(BinaryReader reader)
|
||||
{
|
||||
var textBytes = reader.ReadBytes(116);
|
||||
var rawBytes = reader.ReadBytes(128);
|
||||
using (var stream = new MemoryStream(rawBytes))
|
||||
{
|
||||
using (var newReader = new BinaryReader(stream))
|
||||
{
|
||||
var textBytes = newReader.ReadBytes(116);
|
||||
var text = System.Text.Encoding.UTF8.GetString(textBytes);
|
||||
var subsystemDataOffsetBytes = reader.ReadBytes(8);
|
||||
var subsystemDataOffsetBytes = newReader.ReadBytes(8);
|
||||
var subsystemDataOffset = BitConverter.ToInt64(subsystemDataOffsetBytes, 0);
|
||||
var version = reader.ReadInt16();
|
||||
var endian = reader.ReadInt16();
|
||||
var version = newReader.ReadInt16();
|
||||
var endian = newReader.ReadInt16();
|
||||
var isLittleEndian = endian == 19785;
|
||||
if (!isLittleEndian)
|
||||
{
|
||||
throw new NotSupportedException("Big-endian files are not supported.");
|
||||
}
|
||||
return new Header(text, subsystemDataOffset, version);
|
||||
|
||||
return new Header(rawBytes, text, subsystemDataOffset, version);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static string GetOperatingSystem()
|
||||
|
41
MatFileHandler/MatFileHdfReader.cs
Normal file
41
MatFileHandler/MatFileHdfReader.cs
Normal file
@ -0,0 +1,41 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text;
|
||||
using HDF.PInvoke;
|
||||
|
||||
namespace MatFileHandler
|
||||
{
|
||||
internal static class MatFileHdfReader
|
||||
{
|
||||
internal static IMatFile ContinueReadingHdfFile(Header header, Stream stream)
|
||||
{
|
||||
using (var memoryStream = new MemoryStream())
|
||||
{
|
||||
using (var headerStream = new MemoryStream(header.RawBytes))
|
||||
{
|
||||
headerStream.CopyTo(memoryStream);
|
||||
}
|
||||
stream.CopyTo(memoryStream);
|
||||
var bytes = memoryStream.ToArray();
|
||||
return ReadFromByteArray(bytes);
|
||||
}
|
||||
}
|
||||
|
||||
private static IMatFile ReadFromByteArray(byte[] bytes)
|
||||
{
|
||||
var fileAccessPropertyList = H5P.create(H5P.FILE_ACCESS);
|
||||
H5P.set_fapl_core(fileAccessPropertyList, IntPtr.Add(IntPtr.Zero, 1024), 0);
|
||||
var ptr = Marshal.AllocCoTaskMem(bytes.Length);
|
||||
Marshal.Copy(bytes, 0, ptr, bytes.Length);
|
||||
H5P.set_file_image(fileAccessPropertyList, ptr, IntPtr.Add(IntPtr.Zero, bytes.Length));
|
||||
var fileId = H5F.open(Guid.NewGuid().ToString(), H5F.ACC_RDONLY, fileAccessPropertyList);
|
||||
var hdfFileReader = new HdfFileReader(fileId);
|
||||
var result = hdfFileReader.Read();
|
||||
H5F.close(fileId);
|
||||
H5F.clear_elink_file_cache(fileId);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
88
MatFileHandler/MatFileLevel5Reader.cs
Normal file
88
MatFileHandler/MatFileLevel5Reader.cs
Normal file
@ -0,0 +1,88 @@
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
|
||||
namespace MatFileHandler
|
||||
{
|
||||
internal static class MatFileLevel5Reader
|
||||
{
|
||||
|
||||
/// <summary>
|
||||
/// Read a sequence of raw variables from .mat file.
|
||||
/// </summary>
|
||||
/// <param name="reader">Reader.</param>
|
||||
/// <param name="subsystemDataOffset">Offset of subsystem data in the file;
|
||||
/// we need it because we may encounter it during reading, and
|
||||
/// the subsystem data should be parsed in a special way.</param>
|
||||
/// <param name="subsystemData">
|
||||
/// Link to the current file's subsystem data structure; initially it has dummy value
|
||||
/// which will be replaced after we parse the whole subsystem data.</param>
|
||||
/// <returns>List of "raw" variables; the actual variables are constructed from them later.</returns>
|
||||
internal static List<RawVariable> ReadRawVariables(BinaryReader reader, long subsystemDataOffset, SubsystemData subsystemData)
|
||||
{
|
||||
var variables = new List<RawVariable>();
|
||||
var dataElementReader = new DataElementReader(subsystemData);
|
||||
while (true)
|
||||
{
|
||||
try
|
||||
{
|
||||
var position = reader.BaseStream.Position;
|
||||
var dataElement = dataElementReader.Read(reader);
|
||||
if (position == subsystemDataOffset)
|
||||
{
|
||||
var subsystemDataElement = dataElement as IArrayOf<byte>;
|
||||
var newSubsystemData = ReadSubsystemData(subsystemDataElement.Data, subsystemData);
|
||||
subsystemData.Set(newSubsystemData);
|
||||
}
|
||||
else
|
||||
{
|
||||
variables.Add(new RawVariable(position, dataElement));
|
||||
}
|
||||
}
|
||||
catch (EndOfStreamException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return variables;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read raw variables from a .mat file.
|
||||
/// </summary>
|
||||
/// <param name="reader">Binary reader.</param>
|
||||
/// <param name="subsystemDataOffset">Offset to the subsystem data to use (read from the file header).</param>
|
||||
/// <returns>Raw variables read.</returns>
|
||||
internal static List<RawVariable> ReadRawVariables(BinaryReader reader, long subsystemDataOffset)
|
||||
{
|
||||
var subsystemData = new SubsystemData();
|
||||
return ReadRawVariables(reader, subsystemDataOffset, subsystemData);
|
||||
}
|
||||
|
||||
internal static IMatFile ContinueReadingLevel5File(Header header, BinaryReader reader)
|
||||
{
|
||||
var rawVariables = ReadRawVariables(reader, header.SubsystemDataOffset);
|
||||
var variables = new List<IVariable>();
|
||||
foreach (var variable in rawVariables)
|
||||
{
|
||||
var array = variable.DataElement as MatArray;
|
||||
if (array is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
variables.Add(new MatVariable(
|
||||
array,
|
||||
array.Name,
|
||||
array.Flags.Variable.HasFlag(Variable.IsGlobal)));
|
||||
}
|
||||
|
||||
return new MatFile(variables);
|
||||
}
|
||||
|
||||
private static SubsystemData ReadSubsystemData(byte[] bytes, SubsystemData subsystemData)
|
||||
{
|
||||
return SubsystemDataReader.Read(bytes, subsystemData);
|
||||
}
|
||||
}
|
||||
}
|
@ -34,89 +34,23 @@ namespace MatFileHandler
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read a sequence of raw variables from .mat file.
|
||||
/// </summary>
|
||||
/// <param name="reader">Reader.</param>
|
||||
/// <param name="subsystemDataOffset">Offset of subsystem data in the file;
|
||||
/// we need it because we may encounter it during reading, and
|
||||
/// the subsystem data should be parsed in a special way.</param>
|
||||
/// <param name="subsystemData">
|
||||
/// Link to the current file's subsystem data structure; initially it has dummy value
|
||||
/// which will be replaced after we parse the whole subsystem data.</param>
|
||||
/// <returns>List of "raw" variables; the actual variables are constructed from them later.</returns>
|
||||
internal static List<RawVariable> ReadRawVariables(BinaryReader reader, long subsystemDataOffset, SubsystemData subsystemData)
|
||||
{
|
||||
var variables = new List<RawVariable>();
|
||||
var dataElementReader = new DataElementReader(subsystemData);
|
||||
while (true)
|
||||
{
|
||||
try
|
||||
{
|
||||
var position = reader.BaseStream.Position;
|
||||
var dataElement = dataElementReader.Read(reader);
|
||||
if (position == subsystemDataOffset)
|
||||
{
|
||||
var subsystemDataElement = dataElement as IArrayOf<byte>;
|
||||
var newSubsystemData = ReadSubsystemData(subsystemDataElement.Data, subsystemData);
|
||||
subsystemData.Set(newSubsystemData);
|
||||
}
|
||||
else
|
||||
{
|
||||
variables.Add(new RawVariable(position, dataElement));
|
||||
}
|
||||
}
|
||||
catch (EndOfStreamException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return variables;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read raw variables from a .mat file.
|
||||
/// </summary>
|
||||
/// <param name="reader">Binary reader.</param>
|
||||
/// <param name="subsystemDataOffset">Offset to the subsystem data to use (read from the file header).</param>
|
||||
/// <returns>Raw variables read.</returns>
|
||||
internal static List<RawVariable> ReadRawVariables(BinaryReader reader, long subsystemDataOffset)
|
||||
{
|
||||
var subsystemData = new SubsystemData();
|
||||
return ReadRawVariables(reader, subsystemDataOffset, subsystemData);
|
||||
}
|
||||
|
||||
private static IMatFile Read(BinaryReader reader)
|
||||
private IMatFile Read(BinaryReader reader)
|
||||
{
|
||||
var header = ReadHeader(reader);
|
||||
var rawVariables = ReadRawVariables(reader, header.SubsystemDataOffset);
|
||||
var variables = new List<IVariable>();
|
||||
foreach (var variable in rawVariables)
|
||||
switch (header.Version)
|
||||
{
|
||||
var array = variable.DataElement as MatArray;
|
||||
if (array is null)
|
||||
{
|
||||
continue;
|
||||
case 256:
|
||||
return MatFileLevel5Reader.ContinueReadingLevel5File(header, reader);
|
||||
case 512:
|
||||
return MatFileHdfReader.ContinueReadingHdfFile(header, reader.BaseStream);
|
||||
default:
|
||||
throw new NotSupportedException($"Unknown file format.");
|
||||
}
|
||||
|
||||
variables.Add(new MatVariable(
|
||||
array,
|
||||
array.Name,
|
||||
array.Flags.Variable.HasFlag(Variable.IsGlobal)));
|
||||
}
|
||||
|
||||
return new MatFile(variables);
|
||||
}
|
||||
|
||||
private static Header ReadHeader(BinaryReader reader)
|
||||
{
|
||||
return Header.Read(reader);
|
||||
}
|
||||
|
||||
private static SubsystemData ReadSubsystemData(byte[] bytes, SubsystemData subsystemData)
|
||||
{
|
||||
return SubsystemDataReader.Read(bytes, subsystemData);
|
||||
}
|
||||
}
|
||||
}
|
@ -29,7 +29,7 @@ namespace MatFileHandler
|
||||
using (var reader = new BinaryReader(stream))
|
||||
{
|
||||
reader.ReadBytes(8);
|
||||
rawVariables = MatFileReader.ReadRawVariables(reader, -1, subsystemData);
|
||||
rawVariables = MatFileLevel5Reader.ReadRawVariables(reader, -1, subsystemData);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user