From cfe51d57ae89dc9a94a9a1edce05bee0323f7317 Mon Sep 17 00:00:00 2001 From: Alexander Luzgarev Date: Sat, 16 Mar 2019 14:30:43 +0100 Subject: [PATCH] Support sparse arrays --- MatFileHandler.Tests/MatFileReaderHdfTests.cs | 55 +++++++ MatFileHandler.Tests/test-data/hdf/sparse.mat | Bin 0 -> 3744 bytes .../test-data/hdf/sparse_complex.mat | Bin 0 -> 3728 bytes .../test-data/hdf/sparse_logical.mat | Bin 0 -> 3776 bytes MatFileHandler/DataElementConverter.cs | 21 +-- MatFileHandler/DataExtraction.cs | 17 +++ MatFileHandler/Hdf/SparseArrayOf.cs | 85 +++++++++++ MatFileHandler/HdfFileReader.cs | 141 ++++++++++++++++-- 8 files changed, 290 insertions(+), 29 deletions(-) create mode 100644 MatFileHandler.Tests/test-data/hdf/sparse.mat create mode 100644 MatFileHandler.Tests/test-data/hdf/sparse_complex.mat create mode 100644 MatFileHandler.Tests/test-data/hdf/sparse_logical.mat create mode 100644 MatFileHandler/Hdf/SparseArrayOf.cs diff --git a/MatFileHandler.Tests/MatFileReaderHdfTests.cs b/MatFileHandler.Tests/MatFileReaderHdfTests.cs index b192168..d182732 100644 --- a/MatFileHandler.Tests/MatFileReaderHdfTests.cs +++ b/MatFileHandler.Tests/MatFileReaderHdfTests.cs @@ -204,6 +204,26 @@ namespace MatFileHandler.Tests Assert.That(structure["y", 1, 2].IsEmpty, Is.True); } + /// + /// Test reading a sparse array. + /// + [Test] + public void TestSparse() + { + var matFile = ReadHdfTestFile("sparse"); + var sparseArray = matFile["sparse_"].Value as ISparseArrayOf; + Assert.That(sparseArray, Is.Not.Null); + Assert.That(sparseArray.Dimensions, Is.EqualTo(new[] { 4, 5 })); + Assert.That(sparseArray.Data[(1, 1)], Is.EqualTo(1.0)); + Assert.That(sparseArray[1, 1], Is.EqualTo(1.0)); + Assert.That(sparseArray[1, 2], Is.EqualTo(2.0)); + Assert.That(sparseArray[2, 1], Is.EqualTo(3.0)); + Assert.That(sparseArray[2, 3], Is.EqualTo(4.0)); + Assert.That(sparseArray[0, 4], Is.EqualTo(0.0)); + Assert.That(sparseArray[3, 0], Is.EqualTo(0.0)); + Assert.That(sparseArray[3, 4], Is.EqualTo(0.0)); + } + /// /// Test reading a logical array. /// @@ -222,6 +242,41 @@ namespace MatFileHandler.Tests Assert.That(logicalArray[1, 2], Is.True); } + /// + /// Test reading a sparse complex array. + /// + [Test] + public void TextSparseComplex() + { + var matFile = ReadHdfTestFile("sparse_complex"); + var array = matFile["sparse_complex"].Value; + var sparseArray = array as ISparseArrayOf; + Assert.That(sparseArray, Is.Not.Null); + Assert.That(sparseArray[0, 0], Is.EqualTo(-1.5 + (2.5 * Complex.ImaginaryOne))); + Assert.That(sparseArray[1, 0], Is.EqualTo(2 - (3 * Complex.ImaginaryOne))); + Assert.That(sparseArray[0, 1], Is.EqualTo(Complex.Zero)); + Assert.That(sparseArray[1, 1], Is.EqualTo(0.5 + (1.0 * Complex.ImaginaryOne))); + } + + /// + /// Test reading a sparse logical array. + /// + [Test] + public void TestSparseLogical() + { + var matFile = ReadHdfTestFile("sparse_logical"); + var array = matFile["sparse_logical"].Value; + var sparseArray = array as ISparseArrayOf; + Assert.That(sparseArray, Is.Not.Null); + Assert.That(sparseArray.Data[(0, 0)], Is.True); + Assert.That(sparseArray[0, 0], Is.True); + Assert.That(sparseArray[0, 1], Is.True); + Assert.That(sparseArray[0, 2], Is.False); + Assert.That(sparseArray[1, 0], Is.False); + Assert.That(sparseArray[1, 1], Is.True); + Assert.That(sparseArray[1, 2], Is.True); + } + private static void CheckComplexLimits(IArrayOf> array, T[] limits) where T : struct { diff --git a/MatFileHandler.Tests/test-data/hdf/sparse.mat b/MatFileHandler.Tests/test-data/hdf/sparse.mat new file mode 100644 index 0000000000000000000000000000000000000000..2a5f98a54c01da2ed8693aadd1f1136cd1d665bd GIT binary patch literal 3744 zcmeHJ&2AGh5FTgSvg)5wxFAk*?4d-vkV2bF6WUa%L`npa;DA(?WUB_Hq}mPd5X6N? z;K(EJ033J>9szFLipMk4(hY^s3#l5fc4x*OkL}N&XI-x}A66dV{lZ0mudFkHoOy;KZK!5ZEP!gZ`|ZLeb5 zI*1P=3=2Vk1^itBExDb#suTHt5O{0%Y+?4+tQ+0;VG8m*ksOO-DIXTk+H3*55XFcb zi2Nx14bTH{Q|gx_e%7z~xLO74spF}XZt!)QHD47X5^xhtm`ss3ikQ? zyXOF!A=!6azI-kS6)t7I0i8QNy}umIo$KNn^qhT?|HGfXFMq4@5FLAnmyDG1P`RPf zv+d-%gc~RPF3v&mSV%R@$7KhS2Pj~D+hD~nONGe1lf(QQh}(FGf5&Uob9;)DYz63f O&Dw;-FizunPJaSP&W45n literal 0 HcmV?d00001 diff --git a/MatFileHandler.Tests/test-data/hdf/sparse_complex.mat b/MatFileHandler.Tests/test-data/hdf/sparse_complex.mat new file mode 100644 index 0000000000000000000000000000000000000000..36681e8cd7d6dcfe1575dcc956f62e5d27748156 GIT binary patch literal 3728 zcmeHK&2AGh5FRHjtoo-EBvf&lV-F?Tg|w8tG@%VjB~l_92@XhQNwy?tL#o{%apYJe z4t)$Bfg2ooj6MPvByPp_%rtb<(3A^>#;e^K+cRT(=JVKFE!7{E?&ID39jun>w|3i6 zxP-MR=$g|SVafW45#%Z!|FcUV%qxU8qdxs`QJZ0 z1CUJEm$?WW;Xf<48d#*8&d{mJs$0`TMQn%cr%!wi{9gLs?-jhc79|Fc*Q(;dV&?Yk`RTYw**$=*bf?J{+~ mQ2lU?=r>CTagWq}##j8eFtY#te82J#*8eiC!FgZI{rVG(H;_jF literal 0 HcmV?d00001 diff --git a/MatFileHandler.Tests/test-data/hdf/sparse_logical.mat b/MatFileHandler.Tests/test-data/hdf/sparse_logical.mat new file mode 100644 index 0000000000000000000000000000000000000000..e16235027ba9924c3dcb985c1cade4fa11ba82c1 GIT binary patch literal 3776 zcmeHJ&2AGh5FRHDEG4Z%xl}#Pv4;|ELR(5s32jg+krL5Ja6k&yW=n&%4cgrUk5D8I zJ@N=VMvpuK$G%2SV0&f+*-&UM5Vdg>k3E03KYzBj-RM4RJi$lhhuCg(9~=y#a1A?= zP7j9hQ4KrWFL(Dg*Vk}64s{y#ad=$AeVt-k$5`3K%6hH3RjY1b)vr9p*3Q!nOoGGk zNMoh!`&h=`6_AqKZA)_|{|^Em&7LjHugtsAJrCxfL=(x8_|D~QakNi8fH&MwEKgW| z;dll12e8cgdz?S(*L~4!g7K8`L~}yVYUmuB=Yg553H#g*jieGA_NI_%E902krqqSu77R)GCpHn#( z=)tNRFAwOXt(?I~rW~(}%u75=>vM7O={3M98K$hJ^JGz=@`1jbE}f;kQ>S+?b#Vqc zpA5sFov(pac;_)GCgB~05MtxJBYobm%WznY^Xl_dqbIohi}NTz$hj10_2alr=Xv}A D3)GAA literal 0 HcmV?d00001 diff --git a/MatFileHandler/DataElementConverter.cs b/MatFileHandler/DataElementConverter.cs index 1a1e252..d4e34b7 100755 --- a/MatFileHandler/DataElementConverter.cs +++ b/MatFileHandler/DataElementConverter.cs @@ -40,7 +40,7 @@ namespace MatFileHandler throw new HandlerException("Couldn't read sparse array."); } var dataDictionary = - ConvertMatlabSparseToDictionary( + DataExtraction.ConvertMatlabSparseToDictionary( rowIndex, columnIndex, j => new Complex(realParts[j], imaginaryParts[j])); @@ -82,7 +82,7 @@ namespace MatFileHandler throw new HandlerException("Couldn't read sparse array."); } var dataDictionary = - ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => elements[j]); + DataExtraction.ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => elements[j]); return new MatSparseArrayOf(flags, dimensions, name, dataDictionary); } @@ -225,22 +225,5 @@ namespace MatFileHandler data, new string(data.Select(x => (char)x).ToArray())); } - - private static Dictionary<(int, int), T> ConvertMatlabSparseToDictionary( - int[] rowIndex, - int[] columnIndex, - Func get) - { - var result = new Dictionary<(int, int), T>(); - for (var column = 0; column < columnIndex.Length - 1; column++) - { - for (var j = columnIndex[column]; j < columnIndex[column + 1]; j++) - { - var row = rowIndex[j]; - result[(row, column)] = get(j); - } - } - return result; - } } } \ No newline at end of file diff --git a/MatFileHandler/DataExtraction.cs b/MatFileHandler/DataExtraction.cs index d5d9edd..d79ed1e 100755 --- a/MatFileHandler/DataExtraction.cs +++ b/MatFileHandler/DataExtraction.cs @@ -362,5 +362,22 @@ namespace MatFileHandler throw new HandlerException( $"Expected data element that would be convertible to uint64, found {element.GetType()}."); } + + public static Dictionary<(int, int), T> ConvertMatlabSparseToDictionary( + int[] rowIndex, + int[] columnIndex, + Func get) + { + var result = new Dictionary<(int, int), T>(); + for (var column = 0; column < columnIndex.Length - 1; column++) + { + for (var j = columnIndex[column]; j < columnIndex[column + 1]; j++) + { + var row = rowIndex[j]; + result[(row, column)] = get(j); + } + } + return result; + } } } \ No newline at end of file diff --git a/MatFileHandler/Hdf/SparseArrayOf.cs b/MatFileHandler/Hdf/SparseArrayOf.cs new file mode 100644 index 0000000..0600106 --- /dev/null +++ b/MatFileHandler/Hdf/SparseArrayOf.cs @@ -0,0 +1,85 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; +using System.Text; + +namespace MatFileHandler.Hdf +{ + /// + /// Sparse array. + /// + /// Element type. + /// Possible values of T: Double, Complex, Boolean. + internal class SparseArrayOf : Array, ISparseArrayOf + where T : struct + { + /// + /// Initializes a new instance of the class. + /// + /// Dimensions of the array. + /// Array contents. + public SparseArrayOf( + int[] dimensions, + Dictionary<(int, int), T> data) + : base(dimensions) + { + DataDictionary = data; + } + + /// + T[] IArrayOf.Data => + Enumerable.Range(0, Dimensions[0] * Dimensions[1]) + .Select(i => this[i]) + .ToArray(); + + /// + public IReadOnlyDictionary<(int, int), T> Data => DataDictionary; + + private Dictionary<(int, int), T> DataDictionary { get; } + + /// + public T this[params int[] list] + { + get + { + var rowAndColumn = GetRowAndColumn(list); + return DataDictionary.ContainsKey(rowAndColumn) ? DataDictionary[rowAndColumn] : default(T); + } + set => DataDictionary[GetRowAndColumn(list)] = value; + } + + /// + /// Tries to convert the array to an array of Double values. + /// + /// Array of values of the array, converted to Double, or null if the conversion is not possible. + public override double[] ConvertToDoubleArray() + { + var data = ((IArrayOf)this).Data; + return data as double[] ?? data.Select(x => Convert.ToDouble(x)).ToArray(); + } + + /// + /// Tries to convert the array to an array of Complex values. + /// + /// Array of values of the array, converted to Complex, or null if the conversion is not possible. + public override Complex[] ConvertToComplexArray() + { + var data = ((IArrayOf)this).Data; + return data as Complex[] ?? ConvertToDoubleArray().Select(x => new Complex(x, 0.0)).ToArray(); + } + + private (int row, int column) GetRowAndColumn(int[] indices) + { + switch (indices.Length) + { + case 1: + return (indices[0] % Dimensions[0], indices[0] / Dimensions[0]); + case 2: + return (indices[0], indices[1]); + default: + throw new NotSupportedException("Invalid index for sparse array."); + } + } + } +} diff --git a/MatFileHandler/HdfFileReader.cs b/MatFileHandler/HdfFileReader.cs index 4ac40c0..f2fb596 100644 --- a/MatFileHandler/HdfFileReader.cs +++ b/MatFileHandler/HdfFileReader.cs @@ -152,6 +152,80 @@ namespace MatFileHandler return 0; } + private static IArray ReadSparseArray(long groupId, MatlabClass arrayType) + where T : struct + { + using (var sparseAttribute = new Attribute(groupId, "MATLAB_sparse")) + { + using (var numberOfRowsHandle = new MemoryHandle(sizeof(uint))) + { + H5A.read(sparseAttribute.Id, H5T.NATIVE_UINT, numberOfRowsHandle.Handle); + var numberOfRows = Marshal.ReadInt32(numberOfRowsHandle.Handle); + int[] rowIndex; + int[] columnIndex; + using (var irData = new Dataset(groupId, "ir")) + { + var ds = GetDimensionsOfDataset(irData.Id); + var numberOfIr = ds.NumberOfElements(); + var irBytes = ReadDataset(irData.Id, H5T.NATIVE_INT, numberOfIr * sizeof(int)); + rowIndex = new int[numberOfIr]; + Buffer.BlockCopy(irBytes, 0, rowIndex, 0, irBytes.Length); + } + using (var jcData = new Dataset(groupId, "jc")) + { + var ds = GetDimensionsOfDataset(jcData.Id); + var numberOfJc = ds.NumberOfElements(); + var jcBytes = ReadDataset(jcData.Id, H5T.NATIVE_INT, numberOfJc * sizeof(int)); + columnIndex = new int[numberOfJc]; + Buffer.BlockCopy(jcBytes, 0, columnIndex, 0, jcBytes.Length); + } + + using (var data = new Dataset(groupId, "data")) + { + var ds = GetDimensionsOfDataset(data.Id); + var dims = new int[2]; + dims[0] = numberOfRows; + dims[1] = columnIndex.Length - 1; + var dataSize = ds.NumberOfElements() * SizeOfArrayElement(arrayType); + var storageSize = (int)H5D.get_storage_size(data.Id); + var dataSetType = H5D.get_type(data.Id); + var dataSetTypeClass = H5T.get_class(dataSetType); + var isCompound = dataSetTypeClass == H5T.class_t.COMPOUND; + if (isCompound) + { + var (convertedRealData, convertedImaginaryData) = ReadComplexData(data.Id, dataSize, arrayType); + if (arrayType == MatlabClass.MDouble) + { + var complexData = + (convertedRealData as double[]) + .Zip(convertedImaginaryData as double[], (x, y) => new Complex(x, y)) + .ToArray(); + var complexDataDictionary = DataExtraction.ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => complexData[j]); + return new SparseArrayOf(dims, complexDataDictionary); + } + else + { + var complexData = + convertedRealData + .Zip(convertedImaginaryData, (x, y) => new ComplexOf(x, y)) + .ToArray(); + var complexDataDictionary = DataExtraction.ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => complexData[j]); + return new SparseArrayOf>(dims, complexDataDictionary); + } + } + if (dataSize != storageSize) + { + throw new Exception("Data size mismatch."); + } + var d = ReadDataset(data.Id, H5tTypeFromHdfMatlabClass(arrayType), dataSize); + var elements = ConvertDataToProperType(d, arrayType); + var dataDictionary = DataExtraction.ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => elements[j]); + return new SparseArrayOf(dims, dataDictionary); + } + } + } + } + private static IArray ReadGroup(long groupId) { var matlabClass = GetMatlabClassOfDataset(groupId); @@ -159,6 +233,43 @@ namespace MatFileHandler { return ReadStruct(groupId); } + + if (H5A.exists_by_name(groupId, ".", "MATLAB_sparse") != 0) + { + var dims = new int[0]; + var arrayType = ArrayTypeFromMatlabClassName(matlabClass); + + switch (arrayType) + { + case MatlabClass.MEmpty: + return Array.Empty(); + case MatlabClass.MLogical: + return ReadSparseArray(groupId, arrayType); + case MatlabClass.MInt8: + return ReadSparseArray(groupId, arrayType); + case MatlabClass.MUInt8: + return ReadSparseArray(groupId, arrayType); + case MatlabClass.MInt16: + return ReadSparseArray(groupId, arrayType); + case MatlabClass.MUInt16: + return ReadSparseArray(groupId, arrayType); + case MatlabClass.MInt32: + return ReadSparseArray(groupId, arrayType); + case MatlabClass.MUInt32: + return ReadSparseArray(groupId, arrayType); + case MatlabClass.MInt64: + return ReadSparseArray(groupId, arrayType); + case MatlabClass.MUInt64: + return ReadSparseArray(groupId, arrayType); + case MatlabClass.MSingle: + return ReadSparseArray(groupId, arrayType); + case MatlabClass.MDouble: + return ReadSparseArray(groupId, arrayType); + default: + throw new NotSupportedException(); + } + } + throw new NotImplementedException(); } @@ -409,6 +520,25 @@ namespace MatFileHandler return data; } + private static (T[] real, T[] imaginary) ReadComplexData( + long datasetId, + int dataSize, + MatlabClass arrayType) + where T : struct + { + var h5Type = H5tTypeFromHdfMatlabClass(arrayType); + var h5Size = H5T.get_size(h5Type); + var h5tComplexReal = H5T.create(H5T.class_t.COMPOUND, h5Size); + H5T.insert(h5tComplexReal, "real", IntPtr.Zero, h5Type); + var realData = ReadDataset(datasetId, h5tComplexReal, dataSize); + var convertedRealData = ConvertDataToProperType(realData, arrayType); + var h5tComplexImaginary = H5T.create(H5T.class_t.COMPOUND, h5Size); + H5T.insert(h5tComplexImaginary, "imag", IntPtr.Zero, h5Type); + var imaginaryData = ReadDataset(datasetId, h5tComplexImaginary, dataSize); + var convertedImaginaryData = ConvertDataToProperType(imaginaryData, arrayType); + return (convertedRealData, convertedImaginaryData); + } + private static IArray ReadNumericalArray(long datasetId, int[] dims, MatlabClass arrayType) where T : struct { @@ -420,16 +550,7 @@ namespace MatFileHandler var isCompound = dataSetTypeClass == H5T.class_t.COMPOUND; if (isCompound) { - var h5Type = H5tTypeFromHdfMatlabClass(arrayType); - var h5Size = H5T.get_size(h5Type); - var h5tComplexReal = H5T.create(H5T.class_t.COMPOUND, h5Size); - H5T.insert(h5tComplexReal, "real", IntPtr.Zero, h5Type); - var realData = ReadDataset(datasetId, h5tComplexReal, dataSize); - var convertedRealData = ConvertDataToProperType(realData, arrayType); - var h5tComplexImaginary = H5T.create(H5T.class_t.COMPOUND, h5Size); - H5T.insert(h5tComplexImaginary, "imag", IntPtr.Zero, h5Type); - var imaginaryData = ReadDataset(datasetId, h5tComplexImaginary, dataSize); - var convertedImaginaryData = ConvertDataToProperType(imaginaryData, arrayType); + var (convertedRealData, convertedImaginaryData) = ReadComplexData(datasetId, dataSize, arrayType); if (arrayType == MatlabClass.MDouble) { var complexData =