Support sparse arrays

This commit is contained in:
Alexander Luzgarev 2019-03-16 14:30:43 +01:00
parent f727c6d430
commit cfe51d57ae
8 changed files with 290 additions and 29 deletions

View File

@ -204,6 +204,26 @@ namespace MatFileHandler.Tests
Assert.That(structure["y", 1, 2].IsEmpty, Is.True); Assert.That(structure["y", 1, 2].IsEmpty, Is.True);
} }
/// <summary>
/// Test reading a sparse array.
/// </summary>
[Test]
public void TestSparse()
{
var matFile = ReadHdfTestFile("sparse");
var sparseArray = matFile["sparse_"].Value as ISparseArrayOf<double>;
Assert.That(sparseArray, Is.Not.Null);
Assert.That(sparseArray.Dimensions, Is.EqualTo(new[] { 4, 5 }));
Assert.That(sparseArray.Data[(1, 1)], Is.EqualTo(1.0));
Assert.That(sparseArray[1, 1], Is.EqualTo(1.0));
Assert.That(sparseArray[1, 2], Is.EqualTo(2.0));
Assert.That(sparseArray[2, 1], Is.EqualTo(3.0));
Assert.That(sparseArray[2, 3], Is.EqualTo(4.0));
Assert.That(sparseArray[0, 4], Is.EqualTo(0.0));
Assert.That(sparseArray[3, 0], Is.EqualTo(0.0));
Assert.That(sparseArray[3, 4], Is.EqualTo(0.0));
}
/// <summary> /// <summary>
/// Test reading a logical array. /// Test reading a logical array.
/// </summary> /// </summary>
@ -222,6 +242,41 @@ namespace MatFileHandler.Tests
Assert.That(logicalArray[1, 2], Is.True); Assert.That(logicalArray[1, 2], Is.True);
} }
/// <summary>
/// Test reading a sparse complex array.
/// </summary>
[Test]
public void TextSparseComplex()
{
var matFile = ReadHdfTestFile("sparse_complex");
var array = matFile["sparse_complex"].Value;
var sparseArray = array as ISparseArrayOf<Complex>;
Assert.That(sparseArray, Is.Not.Null);
Assert.That(sparseArray[0, 0], Is.EqualTo(-1.5 + (2.5 * Complex.ImaginaryOne)));
Assert.That(sparseArray[1, 0], Is.EqualTo(2 - (3 * Complex.ImaginaryOne)));
Assert.That(sparseArray[0, 1], Is.EqualTo(Complex.Zero));
Assert.That(sparseArray[1, 1], Is.EqualTo(0.5 + (1.0 * Complex.ImaginaryOne)));
}
/// <summary>
/// Test reading a sparse logical array.
/// </summary>
[Test]
public void TestSparseLogical()
{
var matFile = ReadHdfTestFile("sparse_logical");
var array = matFile["sparse_logical"].Value;
var sparseArray = array as ISparseArrayOf<bool>;
Assert.That(sparseArray, Is.Not.Null);
Assert.That(sparseArray.Data[(0, 0)], Is.True);
Assert.That(sparseArray[0, 0], Is.True);
Assert.That(sparseArray[0, 1], Is.True);
Assert.That(sparseArray[0, 2], Is.False);
Assert.That(sparseArray[1, 0], Is.False);
Assert.That(sparseArray[1, 1], Is.True);
Assert.That(sparseArray[1, 2], Is.True);
}
private static void CheckComplexLimits<T>(IArrayOf<ComplexOf<T>> array, T[] limits) private static void CheckComplexLimits<T>(IArrayOf<ComplexOf<T>> array, T[] limits)
where T : struct where T : struct
{ {

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -40,7 +40,7 @@ namespace MatFileHandler
throw new HandlerException("Couldn't read sparse array."); throw new HandlerException("Couldn't read sparse array.");
} }
var dataDictionary = var dataDictionary =
ConvertMatlabSparseToDictionary( DataExtraction.ConvertMatlabSparseToDictionary(
rowIndex, rowIndex,
columnIndex, columnIndex,
j => new Complex(realParts[j], imaginaryParts[j])); j => new Complex(realParts[j], imaginaryParts[j]));
@ -82,7 +82,7 @@ namespace MatFileHandler
throw new HandlerException("Couldn't read sparse array."); throw new HandlerException("Couldn't read sparse array.");
} }
var dataDictionary = var dataDictionary =
ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => elements[j]); DataExtraction.ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => elements[j]);
return new MatSparseArrayOf<T>(flags, dimensions, name, dataDictionary); return new MatSparseArrayOf<T>(flags, dimensions, name, dataDictionary);
} }
@ -225,22 +225,5 @@ namespace MatFileHandler
data, data,
new string(data.Select(x => (char)x).ToArray())); new string(data.Select(x => (char)x).ToArray()));
} }
private static Dictionary<(int, int), T> ConvertMatlabSparseToDictionary<T>(
int[] rowIndex,
int[] columnIndex,
Func<int, T> get)
{
var result = new Dictionary<(int, int), T>();
for (var column = 0; column < columnIndex.Length - 1; column++)
{
for (var j = columnIndex[column]; j < columnIndex[column + 1]; j++)
{
var row = rowIndex[j];
result[(row, column)] = get(j);
}
}
return result;
}
} }
} }

View File

@ -362,5 +362,22 @@ namespace MatFileHandler
throw new HandlerException( throw new HandlerException(
$"Expected data element that would be convertible to uint64, found {element.GetType()}."); $"Expected data element that would be convertible to uint64, found {element.GetType()}.");
} }
public static Dictionary<(int, int), T> ConvertMatlabSparseToDictionary<T>(
int[] rowIndex,
int[] columnIndex,
Func<int, T> get)
{
var result = new Dictionary<(int, int), T>();
for (var column = 0; column < columnIndex.Length - 1; column++)
{
for (var j = columnIndex[column]; j < columnIndex[column + 1]; j++)
{
var row = rowIndex[j];
result[(row, column)] = get(j);
}
}
return result;
}
} }
} }

View File

@ -0,0 +1,85 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Numerics;
using System.Text;
namespace MatFileHandler.Hdf
{
/// <summary>
/// Sparse array.
/// </summary>
/// <typeparam name="T">Element type.</typeparam>
/// <remarks>Possible values of T: Double, Complex, Boolean.</remarks>
internal class SparseArrayOf<T> : Array, ISparseArrayOf<T>
where T : struct
{
/// <summary>
/// Initializes a new instance of the <see cref="SparseArrayOf{T}"/> class.
/// </summary>
/// <param name="dimensions">Dimensions of the array.</param>
/// <param name="data">Array contents.</param>
public SparseArrayOf(
int[] dimensions,
Dictionary<(int, int), T> data)
: base(dimensions)
{
DataDictionary = data;
}
/// <inheritdoc />
T[] IArrayOf<T>.Data =>
Enumerable.Range(0, Dimensions[0] * Dimensions[1])
.Select(i => this[i])
.ToArray();
/// <inheritdoc />
public IReadOnlyDictionary<(int, int), T> Data => DataDictionary;
private Dictionary<(int, int), T> DataDictionary { get; }
/// <inheritdoc />
public T this[params int[] list]
{
get
{
var rowAndColumn = GetRowAndColumn(list);
return DataDictionary.ContainsKey(rowAndColumn) ? DataDictionary[rowAndColumn] : default(T);
}
set => DataDictionary[GetRowAndColumn(list)] = value;
}
/// <summary>
/// Tries to convert the array to an array of Double values.
/// </summary>
/// <returns>Array of values of the array, converted to Double, or null if the conversion is not possible.</returns>
public override double[] ConvertToDoubleArray()
{
var data = ((IArrayOf<T>)this).Data;
return data as double[] ?? data.Select(x => Convert.ToDouble(x)).ToArray();
}
/// <summary>
/// Tries to convert the array to an array of Complex values.
/// </summary>
/// <returns>Array of values of the array, converted to Complex, or null if the conversion is not possible.</returns>
public override Complex[] ConvertToComplexArray()
{
var data = ((IArrayOf<T>)this).Data;
return data as Complex[] ?? ConvertToDoubleArray().Select(x => new Complex(x, 0.0)).ToArray();
}
private (int row, int column) GetRowAndColumn(int[] indices)
{
switch (indices.Length)
{
case 1:
return (indices[0] % Dimensions[0], indices[0] / Dimensions[0]);
case 2:
return (indices[0], indices[1]);
default:
throw new NotSupportedException("Invalid index for sparse array.");
}
}
}
}

View File

@ -152,6 +152,80 @@ namespace MatFileHandler
return 0; return 0;
} }
private static IArray ReadSparseArray<T>(long groupId, MatlabClass arrayType)
where T : struct
{
using (var sparseAttribute = new Attribute(groupId, "MATLAB_sparse"))
{
using (var numberOfRowsHandle = new MemoryHandle(sizeof(uint)))
{
H5A.read(sparseAttribute.Id, H5T.NATIVE_UINT, numberOfRowsHandle.Handle);
var numberOfRows = Marshal.ReadInt32(numberOfRowsHandle.Handle);
int[] rowIndex;
int[] columnIndex;
using (var irData = new Dataset(groupId, "ir"))
{
var ds = GetDimensionsOfDataset(irData.Id);
var numberOfIr = ds.NumberOfElements();
var irBytes = ReadDataset(irData.Id, H5T.NATIVE_INT, numberOfIr * sizeof(int));
rowIndex = new int[numberOfIr];
Buffer.BlockCopy(irBytes, 0, rowIndex, 0, irBytes.Length);
}
using (var jcData = new Dataset(groupId, "jc"))
{
var ds = GetDimensionsOfDataset(jcData.Id);
var numberOfJc = ds.NumberOfElements();
var jcBytes = ReadDataset(jcData.Id, H5T.NATIVE_INT, numberOfJc * sizeof(int));
columnIndex = new int[numberOfJc];
Buffer.BlockCopy(jcBytes, 0, columnIndex, 0, jcBytes.Length);
}
using (var data = new Dataset(groupId, "data"))
{
var ds = GetDimensionsOfDataset(data.Id);
var dims = new int[2];
dims[0] = numberOfRows;
dims[1] = columnIndex.Length - 1;
var dataSize = ds.NumberOfElements() * SizeOfArrayElement(arrayType);
var storageSize = (int)H5D.get_storage_size(data.Id);
var dataSetType = H5D.get_type(data.Id);
var dataSetTypeClass = H5T.get_class(dataSetType);
var isCompound = dataSetTypeClass == H5T.class_t.COMPOUND;
if (isCompound)
{
var (convertedRealData, convertedImaginaryData) = ReadComplexData<T>(data.Id, dataSize, arrayType);
if (arrayType == MatlabClass.MDouble)
{
var complexData =
(convertedRealData as double[])
.Zip(convertedImaginaryData as double[], (x, y) => new Complex(x, y))
.ToArray();
var complexDataDictionary = DataExtraction.ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => complexData[j]);
return new SparseArrayOf<Complex>(dims, complexDataDictionary);
}
else
{
var complexData =
convertedRealData
.Zip(convertedImaginaryData, (x, y) => new ComplexOf<T>(x, y))
.ToArray();
var complexDataDictionary = DataExtraction.ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => complexData[j]);
return new SparseArrayOf<ComplexOf<T>>(dims, complexDataDictionary);
}
}
if (dataSize != storageSize)
{
throw new Exception("Data size mismatch.");
}
var d = ReadDataset(data.Id, H5tTypeFromHdfMatlabClass(arrayType), dataSize);
var elements = ConvertDataToProperType<T>(d, arrayType);
var dataDictionary = DataExtraction.ConvertMatlabSparseToDictionary(rowIndex, columnIndex, j => elements[j]);
return new SparseArrayOf<T>(dims, dataDictionary);
}
}
}
}
private static IArray ReadGroup(long groupId) private static IArray ReadGroup(long groupId)
{ {
var matlabClass = GetMatlabClassOfDataset(groupId); var matlabClass = GetMatlabClassOfDataset(groupId);
@ -159,6 +233,43 @@ namespace MatFileHandler
{ {
return ReadStruct(groupId); return ReadStruct(groupId);
} }
if (H5A.exists_by_name(groupId, ".", "MATLAB_sparse") != 0)
{
var dims = new int[0];
var arrayType = ArrayTypeFromMatlabClassName(matlabClass);
switch (arrayType)
{
case MatlabClass.MEmpty:
return Array.Empty();
case MatlabClass.MLogical:
return ReadSparseArray<bool>(groupId, arrayType);
case MatlabClass.MInt8:
return ReadSparseArray<sbyte>(groupId, arrayType);
case MatlabClass.MUInt8:
return ReadSparseArray<byte>(groupId, arrayType);
case MatlabClass.MInt16:
return ReadSparseArray<short>(groupId, arrayType);
case MatlabClass.MUInt16:
return ReadSparseArray<ushort>(groupId, arrayType);
case MatlabClass.MInt32:
return ReadSparseArray<int>(groupId, arrayType);
case MatlabClass.MUInt32:
return ReadSparseArray<uint>(groupId, arrayType);
case MatlabClass.MInt64:
return ReadSparseArray<long>(groupId, arrayType);
case MatlabClass.MUInt64:
return ReadSparseArray<ulong>(groupId, arrayType);
case MatlabClass.MSingle:
return ReadSparseArray<float>(groupId, arrayType);
case MatlabClass.MDouble:
return ReadSparseArray<double>(groupId, arrayType);
default:
throw new NotSupportedException();
}
}
throw new NotImplementedException(); throw new NotImplementedException();
} }
@ -409,6 +520,25 @@ namespace MatFileHandler
return data; return data;
} }
private static (T[] real, T[] imaginary) ReadComplexData<T>(
long datasetId,
int dataSize,
MatlabClass arrayType)
where T : struct
{
var h5Type = H5tTypeFromHdfMatlabClass(arrayType);
var h5Size = H5T.get_size(h5Type);
var h5tComplexReal = H5T.create(H5T.class_t.COMPOUND, h5Size);
H5T.insert(h5tComplexReal, "real", IntPtr.Zero, h5Type);
var realData = ReadDataset(datasetId, h5tComplexReal, dataSize);
var convertedRealData = ConvertDataToProperType<T>(realData, arrayType);
var h5tComplexImaginary = H5T.create(H5T.class_t.COMPOUND, h5Size);
H5T.insert(h5tComplexImaginary, "imag", IntPtr.Zero, h5Type);
var imaginaryData = ReadDataset(datasetId, h5tComplexImaginary, dataSize);
var convertedImaginaryData = ConvertDataToProperType<T>(imaginaryData, arrayType);
return (convertedRealData, convertedImaginaryData);
}
private static IArray ReadNumericalArray<T>(long datasetId, int[] dims, MatlabClass arrayType) private static IArray ReadNumericalArray<T>(long datasetId, int[] dims, MatlabClass arrayType)
where T : struct where T : struct
{ {
@ -420,16 +550,7 @@ namespace MatFileHandler
var isCompound = dataSetTypeClass == H5T.class_t.COMPOUND; var isCompound = dataSetTypeClass == H5T.class_t.COMPOUND;
if (isCompound) if (isCompound)
{ {
var h5Type = H5tTypeFromHdfMatlabClass(arrayType); var (convertedRealData, convertedImaginaryData) = ReadComplexData<T>(datasetId, dataSize, arrayType);
var h5Size = H5T.get_size(h5Type);
var h5tComplexReal = H5T.create(H5T.class_t.COMPOUND, h5Size);
H5T.insert(h5tComplexReal, "real", IntPtr.Zero, h5Type);
var realData = ReadDataset(datasetId, h5tComplexReal, dataSize);
var convertedRealData = ConvertDataToProperType<T>(realData, arrayType);
var h5tComplexImaginary = H5T.create(H5T.class_t.COMPOUND, h5Size);
H5T.insert(h5tComplexImaginary, "imag", IntPtr.Zero, h5Type);
var imaginaryData = ReadDataset(datasetId, h5tComplexImaginary, dataSize);
var convertedImaginaryData = ConvertDataToProperType<T>(imaginaryData, arrayType);
if (arrayType == MatlabClass.MDouble) if (arrayType == MatlabClass.MDouble)
{ {
var complexData = var complexData =