From 4f01492ab82afa833813bef0783344c333cd57bc Mon Sep 17 00:00:00 2001 From: MarkPflug Date: Tue, 5 Sep 2023 10:06:24 -0700 Subject: [PATCH] Fix .NET framework xlsx reading bug. Fix RowFieldCount for empty rows. --- docs/ReleaseNotes.md | 4 +++ .../ExcelDataReaderTests.cs | 21 +++++++++++++ .../ExcelDataWriterTests.cs | 13 +++++--- .../ExternalDataTests.cs | 2 ++ .../Sylvan.Data.Excel.Tests.csproj | 2 +- .../Sylvan.Data.Excel.csproj | 2 +- .../Xlsb/XlsbWorkbookReader.cs | 17 ++++++++-- .../Sylvan.Data.Excel/Xlsx/SheetNameTable.cs | 3 +- .../Xlsx/XlsxWorkbookReader.cs | 31 +++++++++++++------ 9 files changed, 75 insertions(+), 20 deletions(-) diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md index a5b1a42..77297cf 100644 --- a/docs/ReleaseNotes.md +++ b/docs/ReleaseNotes.md @@ -1,5 +1,9 @@ # Sylvan.Data.Excel Release Notes +_0.4.15_ +- Fix a bug that prevented .xlsx reader from working on .NET Framework versions. +- Fix a bug where FieldRowCount would be incorrect on empty rows. + _0.4.14_ - Add `CompressionLevel` as a configuration option for ExcelDataWriter. Changes the default compression level to `Fastest`, which produces diff --git a/source/Sylvan.Data.Excel.Tests/ExcelDataReaderTests.cs b/source/Sylvan.Data.Excel.Tests/ExcelDataReaderTests.cs index 4d10a7e..ae44ed0 100644 --- a/source/Sylvan.Data.Excel.Tests/ExcelDataReaderTests.cs +++ b/source/Sylvan.Data.Excel.Tests/ExcelDataReaderTests.cs @@ -206,6 +206,27 @@ public void Gap() Assert.False(edr.Read()); } + [Fact] + public void GapRowFieldCount() + { + var file = GetFile("Gap"); + using var edr = ExcelDataReader.Create(file, noHeaders); + for (int i = 0; i < 41; i++) + { + Assert.True(edr.Read()); + + if (i % 10 == 0) + { + Assert.Equal(1, edr.RowFieldCount); + } + else + { + Assert.Equal(0, edr.RowFieldCount); + } + } + Assert.False(edr.Read()); + } + [Fact] public void MultiSheet() { diff --git a/source/Sylvan.Data.Excel.Tests/ExcelDataWriterTests.cs b/source/Sylvan.Data.Excel.Tests/ExcelDataWriterTests.cs index 9848608..ba6447e 100644 --- a/source/Sylvan.Data.Excel.Tests/ExcelDataWriterTests.cs +++ b/source/Sylvan.Data.Excel.Tests/ExcelDataWriterTests.cs @@ -22,7 +22,7 @@ protected override string GetFile(string name) return string.Format(FileFormat, name); } } - +#if NETCOREAPP1_0_OR_GREATER public class XlsbDataWriterTests : ExcelDataWriterTests { const string FileFormat = "{0}.xlsb"; @@ -34,6 +34,7 @@ protected override string GetFile(string name) return string.Format(FileFormat, name); } } +#endif public abstract class ExcelDataWriterTests { @@ -434,7 +435,11 @@ public void GuidData() i => { var b = new byte[16]; - Array.Fill(b, (byte)(i | i << 4)); + for (int x = 0; x < b.Length; x++) + { + b[i] = (byte)(i | i << 4); + + } return new { Name = "Id " + i, @@ -583,7 +588,7 @@ public override long GetChars(int ordinal, long dataOffset, char[] buffer, int b var dat = col1[row]; var count = Math.Min(length, dat.Length - (int)dataOffset); - dat.AsSpan((int)dataOffset, count).CopyTo(buffer.AsSpan(bufferOffset)); + Array.Copy(dat, dataOffset, buffer, bufferOffset, count); return count; } @@ -696,4 +701,4 @@ public override bool NextResult() } #endregion -} \ No newline at end of file +} diff --git a/source/Sylvan.Data.Excel.Tests/ExternalDataTests.cs b/source/Sylvan.Data.Excel.Tests/ExternalDataTests.cs index ed1899c..285eef9 100644 --- a/source/Sylvan.Data.Excel.Tests/ExternalDataTests.cs +++ b/source/Sylvan.Data.Excel.Tests/ExternalDataTests.cs @@ -18,7 +18,9 @@ public class ExternalDataTests public ExternalDataTests(ITestOutputHelper o) { this.o = o; +#if NETCOREAPP1_0_OR_GREATER Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); +#endif } public static IEnumerable GetInputs() diff --git a/source/Sylvan.Data.Excel.Tests/Sylvan.Data.Excel.Tests.csproj b/source/Sylvan.Data.Excel.Tests/Sylvan.Data.Excel.Tests.csproj index ed310c0..8d0d52f 100644 --- a/source/Sylvan.Data.Excel.Tests/Sylvan.Data.Excel.Tests.csproj +++ b/source/Sylvan.Data.Excel.Tests/Sylvan.Data.Excel.Tests.csproj @@ -1,7 +1,7 @@  - net6.0 + net6.0;net48 Sylvan.Data.Excel false true diff --git a/source/Sylvan.Data.Excel/Sylvan.Data.Excel.csproj b/source/Sylvan.Data.Excel/Sylvan.Data.Excel.csproj index fac4b46..d3bdeac 100644 --- a/source/Sylvan.Data.Excel/Sylvan.Data.Excel.csproj +++ b/source/Sylvan.Data.Excel/Sylvan.Data.Excel.csproj @@ -3,7 +3,7 @@ net6.0;netstandard2.1;netstandard2.0 latest - 0.4.14 + 0.4.15 A cross-platform .NET library for reading Excel data files. excel;xls;xlsx;xlsb;datareader enable diff --git a/source/Sylvan.Data.Excel/Xlsb/XlsbWorkbookReader.cs b/source/Sylvan.Data.Excel/Xlsb/XlsbWorkbookReader.cs index 80126aa..5217379 100644 --- a/source/Sylvan.Data.Excel/Xlsb/XlsbWorkbookReader.cs +++ b/source/Sylvan.Data.Excel/Xlsb/XlsbWorkbookReader.cs @@ -260,7 +260,11 @@ public override bool Read() { if (rowIndex <= parsedRowIndex) { - if (curFieldCount >= 0) + if (rowIndex < parsedRowIndex) + { + this.rowFieldCount = 0; + } + else { this.rowFieldCount = curFieldCount; this.curFieldCount = -1; @@ -272,19 +276,26 @@ public override bool Read() { var c = ParseRowValues(); if (c < 0) + { + this.rowFieldCount = 0; return false; + } if (c == 0) { continue; } + if (rowIndex < parsedRowIndex) + { + this.curFieldCount = c; + this.rowFieldCount = 0; + return true; + } return true; } } else if (state == State.Initialized) { - //this.rowFieldCount = this.curFieldCount; - //this.curFieldCount = 0; // after initizialization, the first record would already be in the buffer // if hasRows is true. if (hasRows) diff --git a/source/Sylvan.Data.Excel/Xlsx/SheetNameTable.cs b/source/Sylvan.Data.Excel/Xlsx/SheetNameTable.cs index 17890ab..460fdf9 100644 --- a/source/Sylvan.Data.Excel/Xlsx/SheetNameTable.cs +++ b/source/Sylvan.Data.Excel/Xlsx/SheetNameTable.cs @@ -7,7 +7,8 @@ namespace Sylvan.Data.Excel.Xlsx; // these name tables avoid having to compute hashes on the // most common element names, and ensures that the strings // map the the const string so that equality will be ref -// equality to the const values in our code. +// equality to the const values in our code. NOTE: cannot use +// ReferenceEquals however, because this doesn't apply in NET4.8 sealed class SheetNameTable : NameTable { public override string Add(char[] key, int start, int len) diff --git a/source/Sylvan.Data.Excel/Xlsx/XlsxWorkbookReader.cs b/source/Sylvan.Data.Excel/Xlsx/XlsxWorkbookReader.cs index 72785ff..1900a4b 100644 --- a/source/Sylvan.Data.Excel/Xlsx/XlsxWorkbookReader.cs +++ b/source/Sylvan.Data.Excel/Xlsx/XlsxWorkbookReader.cs @@ -36,11 +36,11 @@ sealed class XlsxWorkbookReader : ExcelDataReader int rowIndex; int parsedRowIndex = -1; + // the number of fields in the parsedRowIndex. int curFieldCount = -1; public override ExcelWorkbookType WorkbookType => ExcelWorkbookType.ExcelXml; - const string DefaultWorkbookPartName = "xl/workbook.xml"; public XlsxWorkbookReader(Stream iStream, ExcelDataReaderOptions opts) : base(iStream, opts) @@ -320,7 +320,7 @@ bool NextRow() } else { - this.parsedRowIndex++; + this.parsedRowIndex = -1; } reader.MoveToElement(); return true; @@ -377,7 +377,11 @@ public override bool Read() { if (rowIndex <= parsedRowIndex) { - if (curFieldCount >= 0) + if (rowIndex < parsedRowIndex) + { + this.rowFieldCount = 0; + } + else { this.rowFieldCount = curFieldCount; this.curFieldCount = -1; @@ -389,8 +393,15 @@ public override bool Read() var c = ParseRowValues(); if (c == 0) { + // handles trailing empty rows. continue; } + if (rowIndex < parsedRowIndex) + { + this.curFieldCount = c; + this.rowFieldCount = 0; + return true; + } return true; } } @@ -439,7 +450,7 @@ int ParseRowValues() while (reader.MoveToNextAttribute()) { var n = reader.Name; - if (ReferenceEquals(n, "r")) + if (n == "r") { len = reader.ReadValueChunk(valueBuffer, 0, valueBuffer.Length); if (CellPosition.TryParse(valueBuffer.AsSpan().ToParsable(0, len), out var pos)) @@ -448,13 +459,13 @@ int ParseRowValues() } } else - if (ReferenceEquals(n, "t")) + if (n == "t") { len = reader.ReadValueChunk(valueBuffer, 0, valueBuffer.Length); type = GetCellType(valueBuffer, len); } else - if (ReferenceEquals(n, "s")) + if (n == "s") { len = reader.ReadValueChunk(valueBuffer, 0, valueBuffer.Length); if (!TryParse(valueBuffer.AsSpan().ToParsable(0, len), out xfIdx)) @@ -679,7 +690,7 @@ static bool ReadToFollowing(XmlReader reader, string localName) { while (reader.Read()) { - if (reader.NodeType == XmlNodeType.Element && ReferenceEquals(localName, reader.LocalName)) + if (reader.NodeType == XmlNodeType.Element && localName == reader.LocalName) { return true; } @@ -692,7 +703,7 @@ static bool ReadToNextSibling(XmlReader reader, string localName) while (SkipSubtree(reader)) { XmlNodeType nodeType = reader.NodeType; - if (nodeType == XmlNodeType.Element && ReferenceEquals(localName, reader.LocalName)) + if (nodeType == XmlNodeType.Element && localName == reader.LocalName) { return true; } @@ -721,7 +732,7 @@ static bool ReadToDescendant(XmlReader reader, string localName) } while (reader.Read() && reader.Depth > num) { - if (reader.NodeType == XmlNodeType.Element && ReferenceEquals(localName, reader.LocalName)) + if (reader.NodeType == XmlNodeType.Element && localName == reader.LocalName) { return true; } @@ -813,7 +824,7 @@ void LoadSharedStrings(ZipArchiveEntry? entry) while (reader.Read()) { - if (reader.NodeType == XmlNodeType.Element && ReferenceEquals(reader.LocalName, "si")) + if (reader.NodeType == XmlNodeType.Element && reader.LocalName == "si") { var str = ReadString(reader); sstList.Add(str);