Skip to content

Commit

Permalink
Xls label fix (#183)
Browse files Browse the repository at this point in the history
* Fix label (pre-biff8) reading
* Add support for reading "Book" entry for xl95 support.
* Fix reading label (String) values in biff5 (xl95)
* Fix reading rich strings in xl95.
  • Loading branch information
MarkPflug authored Aug 14, 2024
1 parent 15d3f4b commit e458b57
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 82 deletions.
3 changes: 3 additions & 0 deletions docs/ReleaseNotes.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Sylvan.Data.Excel Release Notes

_0.4.25_
- Fix some issues with reading Excel 95 .xls files.

_0.4.24_
- Fix for reading certain .xls files.
- Handle writing NaN and infinity values.
Expand Down
28 changes: 27 additions & 1 deletion source/Sylvan.Data.Excel.Tests/ExternalDataTests.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#if NETCOREAPP3_0_OR_GREATER

using Sylvan.Data.Csv;
using System;
using System.Collections.Generic;
using System.Diagnostics;
Expand Down Expand Up @@ -231,7 +232,6 @@ public void GetValue(string path)
GetErrorAsNull = true
};
var edr = ExcelDataReader.Create(path, opts);

do
{
while (edr.Read())
Expand All @@ -243,6 +243,32 @@ public void GetValue(string path)
}
} while (edr.NextResult());
}

[Theory]
[MemberData(nameof(GetExcelFiles))]
public void ToCsv(string filename)
{
if (filename == null) return;

var root = GetRootPath();
var path = Path.Combine(root, filename);

var opts = new ExcelDataReaderOptions
{
Schema = ExcelSchema.NoHeaders,
GetErrorAsNull = true
};
var edr = ExcelDataReader.Create(path, opts);

do
{
var outPath = $"{filename}-{edr.WorksheetName}.csv";
var dir = Path.GetDirectoryName(outPath);
Directory.CreateDirectory(dir);
using var w = CsvDataWriter.Create($"{filename}-{edr.WorksheetName}.csv");
w.Write(edr.AsVariableField(e => e.RowFieldCount));
} while (edr.NextResult());
}
}

#endif
1 change: 1 addition & 0 deletions source/Sylvan.Data.Excel/ExcelDataReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,7 @@ public sealed override object GetValue(int ordinal)
var kind = fmt?.Kind ?? FormatKind.Number;
switch (kind)
{
case FormatKind.String:
case FormatKind.Number:
var doubleValue = GetDouble(ordinal);
unchecked
Expand Down
2 changes: 1 addition & 1 deletion source/Sylvan.Data.Excel/Sylvan.Data.Excel.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<PropertyGroup>
<TargetFrameworks>net6.0;netstandard2.1;netstandard2.0</TargetFrameworks>
<LangVersion>latest</LangVersion>
<VersionPrefix>0.4.24</VersionPrefix>
<VersionPrefix>0.4.25</VersionPrefix>
<Description>A cross-platform .NET library for reading Excel data files.</Description>
<PackageTags>excel;xls;xlsx;xlsb;datareader</PackageTags>
<Nullable>enable</Nullable>
Expand Down
115 changes: 48 additions & 67 deletions source/Sylvan.Data.Excel/Xls/XlsWorkbookReader+RecordReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -115,57 +115,7 @@ public int ReadInt32()
{
return ReadByte() | ReadByte() << 8 | ReadByte() << 16 | ReadByte() << 24;
}

public string ReadString16()
{
if (bufferPos >= recordOff + recordLen)
{
var next = NextRecord();
if (!next || Type != RecordType.Continue)
throw new InvalidDataException();
}

// the length of the string in *characters*
int len = ReadInt16();
if (len < 0)
{
throw new InvalidDataException();
}
byte options = ReadByte();

bool compressed = (options & 0x01) == 0;
bool asian = (options & 0x04) != 0;
bool rich = (options & 0x08) != 0;

int richCount = 0;
if (rich)
richCount = ReadInt16();

int asianCount = 0;
if (asian)
asianCount = ReadInt32();

var str = ReadStringBuffer(len, compressed);

var remain = richCount * 4 + asianCount;

while (remain > 0)
{
var avail = recordOff + recordLen - bufferPos;
var c = Math.Min(remain, avail);
remain -= c;
bufferPos += c;
Assert();
if (remain > 0)
{
var next = NextRecord();
if (!next || Type != RecordType.Continue)
throw new InvalidDataException();
}
}

return str;
}


static readonly Encoding Encoding1252 = Encoding.GetEncoding(1252);

Expand Down Expand Up @@ -238,20 +188,44 @@ internal string ReadStringBuffer(int charCount, bool compressed)

public string ReadByteString(int lenSize)
{
int len;
if (lenSize == 1)
len = ReadByte();
else
len = ReadInt16();

ReadStringBuffer(len, true);
var str = new string(strBuffer, 0, len);
return str;
int len =
lenSize == 1
? ReadByte()
: ReadInt16();

return ReadStringBuffer(len, true);
}

public string ReadString8()
{
int len = ReadByte();
MaybeContinueString();
var len = ReadByte();
return ReadString(len);
}

public string ReadString16()
{
MaybeContinueString();
var len = ReadInt16();
return ReadString(len);
}

void MaybeContinueString()
{
if (bufferPos >= recordOff + recordLen)
{
var next = NextRecord();
if (!next || Type != RecordType.Continue)
throw new InvalidDataException();
}
}

public string ReadString(int len)
{
if (len < 0)
{
throw new InvalidDataException();
}
byte options = ReadByte();

bool compressed = (options & 0x01) == 0;
Expand All @@ -268,14 +242,21 @@ public string ReadString8()

var str = ReadStringBuffer(len, compressed);

for (int i = 0; i < richCount; i++)
{
ReadInt32();
}
var remain = richCount * 4 + asianCount;

for (int i = 0; i < asianCount; i++)
while (remain > 0)
{
ReadByte();
var avail = recordOff + recordLen - bufferPos;
var c = Math.Min(remain, avail);
remain -= c;
bufferPos += c;
Assert();
if (remain > 0)
{
var next = NextRecord();
if (!next || Type != RecordType.Continue)
throw new InvalidDataException();
}
}

return str;
Expand Down
60 changes: 47 additions & 13 deletions source/Sylvan.Data.Excel/Xls/XlsWorkbookReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ public XlsSheetInfo(string name, int offset, bool hidden) : base(name, hidden)
internal XlsWorkbookReader(Stream stream, ExcelDataReaderOptions options) : base(stream, options)
{
var pkg = new Ole2Package(stream);
var part = pkg.GetEntry("Workbook\0");
var part =
pkg.GetEntry("Workbook\0") ??
pkg.GetEntry("Book\0");

if (part == null)
throw new InvalidDataException();
var ps = part.Open();
Expand Down Expand Up @@ -254,15 +257,10 @@ int ParseXF()
void ParseFormat()
{
int ifmt = reader.ReadInt16();
string str;
if (biffVersion == 0x0500)
{
str = reader.ReadByteString(1);
}
else
{
str = reader.ReadString16();
}
string str =
biffVersion == 0x0500
? reader.ReadByteString(1)
: reader.ReadString16();

if (formats.ContainsKey(ifmt))
{
Expand Down Expand Up @@ -310,7 +308,38 @@ void ParseLabel()
int rowIdx = reader.ReadUInt16();
int colIdx = reader.ReadUInt16();
int xfIdx = reader.ReadUInt16();
string str = reader.ReadByteString(2);
int len = reader.ReadInt16();
if (len > 255) throw new InvalidDataException();
bool compressed = true;
if (biffVersion == 0x0500)
{
// apparently there are no flags in this version
}
else
{
byte flags = reader.ReadByte();
compressed = (flags & 1) == 0;
}

var str = reader.ReadStringBuffer(len, compressed);
SetRowData(colIdx, new FieldInfo(str));
}

void ParseRString()
{
int rowIdx = reader.ReadUInt16();
int colIdx = reader.ReadUInt16();
int xfIdx = reader.ReadUInt16();
var len = reader.ReadInt16();
var str = reader.ReadStringBuffer(len, true);

// consume the formatting info
var x = reader.ReadByte();
for (int i = 0; i < x; i++)
{
reader.ReadUInt16();
}

SetRowData(colIdx, new FieldInfo(str));
}

Expand Down Expand Up @@ -470,7 +499,10 @@ int NextRow()
}
else
{
throw new InvalidDataException();
peekRow = (ushort)(rowIndex + 1);
pendingRow = peekRow;
return 0;
//throw new InvalidDataException();
}
}
break;
Expand Down Expand Up @@ -516,10 +548,12 @@ int NextRow()
case RecordType.Formula:
ParseFormula();
break;
case RecordType.RString:
ParseRString();
break;
case RecordType.Blank:
case RecordType.BoolErr:
case RecordType.MulBlank:
case RecordType.RString:
break;
case RecordType.Array:
case RecordType.SharedFmla:
Expand Down

0 comments on commit e458b57

Please sign in to comment.