Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MM-55972/MM-59809 Simplify parsing of table delimiters and improve how whitespace in tables is handled #19

Merged
merged 2 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 59 additions & 14 deletions dist/commonmark.js
Original file line number Diff line number Diff line change
Expand Up @@ -14134,12 +14134,8 @@

var reLineEnding = /\r\n|\n|\r/;

var reTableDelimiter = /^[ \t]{0,3}((?:\|[ \t]*)?:?-+:?[ \t]*(?:\|(?:[ \t]*:?-+:?[ \t]*)?)*\|?)$/;

var reTableRow = /^(\|?)(?:(?:\\\||[^|])*\|?)+$/;

var reTablePipeSpaceEnding = /\|\s+$/;

var MAX_AUTOCOMPLETED_CELLS = 1000;

// Returns true if string contains only space characters.
Expand All @@ -14159,10 +14155,6 @@
}
};

var trimSpacesAfterPipe = function(ln) {
return ln.replace(reTablePipeSpaceEnding,"|");
};

// DOC PARSER

// These are methods of a Parser object, defined below.
Expand Down Expand Up @@ -14854,9 +14846,12 @@
// table
function(parser, container) {
if (container.type !== "document") {
// @hmhealey We should be able to have a table inside of a list item or block quote to match GitHub
// but I'm not sure if the mobile app can render that, so let's not bother with it for now
return 0;
}


if (parser.indented) {
return 0;
}
Expand All @@ -14866,20 +14861,32 @@
return 0;
}

const nextColumn = measureNonspaceColumn(parser.nextLine);
if (nextColumn - parser.column >= CODE_INDENT || parser.column - nextColumn >= CODE_INDENT) {
// the delimiter row must be on the same indentation level as the header row
return 0;
}

parser.advanceNextNonspace();

// check for a delimiter first since it's stricter than the header row
const nextLine = trimSpacesAfterPipe(parser.nextLine);
var delimiterMatch = reTableDelimiter.exec(nextLine);
if (!delimiterMatch || delimiterMatch[0].indexOf("|") === -1) {
const nextLine = parser.nextLine.trim();
const delimiterMatch = reTableRow.exec(nextLine);
if (!delimiterMatch) {
return 0;
}

const currentLine = trimSpacesAfterPipe(parser.currentLine);
var headerMatch = reTableRow.exec(currentLine.slice(parser.nextNonspace));
const delimiterCells = parseDelimiterRow(delimiterMatch[0]);
if (!delimiterCells) {
return 0;
}

const currentLine = parser.currentLine.slice(parser.nextNonspace).trim();
var headerMatch = reTableRow.exec(currentLine);
if (!headerMatch) {
return 0;
}

var delimiterCells = parseTableCells(delimiterMatch[1]);
var headerCells = parseTableCells(headerMatch[0]);

if (delimiterCells.length !== headerCells.length) {
Expand Down Expand Up @@ -14939,6 +14946,23 @@
}
];

const reValidTableDelimiter = /^:?-+:?$/;
const parseDelimiterRow = function(row) {
if (row.indexOf("|") === -1) {
return null;
}

const cells = parseTableCells(row).map((cell) => cell.trim());

for (const cell of cells) {
if (!reValidTableDelimiter.test(cell)) {
return null;
}
}

return cells;
};

var parseTableCells = function(row) {
// remove starting pipe to make life easier
row = row.replace(/^\|/, "");
Expand Down Expand Up @@ -15028,6 +15052,27 @@
this.indented = this.indent >= CODE_INDENT;
};

function measureNonspaceColumn(line) {
// This code is copied from findNextNonspace above
var i = 0;
var cols = 0;
var c;

while ((c = line.charAt(i)) !== "") {
if (c === " ") {
i++;
cols++;
} else if (c === "\t") {
i++;
cols += 4 - (cols % 4);
} else {
break;
}
}

return cols;
}

// Analyze a line of text and update the document appropriately.
// We parse markdown text by calling this on each line of input,
// then finalizing the document.
Expand Down
2 changes: 1 addition & 1 deletion dist/commonmark.min.js

Large diffs are not rendered by default.

73 changes: 59 additions & 14 deletions lib/blocks.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,8 @@ var reSetextHeadingLine = /^(?:=+|-+)[ \t]*$/;

var reLineEnding = /\r\n|\n|\r/;

var reTableDelimiter = /^[ \t]{0,3}((?:\|[ \t]*)?:?-+:?[ \t]*(?:\|(?:[ \t]*:?-+:?[ \t]*)?)*\|?)$/;

var reTableRow = /^(\|?)(?:(?:\\\||[^|])*\|?)+$/;

var reTablePipeSpaceEnding = /\|\s+$/;

var MAX_AUTOCOMPLETED_CELLS = 1000;

// Returns true if string contains only space characters.
Expand All @@ -80,10 +76,6 @@ var peek = function(ln, pos) {
}
};

var trimSpacesAfterPipe = function(ln) {
return ln.replace(reTablePipeSpaceEnding,"|");
}

// DOC PARSER

// These are methods of a Parser object, defined below.
Expand Down Expand Up @@ -775,9 +767,12 @@ var blockStarts = [
// table
function(parser, container) {
if (container.type !== "document") {
// @hmhealey We should be able to have a table inside of a list item or block quote to match GitHub
// but I'm not sure if the mobile app can render that, so let's not bother with it for now
return 0;
}


if (parser.indented) {
return 0;
}
Expand All @@ -787,20 +782,32 @@ var blockStarts = [
return 0;
}

const nextColumn = measureNonspaceColumn(parser.nextLine);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ensuring that the second line of the table is near the same indentation level as the first line was previously part of reTableDelimiter, but it needs to be done manually now

if (nextColumn - parser.column >= CODE_INDENT || parser.column - nextColumn >= CODE_INDENT) {
hmhealey marked this conversation as resolved.
Show resolved Hide resolved
// the delimiter row must be on the same indentation level as the header row
return 0;
}

parser.advanceNextNonspace();

// check for a delimiter first since it's stricter than the header row
const nextLine = trimSpacesAfterPipe(parser.nextLine);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

trimSpacesAfterPipe was added as a fix for https://mattermost.atlassian.net/browse/MM-13516. Before the change I commented on above, we could only trim the trailing whitespace before passing the line into reTableDelimiter. With the whitespace handled separately, we can just use a regular old .trim to get rid of that whitespace

var delimiterMatch = reTableDelimiter.exec(nextLine);
if (!delimiterMatch || delimiterMatch[0].indexOf("|") === -1) {
const nextLine = parser.nextLine.trim();
const delimiterMatch = reTableRow.exec(nextLine);
if (!delimiterMatch) {
return 0;
}

const currentLine = trimSpacesAfterPipe(parser.currentLine);
var headerMatch = reTableRow.exec(currentLine.slice(parser.nextNonspace));
const delimiterCells = parseDelimiterRow(delimiterMatch[0]);
if (!delimiterCells) {
return 0;
}

const currentLine = parser.currentLine.slice(parser.nextNonspace).trim();
var headerMatch = reTableRow.exec(currentLine);
if (!headerMatch) {
return 0;
}

var delimiterCells = parseTableCells(delimiterMatch[1]);
var headerCells = parseTableCells(headerMatch[0]);

if (delimiterCells.length !== headerCells.length) {
Expand Down Expand Up @@ -860,6 +867,23 @@ var blockStarts = [
}
];

const reValidTableDelimiter = /^:?-+:?$/;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the delimiter row to be valid, all of the cells in it must be made up of a string of dashes with a colon before or after to change the alignment of that row. They can start and end with whitespace, but that's removed before checking with the regex

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the other regexes are between line 14095-14137. Should we put this one there too?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call. I started defining the regexes with their corresponding functions in lib/inlines.js, but I guess I never did that for lib/blocks.js

const parseDelimiterRow = function(row) {
if (row.indexOf("|") === -1) {
return null;
}

const cells = parseTableCells(row).map((cell) => cell.trim());

for (const cell of cells) {
if (!reValidTableDelimiter.test(cell)) {
return null;
}
}

return cells;
}

var parseTableCells = function(row) {
// remove starting pipe to make life easier
row = row.replace(/^\|/, "");
Expand Down Expand Up @@ -949,6 +973,27 @@ var findNextNonspace = function() {
this.indented = this.indent >= CODE_INDENT;
};

function measureNonspaceColumn(line) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could've extracted this from the above method, but I'm hesitant about modifying the existing functions as much as possible to avoid future conflicts, so I just copied this out

// This code is copied from findNextNonspace above
var i = 0;
var cols = 0;
var c;

while ((c = line.charAt(i)) !== "") {
if (c === " ") {
i++;
hmhealey marked this conversation as resolved.
Show resolved Hide resolved
cols++;
} else if (c === "\t") {
i++;
cols += 4 - (cols % 4);
hmhealey marked this conversation as resolved.
Show resolved Hide resolved
} else {
break;
}
}

return cols;
}

// Analyze a line of text and update the document appropriately.
// We parse markdown text by calling this on each line of input,
// then finalizing the document.
Expand Down
Loading
Loading