Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: rewrite renderer parser #58

Merged
merged 5 commits into from
May 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions src/aliasRegex.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,28 @@
export const allowedAliasCharacters = "\\w\\-\\_\\+\\*\\(\\)\\!#&åô’çéãí“”,";
export const startOfURL = "https?\\S*";
import asciiAliases from "../data/asciiAliases";
import flatten from "lodash.flatten";

import { escapeStringToBeUsedInRegExp } from "./utils";

const allowedAliasCharacters = "\\w\\-\\_\\+\\*\\(\\)\\!#&åô’çéãí“”,";
const startOfURL = "https?\\S*";

const names = flatten(
Object.keys(asciiAliases).map(name => {
return asciiAliases[name].map(escapeStringToBeUsedInRegExp);
})
).sort().reverse().join("|"); // reverse sort for most specific match

const edgeCases = [startOfURL].join("|");

function getAliasesRegex() {
return new RegExp(`(${startOfURL})?:([${allowedAliasCharacters}]+):`, "g");
return new RegExp(
// edge cases will be skipped
// match both (and later distinguish between)
// * ascii aliases like :o
// * full emoji like :open_mouth:
`(${edgeCases})?(${names}|:)([${allowedAliasCharacters}]*:)?`,
"g"
);
}

export default getAliasesRegex;
26 changes: 0 additions & 26 deletions src/asciiRegex.js

This file was deleted.

109 changes: 38 additions & 71 deletions src/renderer.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import classnames from "classnames";
import replace from "string-replace-to-array";
import emojiRegex from "emoji-regex";

import asciiRegex from "./asciiRegex";
import aliasRegex from "./aliasRegex";

import normalizeProtocol from "./normalizeProtocol";
Expand All @@ -13,8 +12,6 @@ import unicodeToCodepoint from "./unicodeToCodepoint";
import aliases from "../data/aliases";
import asciiAliases from "../data/asciiAliases";

const asciiAliasesRegex = asciiRegex();
const aliasesRegex = aliasRegex();
const unicodeEmojiRegex = emojiRegex();

// using em's we can ensure size matches surrounding font
Expand All @@ -25,6 +22,14 @@ const style = {
verticalAlign: "-0.1em",
};

const asciiToAlias = { ":": ":" };

for (const alias of Object.keys(asciiAliases)) {
for (const ascii of asciiAliases[alias]) {
asciiToAlias[ascii] = aliases[alias];
}
}

export function toArray(text, options = {}) {
const protocol = normalizeProtocol(options.protocol);

Expand Down Expand Up @@ -60,80 +65,42 @@ export function toArray(text, options = {}) {
);
}

function replaceAsciiAliases(...match) {
const asciiAliasKeys = Object.keys(asciiAliases);

const fullMatch = match[0];
const edgeCase = match[1];
const asciiAlias = match[2];
const maybeBiggerAliasCharacters = match[3];

for (let i in asciiAliasKeys) {
const alias = asciiAliasKeys[i];
const data = asciiAliases[alias];

if (data.includes(asciiAlias)) {
const isEdgeCase = edgeCase !== undefined;

if (isEdgeCase) {
return fullMatch; // do nothing
}

const isMaybePartOfBiggerAlias =
maybeBiggerAliasCharacters !== undefined;

if (!isMaybePartOfBiggerAlias) {
return aliases[alias]; // replace with unicode
} else if (fullMatch[0] === ":") {
const fullMatchContent = fullMatch.slice(1, -1); // remove ":" at the beginning and end
const isPartOfBiggerAlias = aliases[fullMatchContent] !== undefined; // ":" + fullMatchContent + ":" alias doesn't exist

if (isPartOfBiggerAlias) {
return fullMatch; // do nothing
}
function replaceAliases(text) {
const regex = aliasRegex();
const textWithEmoji = [];
let match, pos = 0;

while (match = regex.exec(text)) {
const [edgeCase, asciiAlias, fullEmoji] = match.slice(1, 4);
// possible full emoji like :open_mouth:
const emoji = aliases[(asciiAlias + fullEmoji).slice(1, -1)];
if (match.index > pos) {
// text between matches
textWithEmoji.push(text.slice(pos, match.index));
}
if (edgeCase) {
// verbatim matched text
textWithEmoji.push(match[0]);
} else if (asciiAlias[0] === ":" && fullEmoji && emoji) {
// full emoji
textWithEmoji.push(emoji);
} else {
// ascii alias or ":"
textWithEmoji.push(asciiToAlias[asciiAlias]);
if (fullEmoji) {
// false positive, "go back" and don't skip that substring
regex.lastIndex -= fullEmoji.length;
}

return `${aliases[alias]}${maybeBiggerAliasCharacters}`; // also return matched characters afterwards to handle them in next iteration
}
}
}

function replaceAliases(...match) {
const fullMatch = match[0];
const edgeCase = match[1];
const alias = match[2];

if (edgeCase) {
return fullMatch;
}

const aliasEmoji = aliases[alias];

return aliasEmoji || fullMatch;
}

// We need to execute several times `string.replace` for cases for such as ":):)"
// As we are forced to match ":):" to check if it's a normal alias, the second colon is consumed and cannot match again
function replaceAllAsciiAliases(textWithAsciiAliases) {
let previousTextWithoutAsciiAliases = null;
let textWithoutAsciiAliases = textWithAsciiAliases;

while (previousTextWithoutAsciiAliases !== textWithoutAsciiAliases) {
previousTextWithoutAsciiAliases = textWithoutAsciiAliases;
textWithoutAsciiAliases = textWithoutAsciiAliases.replace(
asciiAliasesRegex,
replaceAsciiAliases
);
pos = regex.lastIndex;
}

return textWithoutAsciiAliases;
// text after last match (if any)
textWithEmoji.push(text.slice(pos));
return textWithEmoji.join("");
}

let replacedText = text;
replacedText = replacedText.replace(aliasesRegex, replaceAliases);
replacedText = replaceAllAsciiAliases(replacedText);
replacedText = replacedText.replace(aliasesRegex, replaceAliases);
return replace(replacedText, unicodeEmojiRegex, replaceUnicodeEmoji);
return replace(replaceAliases(text), unicodeEmojiRegex, replaceUnicodeEmoji);
}

export default function Emoji({
Expand Down