diff --git a/README.md b/README.md index b069d40..30759ee 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![Join the chat at https://gitter.im/hexenq/kuroshiro](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/hexenq/kuroshiro) [![License](https://img.shields.io/github/license/lassjs/lass.svg)](LICENSE) -kuroshiro is a Japanese language library for converting Japanese sentence to Hiragana, Katakana or Romaji with furigana and okurigana modes supported. +kuroshiro is a Japanese language library for converting Japanese sentences to Hiragana, Katakana or Romaji with furigana, okurigana, and raw modes supported. *Read this in other languages: [English](README.md), [日本語](README.jp.md), [简体中文](README.zh-cn.md), [繁體中文](README.zh-tw.md).* @@ -17,7 +17,7 @@ You can check the demo [here](https://kuroshiro.org/#demo). ## Feature - Japanese Sentence => Hiragana, Katakana or Romaji -- Furigana and okurigana supported +- Furigana, okurigana, and raw supported - 🆕Multiple morphological analyzers supported - 🆕Multiple romanization systems supported - Useful Japanese utils @@ -132,7 +132,7 @@ __Arguments__ | Options | Type | Default | Description | |---|---|---|---| | to | String | "hiragana" | Target syllabary [`hiragana`, `katakana`, `romaji`] | -| mode | String | "normal" | Convert mode [`normal`, `spaced`, `okurigana`, `furigana`] | +| mode | String | "normal" | Convert mode [`normal`, `spaced`, `okurigana`, `furigana`, `raw`] | | romajiSystem* | String | "hepburn" | Romanization system [`nippon`, `passport`, `hepburn`] | | delimiter_start | String | "(" | Delimiter(Start) | | delimiter_end | String | ")" | Delimiter(End) | @@ -159,6 +159,28 @@ await kuroshiro.convert("感じ取れたら手を繋ごう、重なるのは人 // result: 感(かん)じ取(と)れたら手(て)を繋(つな)ごう、重(かさ)なるのは人生(じんせい)のライン and レミリア最高(さいこう)! ``` +```js +// raw +await kuroshiro.convert("感じ取れたら手を繋ごう、重なるのは人生のライン and レミリア最高!", {mode:"raw", to:"hiragana"}); +// result: +// [ +// { text: '感', reading: 'かん' }, +// { text: 'じ' }, +// { text: '取', reading: 'と' }, +// { text: 'れたら' }, +// { text: '手', reading: 'て' }, +// { text: 'を' }, +// { text: '繋', reading: 'つな' }, +// { text: 'ごう、' }, +// { text: '重', reading: 'かさ' }, +// { text: 'なるのは' }, +// { text: '人生', reading: 'じんせい' }, +// { text: 'のライン and レミリア' }, +// { text: '最高', reading: 'さいこう' }, +// { text: '!' } +// ] +``` +
// furigana await kuroshiro.convert("感じ取れたら手を繋ごう、重なるのは人生のライン and レミリア最高!", {mode:"furigana", to:"hiragana"}); diff --git a/src/core.js b/src/core.js index 74d1222..400f488 100644 --- a/src/core.js +++ b/src/core.js @@ -63,13 +63,19 @@ class Kuroshiro { * @param {string} str Given String * @param {Object} [options] Settings Object * @param {string} [options.to="hiragana"] Target syllabary ["hiragana"|"katakana"|"romaji"] - * @param {string} [options.mode="normal"] Convert mode ["normal"|"spaced"|"okurigana"|"furigana"] + * @param {string} [options.mode="normal"] Convert mode ["normal"|"spaced"|"okurigana"|"furigana"|"raw"] * @param {string} [options.romajiSystem="hepburn"] Romanization System ["nippon"|"passport"|"hepburn"] * @param {string} [options.delimiter_start="("] Delimiter(Start) * @param {string} [options.delimiter_end=")"] Delimiter(End) * @returns {Promise} Promise object represents the result of conversion */ async convert(str, options) { + function flushPendingText(rawResult, pendingText) { + if (pendingText) { + rawResult.push({ text: pendingText }); + } + } + options = options || {}; options.to = options.to || "hiragana"; options.mode = options.mode || "normal"; @@ -82,7 +88,7 @@ class Kuroshiro { throw new Error("Invalid Target Syllabary."); } - if (["normal", "spaced", "okurigana", "furigana"].indexOf(options.mode) === -1) { + if (["normal", "spaced", "okurigana", "furigana", "raw"].indexOf(options.mode) === -1) { throw new Error("Invalid Conversion Mode."); } @@ -164,7 +170,7 @@ class Kuroshiro { throw new Error("Unknown option.to param"); } } - else if (options.mode === "okurigana" || options.mode === "furigana") { + else if (options.mode === "okurigana" || options.mode === "furigana" || options.mode === "raw") { const notations = []; // [basic, basic_type[1=kanji,2=kana,3=others], notation, pronunciation] for (let i = 0; i < tokens.length; i++) { const strType = getStrType(tokens[i].surface_form); @@ -225,71 +231,123 @@ class Kuroshiro { throw new Error("Unknown strType"); } } - let result = ""; + const rawResult = []; + let stringResult = ""; switch (options.to) { case "katakana": if (options.mode === "okurigana") { for (let n0 = 0; n0 < notations.length; n0++) { if (notations[n0][1] !== 1) { - result += notations[n0][0]; + stringResult += notations[n0][0]; } else { - result += notations[n0][0] + options.delimiter_start + toRawKatakana(notations[n0][2]) + options.delimiter_end; + stringResult += notations[n0][0] + options.delimiter_start + toRawKatakana(notations[n0][2]) + options.delimiter_end; } } } - else { // furigana + else if (options.mode === "furigana") { for (let n1 = 0; n1 < notations.length; n1++) { if (notations[n1][1] !== 1) { - result += notations[n1][0]; + stringResult += notations[n1][0]; } else { - result += `${notations[n1][0]}`; + stringResult += `${notations[n1][0]}`; } } } - return result; + else { // raw + let pendingText = ""; + for (let n6 = 0; n6 < notations.length; n6++) { + if (notations[n6][1] !== 1) { + pendingText += notations[n6][0]; + } + else { + flushPendingText(rawResult, pendingText); + pendingText = ""; + rawResult.push({ + text: notations[n6][0], + reading: toRawKatakana(notations[n6][2]) + }); + } + } + flushPendingText(rawResult, pendingText); + return rawResult; + } + return stringResult; case "romaji": if (options.mode === "okurigana") { for (let n2 = 0; n2 < notations.length; n2++) { if (notations[n2][1] !== 1) { - result += notations[n2][0]; + stringResult += notations[n2][0]; } else { - result += notations[n2][0] + options.delimiter_start + toRawRomaji(notations[n2][3], options.romajiSystem) + options.delimiter_end; + stringResult += notations[n2][0] + options.delimiter_start + toRawRomaji(notations[n2][3], options.romajiSystem) + options.delimiter_end; } } } - else { // furigana - result += ""; + else if (options.mode === "furigana") { + stringResult += ""; for (let n3 = 0; n3 < notations.length; n3++) { - result += `${notations[n3][0]}`; + stringResult += `${notations[n3][0]}`; + } + stringResult += ""; + } + else { // raw + let pendingText = ""; + for (let n7 = 0; n7 < notations.length; n7++) { + if (notations[n7][1] !== 1) { + pendingText += notations[n7][0]; + } + else { + flushPendingText(rawResult, pendingText); + pendingText = ""; + rawResult.push({ + text: notations[n7][0], + reading: toRawRomaji(notations[n7][3], options.romajiSystem) + }); + } } - result += ""; + flushPendingText(rawResult, pendingText); + return rawResult; } - return result; + return stringResult; case "hiragana": if (options.mode === "okurigana") { for (let n4 = 0; n4 < notations.length; n4++) { if (notations[n4][1] !== 1) { - result += notations[n4][0]; + stringResult += notations[n4][0]; } else { - result += notations[n4][0] + options.delimiter_start + notations[n4][2] + options.delimiter_end; + stringResult += notations[n4][0] + options.delimiter_start + notations[n4][2] + options.delimiter_end; } } } - else { // furigana + else if (options.mode === "furigana") { for (let n5 = 0; n5 < notations.length; n5++) { if (notations[n5][1] !== 1) { - result += notations[n5][0]; + stringResult += notations[n5][0]; + } + else { + stringResult += `${notations[n5][0]}`; + } + } + } + else { // raw + let pendingText = ""; + for (let n8 = 0; n8 < notations.length; n8++) { + if (notations[n8][1] !== 1) { + pendingText += notations[n8][0]; } else { - result += `${notations[n5][0]}`; + flushPendingText(rawResult, pendingText); + pendingText = ""; + rawResult.push({ text: notations[n8][0], reading: notations[n8][2] }); } } + flushPendingText(rawResult, pendingText); + return rawResult; } - return result; + return stringResult; default: throw new Error("Invalid Target Syllabary."); } diff --git a/test/node.spec.js b/test/node.spec.js index d08bb04..5668d38 100644 --- a/test/node.spec.js +++ b/test/node.spec.js @@ -290,4 +290,19 @@ describe("Kuroshiro Node Funtional Test", () => { const result = await kuroshiro.convert(ori, { mode: "furigana", to: "romaji" }); expect(result).toEqual("感じ取れたら手を繋ごう、重なるのは人生のライン and レミリア最高!"); }); + it("Kanji to Hiragana with raw", async () => { + const ori = EXAMPLE_TEXT; + const result = await kuroshiro.convert(ori, { mode: "raw", to: "hiragana" }); + expect(result).toEqual(JSON.parse("[{\"text\":\"感\",\"reading\":\"かん\"},{\"text\":\"じ\"},{\"text\":\"取\",\"reading\":\"と\"},{\"text\":\"れたら\"},{\"text\":\"手\",\"reading\":\"て\"},{\"text\":\"を\"},{\"text\":\"繋\",\"reading\":\"つな\"},{\"text\":\"ごう、\"},{\"text\":\"重\",\"reading\":\"かさ\"},{\"text\":\"なるのは\"},{\"text\":\"人生\",\"reading\":\"じんせい\"},{\"text\":\"のライン and レミリア\"},{\"text\":\"最高\",\"reading\":\"さいこう\"},{\"text\":\"!\"}]")); + }); + it("Kanji to Katakana with raw", async () => { + const ori = EXAMPLE_TEXT; + const result = await kuroshiro.convert(ori, { mode: "raw", to: "katakana" }); + expect(result).toEqual(JSON.parse("[{\"text\":\"感\",\"reading\":\"カン\"},{\"text\":\"じ\"},{\"text\":\"取\",\"reading\":\"ト\"},{\"text\":\"れたら\"},{\"text\":\"手\",\"reading\":\"テ\"},{\"text\":\"を\"},{\"text\":\"繋\",\"reading\":\"ツナ\"},{\"text\":\"ごう、\"},{\"text\":\"重\",\"reading\":\"カサ\"},{\"text\":\"なるのは\"},{\"text\":\"人生\",\"reading\":\"ジンセイ\"},{\"text\":\"のライン and レミリア\"},{\"text\":\"最高\",\"reading\":\"サイコウ\"},{\"text\":\"!\"}]")); + }); + it("Kanji to Romaji with raw", async () => { + const ori = EXAMPLE_TEXT; + const result = await kuroshiro.convert(ori, { mode: "raw", to: "romaji" }); + expect(result).toEqual(JSON.parse("[{\"text\":\"感\",\"reading\":\"kan\"},{\"text\":\"じ\"},{\"text\":\"取\",\"reading\":\"to\"},{\"text\":\"れたら\"},{\"text\":\"手\",\"reading\":\"te\"},{\"text\":\"を\"},{\"text\":\"繋\",\"reading\":\"tsuna\"},{\"text\":\"ごう、\"},{\"text\":\"重\",\"reading\":\"kasa\"},{\"text\":\"なるのは\"},{\"text\":\"人生\",\"reading\":\"jinsei\"},{\"text\":\"のライン and レミリア\"},{\"text\":\"最高\",\"reading\":\"saikō\"},{\"text\":\"!\"}]")); + }); });