Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(locales): support haitian creole #998

Merged
merged 4 commits into from
Nov 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,10 @@ Currently, we only support enhanced parsing for the following languages:
```bash
Chinese Mandarin (Simplified), Chinese Mandarin (Traditional)
Dutch
Enlish
English
French
German
Haitian Creole
Italian
Japanese
Korean
Expand Down
2 changes: 1 addition & 1 deletion crowdin.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
'pull_request_title': 'feat(localize): updated translation from Crowdin'
'pull_request_title': 'fix(locales): updated translation from Crowdin'
'pull_request_labels': ['crowdin']

files:
Expand Down
4 changes: 4 additions & 0 deletions src/browser/utils.browser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import * as path from 'path-browserify';

import CH from '../locales/cmn-Hant/text.json';
import CHS from '../locales/ch-CHS/text.json';
import CR from '../locales/ht-HT/text.json';
import E from '../locales/en/text.json';
import F from '../locales/fr-FR/text.json';
import FI from '../locales/fi-FI/text.json';
Expand All @@ -15,6 +16,7 @@ import O from '../locales/nl-NL/text.json';
import P from '../locales/pl-PL/text.json';
import PGW from '../locales/wes-PGW/text.json';
import S from '../locales/es-ES/text.json';
import ST from '../locales/et-EE/text.json';
import SV from '../locales/sl-SI/text.json';
import SW from '../locales/sw-KE/text.json';
import T from '../locales/pt-BR/text.json';
Expand All @@ -40,6 +42,7 @@ window.jw_epub_parser = {
languages: {
CH,
CHS,
CR,
E,
F,
FI,
Expand All @@ -53,6 +56,7 @@ window.jw_epub_parser = {
P,
PGW,
S,
ST,
SV,
SW,
T,
Expand Down
23 changes: 19 additions & 4 deletions src/common/date_parser.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { JWEPUBParserError } from '../classes/error.js';
import { WDateParsing, WDateParsingResult, LangRegExp, MWBDateParsingResult, MWBDateParsing } from '../types/index.js';
import { getMonthNames } from './language_rules.js';
import overrides from './override.js';
rhahao marked this conversation as resolved.
Show resolved Hide resolved

const dateRangeSeparator = `\\s? bis |[-–—]| do | — | – \\s?`;
const wordWithDiacritics = `\\p{L}+|\\p{L}+\\p{M}*`;
Expand Down Expand Up @@ -75,6 +76,7 @@ const mwbDatePatterns: LangRegExp = {
P: new RegExp(mwbDatePatternP, 'giu'),
PGW: new RegExp(mwbDatePatternE, 'giu'),
S: new RegExp(mwbDatePatternS, 'giu'),
ST: new RegExp(mwbDatePatternX, 'giu'),
SV: new RegExp(mwbDatePatternX, 'giu'),
SW: new RegExp(mwbDatePatternE, 'giu'),
T: new RegExp(mwbDatePatternT, 'giu'),
Expand Down Expand Up @@ -255,6 +257,7 @@ const wDatePatterns: LangRegExp = {
P: new RegExp(wDatePatternP, 'giu'),
PGW: new RegExp(wDatePatternE, 'giu'),
S: new RegExp(wDatePatternS, 'giu'),
ST: new RegExp(wDatePatternX, 'giu'),
SV: new RegExp(wDatePatternX, 'giu'),
SW: new RegExp(wDatePatternE, 'giu'),
T: new RegExp(wDatePatternT, 'giu'),
Expand Down Expand Up @@ -340,22 +343,34 @@ const wDateParsing: WDateParsing = {
// #endregion

export const extractWTStudyDate = (src: string, lang: string) => {
const srcClean = src
src = src
.trim()
.replace(' ', ' ')
.replace('​', '')
.replace('⁠', '')
.replace(/\u200F/g, '');

let finalSrc = src;

const overrideLang = overrides[lang];

if (overrideLang) {
const overrideSrc = overrideLang[src];

if (overrideSrc) {
finalSrc = overrideSrc;
}
}

const datePattern = wDatePatterns[lang] || wDatePatterns.common;

const match = srcClean.match(datePattern);
const match = finalSrc.match(datePattern);

if (!match) {
throw new JWEPUBParserError('wtstudy', `Parsing failed for Watchtower Study Date. The input was: ${src}`);
throw new JWEPUBParserError('wtstudy', `Parsing failed for Watchtower Study Date. The input was: ${finalSrc}`);
}

const groups = Array.from(datePattern.exec(srcClean)!);
const groups = Array.from(datePattern.exec(finalSrc)!);

const parseDataFunc = wDateParsing[lang] || wDateParsing.common;

Expand Down
13 changes: 13 additions & 0 deletions src/common/override.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { Override } from '../types';

const overrides: Override = {
PGW: {
'Fine-Fine Things Wey You See for Bible': 'Fine-Fine Things Wey You See for Bible: (10 min.)',
},
ST: {
'Seda artiklit uuritakse vahemikus 28. oktoobrist 3.novembrini 2024.':
'Seda artiklit uuritakse vahemikus 28. oktoobrist 3. novembrini 2024.',
},
};

export default overrides;
19 changes: 14 additions & 5 deletions src/common/parsing_rules.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { JWEPUBParserError } from '../classes/error.js';
import { LangRegExp } from '../types/index.js';
import { getPartMinutesSeparatorVariations } from './language_rules.js';
import overrides from './override.js';

export const extractSongNumber = (src: string) => {
const parseNum = src.match(/(\d+)/);
Expand All @@ -19,8 +20,16 @@ export const extractSongNumber = (src: string) => {
export const extractSourceEnhanced = (src: string, lang: string) => {
const variations = getPartMinutesSeparatorVariations(lang);

if (src === 'Fine-Fine Things Wey You See for Bible' && lang === 'PGW') {
src = 'Fine-Fine Things Wey You See for Bible: (10 min.)';
let finalSrc = src;

const overrideLang = overrides[lang];

if (overrideLang) {
const overrideSrc = overrideLang[src];

if (overrideSrc) {
finalSrc = overrideSrc;
}
}

// separate minutes from title
Expand All @@ -45,13 +54,13 @@ export const extractSourceEnhanced = (src: string, lang: string) => {

const langPattern = firstPattern[lang] || firstPattern.common;

const matchFirstPattern = src.match(langPattern);
const matchFirstPattern = finalSrc.match(langPattern);

if (!matchFirstPattern) {
throw new JWEPUBParserError('jw-epub-parser', `Parsing failed. The input was: ${src}`);
throw new JWEPUBParserError('jw-epub-parser', `Parsing failed. The input was: ${finalSrc}`);
}

const groupsFirstPattern = Array.from(langPattern.exec(src)!);
const groupsFirstPattern = Array.from(langPattern.exec(finalSrc)!);

const fulltitle = groupsFirstPattern.at(1)!.trim();
const time = +groupsFirstPattern.at(2)!.trim();
Expand Down
26 changes: 13 additions & 13 deletions src/locales/et-EE/text.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
{
"januaryVariations": "Jaanuar|jaanuarini|jaanuarist",
"februaryVariations": "Veebruar|veebruarini|veebruarist",
"marchVariations": "Märts|märtsini|märtsist",
"aprilVariations": "Aprill|aprillini|aprillist",
"mayVariations": "Mai|maini|maist",
"juneVariations": "Juuni|juunini",
"julyVariations": "Juuli|juulini|juulist",
"augustVariations": "August|augustini|augustist",
"septemberVariations": "September|septembrini|septembrist",
"octoberVariations": "Oktoober|oktoobrini|oktoobrist",
"novemberVariations": "November|novembrini|novembrist",
"decemberVariations": "Detsember|detsembrini|detsembrist",
"partMinutesSeparatorVariations": "min"
"januaryVariations": "Jaanuar|jaanuarini|jaanuarist",
"februaryVariations": "Veebruar|veebruarini|veebruarist",
"marchVariations": "Märts|märtsini|märtsist",
"aprilVariations": "Aprill|aprillini|aprillist",
"mayVariations": "Mai|maini|maist",
"juneVariations": "Juuni|juunini",
"julyVariations": "Juuli|juulini|juulist",
"augustVariations": "August|augustini|augustist",
"septemberVariations": "September|septembrini|septembrist",
"octoberVariations": "Oktoober|oktoobrini|oktoobrist",
"novemberVariations": "November|novembrini|novembrist",
"decemberVariations": "Detsember|detsembrini|detsembrist",
"partMinutesSeparatorVariations": "min"
}
2 changes: 2 additions & 0 deletions src/locales/languages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,6 @@ export default [
{ locale: 'wes-PGW', code: 'PGW' },
{ locale: 'sl-SI', code: 'SV' },
{ locale: 'sw-KE', code: 'SW' },
{ locale: 'et-EE', code: 'ST' },
{ locale: 'ht-HT', code: 'CR' },
];
4 changes: 4 additions & 0 deletions src/node/utils.node.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { readFile } from 'fs/promises';

import CH from '../locales/cmn-Hant/text.json' assert { type: 'json' };
import CHS from '../locales/ch-CHS/text.json' assert { type: 'json' };
import CR from '../locales/ht-HT/text.json' assert { type: 'json' };
import E from '../locales/en/text.json' assert { type: 'json' };
import F from '../locales/fr-FR/text.json' assert { type: 'json' };
import FI from '../locales/fi-FI/text.json' assert { type: 'json' };
Expand All @@ -16,6 +17,7 @@ import O from '../locales/nl-NL/text.json' assert { type: 'json' };
import P from '../locales/pl-PL/text.json' assert { type: 'json' };
import PGW from '../locales/wes-PGW/text.json' assert { type: 'json' };
import S from '../locales/es-ES/text.json' assert { type: 'json' };
import ST from '../locales/et-EE/text.json' assert { type: 'json' };
import SV from '../locales/sl-SI/text.json' assert { type: 'json' };
import SW from '../locales/sw-KE/text.json' assert { type: 'json' };
import T from '../locales/pt-BR/text.json' assert { type: 'json' };
Expand All @@ -39,6 +41,7 @@ global.jw_epub_parser = {
languages: {
CH,
CHS,
CR,
E,
F,
FI,
Expand All @@ -52,6 +55,7 @@ global.jw_epub_parser = {
P,
PGW,
S,
ST,
SV,
SW,
T,
Expand Down
6 changes: 6 additions & 0 deletions src/types/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,9 @@ export type MWBDateParsingResult = [month: string, date: string];
export type MWBDateParsing = {
[lang: string]: (groups: string[]) => MWBDateParsingResult;
};

export type Override = {
[language: string]: {
[src: string]: string;
};
};
3 changes: 2 additions & 1 deletion test/enhancedParsing/list.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,6 @@
{ "language": "Z", "issue": "202411" },
{ "language": "PGW", "issue": "202411" },
{ "language": "SV", "issue": "202411" },
{ "language": "SW", "issue": "202411" }
{ "language": "SW", "issue": "202411" },
{ "language": "CR", "issue": "202411" }
]
Loading