diff --git a/lib/scripts/mtw.js b/lib/scripts/mtw.js index f27d563..42bfb3a 100644 --- a/lib/scripts/mtw.js +++ b/lib/scripts/mtw.js @@ -149,7 +149,14 @@ export class ContentScript { paragraphs = document.getElementsByTagName('p'); console.log('Getting words from all ' + paragraphs.length + ' paragraphs'); for (var i = 0; i < paragraphs.length; i++) { - var words = paragraphs[i].innerText.split(/\s|,|[.()]|\d/g); + var words = paragraphs[i].innerText; + if(this.clkTest(words)){ + words = words.replace(/\d|\s|[()]/g,'').split('').filter(v=>v!=''); + } + else{ + words = words.split(/\s|,|[.()]|\d/g); + } + // console.log(words); for (var j = 0; j < words.length; j++) { for (var b = ngramMin; b <= ngramMax; b++) { var word = words.slice(j, j + b).join(' '); @@ -179,11 +186,16 @@ export class ContentScript { var blackListReg = new RegExp(userBlacklistedWords); var punctuationReg = new RegExp(/[\.,\/#\!\$%\^&\*;:{}=\\\_`~()\?@\d\+\-]/g); var countedWordsList = this.shuffle(this.toList(countedWords, (word, count) => { - return !!word && word.length >= 2 && // no words that are too short - word !== '' && !/\d/.test(word) && // no empty words - word.charAt(0) !== word.charAt(0).toUpperCase() && // no proper nouns - !blackListReg.test(word.toLowerCase()) && // no blacklisted words - !punctuationReg.test(word.toLowerCase()); // no punctuation marks + if(this.clkTest(word)) + + return !!word && word !== '' && !/\d/.test(word) && // no empty words + !blackListReg.test(word.toLowerCase()) && // no blacklisted words + !punctuationReg.test(word.toLowerCase()); // no punctuation marksreturn !!word && word.length >= 2 && // no words that are too short + else + return word !== '' && !/\d/.test(word) && // no empty words + word.charAt(0) !== word.charAt(0).toUpperCase() && // no proper nouns + !blackListReg.test(word.toLowerCase()) && // no blacklisted words + !punctuationReg.test(word.toLowerCase()); // no punctuation marks })); var targetLength = Math.floor((Object.keys(countedWordsList).length * translationProbability) / 100); return this.toMap(countedWordsList.slice(0, targetLength - 1)); @@ -280,7 +292,6 @@ export class ContentScript { } } }); - if (Object.keys(filteredTMap).length !== 0) { var paragraphs = document.getElementsByTagName('p'); if (this.oneWordTranslation) { @@ -294,6 +305,8 @@ export class ContentScript { } } + // console.log(filteredTMap); + // Add event listener to each word for toggle var translatedWords = document.querySelectorAll('.mtwTranslatedWord, .mtwTranslatedWorde, .mtwTranslatedWordn, .mtwTranslatedWordh'); for (let i = 0; i < translatedWords.length; i++) { @@ -395,11 +408,33 @@ export class ContentScript { deepHTMLReplacement(node, tMap, iTMap) { var badTags = ['TEXTAREA', 'INPUT', 'SCRIPT', 'CODE', 'A', 'SPAN']; if (node.nodeType === Node.TEXT_NODE) { - var newNodeValue = this.replaceAll(node.nodeValue, tMap); + var newNodeValue; + if(this.targetLanguage == "zh"){ + newNodeValue = this.replaceAll(node.nodeValue, tMap); + } + else{ + if(this.clkTest(node.nodeValue)){ + newNodeValue = this.replaceAllClk(node.nodeValue, tMap); + } + else{ + newNodeValue = this.replaceAll(node.nodeValue, tMap); + } + } if (newNodeValue !== node.nodeValue) { node.nodeValue = newNodeValue; var parent = node.parentNode; - parent.innerHTML = this.replaceAll(parent.innerHTML, iTMap); + if(this.targetLanguage == "zh"){ + parent.innerHTML = this.replaceAll(parent.innerHTML, iTMap); + } + else{ + if(this.clkTest(node.nodeValue)){ + parent.innerHTML = this.replaceAllClk(parent.innerHTML, iTMap); + } + else{ + parent.innerHTML = this.replaceAll(parent.innerHTML, iTMap); + } + } + // parent.innerHTML = this.replaceAll(parent.innerHTML, iTMap); } } else if (node.nodeType === Node.ELEMENT_NODE && badTags.indexOf(node.tagName) <= -1) { var innerNodes = node.childNodes; @@ -434,6 +469,40 @@ export class ContentScript { return ' ' + m + ' '; } }); + + if (/^\s*$/.test(newText)) { + return text; + } + return newText; + } + + + /** + * Returns text replaced with translations (only for CLK Languages) (if any) + * otherwise returns the same text + * @param {string} text - source text + * @param {Object} translationMap - translations for source words + * @returns {string} text - text with translations + */ + replaceAllClk(text, translationMap) { + var rExp = ''; + var sortedSourceWords = Object.keys(translationMap) + .sort((w1, w2) => { + return w2.length - w1.length; + }); + sortedSourceWords.forEach((sourceWord) => { + rExp += '(' + this.escapeRegExp(sourceWord) + ')|'; + }); + rExp = rExp.substring(0, rExp.length - 1); + var regExp = new RegExp(rExp, 'gm'); + var newText = text.replace(regExp, (m) => { + if (translationMap[m] !== null) { + return ' ' + translationMap[m] + ' '; + } else { + return ' ' + m + ' '; + } + }); + if (/^\s*$/.test(newText)) { return text; } @@ -572,7 +641,7 @@ export class ContentScript { sendError(message) { if(message == '') - message = 'Could Not Connect To Translator Service'; + message = 'Could not connect to '+this.translator+' Service .\nIt may be temporarily unavailable or you may be experiencing internet connection problems '; var date = new Date(); @@ -597,6 +666,15 @@ export class ContentScript { }); } + + + clkTest(str){ + var clk_main = new RegExp("[\u4E00-\u9FFF]"); + var clk_extension = new RegExp("[\u3400-\u4DBF]"); + var clk_strokes = new RegExp("[\u31C0-\u31EF]"); + var clk_symbols_punctuation = new RegExp("[\u3000-\u303F]"); + return (clk_main.test(str)||clk_extension.test(str)||clk_strokes.test(str)||clk_symbols_punctuation.test(str)); + } } var MTWTranslator = new ContentScript(); diff --git a/lib/views/includes/troubleshooting.html b/lib/views/includes/troubleshooting.html index 01a9a83..76496c2 100644 --- a/lib/views/includes/troubleshooting.html +++ b/lib/views/includes/troubleshooting.html @@ -1,3 +1,4 @@ +
{{logMessage.message}}
-