Skip to content

Commit

Permalink
Merge pull request #291 from ndaidong/7.0.3
Browse files Browse the repository at this point in the history
v7.0.3
  • Loading branch information
ndaidong authored Sep 16, 2022
2 parents 864ae92 + 57716fb commit da8df03
Show file tree
Hide file tree
Showing 10 changed files with 203 additions and 166 deletions.
118 changes: 59 additions & 59 deletions dist/article-parser.browser.js

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions dist/article-parser.browser.js.map

Large diffs are not rendered by default.

162 changes: 81 additions & 81 deletions dist/cjs/article-parser.js

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions dist/cjs/article-parser.js.map

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/cjs/package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"name": "article-parser-cjs",
"version": "7.0.2",
"version": "7.0.3",
"main": "./article-parser.js"
}
21 changes: 12 additions & 9 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "7.0.2",
"version": "7.0.3",
"name": "article-parser",
"description": "To extract main article from given URL",
"homepage": "https://ndaidong.github.io/article-parser-demo/",
Expand All @@ -9,6 +9,10 @@
},
"author": "@ndaidong",
"main": "./src/main.js",
"exports": {
"import": "./src/main.js",
"require": "./dist/cjs/article-parser.js"
},
"browser": {
"linkedom": "./src/browser/linkedom.js",
"./main.js": "./dist/article-parser.browser.js"
Expand All @@ -30,13 +34,12 @@
"dependencies": {
"@mozilla/readability": "^0.4.2",
"axios": "^0.27.2",
"bellajs": "^11.0.5",
"html-crush": "^5.1.3",
"linkedom": "^0.14.12",
"sanitize-html": "^2.7.1",
"bellajs": "^11.0.7",
"html-crush": "^5.1.6",
"linkedom": "^0.14.14",
"sanitize-html": "^2.7.2",
"string-comparison": "^1.1.0",
"tldts": "^5.7.90",
"urlpattern-polyfill": "^5.0.6"
"urlpattern-polyfill": "^5.0.9"
},
"standard": {
"ignore": [
Expand All @@ -46,8 +49,8 @@
"devDependencies": {
"@types/sanitize-html": "^2.6.2",
"cross-env": "^7.0.3",
"esbuild": "^0.15.6",
"jest": "^29.0.1",
"esbuild": "^0.15.7",
"jest": "^29.0.3",
"nock": "^13.2.9",
"standard": "^17.0.0"
},
Expand Down
39 changes: 34 additions & 5 deletions src/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { clone, copies } from 'bellajs'

const requestOptions = {
headers: {
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:104.0) Gecko/20100101 Firefox/104.0',
accept: 'text/html; charset=utf-8',
'accept-encoding': 'deflate,zlib,gzip'
},
Expand All @@ -15,20 +15,49 @@ const requestOptions = {
}

const sanitizeHtmlOptions = {
allowedTags: ['h1', 'h2', 'h3', 'h4', 'h5', 'u', 'b', 'i', 'em', 'strong', 'small', 'sup', 'sub', 'div', 'span', 'p', 'article', 'blockquote', 'section', 'details', 'summary', 'pre', 'code', 'ul', 'ol', 'li', 'dd', 'dl', 'table', 'th', 'tr', 'td', 'thead', 'tbody', 'tfood', 'fieldset', 'legend', 'figure', 'figcaption', 'img', 'picture', 'video', 'audio', 'source', 'iframe', 'progress', 'br', 'p', 'hr', 'label', 'abbr', 'a', 'svg'],
allowedTags: [
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'u', 'b', 'i', 'em', 'strong', 'small', 'sup', 'sub',
'div', 'span', 'p', 'article', 'blockquote', 'section',
'details', 'summary',
'pre', 'code',
'ul', 'ol', 'li', 'dd', 'dl',
'table', 'th', 'tr', 'td', 'thead', 'tbody', 'tfood',
'fieldset', 'legend',
'figure', 'figcaption', 'img', 'picture',
'video', 'audio', 'source',
'iframe',
'progress',
'br', 'p', 'hr',
'label',
'abbr',
'a',
'svg'
],
allowedAttributes: {
h1: ['id'],
h2: ['id'],
h3: ['id'],
h4: ['id'],
h5: ['id'],
h6: ['id'],
a: ['href', 'target', 'title'],
abbr: ['title'],
progress: ['value', 'max'],
img: ['src', 'srcset', 'alt', 'width', 'height', 'style', 'title'],
img: ['src', 'srcset', 'alt', 'title'],
picture: ['media', 'srcset'],
video: ['controls', 'width', 'height', 'autoplay', 'muted', 'loop', 'src'],
audio: ['controls'],
audio: ['controls', 'width', 'height', 'autoplay', 'muted', 'loop', 'src'],
source: ['src', 'srcset', 'data-srcset', 'type', 'media', 'sizes'],
iframe: ['src', 'frameborder', 'height', 'width', 'scrolling', 'allow'],
svg: ['width', 'height'] // sanitize-html does not support svg fully yet
},
allowedIframeDomains: ['youtube.com', 'twitter.com', 'facebook.com', 'vimeo.com']
allowedIframeDomains: [
'youtube.com', 'vimeo.com', 'odysee.com',
'soundcloud.com', 'audius.co',
'github.com', 'codepen.com',
'twitter.com', 'facebook.com', 'instagram.com'
]
}

/**
Expand Down
2 changes: 1 addition & 1 deletion src/config.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ test('Testing setRequestOptions/getRequestOptions methods', () => {
const actual = getRequestOptions()
const expectedHeader = {
authorization: 'bearer <token>',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:104.0) Gecko/20100101 Firefox/104.0',
accept: 'text/html; charset=utf-8',
'accept-encoding': 'deflate,zlib,gzip'
}
Expand Down
5 changes: 5 additions & 0 deletions src/utils/linker.js
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,8 @@ export const chooseBestUrl = (candidates = [], title = '') => {
value: shortestUrl
}).value
}

export const getDomain = (url) => {
const host = (new URL(url)).host
return host.replace('www.', '')
}
4 changes: 2 additions & 2 deletions src/utils/parseFromHtml.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// utils -> parseFromHtml

import { stripTags, truncate, unique, pipe } from 'bellajs'
import { getDomain } from 'tldts'

import { cleanify, cleanAndMinify as cleanAndMinifyHtml } from './html.js'

Expand All @@ -10,7 +9,8 @@ import {
purify as purifyUrl,
absolutify as absolutifyUrl,
normalize as normalizeUrls,
chooseBestUrl
chooseBestUrl,
getDomain
} from './linker.js'

import extractMetaData from './extractMetaData.js'
Expand Down

0 comments on commit da8df03

Please sign in to comment.