Skip to content

Commit

Permalink
merge AuthorityAngels scrapers, move standalone out of folder
Browse files Browse the repository at this point in the history
  • Loading branch information
feederbox826 committed Dec 18, 2024
1 parent 21dc33c commit a4b9fc8
Show file tree
Hide file tree
Showing 8 changed files with 284 additions and 577 deletions.
Original file line number Diff line number Diff line change
@@ -1,42 +1,42 @@
name: Authority Angels
sceneByURL:
- action: scrapeXPath
# https://authorityangels.com/updates/<scene-id>
url:
- authorityangels.com/updates/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //div[2]/div/div/h1/text()
Details:
selector: //div[2]/div/div/div[3]/p[2]/text()
Performers:
Name:
selector: //div[2]/div/div/div[4]/ul/li/a
Date:
selector: //div[2]/div/div/div[1]/ul/li[3]/text()
postProcess:
- parseDate: Jan 02, 2006
Studio:
Name:
fixed: 'Authority Angels'
Image:
selector: //iframe[@class='cloudflare-player']/@src
postProcess:
- replace:
- regex: '^(.*?)(https://authorityangels\.com/content/thumbs/\d+/[\w/-]+\.png)(.*)$'
with: $2
Tags:
Name:
selector: //div[2]/div/div/div[5]/ul/li/a
driver:
cookies:
- CookieURL: "https://authorityangels.com"
Cookies:
- Name: "warningpopup"
Domain: "authorityangels.com"
Value: "true"
Path: "/"
name: Authority Angels
sceneByURL:
- action: scrapeXPath
# https://authorityangels.com/updates/<scene-id>
url:
- authorityangels.com/updates/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //div[2]/div/div/h1/text()
Details:
selector: //div[2]/div/div/div[3]/p[2]/text()
Performers:
Name:
selector: //div[2]/div/div/div[4]/ul/li/a
Date:
selector: //div[2]/div/div/div[1]/ul/li[3]/text()
postProcess:
- parseDate: Jan 02, 2006
Studio:
Name:
fixed: 'Authority Angels'
Image:
selector: //iframe[@class='cloudflare-player']/@src
postProcess:
- replace:
- regex: '^(.*?)(https://authorityangels\.com/content/thumbs/\d+/[\w/-]+\.png)(.*)$'
with: $2
Tags:
Name:
selector: //div[2]/div/div/div[5]/ul/li/a
driver:
cookies:
- CookieURL: "https://authorityangels.com"
Cookies:
- Name: "warningpopup"
Domain: "authorityangels.com"
Value: "true"
Path: "/"
# Last Updated December 15, 2024
92 changes: 46 additions & 46 deletions scrapers/CruelBrats/CruelBrats.yml → scrapers/CruelBrats.yml
Original file line number Diff line number Diff line change
@@ -1,47 +1,47 @@
name: CruelBrats
sceneByURL:
- action: scrapeXPath
url:
- cruelbrats.com/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //div[@class="info-box-pink"]//h7/text()
postProcess:
- javascript:
if (value && value.length) {
let words = value.split(' ');
for (let i = 0; i < words.length; i++) {
words[i] = words[i].charAt(0).toUpperCase() + words[i].substring(1).toLowerCase();
}
return words.join(' ');
}
Details: //div[@class="info-box-white"]//h3/text()[1]
Performers:
Name:
selector: //div[7]/div/div/h7
postProcess:
- replace:
- regex: '(?i)MORE CONTENT FROM.'
with: ""
Date:
selector: //div[5]//h3/text()[2]
postProcess:
- replace:
- regex: 'Added:\s(\d{2}\s\w{3}\s\d{4})(.*)'
with: "$1"
- parseDate: "02 Jan 2006"
Studio:
Name:
fixed: 'Cruel Brats'
Tags:
Name: //div[5]//div/h3//a/text()
Image:
selector: //div[2]/div/video[contains(@poster,'images/Backgrounds/')]/@poster
postProcess:
- replace:
- regex: ^
with: https://cruelbrats.com/
name: CruelBrats
sceneByURL:
- action: scrapeXPath
url:
- cruelbrats.com/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //div[@class="info-box-pink"]//h7/text()
postProcess:
- javascript:
if (value && value.length) {
let words = value.split(' ');
for (let i = 0; i < words.length; i++) {
words[i] = words[i].charAt(0).toUpperCase() + words[i].substring(1).toLowerCase();
}
return words.join(' ');
}
Details: //div[@class="info-box-white"]//h3/text()[1]
Performers:
Name:
selector: //div[7]/div/div/h7
postProcess:
- replace:
- regex: '(?i)MORE CONTENT FROM.'
with: ""
Date:
selector: //div[5]//h3/text()[2]
postProcess:
- replace:
- regex: 'Added:\s(\d{2}\s\w{3}\s\d{4})(.*)'
with: "$1"
- parseDate: "02 Jan 2006"
Studio:
Name:
fixed: 'Cruel Brats'
Tags:
Name: //div[5]//div/h3//a/text()
Image:
selector: //div[2]/div/video[contains(@poster,'images/Backgrounds/')]/@poster
postProcess:
- replace:
- regex: ^
with: https://cruelbrats.com/
# Last Updated October 06, 2024
Loading

0 comments on commit a4b9fc8

Please sign in to comment.