Skip to content

Commit

Permalink
Merge pull request #2136 from jsdb46/master
Browse files Browse the repository at this point in the history
Add support for RealityStudio sites, cruelbrats, authorityangels and update clubdom
  • Loading branch information
feederbox826 authored Dec 18, 2024
2 parents 5a66a6b + a4b9fc8 commit 20cbd0b
Show file tree
Hide file tree
Showing 6 changed files with 302 additions and 3 deletions.
42 changes: 42 additions & 0 deletions scrapers/AuthorityAngels.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: Authority Angels
sceneByURL:
- action: scrapeXPath
# https://authorityangels.com/updates/<scene-id>
url:
- authorityangels.com/updates/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //div[2]/div/div/h1/text()
Details:
selector: //div[2]/div/div/div[3]/p[2]/text()
Performers:
Name:
selector: //div[2]/div/div/div[4]/ul/li/a
Date:
selector: //div[2]/div/div/div[1]/ul/li[3]/text()
postProcess:
- parseDate: Jan 02, 2006
Studio:
Name:
fixed: 'Authority Angels'
Image:
selector: //iframe[@class='cloudflare-player']/@src
postProcess:
- replace:
- regex: '^(.*?)(https://authorityangels\.com/content/thumbs/\d+/[\w/-]+\.png)(.*)$'
with: $2
Tags:
Name:
selector: //div[2]/div/div/div[5]/ul/li/a
driver:
cookies:
- CookieURL: "https://authorityangels.com"
Cookies:
- Name: "warningpopup"
Domain: "authorityangels.com"
Value: "true"
Path: "/"
# Last Updated December 15, 2024
4 changes: 2 additions & 2 deletions scrapers/ClubDom.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ xPathScrapers:
Details:
selector: //strong[contains(.,"Description:")]/following-sibling::text()
Image:
selector: //div[@class="videoplayer"]/img//@src0_1x
selector: //div[@class='videoplayer']/img/@src0_3x | //div[@class='videoplayer']/img/@src0_2x | //div[@class='videoplayer']/img/@src0_1x
postProcess:
- replace:
- regex: ^
Expand Down Expand Up @@ -87,4 +87,4 @@ xPathScrapers:
- replace:
- regex: ^
with: "https:"
# Last Updated August 16, 2024
# Last Updated December 15, 2024
47 changes: 47 additions & 0 deletions scrapers/CruelBrats.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: CruelBrats
sceneByURL:
- action: scrapeXPath
url:
- cruelbrats.com/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //div[@class="info-box-pink"]//h7/text()
postProcess:
- javascript:
if (value && value.length) {
let words = value.split(' ');
for (let i = 0; i < words.length; i++) {
words[i] = words[i].charAt(0).toUpperCase() + words[i].substring(1).toLowerCase();
}
return words.join(' ');
}
Details: //div[@class="info-box-white"]//h3/text()[1]
Performers:
Name:
selector: //div[7]/div/div/h7
postProcess:
- replace:
- regex: '(?i)MORE CONTENT FROM.'
with: ""
Date:
selector: //div[5]//h3/text()[2]
postProcess:
- replace:
- regex: 'Added:\s(\d{2}\s\w{3}\s\d{4})(.*)'
with: "$1"
- parseDate: "02 Jan 2006"
Studio:
Name:
fixed: 'Cruel Brats'
Tags:
Name: //div[5]//div/h3//a/text()
Image:
selector: //div[2]/div/video[contains(@poster,'images/Backgrounds/')]/@poster
postProcess:
- replace:
- regex: ^
with: https://cruelbrats.com/
# Last Updated October 06, 2024
121 changes: 121 additions & 0 deletions scrapers/PurePass.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
name: Pure Pass
sceneByURL:
- action: scrapeXPath
# https://www.<site>.com/scenes/<scene-name>_vids.html
url:
- amateurcfnm.com/scenes/
- cfnmgames.com/scenes/
- girlsabuseguys.com/scenes/
- ladyvoyeurs.com/scenes/
- littledick.club/scenes/
- purecfnm.com/scenes/
scraper: purepass_scenes
- action: scrapeXPath
# https://www.<site>.com/updates/<Scene-name>.html
url:
- amateurcfnm.com/updates/
- cfnmgames.com/updates/
- girlsabuseguys.com/updates/
- ladyvoyeurs.com/updates/
- littledick.club/updates/
- purecfnm.com/updates/
scraper: purepass_updates
xPathScrapers:
purepass_scenes:
scene:
Title:
selector: //div[3]/div/div[2]/div[1]/span/text()
Details:
selector: //div[3]/div/div[4]/span[2]/text()
Performers:
Name:
selector: //div[3]/div/div[4]/span[1]/a
Date:
selector: //div[3]/div/div[4]/div[1]/div/div[1]/text()
postProcess:
- parseDate: January 02, 2006
Studio:
Name:
selector: //base/@href
postProcess:
- map:
https://www.amateurcfnm.com/: Amateur CFNM
https://www.cfnmgames.com/: CFNM Games
https://www.girlsabuseguys.com/: Girls Abuse Guys
https://www.ladyvoyeurs.com/: Lady Voyeurs
https://littledick.club/: Little Dick Club
https://www.purecfnm.com/: Pure CFNM
Tags:
Name:
selector: //div[3]/div/div[4]/div[2]/a
Image:
selector: //div[3]/div/div[2]/div[2]/div[1]/div[3]/a/@href
subScraper:
selector: //base/@href | //*[contains(@id,"set-target")]/@data-src0_3x | //base/@href | //*[contains(@id,"set-target")]/@data-src0_2x | //base/@href | //*[contains(@id,"set-target")]/@data-src0_1x
concat: __SEP__
postProcess:
- replace:
- regex: __SEP__/
with: ""
- replace:
- regex: (https://[^/]+(?:/tour)?/content//contentthumbs/\d+/\d+/[\w\d-]+-\dx\.jpg)((?:(?:tour/)?content//contentthumbs/\d+/\d+/[\w\d-]+-\dx\.jpg)*)
with: $1
purepass_updates:
common:
$title: //div[3]/div[1]/div[2]/div[2]/div[1]/div/span[1]/text()
scene:
Title:
selector: //div[3]/div[1]/div[2]/div[2]/div[1]/div/span[1]/text()
Details:
selector: //div[3]/div[1]/div[2]/div[2]/div[1]/div/span[4]/text()
Performers:
Name:
selector: //div[3]/div[1]/div[2]/div[2]/div[1]/div/span[2]/a
Date:
selector: //div[3]/div[1]/div[2]/div[2]/div[1]/div/span[3]
postProcess:
- parseDate: January 02, 2006
Studio:
Name:
selector: //base/@href
postProcess:
- map:
https://www.amateurcfnm.com/: Amateur CFNM
https://www.cfnmgames.com/: CFNM Games
https://www.girlsabuseguys.com/: Girls Abuse Guys
https://www.ladyvoyeurs.com/: Lady Voyeurs
https://littledick.club/: Little Dick Club
https://www.purecfnm.com/: Pure CFNM
Image:
selector: //base/@href | //*[contains(@id,"set-target")]/@data-src0_3x | //base/@href | //*[contains(@id,"set-target")]/@data-src0_2x | //base/@href | //*[contains(@id,"set-target")]/@data-src0_1x
concat: __SEP__
postProcess:
- replace:
- regex: __SEP__/
with: ""
- replace:
- regex: (https://[^/]+(?:/tour)?/content//contentthumbs/\d+/\d+/[\w\d-]+-\dx\.jpg)((?:(?:tour/)?content//contentthumbs/\d+/\d+/[\w\d-]+-\dx\.jpg)*)
with: $1
# Scraping tags using /updates/ url Only returns one tag sadly due to subscraping, keeping it in case stash scraper gets updated in the future,
# if you want all tags, scrape directly using https://www.<site>.com/scenes/<scene-name>_vids.html
Tags:
Name:
selector: //base/@href | $title
concat: "__SEP__"
postProcess:
- replace:
- regex: __SEP__
with: "scenes/"
- replace:
- regex: ' '
with: '-'
- replace:
- regex: \'
with: ''
- replace:
- regex: '$'
with: '_vids.html'
- subScraper:
selector: //div[3]/div/div[4]/div[2]/a

# Last Updated December 17, 2024
77 changes: 77 additions & 0 deletions scrapers/RealityStudio.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
name: RealityStudio
sceneByURL:
- action: scrapeXPath
url:
- cumcountdown.com/gallery.html
- femaleworship.com/gallery.html
- goddesskitra.com/gallery.html
- menareslaves.com/gallery.html
- subbygirls.com/gallery.html
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //div[@id='galleryname']/text()
postProcess:
- replace:
- regex: '\d{2}/\d{2}/\d{2}$'
with: ''
- replace:
- regex: '     '
with: ' '
Details:
selector: //div[@id='galleryname']/text()
postProcess:
- replace:
- regex: '\d{2}/\d{2}/\d{2}$'
with: ''
- replace:
- regex: '^#\d+'
with: ""
Performers:
Name:
selector: //div[@id='galleryname'][1]/text()
postProcess:
- replace:
- regex: '\d{2}/\d{2}/\d{2}$'
with: ''
- replace:
- regex: '^#\d+'
with: ""
split: ', '
Date:
selector: //div[@id='galleryname']/text()
postProcess:
- replace:
- regex: '([A-Za-z]+(?:\s[A-Za-z]+)?(?:,\s*[A-Za-z]+(?:\s[A-Za-z]+)?)*)'
with: ""
- replace:
- regex: '^#\d+'
with: ""
- parseDate: 01/02/06
Studio:
Name: //title/text()
Image:
selector: //div[@id="footer"]/p/img/@src | //*[@id="gallerycontainer"]/center/div[5]/img/@src
concat: " "
postProcess:
- replace:
- regex: (images\/.+\s)
with: ""
Code:
selector: //div[@id='galleryname'][1]/text()
postProcess:
- replace:
- regex: '([A-Za-z]+(?:\s[A-Za-z]+)?(?:,\s*[A-Za-z]+(?:\s[A-Za-z]+)?)*)'
with: ""
- replace:
- regex: '\d{2}/\d{2}/\d{2}$'
with: ""
Tags:
Name:
selector: //*[@id="galleryname"][2]/text()
split: ', '
driver:
useCDP: true
# Last Updated December 17, 2024
14 changes: 13 additions & 1 deletion scrapers/StasyQVR.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@ sceneByURL:
url:
- stasyqvr.com/virtualreality
scraper: sceneScraper
sceneByFragment:
action: scrapeXPath
queryURL: https://stasyqvr.com/virtualreality/scene/id/{filename}
queryURLReplace:
filename:
- regex: ^(StasyQVR)_([^_]+)_(\d+)_(\d+)_([A-Z]+)_(\d+)\.([a-zA-Z0-9]+)$ # official site file naming scheme: StasyQVR_example scene name_2880_999_LR_180.mp4 # StasyQVR_scene name_resolution_code_LR_180.mp4
with: $4-$2
- regex: " "
with: '_'
- regex: "'"
with: ''
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Expand All @@ -30,4 +42,4 @@ xPathScrapers:
- replace:
- regex: '^background-image: url\(|\);$'
with: ""
# Last Updated October 20, 2023
# Last Updated December 17, 2024

0 comments on commit 20cbd0b

Please sign in to comment.