diff --git a/scrapers/AuthorityAngels.yml b/scrapers/AuthorityAngels.yml new file mode 100644 index 000000000..1b41e4b47 --- /dev/null +++ b/scrapers/AuthorityAngels.yml @@ -0,0 +1,42 @@ +name: Authority Angels +sceneByURL: + - action: scrapeXPath + # https://authorityangels.com/updates/ + url: + - authorityangels.com/updates/ + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //div[2]/div/div/h1/text() + Details: + selector: //div[2]/div/div/div[3]/p[2]/text() + Performers: + Name: + selector: //div[2]/div/div/div[4]/ul/li/a + Date: + selector: //div[2]/div/div/div[1]/ul/li[3]/text() + postProcess: + - parseDate: Jan 02, 2006 + Studio: + Name: + fixed: 'Authority Angels' + Image: + selector: //iframe[@class='cloudflare-player']/@src + postProcess: + - replace: + - regex: '^(.*?)(https://authorityangels\.com/content/thumbs/\d+/[\w/-]+\.png)(.*)$' + with: $2 + Tags: + Name: + selector: //div[2]/div/div/div[5]/ul/li/a +driver: + cookies: + - CookieURL: "https://authorityangels.com" + Cookies: + - Name: "warningpopup" + Domain: "authorityangels.com" + Value: "true" + Path: "/" +# Last Updated December 15, 2024 \ No newline at end of file diff --git a/scrapers/ClubDom.yml b/scrapers/ClubDom.yml index daa542216..8555f118e 100644 --- a/scrapers/ClubDom.yml +++ b/scrapers/ClubDom.yml @@ -40,7 +40,7 @@ xPathScrapers: Details: selector: //strong[contains(.,"Description:")]/following-sibling::text() Image: - selector: //div[@class="videoplayer"]/img//@src0_1x + selector: //div[@class='videoplayer']/img/@src0_3x | //div[@class='videoplayer']/img/@src0_2x | //div[@class='videoplayer']/img/@src0_1x postProcess: - replace: - regex: ^ @@ -87,4 +87,4 @@ xPathScrapers: - replace: - regex: ^ with: "https:" -# Last Updated August 16, 2024 \ No newline at end of file +# Last Updated December 15, 2024 diff --git a/scrapers/CruelBrats.yml b/scrapers/CruelBrats.yml new file mode 100644 index 000000000..df6cdaaba --- /dev/null +++ b/scrapers/CruelBrats.yml @@ -0,0 +1,47 @@ +name: CruelBrats +sceneByURL: + - action: scrapeXPath + url: + - cruelbrats.com/ + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //div[@class="info-box-pink"]//h7/text() + postProcess: + - javascript: + if (value && value.length) { + let words = value.split(' '); + for (let i = 0; i < words.length; i++) { + words[i] = words[i].charAt(0).toUpperCase() + words[i].substring(1).toLowerCase(); + } + return words.join(' '); + } + Details: //div[@class="info-box-white"]//h3/text()[1] + Performers: + Name: + selector: //div[7]/div/div/h7 + postProcess: + - replace: + - regex: '(?i)MORE CONTENT FROM.' + with: "" + Date: + selector: //div[5]//h3/text()[2] + postProcess: + - replace: + - regex: 'Added:\s(\d{2}\s\w{3}\s\d{4})(.*)' + with: "$1" + - parseDate: "02 Jan 2006" + Studio: + Name: + fixed: 'Cruel Brats' + Tags: + Name: //div[5]//div/h3//a/text() + Image: + selector: //div[2]/div/video[contains(@poster,'images/Backgrounds/')]/@poster + postProcess: + - replace: + - regex: ^ + with: https://cruelbrats.com/ +# Last Updated October 06, 2024 \ No newline at end of file diff --git a/scrapers/PurePass.yml b/scrapers/PurePass.yml new file mode 100644 index 000000000..78669cb92 --- /dev/null +++ b/scrapers/PurePass.yml @@ -0,0 +1,121 @@ +name: Pure Pass +sceneByURL: + - action: scrapeXPath + # https://www..com/scenes/_vids.html + url: + - amateurcfnm.com/scenes/ + - cfnmgames.com/scenes/ + - girlsabuseguys.com/scenes/ + - ladyvoyeurs.com/scenes/ + - littledick.club/scenes/ + - purecfnm.com/scenes/ + scraper: purepass_scenes + - action: scrapeXPath + # https://www..com/updates/.html + url: + - amateurcfnm.com/updates/ + - cfnmgames.com/updates/ + - girlsabuseguys.com/updates/ + - ladyvoyeurs.com/updates/ + - littledick.club/updates/ + - purecfnm.com/updates/ + scraper: purepass_updates +xPathScrapers: + purepass_scenes: + scene: + Title: + selector: //div[3]/div/div[2]/div[1]/span/text() + Details: + selector: //div[3]/div/div[4]/span[2]/text() + Performers: + Name: + selector: //div[3]/div/div[4]/span[1]/a + Date: + selector: //div[3]/div/div[4]/div[1]/div/div[1]/text() + postProcess: + - parseDate: January 02, 2006 + Studio: + Name: + selector: //base/@href + postProcess: + - map: + https://www.amateurcfnm.com/: Amateur CFNM + https://www.cfnmgames.com/: CFNM Games + https://www.girlsabuseguys.com/: Girls Abuse Guys + https://www.ladyvoyeurs.com/: Lady Voyeurs + https://littledick.club/: Little Dick Club + https://www.purecfnm.com/: Pure CFNM + Tags: + Name: + selector: //div[3]/div/div[4]/div[2]/a + Image: + selector: //div[3]/div/div[2]/div[2]/div[1]/div[3]/a/@href + subScraper: + selector: //base/@href | //*[contains(@id,"set-target")]/@data-src0_3x | //base/@href | //*[contains(@id,"set-target")]/@data-src0_2x | //base/@href | //*[contains(@id,"set-target")]/@data-src0_1x + concat: __SEP__ + postProcess: + - replace: + - regex: __SEP__/ + with: "" + - replace: + - regex: (https://[^/]+(?:/tour)?/content//contentthumbs/\d+/\d+/[\w\d-]+-\dx\.jpg)((?:(?:tour/)?content//contentthumbs/\d+/\d+/[\w\d-]+-\dx\.jpg)*) + with: $1 + purepass_updates: + common: + $title: //div[3]/div[1]/div[2]/div[2]/div[1]/div/span[1]/text() + scene: + Title: + selector: //div[3]/div[1]/div[2]/div[2]/div[1]/div/span[1]/text() + Details: + selector: //div[3]/div[1]/div[2]/div[2]/div[1]/div/span[4]/text() + Performers: + Name: + selector: //div[3]/div[1]/div[2]/div[2]/div[1]/div/span[2]/a + Date: + selector: //div[3]/div[1]/div[2]/div[2]/div[1]/div/span[3] + postProcess: + - parseDate: January 02, 2006 + Studio: + Name: + selector: //base/@href + postProcess: + - map: + https://www.amateurcfnm.com/: Amateur CFNM + https://www.cfnmgames.com/: CFNM Games + https://www.girlsabuseguys.com/: Girls Abuse Guys + https://www.ladyvoyeurs.com/: Lady Voyeurs + https://littledick.club/: Little Dick Club + https://www.purecfnm.com/: Pure CFNM + Image: + selector: //base/@href | //*[contains(@id,"set-target")]/@data-src0_3x | //base/@href | //*[contains(@id,"set-target")]/@data-src0_2x | //base/@href | //*[contains(@id,"set-target")]/@data-src0_1x + concat: __SEP__ + postProcess: + - replace: + - regex: __SEP__/ + with: "" + - replace: + - regex: (https://[^/]+(?:/tour)?/content//contentthumbs/\d+/\d+/[\w\d-]+-\dx\.jpg)((?:(?:tour/)?content//contentthumbs/\d+/\d+/[\w\d-]+-\dx\.jpg)*) + with: $1 + # Scraping tags using /updates/ url Only returns one tag sadly due to subscraping, keeping it in case stash scraper gets updated in the future, + # if you want all tags, scrape directly using https://www..com/scenes/_vids.html + Tags: + Name: + selector: //base/@href | $title + concat: "__SEP__" + postProcess: + - replace: + - regex: __SEP__ + with: "scenes/" + - replace: + - regex: ' ' + with: '-' + - replace: + - regex: \' + with: '' + - replace: + - regex: '$' + with: '_vids.html' + - subScraper: + selector: //div[3]/div/div[4]/div[2]/a + +# Last Updated December 17, 2024 \ No newline at end of file diff --git a/scrapers/RealityStudio.yml b/scrapers/RealityStudio.yml new file mode 100644 index 000000000..2050de6cb --- /dev/null +++ b/scrapers/RealityStudio.yml @@ -0,0 +1,77 @@ +name: RealityStudio +sceneByURL: + - action: scrapeXPath + url: + - cumcountdown.com/gallery.html + - femaleworship.com/gallery.html + - goddesskitra.com/gallery.html + - menareslaves.com/gallery.html + - subbygirls.com/gallery.html + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //div[@id='galleryname']/text() + postProcess: + - replace: + - regex: '\d{2}/\d{2}/\d{2}$' + with: '' + - replace: + - regex: '     ' + with: ' ' + Details: + selector: //div[@id='galleryname']/text() + postProcess: + - replace: + - regex: '\d{2}/\d{2}/\d{2}$' + with: '' + - replace: + - regex: '^#\d+' + with: "" + Performers: + Name: + selector: //div[@id='galleryname'][1]/text() + postProcess: + - replace: + - regex: '\d{2}/\d{2}/\d{2}$' + with: '' + - replace: + - regex: '^#\d+' + with: "" + split: ', ' + Date: + selector: //div[@id='galleryname']/text() + postProcess: + - replace: + - regex: '([A-Za-z]+(?:\s[A-Za-z]+)?(?:,\s*[A-Za-z]+(?:\s[A-Za-z]+)?)*)' + with: "" + - replace: + - regex: '^#\d+' + with: "" + - parseDate: 01/02/06 + Studio: + Name: //title/text() + Image: + selector: //div[@id="footer"]/p/img/@src | //*[@id="gallerycontainer"]/center/div[5]/img/@src + concat: " " + postProcess: + - replace: + - regex: (images\/.+\s) + with: "" + Code: + selector: //div[@id='galleryname'][1]/text() + postProcess: + - replace: + - regex: '([A-Za-z]+(?:\s[A-Za-z]+)?(?:,\s*[A-Za-z]+(?:\s[A-Za-z]+)?)*)' + with: "" + - replace: + - regex: '\d{2}/\d{2}/\d{2}$' + with: "" + Tags: + Name: + selector: //*[@id="galleryname"][2]/text() + split: ', ' +driver: + useCDP: true +# Last Updated December 17, 2024 \ No newline at end of file diff --git a/scrapers/StasyQVR.yml b/scrapers/StasyQVR.yml index c87765f2d..d66230b12 100644 --- a/scrapers/StasyQVR.yml +++ b/scrapers/StasyQVR.yml @@ -4,6 +4,18 @@ sceneByURL: url: - stasyqvr.com/virtualreality scraper: sceneScraper +sceneByFragment: + action: scrapeXPath + queryURL: https://stasyqvr.com/virtualreality/scene/id/{filename} + queryURLReplace: + filename: + - regex: ^(StasyQVR)_([^_]+)_(\d+)_(\d+)_([A-Z]+)_(\d+)\.([a-zA-Z0-9]+)$ # official site file naming scheme: StasyQVR_example scene name_2880_999_LR_180.mp4 # StasyQVR_scene name_resolution_code_LR_180.mp4 + with: $4-$2 + - regex: " " + with: '_' + - regex: "'" + with: '' + scraper: sceneScraper xPathScrapers: sceneScraper: scene: @@ -30,4 +42,4 @@ xPathScrapers: - replace: - regex: '^background-image: url\(|\);$' with: "" -# Last Updated October 20, 2023 \ No newline at end of file +# Last Updated December 17, 2024