Merge pull request #2136 from jsdb46/master

Add support for RealityStudio sites, cruelbrats, authorityangels and update clubdom
stashapp · Dec 18, 2024 · 20cbd0b · 20cbd0b
2 parents 5a66a6b + a4b9fc8
commit 20cbd0b
Show file tree

Hide file tree

Showing 6 changed files with 302 additions and 3 deletions.
diff --git a/scrapers/AuthorityAngels.yml b/scrapers/AuthorityAngels.yml
@@ -0,0 +1,42 @@
+name: Authority Angels
+sceneByURL:
+  - action: scrapeXPath
+    # https://authorityangels.com/updates/<scene-id>
+    url:
+      - authorityangels.com/updates/
+    scraper: sceneScraper
+xPathScrapers:
+  sceneScraper:
+    scene:
+      Title:
+        selector: //div[2]/div/div/h1/text()
+      Details:
+        selector: //div[2]/div/div/div[3]/p[2]/text()
+      Performers:
+        Name:
+          selector: //div[2]/div/div/div[4]/ul/li/a
+      Date:
+        selector: //div[2]/div/div/div[1]/ul/li[3]/text()
+        postProcess:
+          - parseDate: Jan 02, 2006
+      Studio:
+        Name:
+          fixed: 'Authority Angels'
+      Image:
+        selector: //iframe[@class='cloudflare-player']/@src
+        postProcess:
+          - replace:
+              - regex: '^(.*?)(https://authorityangels\.com/content/thumbs/\d+/[\w/-]+\.png)(.*)$'
+                with: $2
+      Tags:
+        Name:
+          selector: //div[2]/div/div/div[5]/ul/li/a
+driver:
+  cookies:
+    - CookieURL: "https://authorityangels.com"
+      Cookies:
+        - Name: "warningpopup"
+          Domain: "authorityangels.com"
+          Value: "true"
+          Path: "/"
+# Last Updated December 15, 2024
diff --git a/scrapers/ClubDom.yml b/scrapers/ClubDom.yml
@@ -40,7 +40,7 @@ xPathScrapers:
       Details:
         selector: //strong[contains(.,"Description:")]/following-sibling::text()
       Image:
-        selector: //div[@class="videoplayer"]/img//@src0_1x
+        selector: //div[@class='videoplayer']/img/@src0_3x | //div[@class='videoplayer']/img/@src0_2x | //div[@class='videoplayer']/img/@src0_1x
         postProcess:
           - replace:
               - regex: ^
@@ -87,4 +87,4 @@ xPathScrapers:
           - replace:
               - regex: ^
                 with: "https:"
-# Last Updated August 16, 2024
+# Last Updated December 15, 2024
diff --git a/scrapers/CruelBrats.yml b/scrapers/CruelBrats.yml
@@ -0,0 +1,47 @@
+name: CruelBrats
+sceneByURL:
+  - action: scrapeXPath
+    url:
+      - cruelbrats.com/
+    scraper: sceneScraper
+xPathScrapers:
+  sceneScraper:
+    scene:
+      Title:
+        selector: //div[@class="info-box-pink"]//h7/text()
+        postProcess:
+        - javascript:
+            if (value && value.length) {
+              let words = value.split(' ');
+              for (let i = 0; i < words.length; i++) {
+                words[i] = words[i].charAt(0).toUpperCase() + words[i].substring(1).toLowerCase();
+              }
+              return words.join(' ');
+            }
+      Details: //div[@class="info-box-white"]//h3/text()[1]
+      Performers:
+        Name:
+          selector: //div[7]/div/div/h7
+          postProcess:
+           - replace:
+             - regex: '(?i)MORE CONTENT FROM.'
+               with: ""
+      Date:
+        selector: //div[5]//h3/text()[2]
+        postProcess:
+          - replace:
+            - regex: 'Added:\s(\d{2}\s\w{3}\s\d{4})(.*)'
+              with:  "$1"
+          - parseDate: "02 Jan 2006"
+      Studio:
+        Name:
+          fixed: 'Cruel Brats'
+      Tags:
+        Name:  //div[5]//div/h3//a/text()
+      Image:
+        selector: //div[2]/div/video[contains(@poster,'images/Backgrounds/')]/@poster
+        postProcess:
+          - replace:
+            - regex: ^
+              with: https://cruelbrats.com/
+# Last Updated October 06, 2024
diff --git a/scrapers/PurePass.yml b/scrapers/PurePass.yml
@@ -0,0 +1,121 @@
+name: Pure Pass
+sceneByURL:
+  - action: scrapeXPath
+    # https://www.<site>.com/scenes/<scene-name>_vids.html
+    url:
+      - amateurcfnm.com/scenes/
+      - cfnmgames.com/scenes/
+      - girlsabuseguys.com/scenes/
+      - ladyvoyeurs.com/scenes/
+      - littledick.club/scenes/
+      - purecfnm.com/scenes/
+    scraper: purepass_scenes
+  - action: scrapeXPath
+    # https://www.<site>.com/updates/<Scene-name>.html
+    url:
+      - amateurcfnm.com/updates/
+      - cfnmgames.com/updates/
+      - girlsabuseguys.com/updates/
+      - ladyvoyeurs.com/updates/
+      - littledick.club/updates/
+      - purecfnm.com/updates/
+    scraper: purepass_updates
+xPathScrapers:
+  purepass_scenes:
+    scene:
+      Title:
+        selector: //div[3]/div/div[2]/div[1]/span/text()
+      Details:
+        selector: //div[3]/div/div[4]/span[2]/text()
+      Performers:
+        Name:
+          selector: //div[3]/div/div[4]/span[1]/a
+      Date:
+        selector: //div[3]/div/div[4]/div[1]/div/div[1]/text()
+        postProcess:
+          - parseDate: January 02, 2006
+      Studio:
+        Name:
+          selector: //base/@href
+          postProcess:
+            - map:
+                https://www.amateurcfnm.com/: Amateur CFNM
+                https://www.cfnmgames.com/: CFNM Games
+                https://www.girlsabuseguys.com/: Girls Abuse Guys
+                https://www.ladyvoyeurs.com/: Lady Voyeurs
+                https://littledick.club/: Little Dick Club
+                https://www.purecfnm.com/: Pure CFNM
+      Tags:
+        Name:
+          selector: //div[3]/div/div[4]/div[2]/a
+      Image:
+        selector: //div[3]/div/div[2]/div[2]/div[1]/div[3]/a/@href
+        subScraper:
+          selector: //base/@href | //*[contains(@id,"set-target")]/@data-src0_3x | //base/@href | //*[contains(@id,"set-target")]/@data-src0_2x | //base/@href | //*[contains(@id,"set-target")]/@data-src0_1x
+          concat: __SEP__
+          postProcess:
+            - replace:
+                - regex: __SEP__/
+                  with: ""
+            - replace:
+                - regex: (https://[^/]+(?:/tour)?/content//contentthumbs/\d+/\d+/[\w\d-]+-\dx\.jpg)((?:(?:tour/)?content//contentthumbs/\d+/\d+/[\w\d-]+-\dx\.jpg)*)
+                  with: $1
+  purepass_updates:
+    common:
+      $title: //div[3]/div[1]/div[2]/div[2]/div[1]/div/span[1]/text()
+    scene:
+      Title:
+        selector: //div[3]/div[1]/div[2]/div[2]/div[1]/div/span[1]/text()
+      Details:
+        selector: //div[3]/div[1]/div[2]/div[2]/div[1]/div/span[4]/text()
+      Performers:
+        Name:
+          selector: //div[3]/div[1]/div[2]/div[2]/div[1]/div/span[2]/a
+      Date:
+        selector: //div[3]/div[1]/div[2]/div[2]/div[1]/div/span[3]
+        postProcess:
+          - parseDate: January 02, 2006
+      Studio:
+        Name:
+          selector: //base/@href
+          postProcess:
+            - map:
+                https://www.amateurcfnm.com/: Amateur CFNM
+                https://www.cfnmgames.com/: CFNM Games
+                https://www.girlsabuseguys.com/: Girls Abuse Guys
+                https://www.ladyvoyeurs.com/: Lady Voyeurs
+                https://littledick.club/: Little Dick Club
+                https://www.purecfnm.com/: Pure CFNM
+      Image:
+        selector: //base/@href | //*[contains(@id,"set-target")]/@data-src0_3x | //base/@href | //*[contains(@id,"set-target")]/@data-src0_2x | //base/@href | //*[contains(@id,"set-target")]/@data-src0_1x
+        concat: __SEP__
+        postProcess:
+          - replace:
+              - regex: __SEP__/
+                with: ""
+          - replace:
+              - regex: (https://[^/]+(?:/tour)?/content//contentthumbs/\d+/\d+/[\w\d-]+-\dx\.jpg)((?:(?:tour/)?content//contentthumbs/\d+/\d+/[\w\d-]+-\dx\.jpg)*)
+                with: $1
+  # Scraping tags using /updates/ url Only returns one tag sadly due to subscraping, keeping it in case stash scraper gets updated in the future,
+  # if you want all tags, scrape directly using https://www.<site>.com/scenes/<scene-name>_vids.html
+      Tags:
+        Name:
+          selector: //base/@href | $title
+          concat: "__SEP__"
+          postProcess:
+            - replace:
+                - regex: __SEP__
+                  with: "scenes/"
+            - replace:
+                - regex: ' '
+                  with: '-'
+            - replace:
+                - regex: \'
+                  with: ''
+            - replace:
+                - regex: '$'
+                  with: '_vids.html'
+            - subScraper:
+                selector: //div[3]/div/div[4]/div[2]/a
+
+# Last Updated December 17, 2024
diff --git a/scrapers/RealityStudio.yml b/scrapers/RealityStudio.yml
@@ -0,0 +1,77 @@
+name: RealityStudio
+sceneByURL:
+  - action: scrapeXPath
+    url:
+      - cumcountdown.com/gallery.html
+      - femaleworship.com/gallery.html
+      - goddesskitra.com/gallery.html
+      - menareslaves.com/gallery.html
+      - subbygirls.com/gallery.html
+    scraper: sceneScraper
+xPathScrapers:
+  sceneScraper:
+    scene:
+      Title:
+        selector: //div[@id='galleryname']/text()
+        postProcess:
+          - replace:
+              - regex: '\d{2}/\d{2}/\d{2}$'
+                with: ''
+          - replace:
+              - regex: '     '
+                with: ' '
+      Details:
+        selector: //div[@id='galleryname']/text()
+        postProcess:
+          - replace:
+              - regex: '\d{2}/\d{2}/\d{2}$'
+                with: ''
+          - replace:
+              - regex: '^#\d+'
+                with: ""
+      Performers:
+        Name:
+          selector: //div[@id='galleryname'][1]/text()
+          postProcess:
+            - replace:
+                - regex: '\d{2}/\d{2}/\d{2}$'
+                  with: ''
+            - replace:
+                - regex: '^#\d+'
+                  with: ""
+          split: ', '
+      Date:
+        selector: //div[@id='galleryname']/text()
+        postProcess:
+          - replace:
+              - regex: '([A-Za-z]+(?:\s[A-Za-z]+)?(?:,\s*[A-Za-z]+(?:\s[A-Za-z]+)?)*)'
+                with: ""
+          - replace:
+              - regex: '^#\d+'
+                with: ""
+          - parseDate: 01/02/06
+      Studio:
+        Name: //title/text()
+      Image:
+          selector: //div[@id="footer"]/p/img/@src | //*[@id="gallerycontainer"]/center/div[5]/img/@src
+          concat: " "
+          postProcess:
+          - replace:
+            - regex: (images\/.+\s)
+              with: ""
+      Code:
+          selector: //div[@id='galleryname'][1]/text()
+          postProcess:
+           - replace:
+             - regex: '([A-Za-z]+(?:\s[A-Za-z]+)?(?:,\s*[A-Za-z]+(?:\s[A-Za-z]+)?)*)'
+               with: ""
+           - replace:
+               - regex: '\d{2}/\d{2}/\d{2}$'
+                 with: ""
+      Tags:
+          Name:
+            selector: //*[@id="galleryname"][2]/text()
+            split: ', '
+driver:
+  useCDP: true
+# Last Updated December 17, 2024
diff --git a/scrapers/StasyQVR.yml b/scrapers/StasyQVR.yml
@@ -4,6 +4,18 @@ sceneByURL:
     url:
       - stasyqvr.com/virtualreality
     scraper: sceneScraper
+sceneByFragment:
+  action: scrapeXPath
+  queryURL: https://stasyqvr.com/virtualreality/scene/id/{filename}
+  queryURLReplace:
+    filename:
+      - regex: ^(StasyQVR)_([^_]+)_(\d+)_(\d+)_([A-Z]+)_(\d+)\.([a-zA-Z0-9]+)$ # official site file naming scheme: StasyQVR_example scene name_2880_999_LR_180.mp4 # StasyQVR_scene name_resolution_code_LR_180.mp4
+        with: $4-$2
+      - regex: " "
+        with: '_'
+      - regex: "'"
+        with: ''
+  scraper: sceneScraper
 xPathScrapers:
   sceneScraper:
     scene:
@@ -30,4 +42,4 @@ xPathScrapers:
           - replace:
               - regex: '^background-image: url\(|\);$'
                 with: ""
-# Last Updated October 20, 2023
+# Last Updated December 17, 2024