Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Chry007 committed Jan 10, 2019
0 parents commit 0cb7db2
Showing 1 changed file with 30 additions and 0 deletions.
30 changes: 30 additions & 0 deletions scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from lxml import html, etree
import requests

siteIndicator = 0
i = 1

page = requests.get('https://www.freelance.de/Projekte/K/IT-Entwicklung-Projekte/')
tree = html.fromstring(page.content)

while "Es wurden leider keine Projekte für Ihre Suchanfrage gefunden." not in page.content.decode("utf-8"):

panels = tree.xpath("//div[contains(@class, 'panel-body single-profile clearfix')]")

print("Page: " + str(i))

for panel in panels:
panelstring = etree.tostring(panel)
panelElement = html.fromstring(panelstring)
timeelement = panelElement.xpath("//i[contains(@class, 'fa fa-clock-o')]")
if len(timeelement) > 0 and "April" in timeelement[0].tail or "Mai" in timeelement[0].tail or "Juni" in timeelement[0].tail or "Juli" in timeelement[0].tail:
linkelement = panelElement.xpath("//a[contains(@id, 'project_link_')]")
print(linkelement[0].text, "https://www.freelance.de" + linkelement[0].attrib['href'], timeelement[0].tail)

siteIndicator += 20
i += 1
page = requests.get('https://www.freelance.de/Projekte/K/IT-Entwicklung-Projekte/' + str(siteIndicator) + '-2')
tree = html.fromstring(page.content)



0 comments on commit 0cb7db2

Please sign in to comment.