forked from RubensZimbres/Repo-2017
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Web-scrap.py
40 lines (36 loc) · 1.48 KB
/
Web-scrap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
max_results = 20
city_set = ['Los+Angeles','Austin']
columns = ["city", "job_title", "company_name", "location", "summary"]
df = []
for city in city_set:
for start in range(0, max_results, 1):
page = requests.get('https://www.indeed.com/jobs?q=computer+science&l=' + str(city) + '&start=' + str(start))
time.sleep(1)
soup = BeautifulSoup(page.text, "lxml")
for div in soup.find_all(name="div", attrs={"class":"row"}):
job_post = []
job_post.append(city)
for a in div.find_all(name="a", attrs={"data-tn-element":"jobTitle"}):
job_post.append(a["title"])
company = div.find_all(name="span", attrs={"class":"company"})
if len(company) > 0:
for b in company:
job_post.append(b.text.strip())
else:
sec_try = div.find_all(name="span", attrs={"class":"result-link-source"})
for span in sec_try:
job_post.append(span.text)
c = div.findAll(name='span', attrs={'class': 'location'})
for span in c:
job_post.append(span.text)
d = div.findAll('div', attrs={'class': 'summary'})
for span in d:
job_post.append(span.text.strip())
df.append(job_post)
df00=pd.DataFrame(df)
df00.columns=columns
df00.to_csv("jobs_report.csv",index=False)