-
Notifications
You must be signed in to change notification settings - Fork 0
/
amazon.py
89 lines (69 loc) · 3.1 KB
/
amazon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import csv
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
class crawledArticle():
def __init__(self, title, price):
self.title = title
self.price = price
class Bot:
def article(self, name):
count = 1
page = 1
pageIncrement = 10
maxRetrieves = 30
file = open("result.csv", "w")
writer = csv.writer(file)
writer.writerow(["title", 'price'])
url = 'https://www.amazon.com/s?k=' + name + '&page=' + str(page)
result = []
options = Options()
options.headless = False
options.add_experimental_option('detach', True)
browser = webdriver.Chrome(ChromeDriverManager().install(), options=options)
browser.maximize_window()
browser.get(url)
browser.set_page_load_timeout(10)
while True:
try:
if pageIncrement * page > maxRetrieves:
break
if count > pageIncrement:
count = 1
page += 1
xPathTitle = '/html/body/div[1]/div[2]/div[1]/div[1]/div/span[3]/div[2]/div['+str(count)+']/div/div/div/div/div/div[2]/div/div/div[1]/h2/a/span'
title = browser.find_element(By.XPATH,xPathTitle)
xPathLink = '//*[@id="search"]/div[1]/div[1]/div/span[3]/div[2]/div['+str(count)+']/div/div/div/div/div/div[2]/div/div/div[1]/h2/a'
link = browser.find_element(By.XPATH,xPathLink)
titleText = title.get_attribute("innerHTML")
link.click()
xPathPrice = '/html/body/div[1]/div[2]/div[10]/div[6]/div[1]/div[4]/div/div/div/form/div/div/div/div/div[2]/div[1]/div/span/span[1]'
price = browser.find_element(By.XPATH,xPathPrice)
priceText = price.get_attribute("innerHTML")
url = 'https://www.amazon.com/s?k=' + name + '&page=' + str(page)
browser.get(url)
browser.set_page_load_timeout(10)
info = crawledArticle(titleText, priceText)
result.append(info)
count += 1
writer.writerow([titleText,priceText])
except Exception as e:
print('Exception', e)
count += 1
if pageIncrement * page > maxRetrieves:
break
if count > pageIncrement:
count = 1
page += 1
url = 'https://www.amazon.com/s?k=' + name + '&page=' + str(page)
browser.get(url)
browser.set_page_load_timeout(10)
browser.close()
return result
fetcher = Bot()
fetcher.article('iPhone 13')
# with open('results.csv', 'w', newline='', encoding='utf-8') as csvfile:
# articleWriter = csv.writer(csvfile, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL)
# for article in fetcher.article('iPhone 13'):
# articleWriter.writerow([article.title, article.price])