forked from pzaino/thecrowler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.default
59 lines (52 loc) · 1.86 KB
/
config.default
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# This is a sample configuration file for the CROWler, for a generic configuration
# that should work for most small to medium deployments. You can use this as a
# starting point for your own configuration file.
database:
type: postgres
host: ${POSTGRES_DB_HOST}
port: 5432
user: ${CROWLER_DB_USER}
password: ${CROWLER_DB_PASSWORD}
dbname: SitesIndex
sslmode: disable
# The CROWler calls web-site's entry-point URLs "sources" as in "source of information"
crawler:
source_screenshot: true
interval: random(random(5,15), random(45,75))
workers: 5
delay: random(3,75)
timeout: 10
maintenance: 60
api:
port: 8080
host: 0.0.0.0 # Replace this with the network interface IP you want to use for the API (0.0.0.0 means respond on all available IPs)
timeout: 10
enable_console: true # Enable the console for the API (this enables extra endpoint to check system status and add/remove/update crowler sources aka websites entry-point URLs)
return_404: false
selenium:
- type: chrome # This is the type of browser you want to use for this selenium instance, you can use chrome or firefox
path: "" # If you have deployed CROWLER_VDIs then leave this empty
port: 4444 # The port where the selenium instance will be listening
host: crowler_vdi # Replace this with the network name or the IP of your crowler_vdi container
use_service: false # If you are using CROWLER_VDIs, then set this to false
sslmode: disable # If you are using CROWLER_VDIs locally (intranet/vpn/private lan), then set this to disable
image_storage:
type: local
path: /app/data/images
network_info:
netlookup:
enabled: true
timeout: 15
dns:
enabled: true
timeout: 15
whois:
enabled: true
timeout: 15
service_scout:
enabled: true
timeout: 1200
geolocation:
enabled: false
timeout: 15
debug_level: 0