forked from desecsecurity/parsing_html_bash
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parsing_html.sh
executable file
·300 lines (254 loc) · 10.4 KB
/
parsing_html.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
#!/usr/bin/env bash
################################################################################
# Titulo : Parsing HTML #
# Versao : 1.7 #
# Data : 16/10/2019 #
# Homepage : https://www.desecsecurity.com #
# Tested on : MacOS/Linux #
################################################################################
# ==============================================================================
# Constantes
# ==============================================================================
# Constantes para facilitar a utilização das cores.
RED='\033[31;1m'
GREEN='\033[32;1m'
BLUE='\033[34;1m'
YELLOW='\033[33;1m'
RED_BLINK='\033[31;5;1m'
END='\033[m'
# Constantes criadas utilizando os valores dos argumentos
# passados, para evitando a perda dos valores.
ARG01=$1
ARG02=$2
# Constante utilizada para guadar a versão do programa.
VERSION='1.7'
# Função chamada quando cancelar o programa com [Ctrl]+[c]
trap __Ctrl_c__ INT
# ==============================================================================
# Função chamada ao pressionar as teclas Ctrl+c
# ==============================================================================
__Ctrl_c__() {
__Clear__
printf "\n${RED_BLINK}Ação abortada!${END}\n\n"
exit 1
}
# ==============================================================================
# Banner do programa
# ==============================================================================
__Banner__() {
echo
echo -e "${YELLOW}################################################################################${END}"
echo -e "${YELLOW}# #${END}"
echo -e "${YELLOW}# PARSING HTML #${END}"
echo -e "${YELLOW}# Desec Security #${END}"
echo -e "${YELLOW}# Version $VERSION #${END}"
echo -e "${YELLOW}# #${END}"
echo -e "${YELLOW}################################################################################${END}"
echo
echo -e "Usage : ${GREEN}$0${END} [OPTION] [URL]"
echo -e "Example : ${GREEN}$0${END} www.site.com"
echo
echo -e "Try ${GREEN}$0 -h${END} for more options."
echo
}
# ==============================================================================
# Menu de ajuda
# ==============================================================================
__Help__() {
printf "\
\nNAME\n \
\t$0 - Software para procura de links em páginas web.\n \
\nSYNOPSIS\n \
\t$0 [Options] [URL]\n \
\nDESCRIPTION\n \
\tO $0 é usado para procurar links em páginas web e verificar se existem \n \
\thosts vivos.\n \
\nOPTIONS\n \
\t-h, --help\n \
\t\tMostra o menu de ajuda.\n\n \
\t-v, --version\n \
\t\tMostra a versão do programa.\n\n \
\t-f, --file\n \
\t\tProcura links no arquivo informado.\n\n \
\t\tEx: $0 -f file.txt\n\n"
}
# ==============================================================================
# Verificação básica
# ==============================================================================
__Verification__() {
# Verificando as dependências.
if ! [[ -e /usr/bin/wget ]]; then
printf "\nFaltando programa ${RED}wget${END} para funcionar.\n"
exit 1
elif ! [[ -e /usr/bin/host ]]; then
printf "\nFaltando programa ${RED}host${END} para funcionar.\n"
exit 1
fi
# Verificando se não foi passado argumentos.
if [[ "$ARG01" == "" ]]; then
__Banner__
exit 1
fi
}
# ==============================================================================
# Limpando arquivos temporários
# ==============================================================================
__Clear__() {
rm -rf /tmp/1 &>/dev/null
}
# ==============================================================================
# Fazendo download da página
# ==============================================================================
__Download__() {
# É criado e utilizado um diretório em /tmp, para não sujar o sistema do
# usuário.
__Clear__
mkdir /tmp/1 && cd /tmp/1
printf "\n${GREEN}[+] Download do site...${END}\n\n"
if wget -q -c --show-progress $ARG01 -O FILE; then
printf "\n${GREEN}[+] Download completo!${END}\n\n"
else
printf "\n${RED}[+] Falha no download!${END}\n\n"
exit 1
fi
}
# ==============================================================================
# Copiando arquivo para diretório temporario.
# ==============================================================================
__OpenFile__() {
if [[ $ARG02 == "" ]]; then
echo -e "\n${RED}!!! File required !!!${END}\n"
exit 1
elif ! [[ -e $ARG02 ]]; then
printf "\n${RED}!!! File not found !!!${END}\n"
exit 1
fi
__Clear__
mkdir /tmp/1
cp $ARG02 /tmp/1/FILE
cd /tmp/1
}
# ==============================================================================
# Filtrando links
# ==============================================================================
__FindLinks__() {
# Quebranco as linhas para melhorar a seleção dos links, onde
# se encontram as palavras 'href' e 'action'.
sed -i "s/ /\n/g" FILE
grep -E "(href=|action=)" FILE > .tmp1
# Capturando o conteudo entre aspas e apostrofos.
grep -oh '"[^"]*"' .tmp1 > .tmp2
grep -oh "'[^']*'" .tmp1 >> .tmp2
# Removendo as aspas e apostrofos.
sed -i 's/"//g' .tmp2
sed -i "s/'//g" .tmp2
# Captura apenas as linhas que contenham pontos, e remove as
# semelhantes.
grep "\." .tmp2 | sort -u > links
}
# ==============================================================================
# Filtrando hosts
# ==============================================================================
__FindHosts__() {
# Quebrando as URLs para facilitar a procurar de links no corpo da URL.
cp links links2
sed -i "s/?/\n/g
s/\/\/\//\n\/\//g" links2
# Utilizando expressões regulares para procurar os links simples.
grep -oh "//[^/]*/" links2 > .tmp10
grep -oh "//[^/]*" links2 >> .tmp10
grep -oh "ww.*\.br" links2 >> .tmp10
grep -oh "ww.*\.net" links2 >> .tmp10
grep -oh "ww.*\.gov" links2 >> .tmp10
grep -oh "ww.*\.org[^.]" links2 >> .tmp10
grep -oh "ww.*\.com[^.]" links2 >> .tmp10
# Removendo as barras e filtrando as linhas com pontos.
sed -i "s/\///g" .tmp10
grep "\." .tmp10 | sort -u > hosts
}
# ==============================================================================
# Verificando e mostrando Hosts ativos
# ==============================================================================
__LiveHosts__() {
echo
echo -e "${YELLOW}################################################################################${END}"
echo -e "${YELLOW}# Hosts ativos #${END}"
echo -e "${YELLOW}################################################################################${END}"
echo
# Como será a uma das ultimas funções executadas, seu resultado será
# mostrado na tela ao mesmo tempo.
while read linha; do
host $linha 2>/dev/null | grep "has address" | awk '{print $4 "\t\t" $1}'
done < hosts
}
# ==============================================================================
# Mostrando links encontrados
# ==============================================================================
__ShowLinks__() {
echo
echo -e "${YELLOW}################################################################################${END}"
echo -e "${YELLOW}# Links encontrados. #${END}"
echo -e "${YELLOW}################################################################################${END}"
echo
while read linha; do
echo $linha
done < links
}
# ==============================================================================
# Mostrando Hosts encontrados
# ==============================================================================
__ShowHosts__() {
echo
echo -e "${YELLOW}################################################################################${END}"
echo -e "${YELLOW}# Hosts encontrados. #${END}"
echo -e "${YELLOW}################################################################################${END}"
echo
while read linha; do
echo $linha
done < hosts
}
# ==============================================================================
# Mostrando quantidade de links e Hosts encontrados.
# ==============================================================================
__ShowResume__() {
printf "\n${YELLOW}================================================================================${END}\n\n"
printf "Found :\t" ; wc -l links
printf "\t" ; wc -l hosts
printf "\n${YELLOW}================================================================================${END}\n\n"
}
# ==============================================================================
# Função principal do programa
# ==============================================================================
__Main__() {
__Verification__
case $ARG01 in
"-v"|"--version") printf "\nVersion: $VERSION\n"
exit 0
;;
"-h"|"--help") __Help__
exit 0
;;
"-f"|"--file") __OpenFile__
__FindLinks__
__ShowLinks__
__FindHosts__
__ShowHosts__
__LiveHosts__
__ShowResume__
__Clear__
;;
*) __Download__
__FindLinks__
__ShowLinks__
__FindHosts__
__ShowHosts__
__LiveHosts__
__ShowResume__
__Clear__
;;
esac
}
# ==============================================================================
# Inicio do programa
# ==============================================================================
__Main__