-
Notifications
You must be signed in to change notification settings - Fork 0
/
KBBIModel.php
90 lines (71 loc) · 2.81 KB
/
KBBIModel.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
<?php
namespace App\Models;
use CodeIgniter\Model;
use DOMDocument;
use DOMXPath;
class KBBIModel extends Model
{
protected $table = 'kbbi_entries';
private function _fetchHtml($word)
{
$encodedWord = rawurlencode($word);
$url = "https://kbbi.kemdikbud.go.id/entri/" . $encodedWord;
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); // Disable SSL host verification
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // Disable SSL peer verification
$response = curl_exec($ch);
if (curl_errno($ch)) {
throw new \Exception('Error fetching HTML: ' . curl_error($ch));
}
curl_close($ch);
return $response;
}
private function _cleanText($text)
{
return preg_replace('/\s+/', ' ', trim($text));
}
public function searchWord($word)
{
$htmlData = $this->_fetchHtml($word);
$doc = new DOMDocument();
libxml_use_internal_errors(true);
$doc->loadHTML($htmlData);
libxml_clear_errors();
$xpath = new DOMXPath($doc);
$dataResponse = [];
$bodyContent = $xpath->query("//div[contains(@class, 'body-content')]")->item(0);
if (!$bodyContent) {
return false;
}
$h2Elements = $xpath->query("//div[contains(@class, 'body-content')]/h2");
foreach ($h2Elements as $i => $h2Element) {
$lema = $this->_cleanText($h2Element->textContent);
$arti = [];
$tesaurusLink = '';
// Get Tesaurus link if it exists
$tesaurusAnchor = $xpath->query("following-sibling::p/a[contains(text(), 'Tesaurus')]", $h2Element)->item(0);
if ($tesaurusAnchor) {
$tesaurusLink = $tesaurusAnchor->getAttribute('href');
}
$nextSibling = $h2Element->nextSibling;
while ($nextSibling && ($nextSibling->nodeName !== 'h2' && $nextSibling->nodeName !== 'hr')) {
if ($nextSibling->nodeName === 'ul' || $nextSibling->nodeName === 'ol') {
$listItems = $xpath->query('.//li', $nextSibling);
foreach ($listItems as $j => $listItem) {
$deskripsi = $this->_cleanText(preg_replace('/<(?:.|\n)*?>/', '', $listItem->C14N()));
$arti[$j] = ['deskripsi' => $deskripsi];
}
}
$nextSibling = $nextSibling->nextSibling;
}
$dataResponse[$i] = [
'lema' => $lema,
'arti' => $arti,
'tesaurusLink' => $tesaurusLink,
];
}
return count($dataResponse) ? $dataResponse : false;
}
}