added staff model and page

HackerSpace-PESU · Apr 21, 2024 · 1850018 · 1850018
1 parent 2417294
commit 1850018
Show file tree

Hide file tree

Showing 2 changed files with 153 additions and 0 deletions.
diff --git a/pesuacademy/models/staff.py b/pesuacademy/models/staff.py
@@ -0,0 +1,27 @@
+class Staff:
+    def __init__(
+        self,
+        name: str,
+        designation: str,  
+        education: list,
+        experience: list,
+        campus: str,
+        department: str,
+        domains: list,
+        Responsibilities: list, 
+        mail : str
+    ):
+        self.name = name
+        self.designation = designation
+        self.education = education
+        self.experience = experience
+        self.department = department
+        self.campus = campus
+        self.domains = domains
+        self.Responsibilities = Responsibilities
+        self.mail = mail
+
+    def __str__(self):
+        return f"{self.__dict__}"
+
+
diff --git a/pesuacademy/pages/staff.py b/pesuacademy/pages/staff.py
@@ -0,0 +1,126 @@
+import time
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+import requests
+from bs4 import BeautifulSoup
+from ..models.staff import Staff  
+
+
+class StaffPageHandler:
+    @staticmethod
+    def get_staff_details() -> Staff:
+        try:
+            base_url = "https://staff.pes.edu/atoz/"
+            options = Options()
+            # options.add_argument("--disable-infobars")  
+            options.add_argument("--headless")
+            driver = webdriver.Chrome()  
+            for page_num in range(1, 23):
+                staff_url = f"{base_url}?page={page_num}"
+                response = requests.get(staff_url) 
+                soup=BeautifulSoup(response.text,"html.parser")    
+                staff_divs = soup.find_all('div', class_='staff-profile')
+                for staff_div in staff_divs:
+                    anchor_tag = staff_div.find('a', class_='geodir-category-img_item')
+                    if anchor_tag:
+                        base_url_single_staff="https://staff.pes.edu/"
+                        staff_url = anchor_tag['href']
+                        request_path = base_url_single_staff + staff_url[1:]
+                        driver.get(request_path)
+                        # time.sleep(3) 
+                        html = driver.page_source
+                        soup = BeautifulSoup(html, 'html.parser')
+                        PESU_STAFF=StaffPageHandler.get_details_from_url(request_path, driver)
+                        print(PESU_STAFF)
+                        # return PESU_STAFF
+
+
+        except Exception as e:
+            print(f"Error occurred: {e}")
+            raise ConnectionError("Unable to fetch staff data.")
+        finally:
+            driver.quit()
+
+    @staticmethod
+    def get_details_from_url(url, driver):
+        driver.get(url)
+        time.sleep(3) 
+
+        html = driver.page_source
+        soup = BeautifulSoup(html, 'html.parser')
+        #name
+        name_tag = soup.find('h4')
+        name = name_tag.text.strip() if name_tag else None
+        #domain
+        teaching_items = soup.select('#tab-teaching .bookings-item-content ul.ul-item-left li')
+        domains = [item.text.strip() for item in teaching_items]
+        #designation
+        designation=soup.find('h5')
+        designation = ' '.join(designation.text.split())
+        #Education
+        professor_education = []
+        education_section = soup.find('h3', string='Education')
+        if education_section:
+            education_list = education_section.find_next('ul', class_='ul-item-left').find_all('li')
+            education_details = [item.find('p').text.strip() for item in education_list]
+            for detail in education_details:
+                professor_education.append(detail)
+            # print(professor_education)
+            # print()
+        #Experience
+        professor_experience=[]
+        experience_section = soup.find('h3', string='Experience')
+        if experience_section:
+            experience_list = experience_section.find_next('ul', class_='ul-item-left').find_all('li')
+            experience_details = [item.find('p').text.strip() for item in experience_list]
+            for detail in experience_details:
+                professor_experience.append(detail)
+            # print(professor_experience)
+            # print()
+
+
+        #email
+        all_a_tags = soup.find_all("a")
+        email = [
+            tag for tag in all_a_tags
+            if "pes.edu" in tag.get("href", "") and "pes.edu" in tag.get_text()
+        ]
+        email=email[0].get_text()
+
+        #department
+        department_element = soup.find('li', class_='contat-card')
+        department_paragraph = department_element.find('p')
+        department = department_paragraph.get_text(strip=True)
+
+        #campus
+        campus_element=soup.find_all('li', class_='contat-card')[1]
+        campus_paragraph = campus_element.find('p')
+        campus=campus_paragraph.get_text(strip=True)
+
+
+        #responsibilities
+        responsibilities=[]
+        responsibilities_div=soup.find_all('div',class_="bookings-item-content fl-wrap")[3]
+        responsibilities_ul = responsibilities_div.findChild()
+        if responsibilities_ul:
+            responsibilities_li_elements=responsibilities_ul.find_all('li')
+            for li in responsibilities_li_elements:
+                responsibilities_paragraph=li.find('p')
+                responsibilities.append(responsibilities_paragraph.get_text(strip=True))
+
+        Pesu_Staff=Staff(name,designation,professor_education,professor_experience,campus,department,domains,responsibilities,email)
+        # Pesu_Staff.name=name
+        # Pesu_Staff.designation=designation
+        # Pesu_Staff.domains=domains
+        # Pesu_Staff.education=professor_education
+        # Pesu_Staff.experience=professor_experience
+        # Pesu_Staff.department=department
+        # Pesu_Staff.email=email
+        # pesu_staff.campus=campus
+        # Pesu_Staff.responsibilities=responsibilities
+        return Pesu_Staff
+
+