Skip to content

Commit

Permalink
Modularized code, created "requestClient" to scrape content seperately
Browse files Browse the repository at this point in the history
  • Loading branch information
Abhinav-ark committed Oct 9, 2024
1 parent ccd38cf commit a490285
Show file tree
Hide file tree
Showing 8 changed files with 251 additions and 178 deletions.
54 changes: 8 additions & 46 deletions cmd/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,20 @@ import (
"runtime"
)

// fetchHTML - fetches and parses HTML from the given URL
func fetchHTML(url string) string {
// Fetches and parses HTML from the given URL.
func fetchHTML(url string) (string, error) {
doc, err := soup.Get(url)

if err != nil {
fmt.Println("Error fetching the URL. Make sure you're connected to Amrita WiFi or VPN.")
return ""
return "", err
}
return doc

return doc, nil
}


// openBrowser - opens a URL in the default web browser
// Opens a URL in the default web browser.
func openBrowser(url string) {
var err error
switch runtime.GOOS {
Expand All @@ -33,44 +35,4 @@ func openBrowser(url string) {
if err != nil {
log.Fatal(err)
}
}

// Stack implementation using Go slices
type Stack struct {
items []string
}

// Push - adds an item to the stack
func (s *Stack) Push(item string) {
s.items = append(s.items, item)
}

// Pop - removes and returns the top item from the stack
func (s *Stack) Pop() string {
if len(s.items) == 0 {
return ""
}
item := s.items[len(s.items)-1]
s.items = s.items[:len(s.items)-1]
return item
}

// IsEmpty - returns true if the stack is empty
func (s *Stack) IsEmpty() bool {
return len(s.items) == 0
}

// Peek - returns the top item without removing it
func (s *Stack) Peek() string {
if len(s.items) == 0 {
return ""
}
return s.items[len(s.items)-1]
}

// NewStack - creates a new stack
func NewStack() *Stack {
return &Stack{}
}

var stack = NewStack()
}
7 changes: 7 additions & 0 deletions cmd/model.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package cmd

type resource struct {
name string
path string
}

151 changes: 151 additions & 0 deletions cmd/requestClient.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
package cmd

import (
"errors"
"github.com/anaskhan96/soup"
)

var htmlFetchErr error = errors.New("failed to fetch the HTML content")

func getCoursesReq(url string) ([]resource, error) {

res, err := fetchHTML(url)

if err != nil {
return nil, htmlFetchErr
}

doc := soup.HTMLParse(res)
div := doc.Find("div", "id", "aspect_artifactbrowser_CommunityViewer_div_community-view")

subs := div.FindAll("div","class","artifact-title")

var subjects []resource

for _, item := range subs {
sub := item.Find("span")
a := item.Find("a")
path := a.Attrs()["href"]
subject := resource{sub.Text(), path}
subjects = append(subjects, subject)
}

return subjects, nil
}


func semChooseReq(url string) ([]resource ,error) {

res, err := fetchHTML(url)

if err != nil {
return nil, htmlFetchErr
}

doc := soup.HTMLParse(res)
div := doc.Find("div", "id", "aspect_artifactbrowser_CommunityViewer_div_community-view")

if div.Error != nil {
return nil, errors.New("No assesments found on the page.")
}

ul := div.FindAll("ul")
li := ul[0].FindAll("li")

if len(ul)>1 {
li = ul[1].FindAll("li")
} else {
li = ul[0].FindAll("li")
}

var assesments []resource

for _, link := range li {
a := link.Find("a")
span := a.Find("span")
path := link.Find("a").Attrs()["href"]
assesment := resource{span.Text(), path}
assesments = append(assesments, assesment)
}

return assesments, nil
}

func semTableReq(url string) ([]resource, error) {

res, err := fetchHTML(url)

if err != nil {
return nil, htmlFetchErr
}

doc := soup.HTMLParse(res)
div := doc.Find("div", "id", "aspect_artifactbrowser_CommunityViewer_div_community-view")

if div.Error != nil {
return nil, errors.New("No semesters found on the page.")
}

ul := div.Find("ul")
li := ul.FindAll("li")

if len(li) == 0 {
return nil, errors.New("No semesters found on the page.")
}

var semesters []resource

for _, link := range li {
a := link.Find("a")
span := a.Find("span")
path := link.Find("a").Attrs()["href"]
semester := resource{span.Text(), path}
semesters = append(semesters, semester)
}

return semesters, nil

}

func yearReq(url string) ([]resource, error) {

res, err := fetchHTML(url)

if err != nil {
return nil, htmlFetchErr
}

doc := soup.HTMLParse(res)
div := doc.Find("div", "xmlns","http://di.tamu.edu/DRI/1.0/")

ul := div.Find("ul")
li := ul.Find("li")
hyper := li.Find("a").Attrs()["href"]

url = BASE_URL + hyper
page,err := fetchHTML(url)

if err != nil {
return nil, htmlFetchErr
}

doc = soup.HTMLParse(page)
div = doc.Find("div", "class","file-list")

subdiv := div.FindAll("div","class","file-wrapper")

var files []resource

for _, item := range subdiv {
title := item.FindAll("div")
indiv := title[1].Find("div")
span := indiv.FindAll("span")
fileName := span[1].Attrs()["title"]
path := title[0].Find("a").Attrs()["href"]
file := resource{fileName, path}
files = append(files, file)
}

return files, nil

}
35 changes: 13 additions & 22 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package cmd
import (
"fmt"
"os"
"github.com/anaskhan96/soup"

"github.com/spf13/cobra"
)

Expand All @@ -28,42 +28,33 @@ func Execute() {
// start function - equivalent to start() in Python
func start() {
fmt.Println("Fetching Courses...")
res := fetchHTML(COURSE_LIST_URL)
fmt.Println("Available Courses:")

// Check if the response is empty
if res == "" {
fmt.Println("Failed to fetch the HTML content. Exiting.")
subjects, err := getCoursesReq(COURSE_LIST_URL)

if err != nil {
fmt.Errorf(err.Error())
return
}

// Parse the HTML content using soup
doc := soup.HTMLParse(res)
div := doc.Find("div", "id", "aspect_artifactbrowser_CommunityViewer_div_community-view")

subs := div.FindAll("div","class","artifact-title")

for i, item := range subs {
sub := item.Find("span")
if sub.Error == nil {
fmt.Printf("%d.\t%s\n", i+1, sub.Text())
}
fmt.Println("Available Courses:")

for i, subject := range subjects {
fmt.Printf("%d.\t%s\n", i+1, subject.name)
}

// Option to quit.
fmt.Printf("%d.\tQuit\n", len(subs)+1)
fmt.Printf("%d.\tQuit\n", len(subjects)+1)

for {
var ch int
fmt.Printf("\nEnter your choice: ")
fmt.Scanln(&ch)

if ch > 0 && ch <= len(subs) {
a := subs[ch-1].Find("a")
path := a.Attrs()["href"]
if ch > 0 && ch <= len(subjects) {
path := subjects[ch-1].path
url := BASE_URL + path
semTable(url)
} else if ch == len(subs)+1 {
} else if ch == len(subjects)+1 {
fmt.Println("Goodbye!")
os.Exit(0)
} else {
Expand Down
44 changes: 9 additions & 35 deletions cmd/semChoose.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,63 +2,37 @@ package cmd

import (
"fmt"
"github.com/anaskhan96/soup"
)

func semChoose(url string) {
fmt.Println("Fetching assesments...")
params_url := url
res := fetchHTML(url) // Fetch the HTML content

// Check if the response is empty
if res == "" {
fmt.Println("Failed to fetch the HTML content. Exiting.")
return
}

// Parse the HTML content using soup
doc := soup.HTMLParse(res)
div := doc.Find("div", "id", "aspect_artifactbrowser_CommunityViewer_div_community-view")
assesments, err := semChooseReq(url)

if div.Error != nil {
fmt.Println("No assesments found on the page.")
if err != nil {
fmt.Errorf(err.Error())
return
}

ul := div.FindAll("ul")
li := ul[0].FindAll("li")

if len(ul)>1 {
li = ul[1].FindAll("li")
} else {
li = ul[0].FindAll("li")
}

// Display the found items
fmt.Printf("No\tSemesters\n")
for i, link := range li {
a := link.Find("a")
if a.Error == nil {
span := a.Find("span")
if span.Error == nil {
fmt.Printf("%d\t%s\n", i+1, span.Text()) // Extract the text from the span element
}
}
for i, assesment := range assesments {
fmt.Printf("%d\t%s\n", i+1, assesment.name) // Extract the text from the span element
}

// Option to add "Back"
fmt.Printf("%d\tBack\n", len(li)+1)

fmt.Printf("%d\tBack\n", len(assesments)+1)

for {
var ch int
fmt.Print("\nEnter your assesment: ")
fmt.Scanln(&ch)

if ch > 0 && ch <= len(li) {
url = BASE_URL + li[ch-1].Find("a").Attrs()["href"]
if ch > 0 && ch <= len(assesments) {
url = BASE_URL + assesments[ch-1].path
break
} else if ch == len(li)+1 {
} else if ch == len(assesments)+1 {
semTable(stack.Pop())
} else {
fmt.Println("Please enter a valid input!")
Expand Down
Loading

0 comments on commit a490285

Please sign in to comment.