Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Web Scrapping Folder added #170

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 40 additions & 40 deletions C++/bubble_sort.cpp
Original file line number Diff line number Diff line change
@@ -1,41 +1,41 @@
#include<iostream>
using namespace std;
void swapping(int &a, int &b) { //swap the content of a and b
int temp;
temp = a;
a = b;
b = temp;
}
void display(int *array, int size) {
for(int i = 0; i<size; i++)
cout << array[i] << " ";
cout << endl;
}
void bubbleSort(int *array, int size) {
for(int i = 0; i<size; i++) {
int swaps = 0; //flag to detect any swap is there or not
for(int j = 0; j<size-i-1; j++) {
if(array[j] > array[j+1]) { //when the current item is bigger than next
swapping(array[j], array[j+1]);
swaps = 1; //set swap flag
}
}
if(!swaps)
break; // No swap in this pass, so array is sorted
}
}
int main() {
int n;
cout << "Enter the number of elements: ";
cin >> n;
int arr[n]; //create an array with given number of elements
cout << "Enter elements:" << endl;
for(int i = 0; i<n; i++) {
cin >> arr[i];
}
cout << "Array before Sorting: ";
display(arr, n);
bubbleSort(arr, n);
cout << "Array after Sorting: ";
display(arr, n);
#include<iostream>
using namespace std;
void swapping(int &a, int &b) { //swap the content of a and b
int temp;
temp = a;
a = b;
b = temp;
}
void display(int *array, int size) {
for(int i = 0; i<size; i++)
cout << array[i] << " ";
cout << endl;
}
void bubbleSort(int *array, int size) {
for(int i = 0; i<size; i++) {
int swaps = 0; //flag to detect any swap is there or not
for(int j = 0; j<size-i-1; j++) {
if(array[j] > array[j+1]) { //when the current item is bigger than next
swapping(array[j], array[j+1]);
swaps = 1; //set swap flag
}
}
if(!swaps)
break; // No swap in this pass, so array is sorted
}
}
int main() {
int n;
cout << "Enter the number of elements: ";
cin >> n;
int arr[n]; //create an array with given number of elements
cout << "Enter elements:" << endl;
for(int i = 0; i<n; i++) {
cin >> arr[i];
}
cout << "Array before Sorting: ";
display(arr, n);
bubbleSort(arr, n);
cout << "Array after Sorting: ";
display(arr, n);
}
104 changes: 104 additions & 0 deletions Python/Web Scrapping/BeautifulSoup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# HTML Web Scrapping using bs4

# STEP0: SETTING UP THE ENVIRONMENT
# Imported requests and beutifulSoup(bs4)

import requests
from bs4 import BeautifulSoup
url = "https://codewithharry.com/"

# STEP1: GET THE HTML

r = requests.get(url)
htmlcontent = r.content
print(htmlcontent)

# STEP2: PARSE THE HTML

soup = BeautifulSoup(htmlcontent, 'html.parser')
print(soup.prettify)

# STEP3: HTML TREE TRAVERSAL
#Tag
#Navigable String
#BeautifulSoup
#Comment

title = soup.title
print(title)

paras = soup.find_all('p')
print(paras)

anchors = soup.find_all('a')
print(anchors)

first_para = soup.find('p')
print(first_para)

# Get classes of any element in HTML Page
print(first_para['class'])

# Get all the elements of a specific class(eg. lead)
print(soup.find_all("p", class_="lead"))

# To get text from tags/soup
print(soup.find('p').get_text())
print(soup.get_text())

# To get all links of a page
anchors = soup.find_all('a')
all_links = set()

for link in anchors:
if(link.get('href') != "#"):
linkText = "https://codewithharry.com" + link.get('href')
all_links.add(link)
print(linkText)


# For comment in HTML Tree Traversal

markup = "<p><!-- this is a comment --></p>"
soup2 = BeautifulSoup(markup)
print(soup2.p.string)


# To find elements of a particular id

navbarSupportedContent = soup.find(id='navbarSupportedContent')
for element in navbarSupportedContent.children: # we can use .contents also but for large websites .children is more effecient
print(element)

for item in navbarSupportedContent.strings:
print(item)

for item in navbarSupportedContent.stripped_strings:
print(item)

print(navbarSupportedContent.parent)

for item in navbarSupportedContent.parents:
print(item.name)

# Sibblings

print(navbarSupportedContent.next_sibling)

print(navbarSupportedContent.next_sibling.next_sibling)

print(navbarSupportedContent.previous_sibling)

print(navbarSupportedContent.previous_sibling.previous_sibling)

# CSS Selecting

# # is for id

id_element = soup.select('#loginModal')
print(id_element)

# . is for class

class_element = soup.select('.loginModal')
print(class_element)