Skip to content

Commit

Permalink
Init Commit
Browse files Browse the repository at this point in the history
These are all projects for scraping and automation, and also include 4 four project used python, scrapy, selenium, plotly.js and symfony-goutte.
  • Loading branch information
foxtails225 committed May 3, 2020
1 parent cb89450 commit 0c7dbc3
Show file tree
Hide file tree
Showing 48 changed files with 2,654 additions and 0 deletions.
105 changes: 105 additions & 0 deletions Disney_Scraping/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import requests
import json


urls = {
'token_url': 'https://global.edge.bamgrid.com/token',
'collection_url': 'https://search-api-disney.svcs.dssott.com/svc/search/v2/graphql/persisted/query/core/CollectionBySlug',
'section_url': 'https://search-api-disney.svcs.dssott.com/svc/search/v2/graphql/persisted/query/core/SetBySetId'
}


def get_auth_token():
payload = {
'grant_type': 'refresh_token',
'latitude': 0,
'longitude': 0,
'platform': 'browser',
'refresh_token': 'eyJraWQiOiJlNzRlOTlhNy04NDNlLTQ2NmEtOTVhMS02YjA0MjYwNThlNmYiLCJhbGciOiJFZERTQSJ9.eyJhdWQiOiJ1cm46YmFtdGVjaDpzZXJ2aWNlOnRva2VuIiwic3ViamVjdF90b2tlbl90eXBlIjoidXJuOmJhbXRlY2g6cGFyYW1zOm9hdXRoOnRva2VuLXR5cGU6ZGV2aWNlIiwibmJmIjoxNTg0MzQxMDc3LCJncmFudF90eXBlIjoidXJuOmlldGY6cGFyYW1zOm9hdXRoOmdyYW50LXR5cGU6dG9rZW4tZXhjaGFuZ2UiLCJpc3MiOiJ1cm46YmFtdGVjaDpzZXJ2aWNlOnRva2VuIiwiY29udGV4dCI6ImV5SmhiR2NpT2lKdWIyNWxJbjAuZXlKemRXSWlPaUptT0RObE1qQXdZUzA0WmpFMExUUXpZbVF0T0RObE1pMHlaV1EzTmpJeE56VXdZemdpTENKaGRXUWlPaUoxY200NlltRnRkR1ZqYURwelpYSjJhV05sT25SdmEyVnVJaXdpYm1KbUlqb3hOVGcwTXpJNU1EVXpMQ0pwYzNNaU9pSjFjbTQ2WW1GdGRHVmphRHB6WlhKMmFXTmxPbVJsZG1salpTSXNJbVY0Y0NJNk1qUTBPRE15T1RBMU15d2lhV0YwSWpveE5UZzBNekk1TURVekxDSnFkR2tpT2lJMU1HRTBNMlU1WkMweU1tRmtMVFJoTldZdE9HUmpNaTB4TUdZMU5HVXhPR00xT0RjaWZRLiIsImV4cCI6MTU5OTg5MzA3NywiaWF0IjoxNTg0MzQxMDc3LCJqdGkiOiI3NzI0NDY1OS04NzBiLTQ4ZTEtYmY4Yi00NmQ2NmE1MTk2MDcifQ.EWn4KXvf3xgYb9gDSNbcD3xN4qcQVwiEUd45q2sfG9_Zcy06OnTzfIokWAAuNUQzc9Fm6bpEh7__D7M8KZ_IBw'
}

headers = {
'Content-Type': 'application/x-www-form-urlencoded',
'authorization': 'Bearer ZGlzbmV5JmJyb3dzZXImMS4wLjA.Cu56AgSfBTDag5NiRA81oLHkDZfu5L3CKadnefEAY84'
}

token = requests.post(
urls['token_url'], data=payload, headers=headers).json()
return token


def get_section_info():
section_info_dict = {}

token_data = get_auth_token()
auth_token = token_data['access_token']

params = {
'variables': '{"preferredLanguage":["en"],"contentClass":"home","slug":"home","contentTransactionId":"2c278173-12a7-4bb6-85c0-13c23cd93370"}'
}

headers = {
'authorization': "Bearer " + auth_token,
}

data = requests.get(urls['collection_url'],
params=params, headers=headers).json()

index = 0
sections = data['data']['CollectionBySlug']['containers']

for i in range(2, len(sections)):
section_info_dict[index] = {}
if 'refId' in sections[i]['set'].keys():
section_info_dict[index] = {
'setId': sections[i]['set']['refId'], 'setType': sections[i]['set']['refType']}
elif 'setId' in sections[i]['set'].keys():
section_info_dict[index] = {
'setId': sections[i]['set']['setId'], 'setType': sections[i]['set']['type']}

index = index + 1

return section_info_dict


def get_section_data(section_info):
result_data = {}

token_data = get_auth_token()
auth_token = token_data['access_token']

params = {
'variables': '{"preferredLanguage": ["en"], "setId":"' + section_info['setId'] + '",'
'"setType":"' + section_info['setType'] + '", "contentTransactionId": "a099f643-6021-4b28-a687-512bbe546e0d"}'
}

headers = {
'authorization': "Bearer " + auth_token,
'Content-Type': 'application/x-www-form-urlencoded'
}

data = requests.get(urls['section_url'],
params=params, headers=headers).json()

result_data['Name'] = data['data']['SetBySetId']['texts'][0]['content']
result_data['Items'] = []

for i in range(len(data['data']['SetBySetId']['items'])):
result_image = ''
result_name = data['data']['SetBySetId']['items'][i]['texts'][0]['content']
if(result_data['Name'] == "Collections"):
result_image = data['data']['SetBySetId']['items'][i]['images'][4]['url']
elif len(data['data']['SetBySetId']['items'][i]['images']) > 8:
result_image = data['data']['SetBySetId']['items'][i]['images'][9]['url']

result_data['Items'].append({'Name': result_name, 'Image': result_image})
print(result_data)
return result_data

open('data.json', 'w').close()

for key, value in get_section_info().items():
section_data = get_section_data(value)

with open('data.json', 'a', encoding='utf-8') as f:
f.write(json.dumps(section_data))
121 changes: 121 additions & 0 deletions Gmail_Scraping/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# Environment Build Files
env.py
config_local.py

# Remove main config
config_live.py

# Exclude Gulp Files
node_modules/

# Exclude temp uploads
static/images/temp/
static/images/d1/




# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
104 changes: 104 additions & 0 deletions Gmail_Scraping/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
################################
#
# Build App
#
################################

import os
import logging

from datetime import datetime
from logging.config import dictConfig

from flask import Flask, g, current_app, request, make_response
from flask_restful import abort
from flask_cors import CORS

from app.config import LocalConfig
from app.common.extensions import CustomApi
from app.common.database import Database
from app.common.helpers import default_schema, milli_time

from app.views.home import *


def create_app(production=False):
config = LocalConfig

# Create app
app = Flask(__name__)
app.config.from_object(config)
app.config['TEMPLATES_AUTO_RELOAD'] = True
app.secret_key = app.config['APP_SECRET_KEY']
app.url_map.strict_slashes = False
app.jinja_env.cache = {}
app.jinja_env.auto_reload = True

# create api
CORS(app)
api = CustomApi(app, prefix='/api/v1')

# Initializing the logger
dictConfig(app.config['LOGGING'])

# register_extensions(app)
register_hooks(app)
# register_endpoints(api)
register_routes(app)

return app

def register_hooks(app):
def db_has_connection():
return hasattr(g,'db')

def get_db_connection():
if not db_has_connection():
try:
g.db = Database(
host=current_app.config['MYSQL']['HOST'],
db=current_app.config['MYSQL']['DB'],
user=current_app.config['MYSQL']['USER'],
passwd=current_app.config['MYSQL']['PASS'],
)
except Exception as e:
abort(500,
status=0,
message='Failed to connect to CORE Database.',
errors=dict(
application='There was a problem connecting to MySQL.',
validation=None
),
http_status=500
)
return g.db

@app.before_request
def before_request():
if request.path.startswith('/favicon.ico'):
response = make_response('', 204)
response.headers['Content-Length'] = 0
response.status_code = 204
return response

g.start_time = milli_time()

get_db_connection()

@app.teardown_request
def close_db_connection(ex):
if db_has_connection():
conn = get_db_connection()
conn.close()


def register_routes(app):

#############################################
#
# Dashboard home

# Campaigns
app.add_url_rule('/', view_func=Home.as_view('home'))


Loading

0 comments on commit 0c7dbc3

Please sign in to comment.