Skip to content

Commit

Permalink
feat: add crawler
Browse files Browse the repository at this point in the history
  • Loading branch information
modyhoon committed Dec 19, 2021
1 parent 97d1985 commit ca98048
Show file tree
Hide file tree
Showing 6 changed files with 523 additions and 0 deletions.
60 changes: 60 additions & 0 deletions crawler/dumpworks/insert_all_attendance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from datetime import datetime
import firebase_admin
import re
from firebase_admin import credentials
from firebase_admin import firestore

def configurate():
configurateDB()
configurateDateDict()

def configurateDB():
cred = credentials.Certificate("../boostattendance-firebase.json")
firebase_admin.initialize_app(cred)
db = firestore.client()
global detailCollection
global attendanceCollection
global dateCollection
detailCollection = db.collection(u'AttendanceDetail')
attendanceCollection = db.collection(u'Attendance')
dateCollection = db.collection(u'Date')

def configurateDateDict():
global dateDict
dateDict = {}
dates = dateCollection.get()
for date in dates:
dateDict[date.id] = date.to_dict()

def getAllAttendanceDetails():
global allAttendanceDetails
allAttendanceDetails = detailCollection.get()

def calculateAndInsertAttendance():
global userCount
userCount = {}

for detail in allAttendanceDetails:
dic = detail.to_dict()
if (dic["CheckInTime"] != None and dic["CheckOutTime"] != None and dic["Attendance"] == False) or \
(dic["CheckInTime"] != None and dateDict[dic["Date"]]["CheckInOnly"] == True):
doc = detailCollection.document(detail.id)
doc.update({
u'Attendance': True
})
if dic["CamperId"] in userCount:
userCount[dic["CamperId"]] += 1
else:
userCount[dic["CamperId"]] = 1

for key in userCount:
doc = attendanceCollection.document(key)
snapshot = doc.get().to_dict()
doc.update({
u'Count': int(snapshot["Count"]) + userCount[key]
})

configurate()
getAllAttendanceDetails()
calculateAndInsertAttendance()

46 changes: 46 additions & 0 deletions crawler/dumpworks/insert_all_camper_id_to_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from slack_sdk import WebClient
import firebase_admin
import re
from firebase_admin import credentials
from firebase_admin import firestore

def parseDisplayName(name):
splited = name.split('_')
print(splited)
alpha_name = splited[0]
hangul_name = splited[1]
student_type = alpha_name[0]
camper_number = int(alpha_name[1:])
return [alpha_name, camper_number, hangul_name, student_type]

def getDisplayName(user):
username = user["profile"]["display_name"]
res = re.search("^[a-zA-Z][0-9]{1,3}", username)
if res == None:
username = user["profile"]["real_name"]
if re.search("^[a-zA-Z][0-9]{1,3}", username) == None:
return None
return username


reader = open("slack-token.txt", 'r')
token = reader.read().strip()
reader.close()
client = WebClient(token)
api_response = client.users_list()
cred = credentials.Certificate("../boostattendance-firebase.json")
firebase_admin.initialize_app(cred)
db = firestore.client()

for user in api_response["members"]:
displayName = getDisplayName(user)
if displayName == None:
continue
parsed = parseDisplayName(displayName)
doc = db.collection(u'CamperId').document(parsed[0])
doc.set({
u'CamperId': parsed[1],
u'Name': parsed[2],
u'Type': parsed[3]
})

163 changes: 163 additions & 0 deletions crawler/dumpworks/insert_all_history_to_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
from slack_sdk import WebClient
from datetime import datetime
import firebase_admin
import re
from firebase_admin import credentials
from firebase_admin import firestore

def configurate():
reader = open("slack-token.txt", 'r')
token = reader.read().strip()
reader.close()
global client
client = WebClient(token)
configurateUserList()
configurateDB()

def configurateDB():
cred = credentials.Certificate("../boostattendance-firebase.json")
firebase_admin.initialize_app(cred)
db = firestore.client()
global collection
collection = db.collection(u'AttendanceDetail')

def configurateUserList():
global user_dict
user_dict = {}
api_response = client.users_list()

for user in api_response["members"]:
username = user["profile"]["display_name"]
res = re.search("^[a-zA-Z][0-9]{1,3}", username)
if res == None:
username = user["profile"]["real_name"]
user_dict[user["id"]] = username


def getChannelID():
global channel_id
api_response = client.conversations_list()
for i in api_response["channels"]:
if i["name"] == "check_in_check_out":
channel_id = i["id"]

def getConversations():
api_response = client.conversations_history(channel=channel_id)
global messages
messages = api_response["messages"]

def getThreads():
api_response = client.conversations_replies(channel=channel_id, ts=ts_response)
global threads
threads = api_response["messages"]


def getYYMMDD(date):
year = str(date.year)[2:]
month = str(date.month) if date.month >= 10 else "0" + str(date.month)
day = str(date.day) if date.day >= 10 else "0" + str(date.day)
return year + month + day

def getAlphaName(username):
return username[0:4]


def getAttendenceDetails2():
#print(collection.get()[0].to_dict())
yymmdd = "210917"
document = collection.where(u'Date', u'==', yymmdd).get()[0].to_dict()
print(document)
return
for idx, thread in enumerate(threads):
if idx == 0: continue
timedate = datetime.fromtimestamp(float(thread["ts"]))
yymmdd = getYYMMDD(timedate)
alpha_name = getAlphaName(user_dict[thread["user"]])
document = collection.where(u'Date', u'==', yymmdd).where(u'CamperId', u'==', alpha_name).get()

if len(document) > 0:
doc = collection.document(document[0].id)
if thread["text"].find("체크인") != -1 and document[0].to_dict()["CheckInTime"] == None:
doc.update({
u'CheckInTime': timedate
})
elif thread["text"].find("체크아웃") != -1 and document[0].to_dict()["CheckOutTime"] == None:
doc.update({
u'CheckOutTime': timedate
})
else:
doc = collection.document()
if thread["text"].find("체크인") != -1:
doc.set({
u'Attendance': False,
u'CamperId': alpha_name,
u'CheckInTime': timedate,
u'CheckOutTime': None,
u'Date': yymmdd
})
elif thread["text"].find("체크아웃") != -1:
doc.set({
u'Attendance': False,
u'CamperId': alpha_name,
u'CheckInTime': None,
u'CheckOutTime': timedate,
u'Date': yymmdd
})

def getAttendenceDetails():
timedate = datetime.fromtimestamp(float(threads[0]["ts"]))
yymmdd = getYYMMDD(timedate)
document = collection.where(u'Date', u'==', yymmdd).get()
data = {}
for index, datas in enumerate(document) :
detailData = datas.to_dict()
detailData["id"] = datas.id
data[detailData["CamperId"]] = detailData

camperIds = []
for idx, thread in enumerate(threads):
if idx == 0: continue
alpha_name = getAlphaName(user_dict[thread["user"]])
if alpha_name in camperIds : continue
if alpha_name in data:
peice = data[alpha_name]
doc = collection.document(peice["id"])
if thread["text"].find("체크인") != -1 and peice["CheckInTime"] == None:
camperIds.append(alpha_name)
doc.update({
u'CheckInTime': timedate
})
elif thread["text"].find("체크아웃") != -1 and peice["CheckOutTime"] == None:
camperIds.append(alpha_name)
doc.update({
u'CheckOutTime': timedate
})
else:
doc = collection.document()
if thread["text"].find("체크인") != -1:
camperIds.append(alpha_name)
doc.set({
u'Attendance': False,
u'CamperId': alpha_name,
u'CheckInTime': timedate,
u'CheckOutTime': None,
u'Date': yymmdd
})
elif thread["text"].find("체크아웃") != -1:
camperIds.append(alpha_name)
doc.set({
u'Attendance': False,
u'CamperId': alpha_name,
u'CheckInTime': None,
u'CheckOutTime': timedate,
u'Date': yymmdd
})

configurate()
getChannelID()
getConversations()
for message in messages:
global ts_response
ts_response = message["ts"]
getThreads()
getAttendenceDetails()
57 changes: 57 additions & 0 deletions crawler/everydayworks/insert_attendance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from datetime import datetime
import firebase_admin
import re
from firebase_admin import credentials
from firebase_admin import firestore

def configurate():
configurateDB()

def configurateDB():
cred = credentials.Certificate("../boostattendance-firebase.json")
firebase_admin.initialize_app(cred)
db = firestore.client()
global detailCollection
global attendanceCollection
detailCollection = db.collection(u'AttendanceDetail')
attendanceCollection = db.collection(u'Attendance')

def getYYMMDD(date):
year = str(date.year)[2:]
month = str(date.month) if date.month >= 10 else "0" + str(date.month)
day = str(date.day) if date.day >= 10 else "0" + str(date.day)
return year + month + day

def getTodayAttendanceDetails():
global todayAttendanceDetails
today = datetime.now()
yymmdd = getYYMMDD(today)
todayAttendanceDetails = detailCollection.where(u'Date', u'==', yymmdd).get()

def calculateAndInsertAttendance():
global userCount
userCount = {}

for detail in todayAttendanceDetails:
dic = detail.to_dict()
if dic["CheckInTime"] != None and dic["CheckOutTime"] != None and dic["Attendance"] == False:
doc = detailCollection.document(detail.id)
doc.update({
u'Attendance': True
})
if dic["CamperId"] in userCount:
userCount[dic["CamperId"]] += 1
else:
userCount[dic["CamperId"]] = 1

for key in userCount:
doc = attendanceCollection.document(key)
snapshot = doc.get().to_dict()
doc.update({
u'Count': int(snapshot["Count"]) + userCount[key]
})

configurate()
getTodayAttendanceDetails()
calculateAndInsertAttendance()

32 changes: 32 additions & 0 deletions crawler/everydayworks/insert_date.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
from datetime import datetime
from datetime import date

def getYYMMDD(date):
year = str(date.year)[2:]
month = str(date.month) if date.month >= 10 else "0" + str(date.month)
day = str(date.day) if date.day >= 10 else "0" + str(date.day)
return year + month + day

cred = credentials.Certificate("../boostattendance-firebase.json")
firebase_admin.initialize_app(cred)
db = firestore.client()
today = datetime.now()
collection = db.collection(u'Date')
doc = collection.document(getYYMMDD(today))
doc.set({
u'CheckInOnly': False,
u'Date': today,
u'IsActive': True
})

attendanceCountCollection = db.collection(u'AttendanceCount')
doc = attendanceCountCollection.document("1")
#snapshot = doc.get().to_dict()
doc.update({
"Count": len(collection.get())
})

print(today, "에 성공적으로 Date 등록 완료하였습니다.")
Loading

0 comments on commit ca98048

Please sign in to comment.