From ca9804835c17395d4728ba580921a7d82cc59c02 Mon Sep 17 00:00:00 2001 From: modyhoon Date: Sun, 19 Dec 2021 22:35:40 +0900 Subject: [PATCH] feat: add crawler --- crawler/dumpworks/insert_all_attendance.py | 60 +++++++ .../dumpworks/insert_all_camper_id_to_db.py | 46 +++++ crawler/dumpworks/insert_all_history_to_db.py | 163 +++++++++++++++++ crawler/everydayworks/insert_attendance.py | 57 ++++++ crawler/everydayworks/insert_date.py | 32 ++++ .../everydayworks/insert_detail_attendance.py | 165 ++++++++++++++++++ 6 files changed, 523 insertions(+) create mode 100644 crawler/dumpworks/insert_all_attendance.py create mode 100644 crawler/dumpworks/insert_all_camper_id_to_db.py create mode 100644 crawler/dumpworks/insert_all_history_to_db.py create mode 100644 crawler/everydayworks/insert_attendance.py create mode 100644 crawler/everydayworks/insert_date.py create mode 100644 crawler/everydayworks/insert_detail_attendance.py diff --git a/crawler/dumpworks/insert_all_attendance.py b/crawler/dumpworks/insert_all_attendance.py new file mode 100644 index 0000000..e0dc75e --- /dev/null +++ b/crawler/dumpworks/insert_all_attendance.py @@ -0,0 +1,60 @@ +from datetime import datetime +import firebase_admin +import re +from firebase_admin import credentials +from firebase_admin import firestore + +def configurate(): + configurateDB() + configurateDateDict() + +def configurateDB(): + cred = credentials.Certificate("../boostattendance-firebase.json") + firebase_admin.initialize_app(cred) + db = firestore.client() + global detailCollection + global attendanceCollection + global dateCollection + detailCollection = db.collection(u'AttendanceDetail') + attendanceCollection = db.collection(u'Attendance') + dateCollection = db.collection(u'Date') + +def configurateDateDict(): + global dateDict + dateDict = {} + dates = dateCollection.get() + for date in dates: + dateDict[date.id] = date.to_dict() + +def getAllAttendanceDetails(): + global allAttendanceDetails + allAttendanceDetails = detailCollection.get() + +def calculateAndInsertAttendance(): + global userCount + userCount = {} + + for detail in allAttendanceDetails: + dic = detail.to_dict() + if (dic["CheckInTime"] != None and dic["CheckOutTime"] != None and dic["Attendance"] == False) or \ + (dic["CheckInTime"] != None and dateDict[dic["Date"]]["CheckInOnly"] == True): + doc = detailCollection.document(detail.id) + doc.update({ + u'Attendance': True + }) + if dic["CamperId"] in userCount: + userCount[dic["CamperId"]] += 1 + else: + userCount[dic["CamperId"]] = 1 + + for key in userCount: + doc = attendanceCollection.document(key) + snapshot = doc.get().to_dict() + doc.update({ + u'Count': int(snapshot["Count"]) + userCount[key] + }) + +configurate() +getAllAttendanceDetails() +calculateAndInsertAttendance() + diff --git a/crawler/dumpworks/insert_all_camper_id_to_db.py b/crawler/dumpworks/insert_all_camper_id_to_db.py new file mode 100644 index 0000000..0bc754c --- /dev/null +++ b/crawler/dumpworks/insert_all_camper_id_to_db.py @@ -0,0 +1,46 @@ +from slack_sdk import WebClient +import firebase_admin +import re +from firebase_admin import credentials +from firebase_admin import firestore + +def parseDisplayName(name): + splited = name.split('_') + print(splited) + alpha_name = splited[0] + hangul_name = splited[1] + student_type = alpha_name[0] + camper_number = int(alpha_name[1:]) + return [alpha_name, camper_number, hangul_name, student_type] + +def getDisplayName(user): + username = user["profile"]["display_name"] + res = re.search("^[a-zA-Z][0-9]{1,3}", username) + if res == None: + username = user["profile"]["real_name"] + if re.search("^[a-zA-Z][0-9]{1,3}", username) == None: + return None + return username + + +reader = open("slack-token.txt", 'r') +token = reader.read().strip() +reader.close() +client = WebClient(token) +api_response = client.users_list() +cred = credentials.Certificate("../boostattendance-firebase.json") +firebase_admin.initialize_app(cred) +db = firestore.client() + +for user in api_response["members"]: + displayName = getDisplayName(user) + if displayName == None: + continue + parsed = parseDisplayName(displayName) + doc = db.collection(u'CamperId').document(parsed[0]) + doc.set({ + u'CamperId': parsed[1], + u'Name': parsed[2], + u'Type': parsed[3] + }) + diff --git a/crawler/dumpworks/insert_all_history_to_db.py b/crawler/dumpworks/insert_all_history_to_db.py new file mode 100644 index 0000000..64a8c28 --- /dev/null +++ b/crawler/dumpworks/insert_all_history_to_db.py @@ -0,0 +1,163 @@ +from slack_sdk import WebClient +from datetime import datetime +import firebase_admin +import re +from firebase_admin import credentials +from firebase_admin import firestore + +def configurate(): + reader = open("slack-token.txt", 'r') + token = reader.read().strip() + reader.close() + global client + client = WebClient(token) + configurateUserList() + configurateDB() + +def configurateDB(): + cred = credentials.Certificate("../boostattendance-firebase.json") + firebase_admin.initialize_app(cred) + db = firestore.client() + global collection + collection = db.collection(u'AttendanceDetail') + +def configurateUserList(): + global user_dict + user_dict = {} + api_response = client.users_list() + + for user in api_response["members"]: + username = user["profile"]["display_name"] + res = re.search("^[a-zA-Z][0-9]{1,3}", username) + if res == None: + username = user["profile"]["real_name"] + user_dict[user["id"]] = username + + +def getChannelID(): + global channel_id + api_response = client.conversations_list() + for i in api_response["channels"]: + if i["name"] == "check_in_check_out": + channel_id = i["id"] + +def getConversations(): + api_response = client.conversations_history(channel=channel_id) + global messages + messages = api_response["messages"] + +def getThreads(): + api_response = client.conversations_replies(channel=channel_id, ts=ts_response) + global threads + threads = api_response["messages"] + + +def getYYMMDD(date): + year = str(date.year)[2:] + month = str(date.month) if date.month >= 10 else "0" + str(date.month) + day = str(date.day) if date.day >= 10 else "0" + str(date.day) + return year + month + day + +def getAlphaName(username): + return username[0:4] + + +def getAttendenceDetails2(): + #print(collection.get()[0].to_dict()) + yymmdd = "210917" + document = collection.where(u'Date', u'==', yymmdd).get()[0].to_dict() + print(document) + return + for idx, thread in enumerate(threads): + if idx == 0: continue + timedate = datetime.fromtimestamp(float(thread["ts"])) + yymmdd = getYYMMDD(timedate) + alpha_name = getAlphaName(user_dict[thread["user"]]) + document = collection.where(u'Date', u'==', yymmdd).where(u'CamperId', u'==', alpha_name).get() + + if len(document) > 0: + doc = collection.document(document[0].id) + if thread["text"].find("체크인") != -1 and document[0].to_dict()["CheckInTime"] == None: + doc.update({ + u'CheckInTime': timedate + }) + elif thread["text"].find("체크아웃") != -1 and document[0].to_dict()["CheckOutTime"] == None: + doc.update({ + u'CheckOutTime': timedate + }) + else: + doc = collection.document() + if thread["text"].find("체크인") != -1: + doc.set({ + u'Attendance': False, + u'CamperId': alpha_name, + u'CheckInTime': timedate, + u'CheckOutTime': None, + u'Date': yymmdd + }) + elif thread["text"].find("체크아웃") != -1: + doc.set({ + u'Attendance': False, + u'CamperId': alpha_name, + u'CheckInTime': None, + u'CheckOutTime': timedate, + u'Date': yymmdd + }) + +def getAttendenceDetails(): + timedate = datetime.fromtimestamp(float(threads[0]["ts"])) + yymmdd = getYYMMDD(timedate) + document = collection.where(u'Date', u'==', yymmdd).get() + data = {} + for index, datas in enumerate(document) : + detailData = datas.to_dict() + detailData["id"] = datas.id + data[detailData["CamperId"]] = detailData + + camperIds = [] + for idx, thread in enumerate(threads): + if idx == 0: continue + alpha_name = getAlphaName(user_dict[thread["user"]]) + if alpha_name in camperIds : continue + if alpha_name in data: + peice = data[alpha_name] + doc = collection.document(peice["id"]) + if thread["text"].find("체크인") != -1 and peice["CheckInTime"] == None: + camperIds.append(alpha_name) + doc.update({ + u'CheckInTime': timedate + }) + elif thread["text"].find("체크아웃") != -1 and peice["CheckOutTime"] == None: + camperIds.append(alpha_name) + doc.update({ + u'CheckOutTime': timedate + }) + else: + doc = collection.document() + if thread["text"].find("체크인") != -1: + camperIds.append(alpha_name) + doc.set({ + u'Attendance': False, + u'CamperId': alpha_name, + u'CheckInTime': timedate, + u'CheckOutTime': None, + u'Date': yymmdd + }) + elif thread["text"].find("체크아웃") != -1: + camperIds.append(alpha_name) + doc.set({ + u'Attendance': False, + u'CamperId': alpha_name, + u'CheckInTime': None, + u'CheckOutTime': timedate, + u'Date': yymmdd + }) + +configurate() +getChannelID() +getConversations() +for message in messages: + global ts_response + ts_response = message["ts"] + getThreads() + getAttendenceDetails() diff --git a/crawler/everydayworks/insert_attendance.py b/crawler/everydayworks/insert_attendance.py new file mode 100644 index 0000000..bc102eb --- /dev/null +++ b/crawler/everydayworks/insert_attendance.py @@ -0,0 +1,57 @@ +from datetime import datetime +import firebase_admin +import re +from firebase_admin import credentials +from firebase_admin import firestore + +def configurate(): + configurateDB() + +def configurateDB(): + cred = credentials.Certificate("../boostattendance-firebase.json") + firebase_admin.initialize_app(cred) + db = firestore.client() + global detailCollection + global attendanceCollection + detailCollection = db.collection(u'AttendanceDetail') + attendanceCollection = db.collection(u'Attendance') + +def getYYMMDD(date): + year = str(date.year)[2:] + month = str(date.month) if date.month >= 10 else "0" + str(date.month) + day = str(date.day) if date.day >= 10 else "0" + str(date.day) + return year + month + day + +def getTodayAttendanceDetails(): + global todayAttendanceDetails + today = datetime.now() + yymmdd = getYYMMDD(today) + todayAttendanceDetails = detailCollection.where(u'Date', u'==', yymmdd).get() + +def calculateAndInsertAttendance(): + global userCount + userCount = {} + + for detail in todayAttendanceDetails: + dic = detail.to_dict() + if dic["CheckInTime"] != None and dic["CheckOutTime"] != None and dic["Attendance"] == False: + doc = detailCollection.document(detail.id) + doc.update({ + u'Attendance': True + }) + if dic["CamperId"] in userCount: + userCount[dic["CamperId"]] += 1 + else: + userCount[dic["CamperId"]] = 1 + + for key in userCount: + doc = attendanceCollection.document(key) + snapshot = doc.get().to_dict() + doc.update({ + u'Count': int(snapshot["Count"]) + userCount[key] + }) + +configurate() +getTodayAttendanceDetails() +calculateAndInsertAttendance() + diff --git a/crawler/everydayworks/insert_date.py b/crawler/everydayworks/insert_date.py new file mode 100644 index 0000000..27cbeb8 --- /dev/null +++ b/crawler/everydayworks/insert_date.py @@ -0,0 +1,32 @@ +import firebase_admin +from firebase_admin import credentials +from firebase_admin import firestore +from datetime import datetime +from datetime import date + +def getYYMMDD(date): + year = str(date.year)[2:] + month = str(date.month) if date.month >= 10 else "0" + str(date.month) + day = str(date.day) if date.day >= 10 else "0" + str(date.day) + return year + month + day + +cred = credentials.Certificate("../boostattendance-firebase.json") +firebase_admin.initialize_app(cred) +db = firestore.client() +today = datetime.now() +collection = db.collection(u'Date') +doc = collection.document(getYYMMDD(today)) +doc.set({ + u'CheckInOnly': False, + u'Date': today, + u'IsActive': True + }) + +attendanceCountCollection = db.collection(u'AttendanceCount') +doc = attendanceCountCollection.document("1") +#snapshot = doc.get().to_dict() +doc.update({ + "Count": len(collection.get()) + }) + +print(today, "에 성공적으로 Date 등록 완료하였습니다.") diff --git a/crawler/everydayworks/insert_detail_attendance.py b/crawler/everydayworks/insert_detail_attendance.py new file mode 100644 index 0000000..a9b9f11 --- /dev/null +++ b/crawler/everydayworks/insert_detail_attendance.py @@ -0,0 +1,165 @@ +from slack_sdk import WebClient +from datetime import datetime +import firebase_admin +import re +from firebase_admin import credentials +from firebase_admin import firestore + +def configurate(): + reader = open("slack-token.txt", 'r') + token = reader.read().strip() + reader.close() + global client + client = WebClient(token) + configurateUserList() + configurateDB() + +# DB 연동 세팅 +def configurateDB(): + cred = credentials.Certificate("../boostattendance-firebase.json") + firebase_admin.initialize_app(cred) + db = firestore.client() + global collection + collection = db.collection(u'AttendanceDetail') + +def configurateUserList(): + global user_dict + user_dict = {} + api_response = client.users_list() + + for user in api_response["members"]: + username = user["profile"]["display_name"] + res = re.search("^[a-zA-Z][0-9]{1,3}", username) + if res == None: + username = user["profile"]["real_name"] + user_dict[user["id"]] = username + + +# 채널 ID 가져오는 작업 (환경변수 작업 필요) +def getChannelID(): + global channel_id + api_response = client.conversations_list() + for i in api_response["channels"]: + if i["name"] == "check_in_check_out": + channel_id = i["id"] + + +# 마지막 게시글 의 ts를 구해오는 작업 +def getLatestConversationTimestamp(): + api_response = client.conversations_history(channel=channel_id) + global ts_response + ts_response = api_response["messages"][0]["ts"] + +# 특정 게시글의 thread들을 전부 가져오는 작업 +def getThreads(): + api_response = client.conversations_replies(channel=channel_id, ts=ts_response) + global threads + threads = api_response["messages"] + + +def getYYMMDD(date): + year = str(date.year)[2:] + month = str(date.month) if date.month >= 10 else "0" + str(date.month) + day = str(date.day) if date.day >= 10 else "0" + str(date.day) + return year + month + day + +def getAlphaName(username): + return username[0:4] + + +def getAttendenceDetails2(): + for idx, thread in enumerate(threads): + if idx == 0: continue + timedate = datetime.fromtimestamp(float(thread["ts"])) + yymmdd = getYYMMDD(timedate) + alpha_name = getAlphaName(user_dict[thread["user"]]) + document = collection.where(u'Date', u'==', yymmdd).where(u'CamperId', u'==', alpha_name).get() + if len(document) > 1: + continue + + if len(document) == 1: + doc = collection.document(document[0].id) + if thread["text"].find("체크인") != -1 and document[0].to_dict()["CheckInTime"] == None: + doc.update({ + u'CheckInTime': timedate + }) + elif thread["text"].find("체크아웃") != -1 and document[0].to_dict()["CheckOutTime"] == None: + doc.update({ + u'CheckOutTime': timedate + }) + else: + doc = collection.document() + if thread["text"].find("체크인") != -1: + doc.set({ + u'Attendance': False, + u'CamperId': alpha_name, + u'CheckInTime': timedate, + u'CheckOutTime': None, + u'Date': yymmdd + }) + elif thread["text"].find("체크아웃") != -1: + doc.set({ + u'Attendance': False, + u'CamperId': alpha_name, + u'CheckInTime': None, + u'CheckOutTime': timedate, + u'Date': yymmdd + }) + +def getAttendenceDetails(): + timedate = datetime.fromtimestamp(float(threads[0]["ts"])) + yymmdd = getYYMMDD(timedate) + document = collection.where(u'Date', u'==', yymmdd).get() + data = {} + for index, datas in enumerate(document) : + detailData = datas.to_dict() + detailData["id"] = datas.id + data[detailData["CamperId"]] = detailData + + camperIds = [] + for idx, thread in enumerate(threads): + if idx == 0: continue + alpha_name = getAlphaName(user_dict[thread["user"]]) + if alpha_name in camperIds : continue + if alpha_name in data: + peice = data[alpha_name] + doc = collection.document(peice["id"]) + if thread["text"].find("체크인") != -1 and peice["CheckInTime"] == None: + camperIds.append(alpha_name) + doc.update({ + u'CheckInTime': timedate + }) + elif thread["text"].find("체크아웃") != -1 and peice["CheckOutTime"] == None: + camperIds.append(alpha_name) + doc.update({ + u'CheckOutTime': timedate + }) + else: + doc = collection.document() + if thread["text"].find("체크인") != -1: + camperIds.append(alpha_name) + doc.set({ + u'Attendance': False, + u'CamperId': alpha_name, + u'CheckInTime': timedate, + u'CheckOutTime': None, + u'Date': yymmdd + }) + elif thread["text"].find("체크아웃") != -1: + camperIds.append(alpha_name) + doc.set({ + u'Attendance': False, + u'CamperId': alpha_name, + u'CheckInTime': None, + u'CheckOutTime': timedate, + u'Date': yymmdd + }) + + + + +configurate() +getChannelID() +getLatestConversationTimestamp() +getThreads() +getAttendenceDetails()