From e6774618c978535ba6b0db809644fdc1d82139c9 Mon Sep 17 00:00:00 2001 From: Philip Cai Date: Tue, 15 Oct 2024 19:35:31 +1100 Subject: [PATCH 1/2] fix issue#162 Fix HTTP Errorr 403 by defining user agent that mimics browser request. Fix relative path to journal_abbreviations_mathematics.csv --- journals/journal_abbreviations_mathematics.csv | 5 +++++ scripts/update_mathscinet.py | 17 +++++++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/journals/journal_abbreviations_mathematics.csv b/journals/journal_abbreviations_mathematics.csv index fd036fc..47f9bf8 100644 --- a/journals/journal_abbreviations_mathematics.csv +++ b/journals/journal_abbreviations_mathematics.csv @@ -542,6 +542,7 @@ "Bulletin of the Institute of Combinatorics and its Applications","Bull. Inst. Combin. Appl." "Bulletin of the Institute of Mathematics. Academia Sinica. New Series","Bull. Inst. Math. Acad. Sin. (N.S.)" "Bulletin of the Iranian Mathematical Society","Bull. Iranian Math. Soc." +"Bulletin of the Karaganda University. Mathematics Series","Bull. Karaganda Univ. Math. Ser." "Bulletin of the Korean Mathematical Society","Bull. Korean Math. Soc." "Bulletin of the Kyushu Institute of Technology. Pure and Applied Mathematics","Bull. Kyushu Inst. Technol. Pure Appl. Math." "Bulletin of the London Mathematical Society","Bull. Lond. Math. Soc." @@ -744,6 +745,7 @@ "Communications in Statistics. Theory and Methods","Comm. Statist. Theory Methods" "Communications in Theoretical Physics","Commun. Theor. Phys. (Beijing)" "Communications of the American Mathematical Society","Comm. Amer. Math. Soc." +"Communications of the American Mathematical Society","Commun. Am. Math. Soc." "Communications on Applied Mathematics and Computation","Commun. Appl. Math. Comput." "Communications on Applied Nonlinear Analysis","Comm. Appl. Nonlinear Anal." "Communications on Pure and Applied Analysis","Commun. Pure Appl. Anal." @@ -812,6 +814,7 @@ "Computers and People Series","Comput. People Ser." "Computing and Informatics","Comput. Inform." "Computing and Visualization in Science","Comput. Vis. Sci." +"Computing in Geometry and Topology (CGT)","Comput. Geom. Topol." "Computing. Archives for Scientific Computing","Computing" "Concrete Operators","Concr. Oper." "Conference Proceedings and Lecture Notes in Applied Physics","Conf. Proc. Lecture Notes Appl. Phys." @@ -2348,6 +2351,7 @@ "Multiscale Modeling & Simulation. A SIAM Interdisciplinary Journal","Multiscale Model. Simul." "Munster Journal of Mathematics","Munster J. Math." "Munster Lectures in Mathematics","Munst. Lect. Math." +"MusMat. Brazilian Journal of Music and Mathematics","MusMat Braz. J. Music Math." "MusMat. Brazilian Journal of Music and Mathematics","MusMat Brazil. J. Music Math." "NASA Monographs in Systems and Software Engineering","NASA Monogr. Syst. Softw. Eng." "NATO Advanced Science Institutes Series A: Life Sciences","NATO Adv. Sci. Inst. Ser. A Life Sci." @@ -3056,6 +3060,7 @@ "Spectrum Slovakia","Spectr. Slovak." "Spisy Pedagogicke Fakulty v Ostrave","Spisy Ped. Fak. Ostrave" "Springer Actuarial","Springer Actuar." +"Springer Actuarial Textbooks","Springer Actuar. Textb." "Springer Aerospace Technology","Springer Aerosp. Technol." "Springer Biographies","Springer Biogr." "Springer Briefs in Business","Springer Briefs Bus." diff --git a/scripts/update_mathscinet.py b/scripts/update_mathscinet.py index 4647077..c29d156 100755 --- a/scripts/update_mathscinet.py +++ b/scripts/update_mathscinet.py @@ -2,13 +2,22 @@ import pandas as pd import csv +import requests +from io import StringIO +import os +print(os.getcwd()) file_in = "https://mathscinet.ams.org/msnhtml/annser.csv" -file_out = "journals/journal_abbreviations_mathematics.csv" +file_out = "../journals/journal_abbreviations_mathematics.csv" -# Get the first two fields of the last version of MathSciNet data file, without empty values -df_new = pd.read_csv(file_in, usecols=[0, 1]).dropna()[ - ["Full Title", "Abbrev"]] +# set user agent to mimic browser request +headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'} +response = requests.get(file_in, headers=headers) + +if response.status_code == 200: + df_new = pd.read_csv(StringIO(response.text), usecols=[0, 1]).dropna()[["Full Title", "Abbrev"]] +else: + raise Exception(f"Failed to fetch the file. Status code: {response.status_code}") # Get our last mathematics data file df_old = pd.read_csv(file_out, sep=",", escapechar="\\", From a79d4636210bed5e9e1e30ef90f2df51ee64972f Mon Sep 17 00:00:00 2001 From: Philip Cai Date: Wed, 16 Oct 2024 04:21:32 +1100 Subject: [PATCH 2/2] remove debug print and consolidate header Remove the print(os.getcwd()) statement for path debugging. Consolidate user request header by mimicing google chrome --- scripts/update_mathscinet.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/scripts/update_mathscinet.py b/scripts/update_mathscinet.py index c29d156..66c45b6 100755 --- a/scripts/update_mathscinet.py +++ b/scripts/update_mathscinet.py @@ -4,14 +4,18 @@ import csv import requests from io import StringIO -import os -print(os.getcwd()) file_in = "https://mathscinet.ams.org/msnhtml/annser.csv" -file_out = "../journals/journal_abbreviations_mathematics.csv" - -# set user agent to mimic browser request -headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'} +file_out = "../journals/journal_abbreviations_mathematics.csv" # given that /journals and /scripts are on same level + +# set headers to mimic browser request +headers = { + 'sec-ch-ua': '"Google Chrome";v="129", "Not=A?Brand";v="8", "Chromium";v="129"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + 'upgrade-insecure-requests': '1', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36', +} response = requests.get(file_in, headers=headers) if response.status_code == 200: