From 901723011038756bea4ae6b6abe72f2c81045f21 Mon Sep 17 00:00:00 2001 From: Sammy Fung Date: Thu, 31 Aug 2023 23:49:13 +0800 Subject: [PATCH] - Move to sub-folder, and remove django models support. - Add 2 new weather stations. - Update github workflow and README. - Remove Travis CI config file. --- .github/workflows/hk0weather-tests.yml | 30 ------- .github/workflows/hk0weather.yml | 55 ++++++++++++ .travis.yml | 10 --- README.md | 74 +++------------- hk0weather/{ => hk0weather}/__init__.py | 0 hk0weather/{ => hk0weather}/hko.py | 4 + hk0weather/hk0weather/items.py | 72 +++++++++++++++ hk0weather/{ => hk0weather}/pipelines.py | 0 hk0weather/{ => hk0weather}/settings.py | 13 --- .../{ => hk0weather}/spiders/__init__.py | 0 .../spiders/hko9dayforecast.py | 0 .../{ => hk0weather}/spiders/hkoforecast.py | 0 .../{ => hk0weather}/spiders/rainfall.py | 0 .../{ => hk0weather}/spiders/regional.py | 19 ++-- hk0weather/items.py | 88 ------------------- scrapy.cfg => hk0weather/scrapy.cfg | 0 requirements.txt | 6 +- requirements_django.txt | 2 - 18 files changed, 153 insertions(+), 220 deletions(-) delete mode 100644 .github/workflows/hk0weather-tests.yml create mode 100644 .github/workflows/hk0weather.yml delete mode 100644 .travis.yml rename hk0weather/{ => hk0weather}/__init__.py (100%) rename hk0weather/{ => hk0weather}/hko.py (96%) create mode 100644 hk0weather/hk0weather/items.py rename hk0weather/{ => hk0weather}/pipelines.py (100%) rename hk0weather/{ => hk0weather}/settings.py (89%) rename hk0weather/{ => hk0weather}/spiders/__init__.py (100%) rename hk0weather/{ => hk0weather}/spiders/hko9dayforecast.py (100%) rename hk0weather/{ => hk0weather}/spiders/hkoforecast.py (100%) rename hk0weather/{ => hk0weather}/spiders/rainfall.py (100%) rename hk0weather/{ => hk0weather}/spiders/regional.py (86%) delete mode 100644 hk0weather/items.py rename scrapy.cfg => hk0weather/scrapy.cfg (100%) delete mode 100644 requirements_django.txt diff --git a/.github/workflows/hk0weather-tests.yml b/.github/workflows/hk0weather-tests.yml deleted file mode 100644 index d90a167..0000000 --- a/.github/workflows/hk0weather-tests.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: hk0weather-tests -on: [push] -jobs: - do-hk0weather-tests: - runs-on: ubuntu-latest - env: - PYTHON: '3.9' - steps: - - uses: actions/checkout@master - - name: Setup python - uses: actions/setup-python@master - with: - python-version: 3.9 - - name: Install required python packages - run: | - pip install -r requirements.txt - - name: List available scrapers - run: | - coverage run -m scrapy list - - name: Test a scraper of regional weather - run: | - coverage run -m scrapy crawl regional -o regional.csv:csv - - name: Test a scraper of daily weather forecast - run: | - coverage run -m scrapy crawl hkoforecast -o hkoforecast.csv:csv - - name: Test a scraper of 9-day weather forecast - run: | - coverage run -m scrapy crawl hko9dayforecast -o hko9dayforecast.csv:csv - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 diff --git a/.github/workflows/hk0weather.yml b/.github/workflows/hk0weather.yml new file mode 100644 index 0000000..a6ed446 --- /dev/null +++ b/.github/workflows/hk0weather.yml @@ -0,0 +1,55 @@ +name: hk0weather +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + schedule: + - cron: '5 19 1 * *' +jobs: + hk0weather-tests: + runs-on: ubuntu-latest + env: + PYTHON: '3.9' + steps: + - uses: actions/checkout@master + - name: Setup python + uses: actions/setup-python@master + with: + python-version: 3.10 + - name: Install required python packages + run: | + python -m pip install --upgrade pip + pip install flake8 coverage + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: List available scrapers + run: | + coverage run -m scrapy list + - name: Test a scraper of regional weather + run: | + coverage run -m scrapy crawl regional -o regional.csv + working-directory: hk0weather + - name: Test a scraper of daily weather forecast + run: | + coverage run -m scrapy crawl hkoforecast -o hkoforecast.csv + working-directory: hk0weather + - name: Test a scraper of 9-day weather forecast + run: | + coverage run -m scrapy crawl hko9dayforecast -o hko9dayforecast.csv + working-directory: hk0weather + - name: Generate coverage json report + run: | + coverage json + working-directory: hk0weather + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v3 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + with: + directory: hk0weather diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 0229266..0000000 --- a/.travis.yml +++ /dev/null @@ -1,10 +0,0 @@ -language: python -dist: xenial -os: linux -script: - - coverage run -m scrapy list - - coverage run -m scrapy crawl regional -t csv -o regional.csv - - coverage run -m scrapy crawl hkoforecast -t csv -o hkoforecast.csv - - coverage run -m scrapy crawl hko9dayforecast -t csv -o hko9dayforecast.csv -after_success: - - bash <(curl -s https://codecov.io/bash) diff --git a/README.md b/README.md index 65f8058..42b7fb8 100644 --- a/README.md +++ b/README.md @@ -8,56 +8,25 @@ hk0weather is an open source web scraper project using Scrapy to collect the use Scrapy can output collected weather data into the machine-readable formats (eg. CSV, JSON, XML). -Optionally, this project supports a Django app 'openweather' to store the collected weather data to Django web framework, and the data can be shown on web through the Django admin UI. - -Available Spiders +Available Web Crawlers --- 1. **regional**: Hong Kong Regional Weather Data in 10-minutes update from HKO. 1. **rainfall**: Hong Kong Rainfall Data in hourly update from HKO. 1. **hkoforecast**: Hong Kong Next 24 hour Weather Forecast Report from HKO Open Data. 1. **hko9dayforecast**: Hong Kong 9-day Weather Report from HKO Open Data. -Installation Example +Installation --- -1) Cloning and setup hk0weather in a Py3 virtual environment +Cloning and setup hk0weather in a Py3 virtual environment ``` - git clone https://github.com/sammyfung/hk0weather.git - virtualenv hk0weatherenv - source hk0weatherenv/bin/activate - cd hk0weather - pip install -r requirements.txt + $ git clone https://github.com/sammyfung/hk0weather.git + $ cd hk0weather + $ python3 -m venv venv + $ source venv/bin/activate + $ pip install -r requirements.txt ``` - -2) Optional: Setup hk0weather to use openweather - - ``` - pip install -r requirements-django.txt - cd .. - django-admin startproject yourweatherproject - cd yourweatherproject - git clone https://github.com/sammyfung/openweather.git - ``` - - Please add 'openweather' to INSTALLED_APPS in Django yourweatherproject/settings.py. - - ``` - ./manage.py makemigrations - ./manage.py migrate - ./manage.py createsuperuser - ./manage.py runserver & - cd ../hk0weather - ``` - - Django daemon is now running in the background, its web admin UI can be access at [http://localhost:8000/admin](http://localhost:8000/admin). - - ``` - export PYTHONPATH=/your-full-path-to/yourweatherproject - export DJANGO_SETTINGS_MODULE=yourweatherproject.settings - ``` - - Please export PYTHONPATH and DJANGO_SETTINGS_MODULE again after every activation of the Py3 virtual environment. Run a Scrapy spider --- @@ -65,39 +34,22 @@ Run a Scrapy spider Activate the Py3 virtual environment once before the first running of web spiders. ``` -source hk0weatherenv/bin/activate +$ source venv/bin/activate +$ cd hk0weather ``` -Optionally, if Django is in use, export PYTHONPATH and DJANGO_SETTINGS_MODULE. - -``` -export PYTHONPATH=/your-full-path-to/yourweatherproject -export DJANGO_SETTINGS_MODULE=yourweatherproject.settings -``` Optionally, list all available spiders. ``` -scrapy list +$ scrapy list ``` -Run a specific spider (eg. regional) in Scrapy +Run a regional weather data web crawler and export data to a JSON file. ``` -scrapy crawl regional +$ scrapy crawl regional -o regional.json ``` -and optionally use -t (file format) and -o (filename) to output the data in a json file. - -``` -scrapy crawl regional -t json -o test.json -``` - -## Sponsors - -Calvin Tsang. - -Thanks for my sponsors, please consider to [sponsor](https://github.com/sponsors/sammyfung) my works. - References -- diff --git a/hk0weather/__init__.py b/hk0weather/hk0weather/__init__.py similarity index 100% rename from hk0weather/__init__.py rename to hk0weather/hk0weather/__init__.py diff --git a/hk0weather/hko.py b/hk0weather/hk0weather/hko.py similarity index 96% rename from hk0weather/hko.py rename to hk0weather/hk0weather/hko.py index 233cfe2..b70dd8e 100644 --- a/hk0weather/hko.py +++ b/hk0weather/hk0weather/hko.py @@ -49,6 +49,8 @@ class hko: (u'se','Kai Tak'), (u'cp1','Central'), (u'swh','Sai Wan Ho'), + (u'cwb', 'Clear Water Bay'), + (u'tls', 'Tai Lung'), ] cnameid = [ @@ -98,6 +100,8 @@ class hko: (u'啟德','se'), (u'中環','cp1'), (u'西灣河','swh'), + (u'清水灣', 'cwb'), + (u'大隴', 'tls'), ] def getename(self, id): diff --git a/hk0weather/hk0weather/items.py b/hk0weather/hk0weather/items.py new file mode 100644 index 0000000..a5ba5d9 --- /dev/null +++ b/hk0weather/hk0weather/items.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +import scrapy + +class RegionalItem(scrapy.Item): + scraptime = scrapy.Field() + reptime = scrapy.Field() + station = scrapy.Field() + ename = scrapy.Field() + cname = scrapy.Field() + temperture = scrapy.Field() + humidity = scrapy.Field() + temperturemax = scrapy.Field() + temperturemin = scrapy.Field() + winddirection = scrapy.Field() + windspeed = scrapy.Field() + maxgust = scrapy.Field() + pressure = scrapy.Field() + +class RainfallItem(scrapy.Item): + scraptime = scrapy.Field() + reptime = scrapy.Field() + ename = scrapy.Field() + cname = scrapy.Field() + rainfallmin = scrapy.Field() + rainfallmax = scrapy.Field() + +class Hk0WeatherItem(scrapy.Item): + time = scrapy.Field() + station = scrapy.Field() + ename = scrapy.Field() + cname = scrapy.Field() + temperture = scrapy.Field() + humidity = scrapy.Field() + +class Hk0TropicalItem(scrapy.Item): + time = scrapy.Field() + postime = scrapy.Field() + x = scrapy.Field() + y = scrapy.Field() + category = scrapy.Field() + windspeed = scrapy.Field() + tctype = scrapy.Field() + + +class ForecastItem(scrapy.Item): + update_time = scrapy.Field() + date = scrapy.Field() + general_en = scrapy.Field() + general_hk = scrapy.Field() + description_en = scrapy.Field() + description_hk = scrapy.Field() + wind_en = scrapy.Field() + wind_hk = scrapy.Field() + max_temp = scrapy.Field() + min_temp = scrapy.Field() + max_rh = scrapy.Field() + min_rh = scrapy.Field() + icon = scrapy.Field() + + +class ShortForecastItem(scrapy.Item): + scrape_time = scrapy.Field() + update_time = scrapy.Field() + general_en = scrapy.Field() + general_hk = scrapy.Field() + period_en = scrapy.Field() + period_hk = scrapy.Field() + forecast_en = scrapy.Field() + forecast_hk = scrapy.Field() + outlook_en = scrapy.Field() + outlook_hk = scrapy.Field() + diff --git a/hk0weather/pipelines.py b/hk0weather/hk0weather/pipelines.py similarity index 100% rename from hk0weather/pipelines.py rename to hk0weather/hk0weather/pipelines.py diff --git a/hk0weather/settings.py b/hk0weather/hk0weather/settings.py similarity index 89% rename from hk0weather/settings.py rename to hk0weather/hk0weather/settings.py index d4fc1c0..e25c1df 100755 --- a/hk0weather/settings.py +++ b/hk0weather/hk0weather/settings.py @@ -87,16 +87,3 @@ #HTTPCACHE_DIR = 'httpcache' #HTTPCACHE_IGNORE_HTTP_CODES = [] #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' - -# Initialize Django web framework for data store -# Use environment variable PYTHONPATH for abspath to Django project -# and DJANGO_SETTINGS_MODULE for Settings filename of Django project -try: - import django - try: - django.setup() - except django.core.exceptions.ImproperlyConfigured: - pass -except ImportError: - # Allow to work without Django - pass diff --git a/hk0weather/spiders/__init__.py b/hk0weather/hk0weather/spiders/__init__.py similarity index 100% rename from hk0weather/spiders/__init__.py rename to hk0weather/hk0weather/spiders/__init__.py diff --git a/hk0weather/spiders/hko9dayforecast.py b/hk0weather/hk0weather/spiders/hko9dayforecast.py similarity index 100% rename from hk0weather/spiders/hko9dayforecast.py rename to hk0weather/hk0weather/spiders/hko9dayforecast.py diff --git a/hk0weather/spiders/hkoforecast.py b/hk0weather/hk0weather/spiders/hkoforecast.py similarity index 100% rename from hk0weather/spiders/hkoforecast.py rename to hk0weather/hk0weather/spiders/hkoforecast.py diff --git a/hk0weather/spiders/rainfall.py b/hk0weather/hk0weather/spiders/rainfall.py similarity index 100% rename from hk0weather/spiders/rainfall.py rename to hk0weather/hk0weather/spiders/rainfall.py diff --git a/hk0weather/spiders/regional.py b/hk0weather/hk0weather/spiders/regional.py similarity index 86% rename from hk0weather/spiders/regional.py rename to hk0weather/hk0weather/spiders/regional.py index c69fbe5..2201f55 100755 --- a/hk0weather/spiders/regional.py +++ b/hk0weather/hk0weather/spiders/regional.py @@ -49,7 +49,10 @@ def parse(self, response): if len(data) > 5: for j in range(0,len(data)): if data[j].isdigit(): - stations[laststation]['humidity'] = int(data[j]) + #try: + stations[laststation]['humidity'] = int(data[j]) + #except: + # print(i) elif laststation != '': try: if j == 1: @@ -91,17 +94,9 @@ def parse(self, response): pass for key in stations: - # __module__ and __name__ - # Scrapy Item: scrapy.item.ItemMeta - # Scrapy DjangoItem: scrapy_djangoitem.DjangoItemMeta - if RegionalItem.__class__.__module__ == 'scrapy_djangoitem': - stationitem = RegionalItem() - for key2 in stations[key]: - stationitem[key2] = stations[key][key2] - elif RegionalItem.__class__.__module__ == 'scrapy.item': - stationitem = RegionalItem() - for key2 in stations[key]: - stationitem[key2] = stations[key][key2] + stationitem = RegionalItem() + for key2 in stations[key]: + stationitem[key2] = stations[key][key2] stationitems.append(stationitem) return stationitems diff --git a/hk0weather/items.py b/hk0weather/items.py deleted file mode 100644 index 713792a..0000000 --- a/hk0weather/items.py +++ /dev/null @@ -1,88 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define here the models for your scraped items -# -# See documentation in: -# https://docs.scrapy.org/en/latest/topics/items.html - -try: - from scrapy_djangoitem import DjangoItem - from openweather.models import WeatherData, RainfallData, ReportData - - class RegionalItem(DjangoItem): - django_model = WeatherData - - class RainfallItem(DjangoItem): - django_model = RainfallData -except ImportError: - import scrapy - - class RegionalItem(scrapy.Item): - scraptime = scrapy.Field() - reptime = scrapy.Field() - station = scrapy.Field() - ename = scrapy.Field() - cname = scrapy.Field() - temperture = scrapy.Field() - humidity = scrapy.Field() - temperturemax = scrapy.Field() - temperturemin = scrapy.Field() - winddirection = scrapy.Field() - windspeed = scrapy.Field() - maxgust = scrapy.Field() - pressure = scrapy.Field() - - class RainfallItem(scrapy.Item): - scraptime = scrapy.Field() - reptime = scrapy.Field() - ename = scrapy.Field() - cname = scrapy.Field() - rainfallmin = scrapy.Field() - rainfallmax = scrapy.Field() - - class Hk0WeatherItem(scrapy.Item): - time = scrapy.Field() - station = scrapy.Field() - ename = scrapy.Field() - cname = scrapy.Field() - temperture = scrapy.Field() - humidity = scrapy.Field() - - class Hk0TropicalItem(scrapy.Item): - time = scrapy.Field() - postime = scrapy.Field() - x = scrapy.Field() - y = scrapy.Field() - category = scrapy.Field() - windspeed = scrapy.Field() - tctype = scrapy.Field() - - -class ForecastItem(scrapy.Item): - update_time = scrapy.Field() - date = scrapy.Field() - general_en = scrapy.Field() - general_hk = scrapy.Field() - description_en = scrapy.Field() - description_hk = scrapy.Field() - wind_en = scrapy.Field() - wind_hk = scrapy.Field() - max_temp = scrapy.Field() - min_temp = scrapy.Field() - max_rh = scrapy.Field() - min_rh = scrapy.Field() - icon = scrapy.Field() - - -class ShortForecastItem(scrapy.Item): - scrape_time = scrapy.Field() - update_time = scrapy.Field() - general_en = scrapy.Field() - general_hk = scrapy.Field() - period_en = scrapy.Field() - period_hk = scrapy.Field() - forecast_en = scrapy.Field() - forecast_hk = scrapy.Field() - outlook_en = scrapy.Field() - outlook_hk = scrapy.Field() - diff --git a/scrapy.cfg b/hk0weather/scrapy.cfg similarity index 100% rename from scrapy.cfg rename to hk0weather/scrapy.cfg diff --git a/requirements.txt b/requirements.txt index 0411951..6425689 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,2 @@ -Scrapy==2.8.0 -pytz==2019.3 -attrs>=19.2.0 -coverage==5.3 +Scrapy>=2.10.1 +pytz>=2023.3 diff --git a/requirements_django.txt b/requirements_django.txt deleted file mode 100644 index 68f83a7..0000000 --- a/requirements_django.txt +++ /dev/null @@ -1,2 +0,0 @@ -Django>=2.2.13 -scrapy-djangoitem==1.1.1