forked from mpi2/impc-etl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile
118 lines (90 loc) · 6.48 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
env=''
all: default
default: clean devDeps build
# submit-h: clean devDeps build
submit-h: clean build
source venv/bin/activate && LUIGI_CONFIG_PATH='luigi-prod.cfg' PYTHONPATH='.' PYSPARK_PYTHON=python36 luigi --module impc_etl.workflow.main $(task) --workers 3
# submit-h: clean devDeps build
submit-h-daily: clean build createDailyLuigiCfg
source venv/bin/activate && LUIGI_CONFIG_PATH='luigi-daily.cfg' PYTHONPATH='.' PYSPARK_PYTHON=python36 luigi --module impc_etl.workflow.main $(task) --workers 3
# submit-h: clean devDeps build
submit-custom:
source venv/bin/activate && LUIGI_CONFIG_PATH='$(confFile)' PYTHONPATH='.' PYSPARK_PYTHON=python36 YARN_CONF_DIR=/homes/mi_hadoop/hadoop-yarn-conf/ luigi --module impc_etl.workflow.main $(task) --workers 2
submit-lsf: clean devDeps build
source venv/bin/activate && LUIGI_CONFIG_PATH='luigi-lsf.cfg' PYTHONPATH='.' luigi --module impc_etl.workflow.main $(task) --workers 3
submit-dev:
LUIGI_CONFIG_PATH='luigi-dev.cfg' PYTHONPATH='.' YARN_CONF_DIR='' luigi --module impc_etl.workflow.main $(task) --workers 2
.venv: ##@environment Create a venv
if [ ! -e "venv/bin/activate" ] ; then $(PYTHON) -m venv --clear venv ; fi
source venv/bin/activate && pip install --upgrade pip
lint: ##@best_practices Run pylint against the main script and the shared, jobs and test folders
source .venv/bin/activate && pylint -r n impc_etl/main.py impc_etl/shared/ impc_etl/jobs/ tests/
build: clean ##@deploy Build to the dist package
mkdir ./dist$(env)
zip -x main.py -r ./dist$(env)/impc_etl.zip impc_etl
cd ./dist$(env) && mkdir libs
source venv/bin/activate && pip install --upgrade pip
source venv/bin/activate && pip install -U -r requirements/common.txt -t ./dist$(env)/libs
source venv/bin/activate && pip install -U -r requirements/prod.txt -t ./dist$(env)/libs
cd ./dist$(env)/libs && zip -r ../libs.zip .
cd ./dist$(env) && rm -rf libs
clean: clean-build clean-pyc clean-test ##@clean Clean all
clean-build: ##@clean Clean the dist folder
rm -fr dist$(env)/
clean-pyc: ##@clean Clean all the python auto generated files
find . -name '*.pyc' -exec rm -f {} +
find . -name '*.pyo' -exec rm -f {} +
find . -name '*~' -exec rm -f {} +
find . -name '__pycache__' -exec rm -fr {} +
clean-test: ##@clean Clean the pytest cache
rm -fr .pytest_cache
devDeps: .venv ##@deps Create a venv and install common and dev dependencies
source venv/bin/activate && pip install -U -r requirements/common.txt
source venv/bin/activate && pip install -U -r requirements/dev.txt
prodDeps: .venv ##@deps Create a venv and install common and prod dependencies
source venv/bin/activate && pip install -U -r requirements/common.txt
source venv/bin/activate && pip install -U -r requirements/prod.txt
devEnv: .venv devDeps
## source .venv/bin/activate && pip install -U pre-commit
## source .venv/bin/activate && pre-commit install --install-hooks
data: ##@data Download and structure input data for the ETL. Parameters: staging-path (e.g. /nobackup/staging), dr-tag (e.g. dr15.0), input-data-path (e.g. /impc/), etl-host (e.g. hadoop-host), etl-dir (e.g. /user/impc/)
cd $(staging-path) && mkdir $(dr-tag)
cd $(staging-path)/$(dr-tag) && mkdir tracking mgi ontologies xml parquet solr misc
cd $(staging-path)/$(dr-tag)/xml && mkdir impc 3i europhenome pwg
cp $(input-data-path)/gentar-data-archive/phenotyping_colonies.tsv $(staging-path)/$(dr-tag)/tracking/
cp $(input-data-path)/gentar-data-archive/gene_interest.tsv $(staging-path)/$(dr-tag)/tracking/
cp -r $(input-data-path)/ontologies-data-archive/* $(staging-path)/$(dr-tag)/ontologies/
cp -r $(input-data-path)/3i-data-archive/*.xml $(staging-path)/$(dr-tag)/xml/3i/
cp -r $(input-data-path)/pwg-data-archive/*.xml $(staging-path)/$(dr-tag)/xml/pwg/
cp $(input-data-path)/3i-data-archive/flow_results_EBIexport_180119.csv $(staging-path)/$(dr-tag)/misc/
cp -r $(input-data-path)/europhenome-data-archive/*.xml $(staging-path)/$(dr-tag)/xml/europhenome/
cp -r $(input-data-path)/dcc-data-archive/latest/* $(staging-path)/$(dr-tag)/xml/impc/
cd $(staging-path)/$(dr-tag)/xml/impc/ && find "$$PWD" -type f -name "*.xml" -exec bash -c ' DIR=$$( dirname "{}" ); mv "{}" "$$DIR"_$$(basename "{}") ' \;
cd $(staging-path)/$(dr-tag)/xml/impc/normal && find "$$PWD" -type d -empty -delete
cd $(staging-path)/$(dr-tag)/xml/impc/finalising && find "$$PWD" -type d -empty -delete
curl http://www.informatics.jax.org/downloads/reports/MGI_Strain.rpt --output $(staging-path)/$(dr-tag)/mgi/MGI_Strain.rpt
curl http://www.informatics.jax.org/downloads/reports/MGI_PhenotypicAllele.rpt --output $(staging-path)/$(dr-tag)/mgi/MGI_PhenotypicAllele.rpt
curl http://www.informatics.jax.org/downloads/reports/MRK_List1.rpt --output $(staging-path)/$(dr-tag)/mgi/MRK_List1.rpt
curl http://www.informatics.jax.org/downloads/reports/HGNC_AllianceHomology.rpt --output $(staging-path)/$(dr-tag)/mgi/HGNC_AllianceHomology.rpt
curl http://www.informatics.jax.org/downloads/reports/MGI_GenePheno.rpt --output $(staging-path)/$(dr-tag)/mgi/MGI_GenePheno.rpt
curl https://www.mousephenotype.org/embryoviewer/rest/ready --output $(staging-path)/$(dr-tag)/misc/embryo_data_og.json
cd $(staging-path)/$(dr-tag)/misc/ && tr -d '\n' < embryo_data_og.json > embryo_data.json
cd $(staging-path)/$(dr-tag)/misc/ && rm embryo_data_og.json
scp -r $(staging-path)/$(dr-tag) $(etl-host):$(etl-dir)/
createProdLuigiCfg: ##@build Generates a new luigi-prod.cfg file from the luigi.cfg.template a using a new dr-tag, remember to create luigi.cfg.template file first, parameter: dr-tag (e.g. dr15.0)
sed 's/%DR_TAG%/$(dr-tag)/' luigi.cfg.template > luigi-prod.cfg
createDailyLuigiCfg: ##@build Generates a new luigi-prod.cfg file from the luigi.cfg.template a using a new dr-tag, remember to create luigi.cfg.template file first, parameter: dr-tag (e.g. dr15.0)
sed 's/%DR_TAG%/$(dr-tag)/' luigi.cfg.template > luigi-daily.cfg
test: ##@best_practices Run pystest against the test folder
source venv/bin/activate && pytest
HELP_FUN = \
%help; \
while(<>) { push @{$$help{$$2 // 'options'}}, [$$1, $$3] if /^(\w+)\s*:.*\#\#(?:@(\w+))?\s(.*)$$/ }; \
print "usage: make [target]\n\n"; \
for (keys %help) { \
print "$$_:\n"; $$sep = " " x (20 - length $$_->[0]); \
print " $$_->[0]$$sep$$_->[1]\n" for @{$$help{$$_}}; \
print "\n"; }
help:
@echo "Run 'make' without a target to clean all, install dev dependencies, test, lint and build the package \n"
@perl -e '$(HELP_FUN)' $(MAKEFILE_LIST)