-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbuild.sh
More file actions
executable file
·91 lines (72 loc) · 2.11 KB
/
build.sh
File metadata and controls
executable file
·91 lines (72 loc) · 2.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/bin/bash
set -e
DATA="${PWD}/.datasets"
DATABASE=data/immobilier.db
CURRENT_YEAR=$(date +"%Y")
START_YEAR=$((${CURRENT_YEAR}-5))
END_YEAR=$((${CURRENT_YEAR}-1))
YEARS=($(seq ${START_YEAR} ${END_YEAR}))
setup() {
echo "setup database"
rm -f ${DATABASE}
sqlite3 ${DATABASE} < schema.sql
rm -rf ${DATA}
mkdir -p ${DATA}
}
load_dvf() {
echo "load dvf dataset"
DVF_BASE_URL=https://files.data.gouv.fr/geo-dvf/latest/csv
for YEAR in ${YEARS[@]}
do
url="${DVF_BASE_URL}/${YEAR}/departements/38.csv.gz"
gz_file="${DATA}/dvf_${YEAR}.csv.gz"
csv_file="${DATA}/dvf_${YEAR}.csv"
curl -L -o "${gz_file}" "${url}"
gzip -d -f "${gz_file}"
sqlite3 ${DATABASE} ".import --csv --skip 1 ${csv_file} dvf"
done
}
load_communes_geojson() {
echo "load communes geojson dataset"
COMMUNES_URL=https://raw.githubusercontent.com/gregoiredavid/france-geojson/master/departements/38-isere/communes-38-isere.geojson
COMMUNES_JSON="${DATA}/communes.geojson"
curl -L -o "${COMMUNES_JSON}" "${COMMUNES_URL}"
sqlite3 ${DATABASE} <<EOF
CREATE TEMP TABLE IF NOT EXISTS communes_geojson(geojson TEXT);
.import ${COMMUNES_JSON} communes_geojson
INSERT INTO communes(code, nom, geojson)
SELECT
json_extract(value, '$.properties.code'),
json_extract(value, '$.properties.nom'),
value
FROM
(
SELECT json_extract(geojson, '$.features') AS array
FROM communes_geojson
) features,
json_each(features.array)
;
EOF
}
assert_loaded_lines() {
echo "assert loaded lines"
TOTAL_CSV_LINES=$(cat ${DATA}/dvf_*.csv | wc -l)
EXPECTED_LINES=$((${TOTAL_CSV_LINES} - ${#YEARS[@]}))
DATABASE_LINES=$(sqlite3 ${DATABASE} "select count(*) from dvf;")
ASSERT_LINES="${DATABASE_LINES} -eq ${EXPECTED_LINES}"
if [ ! ${ASSERT_LINES} ]
then
echo "number of loaded lines in database different from csv data"
echo "csv lines: ${EXPECTED_LINES}"
echo "database lines: ${DATABASE_LINES}"
exit 1
fi
}
cleanup() {
rm -rf ${DATA}
}
setup
load_dvf
load_communes_geojson
assert_loaded_lines
cleanup