parent
e72766e0af
commit
af7a3a2d4c
@ -0,0 +1,251 @@ |
||||
#!/bin/bash |
||||
# Download, reproject and transform |
||||
# the addresses from the Madrid City |
||||
# Council to a OSM XML file. |
||||
# |
||||
# Santiago Crespo 2022 |
||||
# https://creativecommons.org/publicdomain/zero/1.0/ |
||||
|
||||
# apt install -y gdal-bin p7zip-full |
||||
|
||||
# TODO: RAMDISK |
||||
# TODO: diff osc output |
||||
|
||||
OUT_FILE="direccionesmadrid.osm" |
||||
|
||||
COUNTER=0 |
||||
export LANG="en_US.UTF-8" |
||||
export LANGUAGE="en_US:en" |
||||
export LC_ALL=C.UTF-8 |
||||
|
||||
DIA=`date +%Y-%m-%d` |
||||
HORA=`date +%H-%M-%S` |
||||
DIR="$DIA-$HORA" |
||||
mkdir $DIR |
||||
cd $DIR |
||||
|
||||
echo "# STEP 1/7 Downloading csv and rdf files from datos.madrid.es" |
||||
# Download the page with the links |
||||
wget -nv "http://datos.madrid.es/portal/site/egob/menuitem.c05c1f754a33a9fbe4b2e4b284f1a5a0/?vgnextoid=b3c41f3cf6a6c410VgnVCM2000000c205a0aRCRD&" -O callejero.html |
||||
# Download csv "Relación de direcciones vigentes, con coordenadas" |
||||
URL_CALLEJERO=`grep "3-callejero" callejero.html | perl -pe 's/>/\n/g' | grep csv | grep "3-callejero" | awk -F '"' '{print "http://datos.madrid.es/"$6}'` |
||||
URL_RDF=`grep rdf callejero.html | grep href | awk -F '"' '{print "http://datos.madrid.es/"$6}'` |
||||
|
||||
# Download rdf with the source:date and other information |
||||
wget -nv "$URL_RDF" -O callejero.rdf |
||||
FECHA=`grep "dct:modified" callejero.rdf | awk -F '>' '{print $2}' | head -n1 | awk -F 'T' '{print $1}'` |
||||
|
||||
## District, suburb and addr information |
||||
# 20210919: han cambiado el formato, pero publican directamente un csv con los distritos y otro con barrios. |
||||
wget "https://geoportal.madrid.es/fsdescargas/IDEAM_WBGEOPORTAL/LIMITES_ADMINISTRATIVOS/Distritos/Distritos.csv" -O distritos-bruto.csv |
||||
wget "https://datos.madrid.es/egob/catalogo/300496-11326700-barrios-madrid.csv" -O barrios-bruto.csv |
||||
#URL_VIALES_DISTRITO_BARRIO=`grep "\-1\-" callejero.rdf | awk -F '>' '{print $2}' | awk -F '<' '{print $1}'` |
||||
#URL_VIALES_DISTRITO=`grep "\-2\-" callejero.rdf | awk -F '>' '{print $2}' | awk -F '<' '{print $1}'` |
||||
#wget -nv "$URL_VIALES_DISTRITO_BARRIO" -O barrios-bruto.csv |
||||
#wget -nv "$URL_VIALES_DISTRITO" -O distritos-bruto.csv |
||||
wget -nv "$URL_CALLEJERO" -O callejero-oficial-madrid.csv |
||||
|
||||
|
||||
# Download the rdf with extra data (ref. catastral) |
||||
wget -nv "https://datos.madrid.es/egob/catalogo/200075-0-callejero.dcat" -O 200075-0-callejero.dcat |
||||
URL_EXTRA=`grep Direcciones -A1 200075-0-callejero.dcat | grep csv | awk -F '>' '{print $2}' | awk -F '<' '{print $1}'` |
||||
wget -nv "$URL_EXTRA" -O 200075-1-callejero.csv |
||||
iconv -f ISO-8859-15 -t UTF-8 200075-1-callejero.csv > callejero-extra.csv |
||||
awk -F '"' '{print $2"\""$48}' callejero-extra.csv > refparcelas.csv |
||||
|
||||
echo "# STEP 2/7 Converting to UTF-8 and modifying some fields" |
||||
# Convert to UTF-8, change coordinate fields name (UTM ETRS89 = EPSG:25830) |
||||
iconv -f ISO-8859-15 -t UTF-8 "callejero-oficial-madrid.csv" > callejero-utf8.csv |
||||
perl -pe 's/UTMX_ETRS/x/g' callejero-utf8.csv | perl -pe 's/UTMY_ETRS/y/g' > callejero-25830.csv |
||||
|
||||
iconv -f ISO-8859-15 -t UTF-8 "distritos-bruto.csv" | awk -F ';' '{print $4";"$7}' | perl -pe 's/ //g' | perl -pe 's/ "/"/g' | tail -n +2 > distritos.csv |
||||
iconv -f ISO-8859-15 -t UTF-8 "barrios-bruto.csv" | awk -F ';' '{print $11";"$6";"$3}' | tail -n +2 > barrios.csv |
||||
|
||||
|
||||
echo "# STEP 3/7 Composing full street name and fixing some capitalization" |
||||
#COD_VIA;VIA_CLASE;VIA_PAR;VIA_NOMBRE;VIA_NOMBRE_ACENTOS;CLASE_APP;NUMERO;CALIFICADOR;TIPO_NDP;COD_NDP;DISTRITO;BARRIO;COD_POSTAL;UTMX_ED;UTMY_ED;UTMX_ETRS;UTMY_ETRS;LATITUD;LONGITUD;ANGULO_ROTULACION |
||||
while IFS=$';' read -r -a arry |
||||
do |
||||
VIA_CLASE="${arry[1]}" |
||||
VIA_PAR="${arry[2]}" |
||||
VIA_NOMBRE_ACENTOS="${arry[4]}" |
||||
VIA_CLASE_MINUSCULAS=${VIA_CLASE,,} |
||||
VIA_NOMBRE_ACENTOS_MINUSCULAS=${VIA_NOMBRE_ACENTOS,,} |
||||
VIA_NOMBRE_ACENTOS_PRIMERA_MAYUSCULA=`echo $VIA_NOMBRE_ACENTOS_MINUSCULAS | sed -e "s/\b\(.\)/\u\1/g"` |
||||
NOMBRE="${VIA_CLASE_MINUSCULAS^} ${VIA_PAR,,} ${VIA_NOMBRE_ACENTOS_PRIMERA_MAYUSCULA^}" |
||||
echo ''${arry[0]}';'$NOMBRE';'${arry[5]}';'${arry[6]}';'${arry[7]}';'${arry[8]}';'${arry[9]}';'${arry[10]}';'${arry[11]}';'${arry[12]}';'${arry[15]}';'${arry[16]}'' >> c |
||||
done < callejero-25830.csv |
||||
mv c callejero-25830.csv |
||||
|
||||
echo "# STEP 4/7 Reprojecting fom EPSG:25830 to EPSG:4326" |
||||
# Prepara la reproyección de EPSG:25830 a EPSG:4326: |
||||
echo '<OGRVRTDataSource> |
||||
<OGRVRTLayer name="callejero-25830"> |
||||
<SrcDataSource>callejero-25830.csv</SrcDataSource> |
||||
<GeometryType>wkbPoint</GeometryType> |
||||
<LayerSRS>+init=epsg:25830 +wktext</LayerSRS> |
||||
<GeometryField encoding="PointFromColumns" x="x" y="y"/> |
||||
<Field name="name" src="Via_clase via_par Via_nombre_acentos" /> |
||||
<Field name="COD_VIA" src="COD_VIA" /> |
||||
<Field name="CLASE_APP" src="CLASE_APP" /> |
||||
<Field name="NUMERO" src="NUMERO" /> |
||||
<Field name="CALIFICADOR" src="CALIFICADOR" /> |
||||
<Field name="TIPO_NDP" src="TIPO_NDP" /> |
||||
<Field name="COD_NDP" src="COD_NDP" /> |
||||
<Field name="DISTRITO" src="DISTRITO" /> |
||||
<Field name="BARRIO" src="BARRIO" /> |
||||
<Field name="COD_POSTAL" src="COD_POSTAL" /> |
||||
</OGRVRTLayer> |
||||
</OGRVRTDataSource>' > callejero.vrt |
||||
|
||||
# Reproject to EPSG:4326 |
||||
ogr2ogr -lco GEOMETRY=AS_XY -overwrite -f CSV -t_srs EPSG:4326 callejero.csv callejero.vrt |
||||
|
||||
# Remove the first line |
||||
tail -n +2 callejero.csv > c ; mv c callejero.csv |
||||
|
||||
# Remove " |
||||
perl -pe 's/"//g' callejero.csv > c ; mv c callejero.csv |
||||
|
||||
echo "# STEP 5/7 Creating OSM file" |
||||
|
||||
# HEADERS |
||||
echo '<?xml version="1.0" encoding="UTF-8"?>' > "$OUT_FILE" |
||||
echo '<osm version="0.6" generator="madridaddr2osm.sh 1.2">' >> "$OUT_FILE" |
||||
|
||||
while IFS=$',' read -r -a arry |
||||
do |
||||
let COUNTER=COUNTER-1 |
||||
|
||||
# X,Y,name,COD_VIA,CLASE_APP,NUMERO,CALIFICADOR,TIPO_NDP,COD_NDP,DISTRITO,BARRIO,COD_POSTAL |
||||
POSITIVECOUNT=`echo $COUNTER | perl -pe 's/-//'` |
||||
ID=$((60000000000 + $POSITIVECOUNT)) |
||||
echo ' <node id="'$ID'" changeset="1" lat="'${arry[1]}'" lon="'${arry[0]}'">' >> "$OUT_FILE" |
||||
echo ' <tag k="addr:street" v="'${arry[2]}'"/>' >> "$OUT_FILE" |
||||
|
||||
|
||||
NUMERO_COMPLETO="${arry[5]}" |
||||
# If there a letter in the housenumber |
||||
if [ -n "${arry[6]}" ] ; then |
||||
# En los kilómetros: EN‐entrada, SA‐salida |
||||
if [ "${arry[6]}" = " EN" ] ; then |
||||
NUMERO_COMPLETO="$NUMERO_COMPLETO sentido entrada" |
||||
elif [ "${arry[6]}" = " SA" ] ; then |
||||
NUMERO_COMPLETO="$NUMERO_COMPLETO sentido salida" |
||||
else |
||||
NUMERO_COMPLETO="$NUMERO_COMPLETO-${arry[6]}" |
||||
fi |
||||
fi |
||||
|
||||
# N=Número; K=Kilómetro; C=Chabola |
||||
#if [ "${arry[5]}" = "CHABOLA" ] ; then # Dic 2019: YA NO HAY CHABOLAS? |
||||
# echo ' <tag k="addr:housenumber" v="'$NUMERO_COMPLETO' (chabola)"/>' >> "$OUT_FILE" |
||||
if [ "${arry[4]}" = "KILÓMETRO" ] ; then |
||||
echo ' <tag k="addr:housenumber" v="km '$NUMERO_COMPLETO'"/>' >> "$OUT_FILE" |
||||
elif [ "${arry[4]}" = "NUMERO" ] ; then |
||||
echo ' <tag k="addr:housenumber" v="'$NUMERO_COMPLETO'"/>' >> "$OUT_FILE" |
||||
else |
||||
echo "ERROR: $NUMERO_COMPLETO ${arry[4]}" |
||||
fi |
||||
|
||||
# TIPO_NDP |
||||
#FRENTE FACHADA |
||||
#GARAJE |
||||
#JARDÍN/PARQUE |
||||
#PARCELA |
||||
#PLANEAMIENTO |
||||
#PORTAL |
||||
|
||||
if [ "${arry[7]}" = "PORTAL" ] ; then |
||||
echo ' <tag k="entrance" v="main"/>' >> "$OUT_FILE" |
||||
echo ' <tag k="building" v="yes"/>' >> "$OUT_FILE" # Para que el geocoder encuentre las direcciones |
||||
# echo ' <tag k="door" v="yes"/>' >> "$OUT_FILE" |
||||
elif [ "${arry[7]}" = "FRENTE FACHADA" ] ; then |
||||
: # NOOP |
||||
# echo ' <tag k="building" v="yes"/>' >> "$OUT_FILE" |
||||
elif [ "${arry[7]}" = "GARAJE" ] ; then |
||||
# echo ' <tag k="building" v="garages"/>' >> "$OUT_FILE" |
||||
echo ' <tag k="amenity" v="parking_entrance"/>' >> "$OUT_FILE" |
||||
echo ' <tag k="entrance" v="yes"/>' >> "$OUT_FILE" |
||||
# echo ' <tag k="access" v="destination"/>' >> "$OUT_FILE" |
||||
# echo ' <tag k="motor_vehicle" v="designated"/>' >> "$OUT_FILE" |
||||
elif [ "${arry[7]}" = "JARDÍN/PARQUE" ] ; then |
||||
echo ' <tag k="leisure" v="garden"/>' >> "$OUT_FILE" |
||||
elif [ "${arry[7]}" = "PARCELA" ] ; then |
||||
# : # NOOP |
||||
# PENDIENTE: NO ES BROWNFIELD |
||||
echo ' <tag k="landuse" v="brownfield"/>' >> "$OUT_FILE" |
||||
elif [ "${arry[7]}" = "PLANEAMIENTO" ] ; then |
||||
# : # NOOP |
||||
# PENDIENTE: NO ES CONSTRUCTION |
||||
echo ' <tag k="landuse" v="construction"/>' >> "$OUT_FILE" |
||||
fi |
||||
|
||||
# Districts |
||||
COD_DISTRITO="${arry[9]}" |
||||
# PADDED_DISTRITO=`printf %02d $COD_DISTRITO` |
||||
# DISTRITO=`grep $PADDED_DISTRITO distritos.csv | awk -F '"' '{print $4}'` |
||||
DISTRITO=`grep "$COD_DISTRITO;" distritos.csv | head -n1 | awk -F ';' '{print $2}'` |
||||
# DISTRITO_MINUSCULAS=${DISTRITO,,} |
||||
# DISTRITO_PRIMERA_MAYUSCULA=`echo $DISTRITO_MINUSCULAS | sed -e "s/\b\(.\)/\u\1/g" | perl -pe 's/ De / de /g'` |
||||
# echo ' <tag k="addr:district" v="'$DISTRITO_PRIMERA_MAYUSCULA'"/>' >> "$OUT_FILE" |
||||
echo ' <tag k="addr:district" v="'$DISTRITO'"/>' >> "$OUT_FILE" |
||||
|
||||
# Suburbs |
||||
COD_BARRIO="${arry[10]}" |
||||
# PADDED_BARRIO=`printf %02d $COD_BARRIO` |
||||
# BARRIO=`grep $PADDED_BARRIO\;$PADDED_DISTRITO barrios.csv | awk -F ';' '{print $3}'` |
||||
BARRIO=`grep "$COD_BARRIO;$COD_DISTRITO" barrios.csv | head -n1 | awk -F ';' '{print $3}'` |
||||
# BARRIO_MINUSCULAS=${BARRIO,,} |
||||
# BARRIO_PRIMERA_MAYUSCULA=`echo $BARRIO_MINUSCULAS | sed -e "s/\b\(.\)/\u\1/g" | perl -pe 's/ De / de /g' | perl -pe 's/ Del / del /g'` |
||||
echo ' <tag k="addr:suburb" v="'$BARRIO'"/>' >> "$OUT_FILE" |
||||
echo ' <tag k="addr:postcode" v="'${arry[11]}'"/>' >> "$OUT_FILE" |
||||
echo ' <tag k="madridcity:street_id" v="'${arry[3]}'"/>' >> "$OUT_FILE" |
||||
echo ' <tag k="madridcity:addr_id" v="'${arry[8]}'"/>' >> "$OUT_FILE" |
||||
echo ' <tag k="source" v="Ayuntamiento de Madrid"/>' >> "$OUT_FILE" |
||||
echo ' <tag k="source:date" v="'$FECHA'"/>' >> "$OUT_FILE" |
||||
|
||||
# Imágenes Catastro y ref. Catastral |
||||
REFCATASTRAL=`egrep '^'${arry[8]}'"' refparcelas.csv | awk -F '"' '{print $2}'` |
||||
echo ' <tag k="image" v="http://ovc.catastro.meh.es/OVCServWeb/OVCWcfLibres/OVCFotoFachada.svc/RecuperarFotoFachadaGet?ReferenciaCatastral='$REFCATASTRAL'"/>' >> $OUT_FILE |
||||
echo ' <tag k="source:image" v="Dirección General del Catastro"/>' >> "$OUT_FILE" |
||||
echo ' <tag k="ref:catastro" v="'$REFCATASTRAL'"/>' >> $OUT_FILE |
||||
|
||||
echo ' </node>' >> "$OUT_FILE" |
||||
done < callejero.csv |
||||
|
||||
echo '</osm>' >> "$OUT_FILE" |
||||
|
||||
|
||||
echo "# STEP 6/7 Fixing more capitalization and other errors" |
||||
# Arregla preposiciones en mayúscula y números romanos en minúsculas |
||||
perl -pe 's/ De / de /g' "$OUT_FILE" | perl -pe 's/ Del / del /g' | perl -pe 's/ La / la /g' | perl -pe 's/ La / la /g' | perl -pe 's/ Las / las /g' | perl -pe 's/ Los / los /g' | perl -pe 's/ Y / y /g' | perl -pe 's/ A / a /g' | perl -pe 's/ En / en /g' | perl -pe 's/ Ii"/ II"/g' | perl -pe 's/ Iii"/ III"/g' | perl -pe 's/ Iv"/ IV"/g' | perl -pe 's/ Vi"/ VI"/g' | perl -pe 's/ Vii"/ VII"/g' | perl -pe 's/ Viii/ VIII/g' | perl -pe 's/ Ix"/ IX"/g' | perl -pe 's/ Xi"/ XI"/g' | perl -pe 's/ Xii"/ XII"/g' | perl -pe 's/ Xiii"/ XIII"/g' | perl -pe 's/ Xxiii"/ XXIII"/g' | perl -pe 's/ Don / don /g' | perl -pe 's/ Doña / doña /g' | perl -pe 's/Callejon/Callejón/g' | perl -pe 's/- /-/g' > o ; mv o $OUT_FILE |
||||
|
||||
# Acentos |
||||
perl -pe 's/Aguilas/Águilas/g' $OUT_FILE | perl -pe 's/Apostol/Apóstol/g' | perl -pe 's/Angel/Ángel/g' | perl -pe 's/Rios/Ríos/g' | perl -pe 's/Cristobal/Cristóbal/g' > x ; mv x $OUT_FILE |
||||
|
||||
echo "# STEP 7/7 Adding timestamps, compressing source and intermediate data" |
||||
# Añadir timestamp, version |
||||
HORAINICIO=$HORA |
||||
DIA=`date +%Y-%m-%d` |
||||
HORA=`date +%H:%M:%S` |
||||
perl -pe 's/id="/timestamp="'$DIA'T'$HORA'Z" version="1" id="/g' $OUT_FILE | perl -pe 's/ref="/timestamp="'$DIA'T'$HORA'Z" version="1" ref="/g' | perl -pe 's/version="1"\///g' > m ; mv m $OUT_FILE |
||||
|
||||
mkdir source_intermediate_data |
||||
mv * source_intermediate_data |
||||
mv source_intermediate_data/direccionesmadrid.osm . |
||||
7z a source_intermediate_data source_intermediate_data |
||||
rm -rf source_intermediate_data |
||||
|
||||
mkdir -p ../current |
||||
cp $OUT_FILE ../current/ |
||||
ARCHIVE_FILE=$DIA-`date +%H%M`-$OUT_FILE |
||||
mv $OUT_FILE $ARCHIVE_FILE |
||||
7z a $ARCHIVE_FILE.7z $ARCHIVE_FILE && rm $ARCHIVE_FILE |
||||
|
||||
|
||||
echo "$OUT_FILE created :)" |
||||
HORAFIN=`date +%H:%M:%S` |
||||
echo "Hora inicio: $HORAINICIO" |
||||
echo "Hora fin: $HORAFIN" |
||||
|
Loading…
Reference in new issue