Skip to content
Snippets Groups Projects
Geokoordinaten_Scraper.py 2.03 KiB
Newer Older
# -*- coding: utf-8 -*-
"""
Created on Mon Jan  4 21:33:24 2021

@author: larsw
"""

import requests
from bs4 import BeautifulSoup
import pandas as pd

def replace(s):
    return ''.join(c for c in s if c.isalnum())

df = pd.DataFrame(columns=['Staedte','Breitengrad','Laengengrad','Breitengrad_DEC','Laengengrad_DEC'])

mainPage = requests.get("https://de.wikipedia.org/wiki/Liste_der_St%C3%A4dte_in_Deutschland")
result = {}
soup = BeautifulSoup(mainPage.content,'lxml')
allTables = soup.find_all('table')
trueTables = allTables[1:]
links = []
for i in trueTables:
    links += i.find_all('a',href=True)

a = dict()

for link in links:

    if link.getText() in link.attrs.get('title','') or link.attrs.get('title','') in link.getText():
        stadt = link.attrs.get('title','')
        url = link.attrs.get('href', '')
        res = requests.get("https://de.wikipedia.org/{}".format(url))
        soup = BeautifulSoup(res.content, 'lxml')
        breitengradSoup = soup.find('span',{'title':'Breitengrad'})
        breitengrad =breitengradSoup.get_text()
        laengengradSoup = soup.find('span',{'title':'Längengrad'})
        laengengrad= laengengradSoup.get_text()
        
        ### Umrechnung
        
        # Breitengrad-Aufbereitung
        b_str = breitengrad
        b_list = b_str.split()
        b_list_cutted = b_list[:2]
        b1 = int(replace(b_list_cutted[0]))
        b2 = int(replace(b_list_cutted[1]))
        
        # Längengrad-Aufbereitung
        
        l_str = laengengrad
        l_list = l_str.split()
        l_list_cutted = l_list[:2]
        l1 = int(replace(l_list_cutted[0]))
        l2 = int(replace(l_list_cutted[1]))
        
        # Breitengrad-Decimal-Berechnung
        b_dec = (b2/60)+b1
        
        # Längengrad-Decimal-Berechnung
        l_dec = (l2/60)+l1
        
        ###
    
        df = df.append({'Staedte':stadt,'Breitengrad':breitengrad,'Laengengrad':laengengrad,
                        'Breitengrad_DEC':b_dec,'Laengengrad_DEC':l_dec}, ignore_index=True)
        
df.to_excel('Koordinaten2.xlsx')