# MIT License
#
# Copyright (c) 2025 Jesper Zedlitz <jesper@zedlitz.de>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# Lädt mit Hilfe einer SPARQL-Abfrage an Wikidata die Startseite jeder
# Webseite eines Amtes in Schleswig-Holstein herunter und speichert sie in einer
# lokalen HTML-Datei mit dem Regionalschlüssel als Dateinamen.

import requests
import os

# SPARQL-Abfrage zur Abholung der Ämter und ihrer Webseiten
SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
SPARQL_QUERY = """
SELECT distinct ?amt ?amtLabel ?ars ?www
WHERE {

    # Finde alle Ämter in Schleswig-Holstein
    ?amt wdt:P31 ?typ;   
         wdt:P131 ?kreis .   

    # Typen von Interesse: Amt
    VALUES ?typ { wd:Q478847}

    ?amt wdt:P131 ?kreis.
    ?kreis wdt:P31 wd:Q61856889;      # Instanz von: Kreis in Schleswig-Holstein
  
    MINUS { ?amt wdt:P576 [] }
    MINUS { ?amt wdt:P1366 [] }
           
    # Amtlicher Regionalschlüssel
    OPTIONAL { ?amt wdt:P1388 ?ars. }

    # Webseite
    OPTIONAL { ?amt wdt:P856 ?www. }
  
    SERVICE wikibase:label { bd:serviceParam wikibase:language "de". }
} ORDER BY ?ars
"""

HEADERS = {
   "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0"
}

def get_sparql_data(query, endpoint=SPARQL_ENDPOINT):
    """Führe die SPARQL-Abfrage aus und gebe das Ergebnis zurück."""

    response = requests.get(endpoint, params={"query": query, "format": "json"})
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Fehler bei der Abfrage: {response.status_code}")
        return None

def save_webpage(url, ars):
    """Speichere die Webseite unter dem Regionalschlüssel"""
    file_name = f"{ars}.html"
    if url and not os.path.exists(file_name):
        try:
            page = requests.get(url, headers=HEADERS)
            page.raise_for_status()
            
            with open(file_name, 'w', encoding='utf-8') as file:
                file.write(page.text)
            print(f"Gespeichert: {file_name}")
        except Exception as e:
            print(f"Fehler beim Abrufen der Webseite {url}: {e}")

def main():
    data = get_sparql_data(SPARQL_QUERY)
    
    if data:
        for item in data['results']['bindings']:
            amt_label = item['amtLabel']['value']
            ars = item['ars']['value']
            www = item.get('www', {}).get('value')
            
            if www:
                save_webpage(www, ars)
            else:
                print(f"Keine Webseite für {amt_label} gefunden.")

if __name__ == "__main__":
    main()
