# MIT License
#
# Copyright (c) 2025 Jesper Zedlitz <jesper@zedlitz.de>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# Lädt mit Hilfe einer SPARQL-Abfrage an Wikidata die Startseite jeder
# Webseite eines Amtes in Schleswig-Holstein herunter und speichert sie in einer
# lokalen HTML-Datei mit dem Regionalschlüssel als Dateinamen.

import requests
import os

# SPARQL-Abfrage zur Abholung der Ämter und ihrer Webseiten
SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
SPARQL_QUERY = """
SELECT distinct ?item ?itemLabel ?ars ?www
WHERE {

    {
        ?item wdt:P1388 ?ars . 
        VALUES ?ars { "010515163" "010515166" "010515169" "010515172" "010515175" "010515178" "010535308" "010535313" "010535318" "010535323" "010535343" "010535358" "010535373" "010535391" "010545417" "010545439" "010545453" "010545459" "010545488" "010545489" "010545492" "010545494" "010555543" "010555546" "010555591" "010555739" "010565616" "010565636" "010565660" "010565687" "010565690" "010575727" "010575739" "010575747" "010575755" "010575775" "010575782" "010575785" "010585803" "010585822" "010585824" "010585833" "010585847" "010585853" "010585864" "010585888" "010585889" "010585890" "010585893" "010585895" "010585896" "010595912" "010595915" "010595919" "010595920" "010595937" "010595940" "010595949" "010595952" "010595974" "010595987" "010595990" "010595993" "010595996" "010605005" "010605024" "010605034" "010605043" "010605048" "010605053" "010605063" "010605086" "010615104" "010615134" "010615138" "010615153" "010615168" "010615179" "010615189" "010625034" "010625207" "010625218" "010625244" "010625262" "010625270"   }
    } UNION {
        ?item wdt:P439 ?ars .
        VALUES ?ars { "01001000" "01002000" "01003000" "01004000" "01051011" "01051044" "01053032" "01053083" "01053090" "01053100" "01053116" "01053129" "01054033" "01054056" "01054108" "01054138" "01054168" "01055001" "01055004" "01055010" "01055012" "01055016" "01055018" "01055021" "01055025" "01055028" "01055032" "01055033" "01055035" "01055040" "01055041" "01055042" "01055044" "01055046" "01055998" "01056002" "01056005" "01056015" "01056018" "01056021" "01056025" "01056039" "01056041" "01056043" "01056044" "01056048" "01056049" "01056050" "01057001" "01057008" "01057009" "01057057" "01057062" "01057091" "01057998" "01058005" "01058034" "01058043" "01058092" "01058135" "01058169" "01059045" "01059075" "01059113" "01059120" "01059183" "01060004" "01060005" "01060019" "01060039" "01060044" "01060063" "01060092" "01061029" "01061046" "01061113" "01062001" "01062004" "01062006" "01062009" "01062018" "01062023" "01062053" "01062060" "01062061" "01062090"  }    
    }
  
    # Webseite
    OPTIONAL { ?item wdt:P856 ?www. }
  
    SERVICE wikibase:label { bd:serviceParam wikibase:language "de". }
} ORDER BY ?ars
"""

def get_sparql_data(query, endpoint=SPARQL_ENDPOINT):
    """Führe die SPARQL-Abfrage aus und gebe das Ergebnis zurück."""

    response = requests.get(endpoint, params={"query": query, "format": "json"})
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Fehler bei der Abfrage: {response.status_code}")
        return None

def save_webpage(url, ars):
    """Speichere die Webseite unter dem Regionalschlüssel"""
    file_name = f"{ars}.html"
    if url and not os.path.exists(file_name):
        try:
            page = requests.get(url)
            page.raise_for_status()
            
            with open(file_name, 'w', encoding='utf-8') as file:
                file.write(page.text)
            print(f"Gespeichert: {file_name}")
        except Exception as e:
            print(f"Fehler beim Abrufen der Webseite {url}: {e}")

def main():
    data = get_sparql_data(SPARQL_QUERY)
    
    if data:
        for item in data['results']['bindings']:
            item_label = item['itemLabel']['value']
            ars = item['ars']['value']
            www = item.get('www', {}).get('value')
            
            if www:
                save_webpage(www, ars)
            else:
                print(f"Keine Webseite für {item_label} gefunden.")

if __name__ == "__main__":
    main()
