Wikipedie:GPT úprava referencí na harvardské

Z Wikipedie, otevřené encyklopedie

Následující program v Pythonu 3 je založený na umělé inteligenci. Projde zadaný krátký článek české Wikipedie (jeho jméno zapište přímo do programu, jde o "holý" program bez uživatelského rozhraní) a program vypíše jeho zdrojový kód po úpravě referencí na harvardský styl. Nezapomeňte po něm vše zkontrolovat. Na dlouhých článcích program nefunguje dobře, není tam řešeno "rozsekání" článku na části.

Protože program přes API provolává model řady GPT, je potřeba mít na stránkách firmy OpenAI zakoupeno právo model takto používat (https://openai.com/api/).

"""Upraví reference v (kratším) článku české Wikipedie na harvardský citační styl pomocí šablony sfn"""
import requests
from openai import OpenAI
client = OpenAI(api_key=my_api_key)   # za my_api_key dosadit klíč od firmy OpenAI - lze ho zakoupit na jejich webu

# Vstup:
article_name = "Kostel svatého Simeona Stylity"  # jméno článku v uvozovkách  


def get_wikipedia_source(article_title, language="cs"):
    """Gets the source code of a Wikipedia article"""
    # Define the endpoint and parameters
    endpoint = f"https://{language}.wikipedia.org/w/api.php"
    params = {
        "action": "query",
        "format": "json",
        "prop": "revisions",
        "titles": article_title,
        "rvprop": "content",
        "rvslots": "main"
    }

    # Make the request to the Wikipedia API
    response = requests.get(endpoint, params=params)
    
    if response.status_code != 200:
        raise Exception(f"Error fetching data from Wikipedia API: {response.status_code}")

    data = response.json()

    # Extract the page ID, as the structure of the response contains dynamic page IDs
    pages = data.get("query", {}).get("pages", {})
    
    if not pages:
        raise Exception("No pages found or an error occurred.")

    page_id = next(iter(pages))  # Get the first (and likely only) page ID key

    # Extract the content of the page
    page = pages[page_id]
    revisions = page.get("revisions", [])
    
    if not revisions:
        raise Exception("No revisions found for this page.")

    content = revisions[0].get("slots", {}).get("main", {}).get("*", "")
    
    return content

 
def sfn(article_name):
    """Upraví reference v článku české Wikipedie na harvardský citační styl pomocí šablony sfn"""
    
    vzor = get_wikipedia_source("Buzici")
    
    prompt = f"The goal is to change references in the markup of Czech Wikipedia article about {article_name}"
    prompt += """ to the Harvard Citation Style so that   \
             * All references use standard Czech citation templates like Citace monografie. 
             * All citation templates are in the section titled "=== Literatura ===" directly after the 
             section "=== Reference ===" as subsections of the section "== Odkazy ==" behind the body of the article.
             * None of the citation templates is in the body of the text
             * All citations use the template sfn. It has the form {{sfn|ref to the literature|s=page}}, 
             e.g. {{sfn|Král (1902)|s=152}}. The page may be missing, e.g. {{sfn|Král (1902)|}}, if there is 
             no known page in the citation.
             * The refs to the Literature items are in the form "Author name (year of publication)"
             * They are mentioned in the citation template, e.g. " | ref = Král (1902)"
             * Each cited work has its item in the section "Literatura" 
             * There are no <ref> tags used in the body, only sfn and <references/> in the section "Reference". 
             * Do not add new references which are not in the original article, even if they were useful.
             * Nor drop or change the cited literature.
             Print out only the source code of the corrected article. Do not change its text, only the references 
             and the section "Odkazy" and its subsections. You may create these sections if they are missing.
             Do not change the text at the end of the article (e.g. categories and templates other than citations). 
             An example of the Harvard Citation Style is the article about Buzici, whose code looks like this:\n"""
    prompt += vzor
    prompt += f"\nAnd here is the code of the article about {article_name}, which should be improved:\n"
    prompt += get_wikipedia_source(article_name)
    
    response = client.chat.completions.create(
        model="gpt-4o",    
        max_tokens=4096,
        messages=[
                {
                  "role": "user",
                  "content": prompt 
                },]
              )
    msgtext = response.choices[0].message.content
    print(msgtext)
    return msgtext
 

sfn(article_name)