<?xml version="1.0" encoding="UTF-8"?>
<?xml-model href="../../common/schema/DHQauthor-TEI.rng"    type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0" ?>
<?xml-model href="../../common/schema/DHQauthor-TEI.isosch" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>
<?xml-model href="../../common/schema/dhqTEI-ready.sch"     type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:cc="http://web.resource.org/cc/"
    xmlns:dhq="http://www.digitalhumanities.org/ns/dhq" xmlns:html="http://www.w3.org/1999/xhtml"
    xmlns:mml="http://www.w3.org/1998/Math/MathML"
    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <teiHeader>
        <fileDesc>
            <titleStmt>
                <!-- Author should supply the title and personal information-->
                <title type="article" xml:lang="en">Responsible AI and the Middle Ages: Detecting
                    Historical Toxicity in Medieval Datasets</title>
                <!-- Add a <title> with appropriate @xml:lang for articles in languages other than English -->
                <dhq:authorInfo>
                    <!-- Include a separate <dhq:authorInfo> element for each author -->
                    <dhq:author_name>Delphine <dhq:family>Demeles</dhq:family></dhq:author_name>
                    <idno type="ORCID">https://orcid.org/0000-0002-2414-4581</idno>
                    <dhq:affiliation>University of Southampton</dhq:affiliation>
                    <email>D.Demelas@soton.ac.uk</email>
                    <dhq:bio>
                        <p>PLACEHOLDER</p>
                    </dhq:bio>
                </dhq:authorInfo>
            </titleStmt>
            <publicationStmt>
                <publisher>Alliance of Digital Humanities Organizations</publisher>
                <publisher>Association for Computers and the Humanities</publisher>

                <!-- This information should be added when the file is created -->
                <idno type="DHQarticle-id">000874</idno>
                <idno type="DOI">pending</idno>


                <!-- This information will be completed at publication -->
                <idno type="volume">020</idno>
                <idno type="issue">3</idno>
                <date when="2026-07-30">30 July 20206</date>
                <dhq:articleType>article</dhq:articleType>
                <availability status="CC-BY-ND">
                    <!-- If using a different license from the default, choose one of the following:
                  CC-BY-ND (DHQ default): <cc:License rdf:about="http://creativecommons.org/licenses/by-nd/2.5/"/>     
                  CC-BY:  <cc:License rdf:about="https://creativecommons.org/licenses/by/2.5/"/>
                  CC0: <cc:License rdf:about="https://creativecommons.org/publicdomain/zero/1.0/"/>
-->
                    <cc:License rdf:about="http://creativecommons.org/licenses/by-nd/2.5/"/>
                </availability>
            </publicationStmt>

            <sourceDesc>
                <p>This is the source</p>
            </sourceDesc>
        </fileDesc>
        <encodingDesc>
            <classDecl>
                <taxonomy xml:id="dhq_keywords">
                    <bibl>DHQ classification scheme; full list available at <ref
                            target="https://dhq.digitalhumanities.org/common/xml/taxonomy.xml"
                            >https://dhq.digitalhumanities.org/common/xml/taxonomy.xml</ref></bibl>
                </taxonomy>
                <taxonomy xml:id="authorial_keywords">
                    <bibl>Keywords supplied by author; no controlled vocabulary</bibl>
                </taxonomy>
                <taxonomy xml:id="project_keywords">
                    <bibl>DHQ Project Registry (in development)
                        <!--DHQ project registry; full list available at  https://dhq.digitalhumanities.org/projects.xml-->
                    </bibl>
                </taxonomy>
            </classDecl>
        </encodingDesc>
        <profileDesc>
            <langUsage>
                <language ident="en" extent="default"/>
                <!-- add <language> with appropriate @ident for any additional languages -->
            </langUsage>
            <textClass>
                <keywords scheme="#dhq_keywords">
                    <!-- Authors may suggest one or more keywords from the DHQ keyword list, visible at https://github.com/Digital-Humanities-Quarterly/dhq-journal/wiki/DHQ-Topic-Keywords; these may be supplemented or modified by DHQ editors -->

                    <!-- Enter keywords below preceeded by a "#". Create a new <term> element for each -->
                    <term corresp="#data_curation"/>
                    <term corresp="#ethics"/>
                    <term corresp="#machine_learning"/>
                    <term corresp="#literary_studies"/>
                    <term corresp="#race"/>
                </keywords>
                <keywords scheme="#authorial_keywords">
                    <!-- Authors may include one or more keywords of their choice -->


                    <term>toxicity detection</term>
                    <term>Medieval literature</term>
                    <term>responsible AI</term>
                    <term>historical bias</term>
                    <term>Open data</term>
                </keywords>
                <keywords scheme="#project_keywords">
                    <list type="simple">
                        <item/>
                    </list>
                </keywords>
            </textClass>
        </profileDesc>
        <revisionDesc>
            <!-- Replace "NNNNNN" in the @target of ref below with the appropriate DHQarticle-id value. -->
            <change>The version history for this file can be found on <ref type="gitHist"
                    target="https://github.com/Digital-Humanities-Quarterly/dhq-journal/commits/main/articles/000874/000874.xml"
                    >GitHub</ref>.</change>
            <!-- Each change should include @who and @when as well as a brief note on what was done. -->

        </revisionDesc>
    </teiHeader>
    <!-- If a translation is added to the original article, add an enclosing <text> and <group> element -->
    <text xml:lang="en" type="default">
        <front>
            <dhq:abstract>
                <!-- Include a brief abstract of the article -->
                <p>The increasing reliance on open-source datasets for training large language
                    models has revealed a critical oversight in artificial intelligence development:
                    the presence of historical toxicity embedded within canonical literary texts.
                    This article examines the application of contemporary toxicity detection models
                    to the <title rend="italic">Chanson de Roland</title>, one of medieval French foundational literary works,
                    exposing significant challenges in identifying hate speech, violence advocacy,
                    and discriminatory content within historical documents. Using the multilingual
                    Detoxify model on Joseph Bédier's modern French translation, I analyze 2,605
                    sentences to assess how toxicity detection models trained primarily on
                    contemporary social media content perform when evaluating medieval literature.
                    My findings reveal a troubling pattern: While the model successfully flags some
                    explicit threats and insults, it systematically fails to detect the text's most
                    problematic content, including religious misrepresentation, forced conversion
                    narratives, and anti-Black racism. These <soCalled>false negatives</soCalled> represent a
                    fundamental problem for AI models being trained on historical open data, as they
                    risk perpetuating and amplifying centuries-old discriminatory frameworks while
                    appearing objective. This research contributes to urgent debates about
                    responsible AI development, arguing that without human-annotated ground truth
                    datasets specifically designed for historical texts toxicity, current LLMs risk
                    distorting historical understanding and diffusing undetected violent and racist
                    discrimination at unprecedented scale. I advocate for interdisciplinary
                    collaboration between computer scientists and humanities scholars to develop
                    ethical frameworks for curating historical datasets that acknowledge their toxic
                    content without either sanitizing history or amplifying historical harm.</p>
            </dhq:abstract>
            <dhq:teaser>
                <!-- Include a brief teaser, no more than a phrase or a single sentence -->
                <p>PLACEHOLDER</p>
            </dhq:teaser>
        </front>
        <body>
            <head/>
            <div>
                <head/>
                <p>Content goes here!</p>
            </div>
        </body>
        <back>
            <listBibl>
                <bibl xml:id="PLACEHOLDER">PLACEHOLDER</bibl>
            </listBibl>

        </back>
    </text>
</TEI>
