NT Coreference and Anaphora Chain Analysis¶

Analysis of the MACULA Greek NT referent column, which links ~14,471 pronoun and relative-clause tokens back to their antecedents. This enables tracking who a pronoun refers to throughout a passage, chapter, or book.

Key data:

referent column: MACULA xml_id pointing to the antecedent token
14,471 tokens with referent links (~10.5% of all NT tokens)
84% of all pronoun tokens have a referent link
Primarily αὐτός, relative pronoun ὅς, and demonstratives

Questions this notebook answers:

How many times is Jesus referenced by pronoun in each Gospel?
Where does Paul refer back to himself in his letters?
How dense is pronominal reference to the Spirit/Paraclete in John 14–16?
Which participants receive the most pronominal references per book?
What does ἐκεῖνος refer to in John (theologically significant)?

Data source: MACULA Greek Nestle1904 (macula-greek/ submodule), coreference annotations from Clear Bible.

In [ ]:

Copied!





# @title Colab setup (runs only on Google Colab)
import sys
IN_COLAB = "google.colab" in sys.modules
if IN_COLAB:
    import subprocess, os
    # Clone the repo so all source and data paths work
    if not os.path.isdir("/content/berean-bible-bots"):
        subprocess.run(
            ["git", "clone", "--depth", "1",
             "https://github.com/dnovick/berean-bible-bots.git",
             "/content/berean-bible-bots"],
            check=True,
        )
    os.chdir("/content/berean-bible-bots")
    sys.path.insert(0, "/content/berean-bible-bots/src")
    # Install Python dependencies
    subprocess.run(
        [sys.executable, "-m", "pip", "install", "-q", "-r",
         "binder/requirements.txt"],
        check=True,
    )
    # Download processed data files (~295 MB, one-time)
    subprocess.run(["bash", "binder/postBuild"], check=True)
    print("Colab environment ready.")
# @title Colab setup (runs only on Google Colab)
import sys
IN_COLAB = "google.colab" in sys.modules
if IN_COLAB:
    import subprocess, os
    # Clone the repo so all source and data paths work
    if not os.path.isdir("/content/berean-bible-bots"):
        subprocess.run(
            ["git", "clone", "--depth", "1",
             "https://github.com/dnovick/berean-bible-bots.git",
             "/content/berean-bible-bots"],
            check=True,
        )
    os.chdir("/content/berean-bible-bots")
    sys.path.insert(0, "/content/berean-bible-bots/src")
    # Install Python dependencies
    subprocess.run(
        [sys.executable, "-m", "pip", "install", "-q", "-r",
         "binder/requirements.txt"],
        check=True,
    )
    # Download processed data files (~295 MB, one-time)
    subprocess.run(["bash", "binder/postBuild"], check=True)
    print("Colab environment ready.")

In [ ]:

Copied!





import sys
sys.path.insert(0, '../../../src')

from bible_grammar import (
    nt_referent_data, nt_referent_frequency, nt_entity_chain,
    nt_pronoun_referents, nt_book_entity_density, nt_entity_chapter_distribution,
    print_nt_referent_overview, print_nt_referent_frequency,
    print_nt_entity_chain, print_nt_pronoun_referents, print_nt_book_entity_density,
    nt_referent_book_chart, nt_entity_density_chart,
    KNOWN_ENTITIES,
)
import pandas as pd
import sys
sys.path.insert(0, '../../../src')

from bible_grammar import (
    nt_referent_data, nt_referent_frequency, nt_entity_chain,
    nt_pronoun_referents, nt_book_entity_density, nt_entity_chapter_distribution,
    print_nt_referent_overview, print_nt_referent_frequency,
    print_nt_entity_chain, print_nt_pronoun_referents, print_nt_book_entity_density,
    nt_referent_book_chart, nt_entity_density_chart,
    KNOWN_ENTITIES,
)
import pandas as pd

1. Overview — Coverage¶

In [ ]:

Copied!

print_nt_referent_overview()
print_nt_referent_overview()

In [ ]:

Copied!

# Distribution of referent-annotated tokens by NT book
nt_referent_book_chart(top_n=20)
# Distribution of referent-annotated tokens by NT book
nt_referent_book_chart(top_n=20)

In [ ]:

Copied!





# Sample referent data
df = nt_referent_data(book='Jhn')
print(f"John referent-annotated tokens: {len(df)}")
df[['text', 'lemma', 'gloss', 'antecedent_lemma', 'antecedent_gloss',
    'antecedent_ref', 'chapter', 'verse']].head(12)
# Sample referent data
df = nt_referent_data(book='Jhn')
print(f"John referent-annotated tokens: {len(df)}")
df[['text', 'lemma', 'gloss', 'antecedent_lemma', 'antecedent_gloss',
    'antecedent_ref', 'chapter', 'verse']].head(12)

2. Most Referenced Entities per Book¶

In [ ]:

Copied!

# John — Jesus, the disciples, the Jews
print_nt_book_entity_density('Jhn', top_n=12)
nt_entity_density_chart('Jhn', top_n=12)
# John — Jesus, the disciples, the Jews
print_nt_book_entity_density('Jhn', top_n=12)
nt_entity_density_chart('Jhn', top_n=12)

In [ ]:

Copied!

# Romans — Paul, God, Christ
print_nt_book_entity_density('Rom', top_n=12)
# Romans — Paul, God, Christ
print_nt_book_entity_density('Rom', top_n=12)

In [ ]:

Copied!

# Mark — narrative-dense Christological focus
print_nt_book_entity_density('Mrk', top_n=12)
# Mark — narrative-dense Christological focus
print_nt_book_entity_density('Mrk', top_n=12)

3. Most Referenced Antecedents Across the NT¶

In [ ]:

Copied!

# Top antecedents across the whole NT
print_nt_referent_frequency(top_n=20)
# Top antecedents across the whole NT
print_nt_referent_frequency(top_n=20)

4. Entity Chain Analysis — Tracking a Participant¶

In [ ]:

Copied!

# Jesus in John — how often is he referenced by pronoun, and in which chapters?
jesus_jhn_id = 'n43014023002'  # Jesus @ Jhn 14:23 (most-referenced anchor)
print_nt_entity_chain(jesus_jhn_id, book='Jhn', entity_label='Jesus (Jhn 14:23)')
# Jesus in John — how often is he referenced by pronoun, and in which chapters?
jesus_jhn_id = 'n43014023002'  # Jesus @ Jhn 14:23 (most-referenced anchor)
print_nt_entity_chain(jesus_jhn_id, book='Jhn', entity_label='Jesus (Jhn 14:23)')

In [ ]:

Copied!

# Disciples in John (Farewell Discourse)
disciples_jhn_id = 'n43013023006'  # disciples @ Jhn 13:23
print_nt_entity_chain(disciples_jhn_id, book='Jhn', entity_label='Disciples (Jhn 13:23)')
# Disciples in John (Farewell Discourse)
disciples_jhn_id = 'n43013023006'  # disciples @ Jhn 13:23
print_nt_entity_chain(disciples_jhn_id, book='Jhn', entity_label='Disciples (Jhn 13:23)')

In [ ]:

Copied!

# Paul in Romans
paul_rom_id = 'n45001001001'  # Paul @ Rom 1:1
print_nt_entity_chain(paul_rom_id, book='Rom', entity_label='Paul (Rom 1:1)')
# Paul in Romans
paul_rom_id = 'n45001001001'  # Paul @ Rom 1:1
print_nt_entity_chain(paul_rom_id, book='Rom', entity_label='Paul (Rom 1:1)')

5. Pronoun Referents — What Does αὐτός Point To?¶

In [ ]:

Copied!

# What does αὐτός refer to in the Gospels?
print_nt_pronoun_referents('αὐτός', book=['Mat', 'Mrk', 'Luk', 'Jhn'], top_n=15)
# What does αὐτός refer to in the Gospels?
print_nt_pronoun_referents('αὐτός', book=['Mat', 'Mrk', 'Luk', 'Jhn'], top_n=15)

In [ ]:

Copied!

# What does αὐτός refer to in Romans?
print_nt_pronoun_referents('αὐτός', book='Rom', top_n=15)
# What does αὐτός refer to in Romans?
print_nt_pronoun_referents('αὐτός', book='Rom', top_n=15)

6. John's Use of ἐκεῖνος for the Paraclete¶

John 14–16 uses ἐκεῖνος ("that one", masculine) to refer to the Holy Spirit (πνεῦμα, neuter) — a theologically significant pronoun choice that implies personal identity of the Spirit rather than grammatical concord.

In [ ]:

Copied!

# What does ἐκεῖνος refer to in John?
print_nt_pronoun_referents('ἐκεῖνος', book='Jhn', top_n=20)
# What does ἐκεῖνος refer to in John?
print_nt_pronoun_referents('ἐκεῖνος', book='Jhn', top_n=20)

In [ ]:

Copied!





# The Farewell Discourse (Jhn 14–16): ἐκεῖνος tokens
df = nt_referent_data(book='Jhn')
ekeinos_farewell = df[
    (df['lemma'] == 'ἐκεῖνος') &
    (df['chapter'].isin([14, 15, 16]))
]
print(f"ἐκεῖνος in John 14–16: {len(ekeinos_farewell)} tokens")
ekeinos_farewell[['text', 'antecedent_lemma', 'antecedent_gloss',
                  'antecedent_ref', 'chapter', 'verse']]
# The Farewell Discourse (Jhn 14–16): ἐκεῖνος tokens
df = nt_referent_data(book='Jhn')
ekeinos_farewell = df[
    (df['lemma'] == 'ἐκεῖνος') &
    (df['chapter'].isin([14, 15, 16]))
]
print(f"ἐκεῖνος in John 14–16: {len(ekeinos_farewell)} tokens")
ekeinos_farewell[['text', 'antecedent_lemma', 'antecedent_gloss',
                  'antecedent_ref', 'chapter', 'verse']]

7. Ad-hoc Queries¶

In [ ]:

Copied!

# What does ὅς (relative pronoun) refer to in Hebrews?
print_nt_pronoun_referents('ὅς', book='Heb', top_n=15)
# What does ὅς (relative pronoun) refer to in Hebrews?
print_nt_pronoun_referents('ὅς', book='Heb', top_n=15)

In [ ]:

Copied!





# Compare Jesus entity density across Gospels
for book, label in [('Mat', 'Matthew'), ('Mrk', 'Mark'), ('Luk', 'Luke'), ('Jhn', 'John')]:
    df_b = nt_book_entity_density(book, top_n=30)
    jesus_rows = df_b[df_b['antecedent_lemma'] == 'Ἰησοῦς']
    total = jesus_rows['ref_count'].sum()
    print(f"{label}: {total} pronominal references to Ἰησοῦς")
# Compare Jesus entity density across Gospels
for book, label in [('Mat', 'Matthew'), ('Mrk', 'Mark'), ('Luk', 'Luke'), ('Jhn', 'John')]:
    df_b = nt_book_entity_density(book, top_n=30)
    jesus_rows = df_b[df_b['antecedent_lemma'] == 'Ἰησοῦς']
    total = jesus_rows['ref_count'].sum()
    print(f"{label}: {total} pronominal references to Ἰησοῦς")

In [ ]:

Copied!





# Show all available pre-identified entities
print("Known entity anchors:")
for label, xml_id in KNOWN_ENTITIES.items():
    print(f"  {label}: {xml_id}")
# Show all available pre-identified entities
print("Known entity anchors:")
for label, xml_id in KNOWN_ENTITIES.items():
    print(f"  {label}: {xml_id}")