Peshitta NT — Syriac Morphology¶
Word-level morphological analysis of the Syriac Peshitta New Testament.
Data source: ETCBC/syrnt (Text-Fabric) — 109,640 words across all 27 NT books.
Features: part of speech, gender, number, person, state, verbal stem (vs), tense/aspect (vt), Sedra transliterated root and lemma.
| Feature | Values |
|---|---|
sp |
noun, verb, particle, pronoun, adjective, numeral, interjection |
gn |
m, f, NA |
nu |
s (singular), p (plural), NA |
ps |
p1, p2, p3, NA |
st |
emphatic, construct, absolute, NA |
vs |
peal, pael, aphel, ethpeel, ethpaal, ettaphal, NA |
vt |
perf, impf, imptv, ptca (active participle), ptcp (passive participle), inf, NA |
In [ ]:
Copied!
import sys
sys.path.insert(0, '../../../src')
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from bible_grammar.peshitta_query import query_peshitta
df = query_peshitta()
print(f'Loaded {len(df):,} words')
df.head()
import sys
sys.path.insert(0, '../../../src')
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from bible_grammar.peshitta_query import query_peshitta
df = query_peshitta()
print(f'Loaded {len(df):,} words')
df.head()
1. Part-of-Speech Distribution¶
In [ ]:
Copied!
pos = df['sp'].value_counts()
print('Part-of-speech counts:')
print(pos.to_string())
fig, ax = plt.subplots(figsize=(9, 4))
pos.plot(kind='bar', ax=ax, color='#4292c6', edgecolor='white')
ax.set_title('Peshitta NT — Part-of-Speech Distribution', fontsize=12, fontweight='bold')
ax.set_xlabel('')
ax.set_ylabel('Token count')
ax.tick_params(axis='x', rotation=30)
ax.yaxis.grid(True, linestyle='--', alpha=0.4)
ax.set_axisbelow(True)
plt.tight_layout()
plt.savefig('../../../output/charts/nt/peshitta_pos.png', dpi=150, bbox_inches='tight')
plt.show()
pos = df['sp'].value_counts()
print('Part-of-speech counts:')
print(pos.to_string())
fig, ax = plt.subplots(figsize=(9, 4))
pos.plot(kind='bar', ax=ax, color='#4292c6', edgecolor='white')
ax.set_title('Peshitta NT — Part-of-Speech Distribution', fontsize=12, fontweight='bold')
ax.set_xlabel('')
ax.set_ylabel('Token count')
ax.tick_params(axis='x', rotation=30)
ax.yaxis.grid(True, linestyle='--', alpha=0.4)
ax.set_axisbelow(True)
plt.tight_layout()
plt.savefig('../../../output/charts/nt/peshitta_pos.png', dpi=150, bbox_inches='tight')
plt.show()
2. Verbal Stem Distribution (Peshitta)¶
In [ ]:
Copied!
verbs = df[df['sp'] == 'verb'].copy()
print(f'Total verb tokens: {len(verbs):,}')
stem_counts = verbs['vs'].value_counts()
print('\nVerbal stems:')
print(stem_counts.to_string())
fig, ax = plt.subplots(figsize=(8, 4))
stem_counts.plot(kind='bar', ax=ax, color='#d62728', edgecolor='white')
ax.set_title('Peshitta NT — Verbal Stem Distribution', fontsize=12, fontweight='bold')
ax.set_ylabel('Token count')
ax.tick_params(axis='x', rotation=30)
ax.yaxis.grid(True, linestyle='--', alpha=0.4)
ax.set_axisbelow(True)
plt.tight_layout()
plt.savefig('../../../output/charts/nt/peshitta_verbal_stems.png', dpi=150, bbox_inches='tight')
plt.show()
verbs = df[df['sp'] == 'verb'].copy()
print(f'Total verb tokens: {len(verbs):,}')
stem_counts = verbs['vs'].value_counts()
print('\nVerbal stems:')
print(stem_counts.to_string())
fig, ax = plt.subplots(figsize=(8, 4))
stem_counts.plot(kind='bar', ax=ax, color='#d62728', edgecolor='white')
ax.set_title('Peshitta NT — Verbal Stem Distribution', fontsize=12, fontweight='bold')
ax.set_ylabel('Token count')
ax.tick_params(axis='x', rotation=30)
ax.yaxis.grid(True, linestyle='--', alpha=0.4)
ax.set_axisbelow(True)
plt.tight_layout()
plt.savefig('../../../output/charts/nt/peshitta_verbal_stems.png', dpi=150, bbox_inches='tight')
plt.show()
3. Verbal Tense/Aspect by Stem¶
In [ ]:
Copied!
vt_by_stem = verbs[verbs['vs'] != 'NA'].groupby(['vs', 'vt']).size().unstack(fill_value=0)
print(vt_by_stem)
vt_by_stem = verbs[verbs['vs'] != 'NA'].groupby(['vs', 'vt']).size().unstack(fill_value=0)
print(vt_by_stem)
4. Lexeme Search — Look Up Any Root¶
In [ ]:
Copied!
# Search by Sedra transliteration of the root
# e.g. CTB = write, ;MR = say, QDM = before/come before
ROOT = 'CTB' # write
hits = df[df['root_sedra'] == ROOT].copy()
hits['ref'] = hits['book'] + ' ' + hits['chapter'].astype(str) + ':' + hits['verse'].astype(str)
print(f'Root {ROOT}: {len(hits)} tokens')
print(hits[['ref', 'word', 'sp', 'vs', 'vt', 'st']].to_string())
# Search by Sedra transliteration of the root
# e.g. CTB = write, ;MR = say, QDM = before/come before
ROOT = 'CTB' # write
hits = df[df['root_sedra'] == ROOT].copy()
hits['ref'] = hits['book'] + ' ' + hits['chapter'].astype(str) + ':' + hits['verse'].astype(str)
print(f'Root {ROOT}: {len(hits)} tokens')
print(hits[['ref', 'word', 'sp', 'vs', 'vt', 'st']].to_string())
5. Word Distribution by Book¶
In [ ]:
Copied!
book_order = ['Mat','Mrk','Luk','Jhn','Act','Rom','1Co','2Co','Gal','Eph',
'Php','Col','1Th','2Th','1Ti','2Ti','Tit','Phm','Heb','Jas',
'1Pe','2Pe','1Jn','2Jn','3Jn','Jud','Rev']
book_counts = df.groupby('book').size().reindex(book_order).dropna()
fig, ax = plt.subplots(figsize=(13, 4))
ax.bar(range(len(book_counts)), book_counts.values, color='#41ab5d', edgecolor='white')
ax.set_xticks(range(len(book_counts)))
ax.set_xticklabels(book_counts.index, rotation=45, ha='right', fontsize=8)
ax.set_ylabel('Word tokens')
ax.set_title('Peshitta NT — Word Tokens per Book', fontsize=12, fontweight='bold')
ax.yaxis.grid(True, linestyle='--', alpha=0.4)
ax.set_axisbelow(True)
plt.tight_layout()
plt.savefig('../../../output/charts/nt/peshitta_words_per_book.png', dpi=150, bbox_inches='tight')
plt.show()
book_order = ['Mat','Mrk','Luk','Jhn','Act','Rom','1Co','2Co','Gal','Eph',
'Php','Col','1Th','2Th','1Ti','2Ti','Tit','Phm','Heb','Jas',
'1Pe','2Pe','1Jn','2Jn','3Jn','Jud','Rev']
book_counts = df.groupby('book').size().reindex(book_order).dropna()
fig, ax = plt.subplots(figsize=(13, 4))
ax.bar(range(len(book_counts)), book_counts.values, color='#41ab5d', edgecolor='white')
ax.set_xticks(range(len(book_counts)))
ax.set_xticklabels(book_counts.index, rotation=45, ha='right', fontsize=8)
ax.set_ylabel('Word tokens')
ax.set_title('Peshitta NT — Word Tokens per Book', fontsize=12, fontweight='bold')
ax.yaxis.grid(True, linestyle='--', alpha=0.4)
ax.set_axisbelow(True)
plt.tight_layout()
plt.savefig('../../../output/charts/nt/peshitta_words_per_book.png', dpi=150, bbox_inches='tight')
plt.show()
6. Peshitta Verse Lookup (via translations module)¶
In [ ]:
Copied!
from bible_grammar.translations import load_translations
trans = load_translations()
peshitta = trans[trans['translation'] == 'Peshitta']
# Sample John 1:1-5
sample = peshitta[(peshitta['book_id'] == 'Jhn') & (peshitta['chapter'] == 1) & (peshitta['verse'] <= 5)]
for _, row in sample.iterrows():
print(f"John {row['chapter']}:{row['verse']}: {row['text']}")
from bible_grammar.translations import load_translations
trans = load_translations()
peshitta = trans[trans['translation'] == 'Peshitta']
# Sample John 1:1-5
sample = peshitta[(peshitta['book_id'] == 'Jhn') & (peshitta['chapter'] == 1) & (peshitta['verse'] <= 5)]
for _, row in sample.iterrows():
print(f"John {row['chapter']}:{row['verse']}: {row['text']}")