Skip to content

Commit

Permalink
entity nodes and ent nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
dirkroorda committed Mar 27, 2024
1 parent 61b0cb1 commit c86c5b3
Show file tree
Hide file tree
Showing 58 changed files with 29,076 additions and 5,467 deletions.
3 changes: 3 additions & 0 deletions .jupyter/desktop-settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"uiMode": "multi-document"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"data":{"layout-restorer:data":{"main":{"dock":{"type":"tab-area","currentIndex":0,"widgets":["notebook:programs/entities.ipynb","notebook:tutorial/entities.ipynb"]},"current":"notebook:programs/entities.ipynb"},"down":{"size":0,"widgets":[]},"left":{"collapsed":false,"visible":true,"current":"filebrowser","widgets":["filebrowser","running-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"],"widgetStates":{"jp-running-sessions":{"sizes":[0.25,0.25,0.25,0.25],"expansionStates":[false,false,false,false]},"extensionmanager.main-view":{"sizes":[0.3333333333333333,0.3333333333333333,0.3333333333333333],"expansionStates":[false,false,false]}}},"right":{"collapsed":true,"visible":true,"widgets":["jp-property-inspector","debugger-sidebar"],"widgetStates":{"jp-debugger-sidebar":{"sizes":[0.2,0.2,0.2,0.2,0.2],"expansionStates":[false,false,false,false,false]}}},"relativeSizes":[0.2115921787709497,0.7884078212290503,0],"top":{"simpleVisibility":true}},"file-browser-filebrowser:cwd":{"path":"tutorial"},"notebook:programs/entities.ipynb":{"data":{"path":"programs/entities.ipynb","factory":"Notebook"}},"notebook:tutorial/entities.ipynb":{"data":{"path":"tutorial/entities.ipynb","factory":"Notebook"}}},"metadata":{"id":"default"}}
33 changes: 33 additions & 0 deletions Untitled.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "1b4e1830-3934-49b5-b08a-88d40e9c4c03",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
2 changes: 1 addition & 1 deletion app/__checkout__.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
v1.1
g8666face5331ed80d6b88c942fa450d9f82d4959
g61b0cb1b6bb6e9c4549a53aa5db557ffe37c1946
2 changes: 1 addition & 1 deletion ner/__checkout__.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
v1.1
g8666face5331ed80d6b88c942fa450d9f82d4959
g61b0cb1b6bb6e9c4549a53aa5db557ffe37c1946
55 changes: 45 additions & 10 deletions programs/addEntities.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from itertools import chain
from tf.app import use
from tf.core.helpers import console
from tf.dataset import modify
Expand Down Expand Up @@ -90,19 +91,36 @@ def prepareData(self):
slotLink = {}
kindFeature = {}
eidFeature = {}
occEdge = {}
entities = {}

for i, streak in enumerate(streaks):
n = i + 1
n = 0

for streak in streaks:
n += 1
refS = streak[0]
slotLink[n] = streak
kindFeature[n] = kindv(refS)
ekind = kindv(refS)
kindFeature[n] = ekind
eid = toId(T.text(streak))
eidFeature[n] = eid
entities.setdefault((eid, ekind), []).append(n)

nStreaks = len(streaks)

for ((eid, ekind), ms) in entities.items():
n += 1
occEdge[n] = set(ms)
slotLink[n] = tuple(chain.from_iterable(slotLink[m] for m in ms))
kindFeature[n] = ekind
eidFeature[n] = eid

console(f"{len(streaks):>5} entity nodes")
console(f"{len(set(eidFeature.values())):>5} distinct eids")
console(f"{len(entities):>5} distinct entities")

features = dict(kind=kindFeature, eid=eidFeature)
nodeFeatures = dict(kind=kindFeature, eid=eidFeature)
edgeFeatures = dict(eoccs=occEdge)

featureMeta = dict(
eid=dict(
Expand All @@ -113,18 +131,31 @@ def prepareData(self):
valueType="str",
description="entity kind",
),
eoccs=dict(
valueType="str",
description="entity occurrences",
)
)

self.addTypes = dict(
ent=dict(
nodeFrom=1,
nodeTo=len(streaks),
nodeTo=nStreaks,
nodeSlots=slotLink,
nodeFeatures=features,
nodeFeatures=nodeFeatures,
),
entity=dict(
nodeFrom=nStreaks + 1,
nodeTo=nStreaks + len(entities),
nodeSlots=slotLink,
nodeFeatures=nodeFeatures,
edgeFeatures=edgeFeatures,
),
)
self.featureMeta = featureMeta

return True

def modify(self):
A = self.A
TF = A.TF
Expand All @@ -143,7 +174,7 @@ def modify(self):
f for f in TF.features if f.startswith("omap@")
]

modify(
return modify(
origTf,
newTf,
targetVersion=newVersion,
Expand All @@ -157,7 +188,7 @@ def tweakApp(self):

config = f"{A.repoLocation}/app/config.yaml"
oldVersion = A.version
newVersion = f"{oldVersion}e"
newVersion = f"{oldVersion}ent"

with open(config) as fh:
text = fh.read()
Expand All @@ -180,7 +211,11 @@ def run(self):
if not self.checkStreaks():
return

self.prepareData()
self.modify()
if not self.prepareData():
return

if not self.modify():
return

self.tweakApp()
self.loadNew()
Loading

0 comments on commit c86c5b3

Please sign in to comment.