Permalink
Browse files

misc

  • Loading branch information...
Gabor Recski
Gabor Recski committed Nov 25, 2015
1 parent d575a85 commit 81c6435807ca3a4f841c099100a10fe2d6ae3023
Showing with 43 additions and 0 deletions.
  1. +12 −0 conf/hu_test.cfg
  2. +2 −0 dep_to_4lang_hu.txt
  3. +29 −0 scripts/dep_to_dot.py
@@ -0,0 +1,12 @@

[machine]
definitions_binary = test/machines/eksz_test.pickle
graph_dir = test/graphs/hu_test

[text]
input_sens = test/input/hu_test.sens
expand = False

[deps]
lang = hu
dep_map = dep_to_4lang_hu.txt
@@ -15,6 +15,8 @@ mode 0,-
subj 1,0
obj 2,-

pred 0,-

obl,.*,N...2.* -,- IN
obl,.*,N...x.* -,- IN
obl,.*,N...i.* -,- INSTRUMENT
@@ -0,0 +1,29 @@
import json
import sys

HEADER = u"digraph finite_state_machine {\n\tdpi=100;\n\trankdir=LR;\n"
EXCLUDE = ("punct")

def dep_to_dot(deps, fn):
edges = [
(d['dep']['lemma'], d['type'], d['gov']['lemma']) for d in deps
if d['type'] not in EXCLUDE]
words = set([e[0] for e in edges] + [e[2] for e in edges])
lines = []
for word in words:
lines.append(u'\t{0} [shape=rectangle, label="{0}"];'.format(word))
for dep, dtype, gov in edges:
lines.append(u'\t{0} -> {1} [label="{2}"];'.format(dep, gov, dtype))
with open(fn, 'w') as f:
f.write(HEADER.encode("utf-8"))
f.write(u"\n".join(lines).encode("utf-8"))
f.write("}\n")

def main():
data = json.load(open(sys.argv[1]))
i = 0 if len(sys.argv) == 3 else int(sys.argv[3])
sen = data['deps'][i]
dep_to_dot(sen, sys.argv[2])

if __name__ == "__main__":
main()

0 comments on commit 81c6435

Please sign in to comment.