Permalink
Browse files

fixes:

- dependency processor now only runs in dep_to_4lang
- text_to_4lang.process also converts old-style deps to new ones
  • Loading branch information...
Gabor Recski
Gabor Recski committed Oct 7, 2015
1 parent cd6c569 commit 222530bf760b70d31b300ef147a5e1d1a72f60d4
Showing with 35 additions and 6 deletions.
  1. +1 −0 4lang.primitive
  2. +15 −0 conf/eksz_firsts.cfg
  3. +15 −0 conf/nszt_firsts.cfg
  4. +0 −6 src/fourlang/dict_to_4lang.py
  5. +4 −0 src/fourlang/text_to_4lang.py
@@ -0,0 +1 @@
have
@@ -0,0 +1,15 @@
[dict]
input_type = eksz
input_file = input/eksz.xml
output_file = data/dict/eksz_firsts.json

[machine]
ext_definitions = data/machines/eksz_firsts.pickle
graph_dir = data/graphs/eksz_firsts

[deps]
lang = hu
dep_map = dep_to_4lang_hu.txt

[magyarlanc]
path = magyarlanc/magyarlanc-2.0.jar
@@ -0,0 +1,15 @@
[dict]
input_type = nszt
input_file = input/nsztb.xml
output_file = data/dict/nszt_firsts.json

[machine]
ext_definitions = data/machines/nszt_firsts.pickle
graph_dir = data/graphs/nszt_firsts

[deps]
lang = hu
dep_map = dep_to_4lang_hu.txt

[magyarlanc]
path = magyarlanc/magyarlanc-2.0.jar
@@ -9,7 +9,6 @@
import traceback

from dep_to_4lang import DepTo4lang
from dependency_processor import DependencyProcessor
from entry_preprocessor import EntryPreprocessor
from lexicon import Lexicon
from longman_parser import LongmanParser
@@ -93,8 +92,6 @@ def process_entries(self, words):
else:
print 'incorrect lang'

dependency_processor = DependencyProcessor(self.cfg)

for entry in entries:
if entry['to_filter']:
continue
@@ -103,9 +100,6 @@ def process_entries(self, words):
definition = sense['definition']
if definition is None:
continue
# print 'printing deps' + str(definition['deps'])
definition['deps'] = dependency_processor.process_dependencies(
definition['deps'])

if word in self.dictionary:
logging.warning(
@@ -18,6 +18,7 @@ class TextTo4lang():

def __init__(self, cfg):
self.cfg = cfg
self.lang = self.cfg.get("deps", "lang")
self.deps_dir = self.cfg.get('data', 'deps_dir')
ensure_dir(self.deps_dir)
self.corenlp_wrapper = CoreNLPWrapper(self.cfg)
@@ -58,6 +59,9 @@ def process(self, text, dep_dir=None, fn=None):
logging.getLogger().setLevel(__MACHINE_LOGLEVEL__)

# logging.info("processing sentences...")
if self.lang == 'en':
parsed_sens = map(
self.dep_to_4lang.convert_old_deps, parsed_sens)
words_to_machines = self.dep_to_4lang.get_machines_from_deps_and_corefs( # nopep8
parsed_sens, corefs)

0 comments on commit 222530b

Please sign in to comment.