Permalink
Browse files

small fixes

  • Loading branch information...
Gabor Recski
Gabor Recski committed May 30, 2017
1 parent 9aeed0c commit f4a6aeaaecc64f44e1f727bc24d95b13a08d02a1
Showing with 30 additions and 6 deletions.
  1. +11 −3 scripts/get_defs.py
  2. +2 −1 scripts/get_graph.py
  3. +5 −1 src/fourlang/corenlp_wrapper.py
  4. +12 −1 src/fourlang/wiktionary_parser.py
@@ -1,8 +1,16 @@
import json
import sys
import traceback

data = json.load(sys.stdin)
for e in data.itervalues():
if e['senses'] and e['senses'][0]['definition']:
print u"{0}\t{1}".format(
e['hw'], e['senses'][0]['definition']['sen']).encode('utf-8')
if not e['senses']:
continue
defs = [sense.get('definition') for sense in e['senses']]
for definition in defs:
if not definition:
sys.stderr.write(e['hw'].encode('utf-8')+'\n')
continue
if isinstance(definition, unicode):
continue
print u"{0}\t{1}".format(e['hw'], definition['sen']).encode('utf-8')
@@ -7,7 +7,8 @@
def main():
lex_fn, word = sys.argv[1:3]
lex = Lexicon.load_from_binary(lex_fn)
machines = lex.lexicon.get(word, lex.ext_lexicon.get(word))
# machines = lex.lexicon.get(word, lex.ext_lexicon.get(word))
machines = [lex.get_machine(word)]
if machines is None:
print '404 :('
else:
@@ -102,9 +102,13 @@ def parse_entries(self, entries):
for entry in entries:
for sense in entry['senses']:
sentence = sense['definition']
if sentence is None:
if not sentence:
continue
deps, corefs, parse_trees = self.parse_text(sentence)
if not deps:
logging.warning(
'no deps: {0}'.format(sentence.encode('utf-8')))
continue
sense['definition'] = {
"sen": sentence,
"deps": deps[0],
@@ -110,5 +110,16 @@ def test():
for entry in WiktParser.parse_xml(xml):
print entry

def print_defs():
xml = sys.stdin.read()
for entry in WiktParser.parse_xml(xml):
hw, senses = entry['hw'], entry['senses']
if not senses:
continue
for sense in senses:
d = sense['definition']
if d:
print "{0}\t{1}".format(hw, d)

if __name__ == "__main__":
test()
print_defs()

0 comments on commit f4a6aea

Please sign in to comment.