default.cfg

#When loading some cfg file in a 4lang module, unspecified parameters are assigned default values from this file
#Wherever possible, these values correspond to the most typical settings and test datasets distributed with 4lang

#Stanford Parser
[stanford]
#may in the future support using remote servers for parsing, leave it False for now
remote = False

#full path of Stanford Parser directory, to be set in env variable STANFORDPATH
dir = %(stanfordpath)s
#name of parser JAR file
parser = stanford-parser.jar
#name of model to load
model = englishRNN.ser.gz
#full path of jython executable, to be set in env variable JYTHONPATH
jython = %(jythonpath)s

[magyarlanc]
dir = %(magyarlancpath)s
jar = %(magyarlancpath)s/magyarlanc-2.0.jar

#miscellaneous data
[data]
#directory to save output of dependency parsing
deps_dir = %(fourlangpath)s/test/deps
#directory for temporary files
tmp_dir = %(fourlangpath)s/test/tmp

#dictionary data
[dict]
#input format
#possible values are: longman, collins, wiktionary, eksz, nszt
input_type = longman

#path to input file
input_file = %(fourlangpath)s/test/input/longman_test.xml

#path to JSON file containing parsed dictionary entries
output_file = %(fourlangpath)s/test/dict/longman_test.json


#text_to_4lang options
[text]
#path to input data
input_sens = %(fourlangpath)s/test/input/mrhug_story.sens

#set to True to perform expansion on graphs built from text
expand = False

#set True to print dot files for each sentence
print_graphs = True

#path to save dot files
graph_dir = %(fourlangpath)s/test/graphs/text

#if True, only dependency parsing will run and its output saved, but 4lang
#graphs won't be built. Useful when working with large datasets.
parse_only = False

#path to save output of parsers
deps_dir = %(fourlangpath)s/test/deps/text
#options to control which definitions are included by dict_to_4lang
[filter]

#include multiword expressions
keep_multiword = False

#include words with apostrophes
keep_apostrophes = False

#discard all but the first definition of each headword
first_only = True

[lemmatizer]
#full path of hunmorph binaries and models, to be set in env variable HUNTOOLSBINPATH
hunmorph_path = %(huntoolsbinpath)s

#path of cache (loaded but not updated by default, see docs)
cache_file = %(fourlangpath)s/data/hunmorph_cache.txt

#options related to 4lang graphs
[machine]
#file containing 4lang dictionary
definitions = 4lang

#extra data for 4lang, currently not in use
plurals = 4lang.plural
primitives = 4lang.primitive

#pickle file to load 4lang graphs from
definitions_binary = %(fourlangpath)s/data/machines/4lang.pickle

#pickle file to save 4lang graphs
definitions_binary_out = %(fourlangpath)s/test/machines/wikt_test.pickle

#pickle file to save expanded 4lang graphs
expanded_definitions = %(fourlangpath)s/test/machines/wikt_test_expanded.pickle

#path of directory for printing dot graphs
graph_dir = %(fourlangpath)s/test/graphs/wikt_test

[deps]
#path to the map from dependencies to 4lang edges
dep_map = %(fourlangpath)s/dep_to_4lang.txt
#language of the mapping (en or hu)
lang = en

#options for testing the word similarity module
[word_sim]
definitions_binary = %(fourlangpath)s/data/machines/longman_firsts.pickle
dep_map = %(fourlangpath)s/dep_to_4lang.txt
graph_dir = %(fourlangpath)s/data/graphs/sts
sim_types: fullgraph
out_dir: %(fourlangpath)s/test/output/
shortest_path_res: %(fourlangpath)s/test/output/dijstra_res.txt
calc_shortest_path: true
expand_path: False
batch = true
expand = False

[similarity]
word: True
compositional: False

#options for experimental sentence similarity system
[sim]
similarity_type = word_test
#word_test_data = %(fourlangpath)s/ws_data/wordsim_similarity_goldstandard.txt
word_test_data = %(fourlangpath)s/test/input/sim_data/SimLex-999.txt
graph_dir = %(fourlangpath)s/test/graphs/sts_test
deps_dir = %(fourlangpath)s/test/deps/sts_test

#options for experimental question answering system
[qa]
input_file = %(fourlangpath)s/test/input/clef_qa_sample.xml
output_file = %(fourlangpath)s/test/qa/clef_qa_sample.answers
graph_dir = %(fourlangpath)s/test/graphs/qa_test
deps_dir = %(fourlangpath)s/test/deps/qa_test

[demo]
tmp_root = %(fourlangpath)s/data/tmp/demo

[context]
stanford_output = %(fourlangpath)s/test/input/stanford_output_test.txt
raw_output = %(fourlangpath)s/test/context/test_small
context_file = %(fourlangpath)s/test/context/context_small

[fullgraph]
upper_exclude: true
freq_file: %(fourlangpath)s/test/input/freq/longman_tab_sep_freq.txt
# minimum required freq
freq_val: 0
# minimum number to exclude from the top
freq_count: 50
# NOTE: there is an AND relationship between freq_val and freq_count

# nodename_option:
#   0: all nodes are unique
#   1: all nodes are printnames
#   2: only: uppercase + 'lack', 'before', 'not', 'have' are unique
nodename_option: 1

weighted: False
color_based: False
embedding_path: %(fourlangpath)s/test/input/embedding/paragram_vectors_utf8.txt