Permalink
Branch: master
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
166 lines (130 sloc) 4.84 KB
#When loading some cfg file in a 4lang module, unspecified parameters are assigned default values from this file
#Wherever possible, these values correspond to the most typical settings and test datasets distributed with 4lang
#Stanford Parser
[stanford]
#may in the future support using remote servers for parsing, leave it False for now
remote = False
#full path of Stanford Parser directory, to be set in env variable STANFORDPATH
dir = %(stanfordpath)s
#name of parser JAR file
parser = stanford-parser.jar
#name of model to load
model = englishRNN.ser.gz
#full path of jython executable, to be set in env variable JYTHONPATH
jython = %(jythonpath)s
[magyarlanc]
dir = %(magyarlancpath)s
jar = %(magyarlancpath)s/magyarlanc-2.0.jar
#miscellaneous data
[data]
#directory to save output of dependency parsing
deps_dir = %(fourlangpath)s/test/deps
#directory for temporary files
tmp_dir = %(fourlangpath)s/test/tmp
#dictionary data
[dict]
#input format
#possible values are: longman, collins, wiktionary, eksz, nszt
input_type = longman
#path to input file
input_file = %(fourlangpath)s/test/input/longman_test.xml
#path to JSON file containing parsed dictionary entries
output_file = %(fourlangpath)s/test/dict/longman_test.json
#text_to_4lang options
[text]
#path to input data
input_sens = %(fourlangpath)s/test/input/mrhug_story.sens
#set to True to perform expansion on graphs built from text
expand = False
#set True to print dot files for each sentence
print_graphs = True
#path to save dot files
graph_dir = %(fourlangpath)s/test/graphs/text
#if True, only dependency parsing will run and its output saved, but 4lang
#graphs won't be built. Useful when working with large datasets.
parse_only = False
#path to save output of parsers
deps_dir = %(fourlangpath)s/test/deps/text
#options to control which definitions are included by dict_to_4lang
[filter]
#include multiword expressions
keep_multiword = False
#include words with apostrophes
keep_apostrophes = False
#discard all but the first definition of each headword
first_only = True
[lemmatizer]
#full path of hunmorph binaries and models, to be set in env variable HUNTOOLSBINPATH
hunmorph_path = %(huntoolsbinpath)s
#path of cache (loaded but not updated by default, see docs)
cache_file = %(fourlangpath)s/data/hunmorph_cache.txt
#options related to 4lang graphs
[machine]
#file containing 4lang dictionary
definitions = 4lang
#extra data for 4lang, currently not in use
plurals = 4lang.plural
primitives = 4lang.primitive
#pickle file to load 4lang graphs from
definitions_binary = %(fourlangpath)s/data/machines/4lang.pickle
#pickle file to save 4lang graphs
definitions_binary_out = %(fourlangpath)s/test/machines/wikt_test.pickle
#pickle file to save expanded 4lang graphs
expanded_definitions = %(fourlangpath)s/test/machines/wikt_test_expanded.pickle
#path of directory for printing dot graphs
graph_dir = %(fourlangpath)s/test/graphs/wikt_test
[deps]
#path to the map from dependencies to 4lang edges
dep_map = %(fourlangpath)s/dep_to_4lang.txt
#language of the mapping (en or hu)
lang = en
#options for testing the word similarity module
[word_sim]
definitions_binary = %(fourlangpath)s/data/machines/longman_firsts.pickle
dep_map = %(fourlangpath)s/dep_to_4lang.txt
graph_dir = %(fourlangpath)s/data/graphs/sts
sim_types: fullgraph
out_dir: %(fourlangpath)s/test/output/
shortest_path_res: %(fourlangpath)s/test/output/dijstra_res.txt
calc_shortest_path: true
expand_path: False
batch = true
expand = False
[similarity]
word: True
compositional: False
#options for experimental sentence similarity system
[sim]
similarity_type = word_test
#word_test_data = %(fourlangpath)s/ws_data/wordsim_similarity_goldstandard.txt
word_test_data = %(fourlangpath)s/test/input/sim_data/SimLex-999.txt
graph_dir = %(fourlangpath)s/test/graphs/sts_test
deps_dir = %(fourlangpath)s/test/deps/sts_test
#options for experimental question answering system
[qa]
input_file = %(fourlangpath)s/test/input/clef_qa_sample.xml
output_file = %(fourlangpath)s/test/qa/clef_qa_sample.answers
graph_dir = %(fourlangpath)s/test/graphs/qa_test
deps_dir = %(fourlangpath)s/test/deps/qa_test
[demo]
tmp_root = %(fourlangpath)s/data/tmp/demo
[context]
stanford_output = %(fourlangpath)s/test/input/stanford_output_test.txt
raw_output = %(fourlangpath)s/test/context/test_small
context_file = %(fourlangpath)s/test/context/context_small
[fullgraph]
upper_exclude: true
freq_file: %(fourlangpath)s/test/input/freq/longman_tab_sep_freq.txt
# minimum required freq
freq_val: 0
# minimum number to exclude from the top
freq_count: 50
# NOTE: there is an AND relationship between freq_val and freq_count
# nodename_option:
# 0: all nodes are unique
# 1: all nodes are printnames
# 2: only: uppercase + 'lack', 'before', 'not', 'have' are unique
nodename_option: 1
weighted: False
color_based: False
embedding_path: %(fourlangpath)s/test/input/embedding/paragram_vectors_utf8.txt