Permalink
Join GitHub today
GitHub is home to over 31 million developers working together to host and review code, manage projects, and build software together.
Sign up
Fetching contributors…
Cannot retrieve contributors at this time
#When loading some cfg file in a 4lang module, unspecified parameters are assigned default values from this file | |
#Wherever possible, these values correspond to the most typical settings and test datasets distributed with 4lang | |
#Stanford Parser | |
[stanford] | |
#may in the future support using remote servers for parsing, leave it False for now | |
remote = False | |
#full path of Stanford Parser directory, to be set in env variable STANFORDPATH | |
dir = %(stanfordpath)s | |
#name of parser JAR file | |
parser = stanford-parser.jar | |
#name of model to load | |
model = englishRNN.ser.gz | |
#full path of jython executable, to be set in env variable JYTHONPATH | |
jython = %(jythonpath)s | |
[magyarlanc] | |
dir = %(magyarlancpath)s | |
jar = %(magyarlancpath)s/magyarlanc-2.0.jar | |
#miscellaneous data | |
[data] | |
#directory to save output of dependency parsing | |
deps_dir = %(fourlangpath)s/test/deps | |
#directory for temporary files | |
tmp_dir = %(fourlangpath)s/test/tmp | |
#dictionary data | |
[dict] | |
#input format | |
#possible values are: longman, collins, wiktionary, eksz, nszt | |
input_type = longman | |
#path to input file | |
input_file = %(fourlangpath)s/test/input/longman_test.xml | |
#path to JSON file containing parsed dictionary entries | |
output_file = %(fourlangpath)s/test/dict/longman_test.json | |
#text_to_4lang options | |
[text] | |
#path to input data | |
input_sens = %(fourlangpath)s/test/input/mrhug_story.sens | |
#set to True to perform expansion on graphs built from text | |
expand = False | |
#set True to print dot files for each sentence | |
print_graphs = True | |
#path to save dot files | |
graph_dir = %(fourlangpath)s/test/graphs/text | |
#if True, only dependency parsing will run and its output saved, but 4lang | |
#graphs won't be built. Useful when working with large datasets. | |
parse_only = False | |
#path to save output of parsers | |
deps_dir = %(fourlangpath)s/test/deps/text | |
#options to control which definitions are included by dict_to_4lang | |
[filter] | |
#include multiword expressions | |
keep_multiword = False | |
#include words with apostrophes | |
keep_apostrophes = False | |
#discard all but the first definition of each headword | |
first_only = True | |
[lemmatizer] | |
#full path of hunmorph binaries and models, to be set in env variable HUNTOOLSBINPATH | |
hunmorph_path = %(huntoolsbinpath)s | |
#path of cache (loaded but not updated by default, see docs) | |
cache_file = %(fourlangpath)s/data/hunmorph_cache.txt | |
#options related to 4lang graphs | |
[machine] | |
#file containing 4lang dictionary | |
definitions = 4lang | |
#extra data for 4lang, currently not in use | |
plurals = 4lang.plural | |
primitives = 4lang.primitive | |
#pickle file to load 4lang graphs from | |
definitions_binary = %(fourlangpath)s/data/machines/4lang.pickle | |
#pickle file to save 4lang graphs | |
definitions_binary_out = %(fourlangpath)s/test/machines/wikt_test.pickle | |
#pickle file to save expanded 4lang graphs | |
expanded_definitions = %(fourlangpath)s/test/machines/wikt_test_expanded.pickle | |
#path of directory for printing dot graphs | |
graph_dir = %(fourlangpath)s/test/graphs/wikt_test | |
[deps] | |
#path to the map from dependencies to 4lang edges | |
dep_map = %(fourlangpath)s/dep_to_4lang.txt | |
#language of the mapping (en or hu) | |
lang = en | |
#options for testing the word similarity module | |
[word_sim] | |
definitions_binary = %(fourlangpath)s/data/machines/longman_firsts.pickle | |
dep_map = %(fourlangpath)s/dep_to_4lang.txt | |
graph_dir = %(fourlangpath)s/data/graphs/sts | |
sim_types: fullgraph | |
out_dir: %(fourlangpath)s/test/output/ | |
shortest_path_res: %(fourlangpath)s/test/output/dijstra_res.txt | |
calc_shortest_path: true | |
expand_path: False | |
batch = true | |
expand = False | |
[similarity] | |
word: True | |
compositional: False | |
#options for experimental sentence similarity system | |
[sim] | |
similarity_type = word_test | |
#word_test_data = %(fourlangpath)s/ws_data/wordsim_similarity_goldstandard.txt | |
word_test_data = %(fourlangpath)s/test/input/sim_data/SimLex-999.txt | |
graph_dir = %(fourlangpath)s/test/graphs/sts_test | |
deps_dir = %(fourlangpath)s/test/deps/sts_test | |
#options for experimental question answering system | |
[qa] | |
input_file = %(fourlangpath)s/test/input/clef_qa_sample.xml | |
output_file = %(fourlangpath)s/test/qa/clef_qa_sample.answers | |
graph_dir = %(fourlangpath)s/test/graphs/qa_test | |
deps_dir = %(fourlangpath)s/test/deps/qa_test | |
[demo] | |
tmp_root = %(fourlangpath)s/data/tmp/demo | |
[context] | |
stanford_output = %(fourlangpath)s/test/input/stanford_output_test.txt | |
raw_output = %(fourlangpath)s/test/context/test_small | |
context_file = %(fourlangpath)s/test/context/context_small | |
[fullgraph] | |
upper_exclude: true | |
freq_file: %(fourlangpath)s/test/input/freq/longman_tab_sep_freq.txt | |
# minimum required freq | |
freq_val: 0 | |
# minimum number to exclude from the top | |
freq_count: 50 | |
# NOTE: there is an AND relationship between freq_val and freq_count | |
# nodename_option: | |
# 0: all nodes are unique | |
# 1: all nodes are printnames | |
# 2: only: uppercase + 'lack', 'before', 'not', 'have' are unique | |
nodename_option: 1 | |
weighted: False | |
color_based: False | |
embedding_path: %(fourlangpath)s/test/input/embedding/paragram_vectors_utf8.txt |