lawsoc-scraper/test/lawsoc_example_curl-elastic.../create_settings.sh

113 lines
2.7 KiB
Bash

INDEX_ROOT="lawsoc"
INDEX_DATE=$(date +%Y-%m-%d)
INDEX_NAME="${INDEX_ROOT}_${INDEX_DATE}"
curl -XPOST "http://localhost:9200/${INDEX_NAME}/_close"
curl -XPUT "http://localhost:9200/${INDEX_NAME}/_settings" -d '{
"index":{
"analysis":{
"filter":{
"code":{
"type":"pattern_capture",
"preserve_original":1,
"patterns":[
"(\\p{Ll}+|\\p{Lu}\\p{Ll}+|\\p{Lu}+)",
"(\\d+)"
]
},
"email":{
"type": "pattern_capture",
"preserve_original" : 1,
"patterns" : [
"([^@]+)",
"(\\p{L}+)",
"(\\d+)",
"@(.+)"
]
},
"phonetic_metaphone" : {
"encoder" : "metaphone",
"replace" : false,
"type" : "phonetic"
},
"default_delimiter" : {
"type": "word_delimiter",
"generate_word_parts": true,
"generate_number_parts": true,
"catenate_words": false,
"catenate_numbers": false,
"catenate_all": false,
"split_on_case_change": true,
"preserve_original": false,
"split_on_numerics": true,
"stem_english_possessive": true
},
"my_delimiter" : {
"type": "word_delimiter",
"generate_word_parts": true,
"generate_number_parts": true,
"catenate_words": false,
"catenate_numbers": false,
"catenate_all": false,
"split_on_case_change": false,
"preserve_original": true,
"split_on_numerics": false,
"stem_english_possessive": false
}
},
"analyzer" : {
"index_analyzer" : {
"tokenizer" : "standard",
"filter" : [
"standard",
"lowercase",
"stop",
"asciifolding",
"phonetic_metaphone",
"my_delimiter"
]
},
"search_analyzer" : {
"tokenizer" : "standard",
"filter" : [
"standard",
"lowercase",
"stop",
"asciifolding",
"phonetic_metaphone"
]
},
"name_analyzer" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : [
"asciifolding",
"lowercase"
]
},
"email_analyzer" : {
"type" : "custom",
"tokenizer" : "uax_url_email",
"filter" : [
"email",
"lowercase",
"unique"
]
},
"url_analyzer" : {
"type" : "custom",
"tokenizer" : "uax_url_email"
},
"metaphone_analyzer" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : [
"phonetic_metaphone"
]
}
}
}
}
}'
curl -XPOST "http://localhost:9200/${INDEX_NAME}/_open"