lawsoc-scraper/test/lawsoc_example_curl-elastic.../all_in_one.sh

234 lines
5.4 KiB
Bash

echo "--------------------------------------------------------"
echo "create an index with an analyzer 'lawsoc_2015-10-07'"
echo "--------------------------------------------------------"
INDEX_ROOT="lawsoc"
INDEX_DATE=$(date +%Y-%m-%d)
INDEX_NAME=${INDEX_ROOT}_${INDEX_DATE}
# WORKDIR="/var/tmp/lawsoc_new/lawsoc_example"
SLEEP_TIME=5
sleep ${SLEEP_TIME}
echo "--------------------------------------------------------"
echo "verify the settings for ${INDEX_NAME}"
echo "--------------------------------------------------------"
curl -XGET "http://localhost:9200/_cluster/state?pretty&filter_nodes=true&filter_routing_table=true&filter_indices=${INDEX_NAME}"
sleep ${SLEEP_TIME}
echo "--------------------------------------------------------"
echo "try to add a new mapping"
echo "--------------------------------------------------------"
curl -XPUT "http://localhost:9200/${INDEX_NAME}" -d '{
"mappings": {
"office_detail":
{"properties":
{"address":{
"type":"string"
},
"dx_address":{
"type":"string"
},
"email":{
"type":"string"
},
"facilities":{
"type":"string"
},
"location":{
"type":"geo_point"
},
"name":{
"type":"string"
},
"solicitor_id":{
"type":"string"
},
"sra_id":{
"type":"string"
},
"tel":{
"type":"string"
},
"type":{
"type":"string"
}
,"web":{
"type":"string"}
}
},
"person_detail":{
"properties":{
"accreditations":{
"type":"string"
},
"admitted_date":{
"type":"date",
"format":"yyyy/MM/dd HH:mm:ss||yyyy/MM/dd"
},
"areas_of_practice":{
"type":"string"},
"dx_address":{
"type":"string"
},
"email":{
"type":"string"
},
"languages":{
"type":"string"
},
"name":{
"type":"string"
},
"person_id":{
"type":"string"
},
"roles":{
"type":"string"
},
"solicitor_id":{
"type":"string"
},
"sra_id":{
"type":"string"
},
"tel":{
"type":"string"
},
"type":{
"type":"string"
}
}
}
},
"settings":{
"index":{
"number_of_replicas":0,
"number_of_shards":1,
"analysis":{
"filter":{
"code":{
"type":"pattern_capture",
"preserve_original":1,
"patterns":[
"(\\p{Ll}+|\\p{Lu}\\p{Ll}+|\\p{Lu}+)",
"(\\d+)"
]
},
"email":{
"type": "pattern_capture",
"preserve_original" : 1,
"patterns" : [
"([^@]+)",
"(\\p{L}+)",
"(\\d+)",
"@(.+)"
]
},
"phonetic_metaphone" : {
"encoder" : "metaphone",
"replace" : false,
"type" : "phonetic"
},
"default_delimiter" : {
"type": "word_delimiter",
"generate_word_parts": true,
"generate_number_parts": true,
"catenate_words": false,
"catenate_numbers": false,
"catenate_all": false,
"split_on_case_change": true,
"preserve_original": false,
"split_on_numerics": true,
"stem_english_possessive": true
},
"my_delimiter" : {
"type": "word_delimiter",
"generate_word_parts": true,
"generate_number_parts": true,
"catenate_words": false,
"catenate_numbers": false,
"catenate_all": false,
"split_on_case_change": false,
"preserve_original": true,
"split_on_numerics": false,
"stem_english_possessive": false
}
},
"analyzer" : {
"index_analyzer" : {
"tokenizer" : "standard",
"filter" : [
"standard",
"lowercase",
"stop",
"asciifolding",
"phonetic_metaphone",
"my_delimiter"
]
},
"search_analyzer" : {
"tokenizer" : "standard",
"filter" : [
"standard",
"lowercase",
"stop",
"asciifolding",
"phonetic_metaphone"
]
},
"name_analyzer" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : [
"asciifolding",
"lowercase"
]
},
"email_analyzer" : {
"type" : "custom",
"tokenizer" : "uax_url_email",
"filter" : [
"email",
"lowercase",
"unique"
]
},
"url_analyzer" : {
"type" : "custom",
"tokenizer" : "uax_url_email"
},
"metaphone_analyzer" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : [
"phonetic_metaphone"
]
}
}
}
}
}
}'
echo "--------------------------------------------------------"
printf "\r\ropen the index now"
echo "--------------------------------------------------------"
curl -XPOST "http://localhost:9200/${INDEX_NAME}/_open"
sleep ${SLEEP_TIME}
echo "--------------------------------------------------------"
echo "running insert_offices.sh now"
echo "--------------------------------------------------------"
sh ./insert_offices.sh
#echo "--------------------------------------------------------"
#echo "running insert_person now"
#echo "--------------------------------------------------------"
#sh ./insert_person.sh