lawsoc-scraper/elasticsearch/mappings.json

360 lines
7.9 KiB
JSON

{
"settings" : {
"analysis" : {
"filter" : {
"code" : {
"type" :"pattern_capture",
"preserve_original" :1,
"patterns" : [
"(\\p{Ll}+|\\p{Lu}\\p{Ll}+|\\p{Lu}+)",
"(\\d+)"
]
},
"email" : {
"type" : "pattern_capture",
"preserve_original" : 1,
"patterns" : [
"([^@]+)",
"(\\p{L}+)",
"(\\d+)",
"@(.+)"
]
},
"phonetic_metaphone" : {
"encoder" : "metaphone",
"replace" : false,
"type" : "phonetic"
},
"default_delimiter" : {
"type": "word_delimiter",
"generate_word_parts": true,
"generate_number_parts": true,
"catenate_words": false,
"catenate_numbers": false,
"catenate_all": false,
"split_on_case_change": true,
"preserve_original": false,
"split_on_numerics": true,
"stem_english_possessive": true
},
"my_delimiter" : {
"type": "word_delimiter",
"generate_word_parts": true,
"generate_number_parts": true,
"catenate_words": false,
"catenate_numbers": false,
"catenate_all": false,
"split_on_case_change": false,
"preserve_original": true,
"split_on_numerics": false,
"stem_english_possessive": false
}
},
"analyzer" : {
"index_analyzer" : {
"tokenizer" : "standard",
"filter" : [
"standard",
"lowercase",
"stop",
"asciifolding",
"phonetic_metaphone",
"my_delimiter"
]
},
"search_analyzer" : {
"tokenizer" : "standard",
"filter" : [
"standard",
"lowercase",
"stop",
"asciifolding",
"phonetic_metaphone"
]
},
"name_analyzer" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : [
"asciifolding",
"lowercase"
]
},
"email_analyzer" : {
"type" : "custom",
"tokenizer" : "uax_url_email",
"filter" : [
"email",
"lowercase",
"unique"
]
},
"url_analyzer" : {
"type" : "custom",
"tokenizer" : "uax_url_email"
},
"metaphone_analyzer" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : [
"phonetic_metaphone"
]
}
}
},
"mappings": {
"lawsoc": {
"properties" : {
"_all" : {
"enabled" : true,
"index_analyzer": "index_analyzer",
"search_analyzer": "search_analyzer"
},
"person": {
"admitted_date": {
"type": "date",
"format": "dd-MM-YY"
},
"person_id": {
"type": "integer",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"solicitor_id": {
"type": "integer",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"sra_id": {
"type": "integer",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"type": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"roles": {
"type": "string",
"boost": 4.0,
"index_name": "role",
"index": "analyzed",
"index_analyzer": "index_analyzer",
"search_analyzer": "search_analyzer",
"store": "yes",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"name":{
"type": "string",
"search_analyzer" : "name_analyzer",
"index_analyzer" : "name_analyzer",
"fields": {
"metaphone": {
"type": "string",
"analyzer": "metaphone_analyzer"
},
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"languages": {
"type": "string",
"boost": 4.0,
"index_name": "language",
"index": "analyzed",
"index_analyzer": "index_analyzer",
"search_analyzer": "search_analyzer",
"store": "yes",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"email": {
"type": "string",
"search_analyzer" : "email_analyzer",
"index_analyzer" : "email_analyzer",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"dx_address": {
"type": "string",
"index": "not_analyzed"
},
"areas_of_practice": {
"type": "string",
"boost": 4.0,
"index_name": "area_of_practice",
"index": "analyzed",
"index_analyzer": "index_analyzer",
"search_analyzer": "search_analyzer",
"store": "yes",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"accreditations": {
"type": "string",
"boost": 4.0,
"index_name": "tag",
"index": "accreditation",
"index_analyzer": "index_analyzer",
"search_analyzer": "search_analyzer",
"store": "yes",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"tel": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"office": {
"location": {
"type": "geo_point"
},
"solicitor_id": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"sra_id": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"dx_address": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"email": {
"type": "string",
"search_analyzer" : "email_analyzer",
"index_analyzer" : "email_analyzer",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"facilities": {
"type": "string",
"boost": 4.0,
"index_name": "facility",
"index": "analyzed",
"index_analyzer": "index_analyzer",
"search_analyzer": "search_analyzer",
"store": "yes",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"name": {
"type": "string",
"search_analyzer" : "name_analyzer",
"index_analyzer" : "name_analyzer",
"fields": {
"metaphone": {
"type": "string",
"analyzer": "metaphone_analyzer"
},
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"tel": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"web": {
"type": "string",
"search_analyzer" : "url_analyzer",
"index_analyzer" : "url_analyzer",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"address": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}