Post

Enhancing MySQL to Elasticsearch Migration for Efficient Querying

When migrating MySQL services to Elasticsearch for query operations, utilizing wildcards in Elasticsearch can significantly improve fuzzy query performance. The introduction of the ‘es7.9’ parameter in Elasticsearch brings support for wildcard types, allowing for better query performance. Additionally, the use of ‘ngram’ for data segmentation and storage optimization can further enhance overall system performance.

Build Index

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
{
  "warmsearch" : {
    "aliases" : { },
    "mappings" : {
      "properties" : {
        "aname" : {
          "type" : "wildcard"
        },
        "sn" : {
          "type" : "text"
        },
        "title" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "my_analyzer"
        }
      }
    },
    "settings" : {
      "index" : {
        "max_ngram_diff" : "10",
        "routing" : {
          "allocation" : {
            "include" : {
              "_tier_preference" : "data_content"
            }
          }
        },
        "number_of_shards" : "1",
        "provided_name" : "warmsearch",
        "creation_date" : "1701145536507",
        "analysis" : {
          "analyzer" : {
            "my_analyzer" : {
              "tokenizer" : "my_tokenizer"
            }
          },
          "tokenizer" : {
            "my_tokenizer" : {
              "token_chars" : [
                "letter",
                "digit"
              ],
              "min_gram" : "1",
              "type" : "ngram",
              "max_gram" : "10"
            }
          }
        },
        "number_of_replicas" : "1",
        "queries" : {
          "cache" : {
            "enabled" : "false"
          }
        },
        "uuid" : "OG0QGTrxQiejej8Hj0T9eA",
        "version" : {
          "created" : "7100299"
        }
      }
    }
  }
}


Put Data Insert

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import random

import requests
import uuid


def add_document_to_es(index, document):
    """向Elasticsearch索引中添加一个文档"""
    response = requests.post(f'http://localhost:9200/{index}/_doc/', json=document)
    return response.json()


# 索引名称
index_name = 'warmsearch'

for i in range(1, 500000):
    # 生成随机数据
    random_title = uuid.uuid4()
    random_sn = uuid.uuid4()
    aname = uuid.uuid4()
    random_number = random.randint(0,1)
    # 根据随机数输出扩展名
    if random_number == 0:
        filetype = ".log"
    else:
        filetype = ".bag"
    # 构建文档
    document = {
        "title": str(random_title),
        "sn": str(random_sn),
        "aname": str(aname) + filetype
    }

    print(document)
    # 添加文档到Elasticsearch
    response = add_document_to_es(index_name, document)
    print(response)

Close ES Index

1
POST /warmsearch/_close

Modify ES disable search with cache

1
2
3
4
5
6
7
PUT warmsearch/_settings
{
  "index.queries.cache.enabled": false
}


Open Es Index

1
POST /warmsearch/_open

Search Bench Testing

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import requests
import time

# Elasticsearch服务器的URL
base_url = 'http://localhost:9200'

# 查询请求主体,分别为match、match_phrase和title.keyword查询

# keyName = "title"
# value = "asd"

keyName = "aname"
value = "a0"

queries = [
    {
        "query": {
            "match": {
                keyName: value
            }
        }
    },
    {
        "query": {
            "match": {
                keyName + ".keyword": value
            }
        }
    },
    {
        "query": {
            "match_phrase": {
                keyName: value
            }
        }
    },

    {
        "query": {
            "match_phrase": {
                keyName + ".keyword": value
            }
        }
    },
    {
        "query": {
            "wildcard": {
                keyName: "*" + value + "*"
            }
        }
    },
    {
        "query": {
            "wildcard": {
                keyName + ".keyword": "*" + value + "*"
            }
        }
    }
]

# 执行50次查询

for query in queries:
    # print(query)
    total_time = 0
    num_queries = 50
    for i in range(num_queries):
        start_time = time.time()
        # 发送查询请求
        response = requests.post(f'{base_url}/warmsearch/_search', json=query)

        end_time = time.time()
        elapsed_time = end_time - start_time

        total_time += elapsed_time

        if (i == (num_queries - 1)):
            response_json = response.json()
            hits = response_json.get('hits', {})
            total = hits.get("total", {})
            # print(total.get("value", {}))

        # 输出每次请求的访问耗时
        # print(f'Request {i + 1}: Elapsed Time = {elapsed_time:.4f} seconds')
        # 计算平均耗时
    average_time = total_time / num_queries
    print(f'Query  {query.get("query")}, | {total["value"]}| {average_time:.4f}')

Result

  1. 965658 Logs ,50 times avg ,close cache
QueryMethodKeyNameCountsAnalysisAvg Time (s)
{‘match’: {‘title’: ‘-ah’}},matchtitle10000Ngram0.0056
{‘match’: {‘title.keyword’: ‘-ah’}}matchtitle.keyword0Ngram0.0018
{‘match_phrase’: {‘title’: ‘-ah’}}match_phrasetitle45Ngram0.0036
{‘match_phrase’: {‘title.keyword’: ‘-ah’}}match_phrasetitle.keyword0Ngram0.0024
{‘wildcard’: {‘title’: ‘-ah’}}wildcardtitle0Ngram0.8781
{‘wildcard’: {‘title.keyword’: ‘-ah’}}wildcardtitle.keyword0Ngram0.0580
{‘match’: {‘sn’: ‘5c’}}matchsn0Default0.0105
{‘match’: {‘sn.keyword’: ‘5c’}}matchsn.keyword0Default0.0048
{‘match_phrase’: {‘sn’: ‘5c’}}match_phrasesn0Default0.0068
{‘match_phrase’: {‘sn.keyword’: ‘5c’}}match_phrasesn.keyword0Default0.0054
{‘wildcard’: {‘sn’: ‘5c’}}wildcardsn10000Default0.1659
{‘wildcard’: {‘sn.keyword’: ‘5c’}}wildcardsn.keyword0Default0.0062
{‘match’: {‘aname’: ‘a0’}}matchaname0wildcard0.0138
{‘match’: {‘aname.keyword’: ‘a0’}}matchaname.keyword0wildcard0.0065
{‘match_phrase’: {‘aname’: ‘a0’}}match_phraseaname0wildcard0.0061
{‘match_phrase’: {‘aname.keyword’: ‘a0’}}match_phraseaname.keyword0wildcard0.0048
{‘wildcard’: {‘aname’: ‘a0’}}wildcardaname1000wildcard0.0380
{‘wildcard’: {‘aname.keyword’: ‘a0’}}wildcardaname.keyword0wildcard0.0086
This post is licensed under CC BY 4.0 by the author.