Improve search and fix '*' in search box (#871)
* Improve ES search The new performance is very good. Some examples on my 1.5gb vm: INFO[0153] Query 'shingeki' took 6 milliseconds. INFO[0125] Query 'アニメ' took 17 milliseconds. INFO[0102] Query 'shingeki -kyojin horrible ' took 12 milliseconds Looking at the criteria we wanted here: https://pad.riseup.net/p/i8DrilHDWRvf, it meets: 1. Fast: sub-100ms for a typical query, sub-50ms is good and sub-20ms is optimal 2. Prefix match: "horrible" finds horriblesubs 3. Substring match? "アニメ" finds "TVアニメ" 4. Position-independent terms ("shingeki kyojin" finds the same as "kyojin shingeki") 5. Works with short term lengths correctly and fast (no in "kyoukai no kanata", 04 in "horrible shingeki 04" etc) 7. (nice to have) search negation: shingeki kyojin -horriblesubs * Use match_all query instead of *, fix *
Cette révision appartient à :
Parent
00a885af28
révision
ba683c3bcb
4 fichiers modifiés avec 43 ajouts et 21 suppressions
|
@ -5,6 +5,7 @@ import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
elastic "gopkg.in/olivere/elastic.v5"
|
elastic "gopkg.in/olivere/elastic.v5"
|
||||||
|
@ -38,10 +39,7 @@ type TorrentParam struct {
|
||||||
func (p *TorrentParam) FromRequest(r *http.Request) {
|
func (p *TorrentParam) FromRequest(r *http.Request) {
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
nameLike := r.URL.Query().Get("q")
|
nameLike := strings.TrimSpace(r.URL.Query().Get("q"))
|
||||||
if nameLike == "" {
|
|
||||||
nameLike = "*"
|
|
||||||
}
|
|
||||||
|
|
||||||
page := mux.Vars(r)["page"]
|
page := mux.Vars(r)["page"]
|
||||||
pagenum, err := strconv.ParseUint(page, 10, 32)
|
pagenum, err := strconv.ParseUint(page, 10, 32)
|
||||||
|
@ -143,10 +141,15 @@ func (p *TorrentParam) Find(client *elastic.Client) (int64, []model.Torrent, err
|
||||||
// TODO Why is it needed, what does it do ?
|
// TODO Why is it needed, what does it do ?
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
|
||||||
query := elastic.NewSimpleQueryStringQuery(p.NameLike).
|
var query elastic.Query
|
||||||
Field("name").
|
if p.NameLike == "" {
|
||||||
Analyzer(config.Conf.Search.ElasticsearchAnalyzer).
|
query = elastic.NewMatchAllQuery()
|
||||||
DefaultOperator("AND")
|
} else {
|
||||||
|
query = elastic.NewSimpleQueryStringQuery(p.NameLike).
|
||||||
|
Field("name").
|
||||||
|
Analyzer(config.Conf.Search.ElasticsearchAnalyzer).
|
||||||
|
DefaultOperator("AND")
|
||||||
|
}
|
||||||
|
|
||||||
fsc := elastic.NewFetchSourceContext(true).
|
fsc := elastic.NewFetchSourceContext(true).
|
||||||
Include("id")
|
Include("id")
|
||||||
|
|
|
@ -34,8 +34,8 @@ cache:
|
||||||
# Size by default for the cache
|
# Size by default for the cache
|
||||||
size: 1024
|
size: 1024
|
||||||
search:
|
search:
|
||||||
# default analyzer for ES
|
# default search analyzer for ES
|
||||||
es_analyze: nyaapantsu_analyzer
|
es_analyze: nyaapantsu_search_analyzer
|
||||||
# default search index for ES
|
# default search index for ES
|
||||||
es_index: nyaapantsu
|
es_index: nyaapantsu
|
||||||
# Name of the type in the es mapping
|
# Name of the type in the es mapping
|
||||||
|
@ -163,4 +163,4 @@ models:
|
||||||
# ReportsTableName = "sukebei_torrent_reports"
|
# ReportsTableName = "sukebei_torrent_reports"
|
||||||
# CommentsTableName = "sukebei_comments"
|
# CommentsTableName = "sukebei_comments"
|
||||||
# UploadsOldTableName = "sukebei_user_uploads_old"
|
# UploadsOldTableName = "sukebei_user_uploads_old"
|
||||||
# FilesTableName = "sukebei_files"
|
# FilesTableName = "sukebei_files"
|
||||||
|
|
|
@ -4,7 +4,6 @@ nyaapantsu_password: nyaapantsu
|
||||||
nyaapantsu_pgpool_port: 9998
|
nyaapantsu_pgpool_port: 9998
|
||||||
nyaapantsu_directory: /nyaapantsu/
|
nyaapantsu_directory: /nyaapantsu/
|
||||||
nyaapantsu_gpg_passphrase_file: "{{ nyaapantsu_directory }}/passphrase"
|
nyaapantsu_gpg_passphrase_file: "{{ nyaapantsu_directory }}/passphrase"
|
||||||
nyaapantsu_elasticsearch_index: nyaapantsu
|
|
||||||
nyaapantsu_build_directory: go_nyaa/
|
nyaapantsu_build_directory: go_nyaa/
|
||||||
nyaapantsu_elasticsearch_alias: nyaapantsu
|
nyaapantsu_elasticsearch_alias: nyaapantsu
|
||||||
# nyaapantsu_elasticsearch_alias: sukebei
|
# nyaapantsu_elasticsearch_alias: sukebei
|
||||||
|
|
|
@ -2,19 +2,37 @@
|
||||||
settings:
|
settings:
|
||||||
analysis:
|
analysis:
|
||||||
analyzer:
|
analyzer:
|
||||||
nyaapantsu_analyzer:
|
# Don't use ngram for search otherwise 'horribleexample' would match
|
||||||
tokenizer: nyaapantsu_tokenizer
|
# 'horriblesubs'
|
||||||
|
nyaapantsu_search_analyzer:
|
||||||
|
tokenizer: standard
|
||||||
filter:
|
filter:
|
||||||
- standard
|
- standard
|
||||||
- lowercase
|
- lowercase
|
||||||
tokenizer:
|
char_filter:
|
||||||
nyaapantsu_tokenizer:
|
- dash_to_underscore
|
||||||
|
|
||||||
|
nyaapantsu_index_analyzer:
|
||||||
|
tokenizer: standard
|
||||||
|
filter:
|
||||||
|
- standard
|
||||||
|
- lowercase
|
||||||
|
- e_ngram_filter
|
||||||
|
char_filter:
|
||||||
|
- dash_to_underscore
|
||||||
|
|
||||||
|
filter:
|
||||||
|
e_ngram_filter:
|
||||||
type: edge_ngram
|
type: edge_ngram
|
||||||
min_gram: 2
|
min_gram: 2
|
||||||
max_gram: 15
|
max_gram: 15
|
||||||
token_chars:
|
|
||||||
- letter
|
char_filter:
|
||||||
- digit
|
dash_to_underscore:
|
||||||
|
type: pattern_replace
|
||||||
|
pattern: "([^\\s]+)-(?=[^\\s]+)"
|
||||||
|
replacement: "$1_"
|
||||||
|
|
||||||
index:
|
index:
|
||||||
number_of_shards: 1
|
number_of_shards: 1
|
||||||
number_of_replicas: 0
|
number_of_replicas: 0
|
||||||
|
@ -28,8 +46,10 @@ mappings:
|
||||||
type: long
|
type: long
|
||||||
name:
|
name:
|
||||||
type: text
|
type: text
|
||||||
analyzer: nyaapantsu_analyzer
|
analyzer: nyaapantsu_index_analyzer
|
||||||
fielddata: true # Use to sort by id because it is currently a text field
|
fields:
|
||||||
|
raw:
|
||||||
|
type: keyword
|
||||||
category:
|
category:
|
||||||
type: text
|
type: text
|
||||||
sub_category:
|
sub_category:
|
||||||
|
|
Référencer dans un nouveau ticket