1
0
mirror of https://github.com/go-gitea/gitea.git synced 2025-03-22 14:50:13 +03:00

Make SearchMode have default value and add comments (#33863)

* Make `SearchMode` have default value if it is empty
* Add some comments for the "match" queries
* Fix a copy-paste mistake in `buildMatchQuery` (`db.go`)
* Add missing `q.Analyzer = repoIndexerAnalyzer`, it is in old code,
although I do not see real difference ....
This commit is contained in:
wxiaoguang 2025-03-15 02:06:31 +08:00 committed by GitHub
parent 45c4139134
commit a0e0a30d23
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 34 additions and 19 deletions

View File

@ -25,6 +25,7 @@ import (
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"
"code.gitea.io/gitea/modules/util"
"github.com/blevesearch/bleve/v2"
analyzer_custom "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
@ -272,14 +273,18 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
pathQuery.FieldVal = "Filename"
pathQuery.SetBoost(10)
if opts.SearchMode == indexer.SearchModeExact {
searchMode := util.IfZero(opts.SearchMode, b.SupportedSearchModes()[0].ModeValue)
if searchMode == indexer.SearchModeExact {
// 1.21 used NewPrefixQuery, but it seems not working well, and later releases changed to NewMatchPhraseQuery
q := bleve.NewMatchPhraseQuery(opts.Keyword)
q.Analyzer = repoIndexerAnalyzer
q.FieldVal = "Content"
contentQuery = q
} else /* words */ {
q := bleve.NewMatchQuery(opts.Keyword)
q.FieldVal = "Content"
if opts.SearchMode == indexer.SearchModeFuzzy {
q.Analyzer = repoIndexerAnalyzer
if searchMode == indexer.SearchModeFuzzy {
// this logic doesn't seem right, it is only used to pass the test-case `Keyword: "dESCRIPTION"`, which doesn't seem to be a real-life use-case.
q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
} else {

View File

@ -25,6 +25,7 @@ import (
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"
"code.gitea.io/gitea/modules/util"
"github.com/go-enry/go-enry/v2"
"github.com/olivere/elastic/v7"
@ -365,7 +366,9 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
// Search searches for codes and language stats by given conditions.
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
var contentQuery elastic.Query
if opts.SearchMode == indexer.SearchModeExact {
searchMode := util.IfZero(opts.SearchMode, b.SupportedSearchModes()[0].ModeValue)
if searchMode == indexer.SearchModeExact {
// 1.21 used NewMultiMatchQuery().Type(esMultiMatchTypePhrasePrefix), but later releases changed to NewMatchPhraseQuery
contentQuery = elastic.NewMatchPhraseQuery("content", opts.Keyword)
} else /* words */ {
contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).Type(esMultiMatchTypeBestFields).Operator("and")

View File

@ -17,6 +17,7 @@ import (
"code.gitea.io/gitea/modules/indexer/code/internal"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/test"
"code.gitea.io/gitea/modules/util"
_ "code.gitea.io/gitea/models"
_ "code.gitea.io/gitea/models/actions"
@ -240,7 +241,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
total, res, langs, err := indexer.Search(t.Context(), &internal.SearchOptions{
RepoIDs: kw.RepoIDs,
Keyword: kw.Keyword,
SearchMode: kw.SearchMode,
SearchMode: util.IfZero(kw.SearchMode, indexer_module.SearchModeWords),
Paginator: &db.ListOptions{
Page: 1,
PageSize: 10,

View File

@ -10,6 +10,7 @@ import (
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve"
"code.gitea.io/gitea/modules/indexer/issues/internal"
"code.gitea.io/gitea/modules/util"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
@ -162,9 +163,10 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
var queries []query.Query
if options.Keyword != "" {
if options.SearchMode == indexer.SearchModeWords || options.SearchMode == indexer.SearchModeFuzzy {
searchMode := util.IfZero(options.SearchMode, b.SupportedSearchModes()[0].ModeValue)
if searchMode == indexer.SearchModeWords || searchMode == indexer.SearchModeFuzzy {
fuzziness := 0
if options.SearchMode == indexer.SearchModeFuzzy {
if searchMode == indexer.SearchModeFuzzy {
fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword)
}
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{

View File

@ -13,6 +13,7 @@ import (
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
inner_db "code.gitea.io/gitea/modules/indexer/internal/db"
"code.gitea.io/gitea/modules/indexer/issues/internal"
"code.gitea.io/gitea/modules/util"
"xorm.io/builder"
)
@ -46,7 +47,7 @@ func (i *Indexer) Delete(_ context.Context, _ ...int64) error {
func buildMatchQuery(mode indexer.SearchModeType, colName, keyword string) builder.Cond {
if mode == indexer.SearchModeExact {
return db.BuildCaseInsensitiveLike("issue.name", keyword)
return db.BuildCaseInsensitiveLike(colName, keyword)
}
// match words
@ -84,16 +85,16 @@ func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
repoCond = builder.Eq{"repo_id": options.RepoIDs[0]}
}
subQuery := builder.Select("id").From("issue").Where(repoCond)
searchMode := util.IfZero(options.SearchMode, i.SupportedSearchModes()[0].ModeValue)
cond = builder.Or(
buildMatchQuery(options.SearchMode, "issue.name", options.Keyword),
buildMatchQuery(options.SearchMode, "issue.content", options.Keyword),
buildMatchQuery(searchMode, "issue.name", options.Keyword),
buildMatchQuery(searchMode, "issue.content", options.Keyword),
builder.In("issue.id", builder.Select("issue_id").
From("comment").
Where(builder.And(
builder.Eq{"type": issue_model.CommentTypeComment},
builder.In("issue_id", subQuery),
buildMatchQuery(options.SearchMode, "content", options.Keyword),
buildMatchQuery(searchMode, "content", options.Keyword),
)),
),
)

View File

@ -14,6 +14,7 @@ import (
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
"code.gitea.io/gitea/modules/indexer/issues/internal"
"code.gitea.io/gitea/modules/util"
"github.com/olivere/elastic/v7"
)
@ -152,7 +153,8 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
query := elastic.NewBoolQuery()
if options.Keyword != "" {
if options.SearchMode == indexer.SearchModeExact {
searchMode := util.IfZero(options.SearchMode, b.SupportedSearchModes()[0].ModeValue)
if searchMode == indexer.SearchModeExact {
query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypePhrasePrefix))
} else /* words */ {
query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypeBestFields).Operator("and"))

View File

@ -282,7 +282,7 @@ const (
// SearchIssues search issues by options.
func SearchIssues(ctx context.Context, opts *SearchOptions) ([]int64, int64, error) {
indexer := *globalIndexer.Load()
ix := *globalIndexer.Load()
if opts.Keyword == "" || opts.IsKeywordNumeric() {
// This is a conservative shortcut.
@ -291,10 +291,9 @@ func SearchIssues(ctx context.Context, opts *SearchOptions) ([]int64, int64, err
// So if the user creates an issue and list issues immediately, the issue may not be listed because the indexer needs time to index the issue.
// Even worse, the external indexer like elastic search may not be available for a while,
// and the user may not be able to list issues completely until it is available again.
indexer = db.NewIndexer()
ix = db.NewIndexer()
}
result, err := indexer.Search(ctx, opts)
result, err := ix.Search(ctx, opts)
if err != nil {
return nil, 0, err
}

View File

@ -82,9 +82,11 @@ func searchIssueWithKeyword(t *testing.T) {
}
for _, test := range tests {
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
require.NoError(t, err)
assert.Equal(t, test.expectedIDs, issueIDs)
t.Run(test.opts.Keyword, func(t *testing.T) {
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
require.NoError(t, err)
assert.Equal(t, test.expectedIDs, issueIDs)
})
}
}