From a0e0a30d23e0890538bbe8e9edc3c07ae5242bb1 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Sat, 15 Mar 2025 02:06:31 +0800 Subject: [PATCH] Make SearchMode have default value and add comments (#33863) * Make `SearchMode` have default value if it is empty * Add some comments for the "match" queries * Fix a copy-paste mistake in `buildMatchQuery` (`db.go`) * Add missing `q.Analyzer = repoIndexerAnalyzer`, it is in old code, although I do not see real difference .... --- modules/indexer/code/bleve/bleve.go | 9 +++++++-- modules/indexer/code/elasticsearch/elasticsearch.go | 5 ++++- modules/indexer/code/indexer_test.go | 3 ++- modules/indexer/issues/bleve/bleve.go | 6 ++++-- modules/indexer/issues/db/db.go | 11 ++++++----- modules/indexer/issues/elasticsearch/elasticsearch.go | 4 +++- modules/indexer/issues/indexer.go | 7 +++---- modules/indexer/issues/indexer_test.go | 8 +++++--- 8 files changed, 34 insertions(+), 19 deletions(-) diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 52a934d4ff..e1a381f992 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -25,6 +25,7 @@ import ( "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/typesniffer" + "code.gitea.io/gitea/modules/util" "github.com/blevesearch/bleve/v2" analyzer_custom "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" @@ -272,14 +273,18 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int pathQuery.FieldVal = "Filename" pathQuery.SetBoost(10) - if opts.SearchMode == indexer.SearchModeExact { + searchMode := util.IfZero(opts.SearchMode, b.SupportedSearchModes()[0].ModeValue) + if searchMode == indexer.SearchModeExact { + // 1.21 used NewPrefixQuery, but it seems not working well, and later releases changed to NewMatchPhraseQuery q := bleve.NewMatchPhraseQuery(opts.Keyword) + q.Analyzer = repoIndexerAnalyzer q.FieldVal = "Content" contentQuery = q } else /* words */ { q := bleve.NewMatchQuery(opts.Keyword) q.FieldVal = "Content" - if opts.SearchMode == indexer.SearchModeFuzzy { + q.Analyzer = repoIndexerAnalyzer + if searchMode == indexer.SearchModeFuzzy { // this logic doesn't seem right, it is only used to pass the test-case `Keyword: "dESCRIPTION"`, which doesn't seem to be a real-life use-case. q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword) } else { diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 354a8334fb..be8efad5fd 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -25,6 +25,7 @@ import ( "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/typesniffer" + "code.gitea.io/gitea/modules/util" "github.com/go-enry/go-enry/v2" "github.com/olivere/elastic/v7" @@ -365,7 +366,9 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan // Search searches for codes and language stats by given conditions. func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { var contentQuery elastic.Query - if opts.SearchMode == indexer.SearchModeExact { + searchMode := util.IfZero(opts.SearchMode, b.SupportedSearchModes()[0].ModeValue) + if searchMode == indexer.SearchModeExact { + // 1.21 used NewMultiMatchQuery().Type(esMultiMatchTypePhrasePrefix), but later releases changed to NewMatchPhraseQuery contentQuery = elastic.NewMatchPhraseQuery("content", opts.Keyword) } else /* words */ { contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).Type(esMultiMatchTypeBestFields).Operator("and") diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index 96516166a0..1d9bf99d40 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -17,6 +17,7 @@ import ( "code.gitea.io/gitea/modules/indexer/code/internal" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/test" + "code.gitea.io/gitea/modules/util" _ "code.gitea.io/gitea/models" _ "code.gitea.io/gitea/models/actions" @@ -240,7 +241,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { total, res, langs, err := indexer.Search(t.Context(), &internal.SearchOptions{ RepoIDs: kw.RepoIDs, Keyword: kw.Keyword, - SearchMode: kw.SearchMode, + SearchMode: util.IfZero(kw.SearchMode, indexer_module.SearchModeWords), Paginator: &db.ListOptions{ Page: 1, PageSize: 10, diff --git a/modules/indexer/issues/bleve/bleve.go b/modules/indexer/issues/bleve/bleve.go index e778df86c1..9534b0b750 100644 --- a/modules/indexer/issues/bleve/bleve.go +++ b/modules/indexer/issues/bleve/bleve.go @@ -10,6 +10,7 @@ import ( indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" "code.gitea.io/gitea/modules/indexer/issues/internal" + "code.gitea.io/gitea/modules/util" "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" @@ -162,9 +163,10 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( var queries []query.Query if options.Keyword != "" { - if options.SearchMode == indexer.SearchModeWords || options.SearchMode == indexer.SearchModeFuzzy { + searchMode := util.IfZero(options.SearchMode, b.SupportedSearchModes()[0].ModeValue) + if searchMode == indexer.SearchModeWords || searchMode == indexer.SearchModeFuzzy { fuzziness := 0 - if options.SearchMode == indexer.SearchModeFuzzy { + if searchMode == indexer.SearchModeFuzzy { fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword) } queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ diff --git a/modules/indexer/issues/db/db.go b/modules/indexer/issues/db/db.go index 493f6dd0b0..6124ad2515 100644 --- a/modules/indexer/issues/db/db.go +++ b/modules/indexer/issues/db/db.go @@ -13,6 +13,7 @@ import ( indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_db "code.gitea.io/gitea/modules/indexer/internal/db" "code.gitea.io/gitea/modules/indexer/issues/internal" + "code.gitea.io/gitea/modules/util" "xorm.io/builder" ) @@ -46,7 +47,7 @@ func (i *Indexer) Delete(_ context.Context, _ ...int64) error { func buildMatchQuery(mode indexer.SearchModeType, colName, keyword string) builder.Cond { if mode == indexer.SearchModeExact { - return db.BuildCaseInsensitiveLike("issue.name", keyword) + return db.BuildCaseInsensitiveLike(colName, keyword) } // match words @@ -84,16 +85,16 @@ func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( repoCond = builder.Eq{"repo_id": options.RepoIDs[0]} } subQuery := builder.Select("id").From("issue").Where(repoCond) - + searchMode := util.IfZero(options.SearchMode, i.SupportedSearchModes()[0].ModeValue) cond = builder.Or( - buildMatchQuery(options.SearchMode, "issue.name", options.Keyword), - buildMatchQuery(options.SearchMode, "issue.content", options.Keyword), + buildMatchQuery(searchMode, "issue.name", options.Keyword), + buildMatchQuery(searchMode, "issue.content", options.Keyword), builder.In("issue.id", builder.Select("issue_id"). From("comment"). Where(builder.And( builder.Eq{"type": issue_model.CommentTypeComment}, builder.In("issue_id", subQuery), - buildMatchQuery(options.SearchMode, "content", options.Keyword), + buildMatchQuery(searchMode, "content", options.Keyword), )), ), ) diff --git a/modules/indexer/issues/elasticsearch/elasticsearch.go b/modules/indexer/issues/elasticsearch/elasticsearch.go index 2e4e172540..464c0028f2 100644 --- a/modules/indexer/issues/elasticsearch/elasticsearch.go +++ b/modules/indexer/issues/elasticsearch/elasticsearch.go @@ -14,6 +14,7 @@ import ( indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" "code.gitea.io/gitea/modules/indexer/issues/internal" + "code.gitea.io/gitea/modules/util" "github.com/olivere/elastic/v7" ) @@ -152,7 +153,8 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( query := elastic.NewBoolQuery() if options.Keyword != "" { - if options.SearchMode == indexer.SearchModeExact { + searchMode := util.IfZero(options.SearchMode, b.SupportedSearchModes()[0].ModeValue) + if searchMode == indexer.SearchModeExact { query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypePhrasePrefix)) } else /* words */ { query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypeBestFields).Operator("and")) diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index a712efbc8b..4741235d47 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -282,7 +282,7 @@ const ( // SearchIssues search issues by options. func SearchIssues(ctx context.Context, opts *SearchOptions) ([]int64, int64, error) { - indexer := *globalIndexer.Load() + ix := *globalIndexer.Load() if opts.Keyword == "" || opts.IsKeywordNumeric() { // This is a conservative shortcut. @@ -291,10 +291,9 @@ func SearchIssues(ctx context.Context, opts *SearchOptions) ([]int64, int64, err // So if the user creates an issue and list issues immediately, the issue may not be listed because the indexer needs time to index the issue. // Even worse, the external indexer like elastic search may not be available for a while, // and the user may not be able to list issues completely until it is available again. - indexer = db.NewIndexer() + ix = db.NewIndexer() } - - result, err := indexer.Search(ctx, opts) + result, err := ix.Search(ctx, opts) if err != nil { return nil, 0, err } diff --git a/modules/indexer/issues/indexer_test.go b/modules/indexer/issues/indexer_test.go index 7def2a2c6e..14dd6ba101 100644 --- a/modules/indexer/issues/indexer_test.go +++ b/modules/indexer/issues/indexer_test.go @@ -82,9 +82,11 @@ func searchIssueWithKeyword(t *testing.T) { } for _, test := range tests { - issueIDs, _, err := SearchIssues(t.Context(), &test.opts) - require.NoError(t, err) - assert.Equal(t, test.expectedIDs, issueIDs) + t.Run(test.opts.Keyword, func(t *testing.T) { + issueIDs, _, err := SearchIssues(t.Context(), &test.opts) + require.NoError(t, err) + assert.Equal(t, test.expectedIDs, issueIDs) + }) } }