2017-10-26 23:10:54 -07:00
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package indexer
import (
"strings"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"github.com/blevesearch/bleve"
"github.com/blevesearch/bleve/analysis/analyzer/custom"
"github.com/blevesearch/bleve/analysis/token/camelcase"
"github.com/blevesearch/bleve/analysis/token/lowercase"
2018-02-05 10:29:17 -08:00
"github.com/blevesearch/bleve/analysis/token/unique"
2017-10-26 23:10:54 -07:00
"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
2018-03-16 22:04:33 +08:00
"github.com/blevesearch/bleve/search/query"
2018-02-05 10:29:17 -08:00
"github.com/ethantkoenig/rupture"
2017-10-26 23:10:54 -07:00
)
2018-02-05 10:29:17 -08:00
const (
repoIndexerAnalyzer = "repoIndexerAnalyzer"
repoIndexerDocType = "repoIndexerDocType"
repoIndexerLatestVersion = 1
)
2017-10-26 23:10:54 -07:00
// repoIndexer (thread-safe) index for repository contents
var repoIndexer bleve . Index
// RepoIndexerOp type of operation to perform on repo indexer
type RepoIndexerOp int
const (
// RepoIndexerOpUpdate add/update a file's contents
RepoIndexerOpUpdate = iota
// RepoIndexerOpDelete delete a file
RepoIndexerOpDelete
)
// RepoIndexerData data stored in the repo indexer
type RepoIndexerData struct {
RepoID int64
Content string
}
2018-02-05 10:29:17 -08:00
// Type returns the document type, for bleve's mapping.Classifier interface.
func ( d * RepoIndexerData ) Type ( ) string {
return repoIndexerDocType
}
2017-10-26 23:10:54 -07:00
// RepoIndexerUpdate an update to the repo indexer
type RepoIndexerUpdate struct {
Filepath string
Op RepoIndexerOp
Data * RepoIndexerData
}
2018-02-05 10:29:17 -08:00
// AddToFlushingBatch adds the update to the given flushing batch.
func ( update RepoIndexerUpdate ) AddToFlushingBatch ( batch rupture . FlushingBatch ) error {
2017-10-26 23:10:54 -07:00
id := filenameIndexerID ( update . Data . RepoID , update . Filepath )
switch update . Op {
case RepoIndexerOpUpdate :
return batch . Index ( id , update . Data )
case RepoIndexerOpDelete :
2018-02-05 10:29:17 -08:00
return batch . Delete ( id )
2017-10-26 23:10:54 -07:00
default :
log . Error ( 4 , "Unrecognized repo indexer op: %d" , update . Op )
}
return nil
}
// InitRepoIndexer initialize repo indexer
func InitRepoIndexer ( populateIndexer func ( ) error ) {
2018-02-05 10:29:17 -08:00
var err error
repoIndexer , err = openIndexer ( setting . Indexer . RepoPath , repoIndexerLatestVersion )
2017-10-26 23:10:54 -07:00
if err != nil {
2018-02-05 10:29:17 -08:00
log . Fatal ( 4 , "InitRepoIndexer: %v" , err )
}
if repoIndexer != nil {
return
}
if err = createRepoIndexer ( ) ; err != nil {
log . Fatal ( 4 , "CreateRepoIndexer: %v" , err )
}
if err = populateIndexer ( ) ; err != nil {
log . Fatal ( 4 , "PopulateRepoIndex: %v" , err )
2017-10-26 23:10:54 -07:00
}
}
// createRepoIndexer create a repo indexer if one does not already exist
func createRepoIndexer ( ) error {
2018-02-05 10:29:17 -08:00
var err error
2017-10-26 23:10:54 -07:00
docMapping := bleve . NewDocumentMapping ( )
2018-02-05 10:29:17 -08:00
numericFieldMapping := bleve . NewNumericFieldMapping ( )
numericFieldMapping . IncludeInAll = false
docMapping . AddFieldMappingsAt ( "RepoID" , numericFieldMapping )
2017-10-26 23:10:54 -07:00
textFieldMapping := bleve . NewTextFieldMapping ( )
2018-02-05 10:29:17 -08:00
textFieldMapping . IncludeInAll = false
2017-10-26 23:10:54 -07:00
docMapping . AddFieldMappingsAt ( "Content" , textFieldMapping )
mapping := bleve . NewIndexMapping ( )
2018-02-05 10:29:17 -08:00
if err = addUnicodeNormalizeTokenFilter ( mapping ) ; err != nil {
2017-10-26 23:10:54 -07:00
return err
2018-02-05 10:29:17 -08:00
} else if err = mapping . AddCustomAnalyzer ( repoIndexerAnalyzer , map [ string ] interface { } {
2017-10-26 23:10:54 -07:00
"type" : custom . Name ,
"char_filters" : [ ] string { } ,
"tokenizer" : unicode . Name ,
2018-02-05 10:29:17 -08:00
"token_filters" : [ ] string { unicodeNormalizeName , camelcase . Name , lowercase . Name , unique . Name } ,
2017-10-26 23:10:54 -07:00
} ) ; err != nil {
return err
}
mapping . DefaultAnalyzer = repoIndexerAnalyzer
2018-02-05 10:29:17 -08:00
mapping . AddDocumentMapping ( repoIndexerDocType , docMapping )
mapping . AddDocumentMapping ( "_all" , bleve . NewDocumentDisabledMapping ( ) )
2017-10-26 23:10:54 -07:00
repoIndexer , err = bleve . New ( setting . Indexer . RepoPath , mapping )
return err
}
func filenameIndexerID ( repoID int64 , filename string ) string {
return indexerID ( repoID ) + "_" + filename
}
func filenameOfIndexerID ( indexerID string ) string {
index := strings . IndexByte ( indexerID , '_' )
if index == - 1 {
log . Error ( 4 , "Unexpected ID in repo indexer: %s" , indexerID )
}
return indexerID [ index + 1 : ]
}
// RepoIndexerBatch batch to add updates to
2018-02-05 10:29:17 -08:00
func RepoIndexerBatch ( ) rupture . FlushingBatch {
return rupture . NewFlushingBatch ( repoIndexer , maxBatchSize )
2017-10-26 23:10:54 -07:00
}
// DeleteRepoFromIndexer delete all of a repo's files from indexer
func DeleteRepoFromIndexer ( repoID int64 ) error {
query := numericEqualityQuery ( repoID , "RepoID" )
searchRequest := bleve . NewSearchRequestOptions ( query , 2147483647 , 0 , false )
result , err := repoIndexer . Search ( searchRequest )
if err != nil {
return err
}
batch := RepoIndexerBatch ( )
for _ , hit := range result . Hits {
2018-02-05 10:29:17 -08:00
if err = batch . Delete ( hit . ID ) ; err != nil {
2017-10-26 23:10:54 -07:00
return err
}
}
return batch . Flush ( )
}
// RepoSearchResult result of performing a search in a repo
type RepoSearchResult struct {
2018-03-16 22:04:33 +08:00
RepoID int64
2017-10-26 23:10:54 -07:00
StartIndex int
EndIndex int
Filename string
Content string
}
// SearchRepoByKeyword searches for files in the specified repo.
// Returns the matching file-paths
2018-03-16 22:04:33 +08:00
func SearchRepoByKeyword ( repoIDs [ ] int64 , keyword string , page , pageSize int ) ( int64 , [ ] * RepoSearchResult , error ) {
2017-10-26 23:10:54 -07:00
phraseQuery := bleve . NewMatchPhraseQuery ( keyword )
phraseQuery . FieldVal = "Content"
phraseQuery . Analyzer = repoIndexerAnalyzer
2018-03-16 22:04:33 +08:00
var indexerQuery query . Query
if len ( repoIDs ) > 0 {
var repoQueries = make ( [ ] query . Query , 0 , len ( repoIDs ) )
for _ , repoID := range repoIDs {
repoQueries = append ( repoQueries , numericEqualityQuery ( repoID , "RepoID" ) )
}
indexerQuery = bleve . NewConjunctionQuery (
bleve . NewDisjunctionQuery ( repoQueries ... ) ,
phraseQuery ,
)
} else {
indexerQuery = phraseQuery
}
2017-10-26 23:10:54 -07:00
from := ( page - 1 ) * pageSize
searchRequest := bleve . NewSearchRequestOptions ( indexerQuery , pageSize , from , false )
2018-03-16 22:04:33 +08:00
searchRequest . Fields = [ ] string { "Content" , "RepoID" }
2017-10-26 23:10:54 -07:00
searchRequest . IncludeLocations = true
result , err := repoIndexer . Search ( searchRequest )
if err != nil {
return 0 , nil , err
}
searchResults := make ( [ ] * RepoSearchResult , len ( result . Hits ) )
for i , hit := range result . Hits {
var startIndex , endIndex int = - 1 , - 1
for _ , locations := range hit . Locations [ "Content" ] {
location := locations [ 0 ]
locationStart := int ( location . Start )
locationEnd := int ( location . End )
if startIndex < 0 || locationStart < startIndex {
startIndex = locationStart
}
if endIndex < 0 || locationEnd > endIndex {
endIndex = locationEnd
}
}
searchResults [ i ] = & RepoSearchResult {
2018-03-16 22:04:33 +08:00
RepoID : int64 ( hit . Fields [ "RepoID" ] . ( float64 ) ) ,
2017-10-26 23:10:54 -07:00
StartIndex : startIndex ,
EndIndex : endIndex ,
Filename : filenameOfIndexerID ( hit . ID ) ,
Content : hit . Fields [ "Content" ] . ( string ) ,
}
}
return int64 ( result . Total ) , searchResults , nil
}