2018-06-21 12:09:46 +03:00
// Copyright 2018 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package migrations
import (
2018-06-27 08:23:10 +03:00
"fmt"
"regexp"
2018-06-21 12:09:46 +03:00
"strings"
"code.gitea.io/gitea/modules/log"
"github.com/go-xorm/xorm"
)
2018-06-27 08:23:10 +03:00
var topicPattern = regexp . MustCompile ( ` ^[a-z0-9][a-z0-9-]*$ ` )
func validateTopic ( topic string ) bool {
return len ( topic ) <= 35 && topicPattern . MatchString ( topic )
}
2018-06-21 12:09:46 +03:00
func reformatAndRemoveIncorrectTopics ( x * xorm . Engine ) ( err error ) {
log . Info ( "This migration could take up to minutes, please be patient." )
2018-06-27 08:23:10 +03:00
2018-06-21 12:09:46 +03:00
type Topic struct {
2018-06-27 08:23:10 +03:00
ID int64
2018-12-08 20:27:30 +08:00
Name string ` xorm:"UNIQUE VARCHAR(25)" `
2018-06-27 08:23:10 +03:00
RepoCount int
CreatedUnix int64 ` xorm:"INDEX created" `
UpdatedUnix int64 ` xorm:"INDEX updated" `
}
type RepoTopic struct {
RepoID int64 ` xorm:"UNIQUE(s)" `
TopicID int64 ` xorm:"UNIQUE(s)" `
}
type Repository struct {
ID int64 ` xorm:"pk autoincr" `
Topics [ ] string ` xorm:"TEXT JSON" `
}
if err := x . Sync2 ( new ( Topic ) ) ; err != nil {
return fmt . Errorf ( "Sync2: %v" , err )
}
if err := x . Sync2 ( new ( RepoTopic ) ) ; err != nil {
return fmt . Errorf ( "Sync2: %v" , err )
2018-06-21 12:09:46 +03:00
}
sess := x . NewSession ( )
defer sess . Close ( )
const batchSize = 100
touchedRepo := make ( map [ int64 ] struct { } )
delTopicIDs := make ( [ ] int64 , 0 , batchSize )
2018-06-27 08:23:10 +03:00
log . Info ( "Validating existed topics..." )
2018-06-21 12:09:46 +03:00
if err := sess . Begin ( ) ; err != nil {
return err
}
for start := 0 ; ; start += batchSize {
2018-06-27 08:23:10 +03:00
topics := make ( [ ] * Topic , 0 , batchSize )
if err := x . Cols ( "id" , "name" ) . Asc ( "id" ) . Limit ( batchSize , start ) . Find ( & topics ) ; err != nil {
2018-06-21 12:09:46 +03:00
return err
}
if len ( topics ) == 0 {
break
}
for _ , topic := range topics {
2018-06-27 08:23:10 +03:00
if validateTopic ( topic . Name ) {
2018-06-21 12:09:46 +03:00
continue
}
2018-06-27 08:23:10 +03:00
log . Info ( "Incorrect topic: id = %v, name = %q" , topic . ID , topic . Name )
2018-06-21 12:09:46 +03:00
topic . Name = strings . Replace ( strings . TrimSpace ( strings . ToLower ( topic . Name ) ) , " " , "-" , - 1 )
2018-06-27 08:23:10 +03:00
ids := make ( [ ] int64 , 0 , 30 )
2018-06-21 12:09:46 +03:00
if err := sess . Table ( "repo_topic" ) . Cols ( "repo_id" ) .
Where ( "topic_id = ?" , topic . ID ) . Find ( & ids ) ; err != nil {
return err
}
2018-06-27 08:23:10 +03:00
log . Info ( "Touched repo ids: %v" , ids )
2018-06-21 12:09:46 +03:00
for _ , id := range ids {
touchedRepo [ id ] = struct { } { }
}
2018-06-27 08:23:10 +03:00
if validateTopic ( topic . Name ) {
unifiedTopic := Topic { Name : topic . Name }
exists , err := sess . Cols ( "id" , "name" ) . Get ( & unifiedTopic )
log . Info ( "Exists topic with the name %q? %v, id = %v" , topic . Name , exists , unifiedTopic . ID )
if err != nil {
2018-06-21 12:09:46 +03:00
return err
}
2018-06-27 08:23:10 +03:00
if exists {
log . Info ( "Updating repo_topic rows with topic_id = %v to topic_id = %v" , topic . ID , unifiedTopic . ID )
if _ , err := sess . Where ( "topic_id = ? AND repo_id NOT IN " +
"(SELECT rt1.repo_id FROM repo_topic rt1 INNER JOIN repo_topic rt2 " +
"ON rt1.repo_id = rt2.repo_id WHERE rt1.topic_id = ? AND rt2.topic_id = ?)" ,
topic . ID , topic . ID , unifiedTopic . ID ) . Update ( & RepoTopic { TopicID : unifiedTopic . ID } ) ; err != nil {
return err
}
log . Info ( "Updating topic `repo_count` field" )
if _ , err := sess . Exec (
"UPDATE topic SET repo_count = (SELECT COUNT(*) FROM repo_topic WHERE topic_id = ? GROUP BY topic_id) WHERE id = ?" ,
unifiedTopic . ID , unifiedTopic . ID ) ; err != nil {
return err
}
} else {
log . Info ( "Updating topic: id = %v, name = %q" , topic . ID , topic . Name )
if _ , err := sess . Table ( "topic" ) . ID ( topic . ID ) .
Update ( & Topic { Name : topic . Name } ) ; err != nil {
return err
}
continue
}
2018-06-21 12:09:46 +03:00
}
2018-06-27 08:23:10 +03:00
delTopicIDs = append ( delTopicIDs , topic . ID )
2018-06-21 12:09:46 +03:00
}
}
2018-06-27 08:23:10 +03:00
if err := sess . Commit ( ) ; err != nil {
return err
}
2018-06-21 12:09:46 +03:00
2018-06-27 08:23:10 +03:00
sess . Init ( )
2018-06-21 12:09:46 +03:00
2018-06-27 08:23:10 +03:00
log . Info ( "Deleting incorrect topics..." )
if err := sess . Begin ( ) ; err != nil {
return err
}
log . Info ( "Deleting 'repo_topic' rows for topics with ids = %v" , delTopicIDs )
if _ , err := sess . In ( "topic_id" , delTopicIDs ) . Delete ( & RepoTopic { } ) ; err != nil {
return err
}
log . Info ( "Deleting topics with id = %v" , delTopicIDs )
if _ , err := sess . In ( "id" , delTopicIDs ) . Delete ( & Topic { } ) ; err != nil {
return err
}
if err := sess . Commit ( ) ; err != nil {
return err
2018-06-21 12:09:46 +03:00
}
2018-06-27 08:23:10 +03:00
delRepoTopics := make ( [ ] * RepoTopic , 0 , batchSize )
2018-06-21 12:09:46 +03:00
log . Info ( "Checking the number of topics in the repositories..." )
for start := 0 ; ; start += batchSize {
2018-06-27 08:23:10 +03:00
repoTopics := make ( [ ] * RepoTopic , 0 , batchSize )
if err := x . Cols ( "repo_id" ) . Asc ( "repo_id" ) . Limit ( batchSize , start ) .
2018-06-21 12:09:46 +03:00
GroupBy ( "repo_id" ) . Having ( "COUNT(*) > 25" ) . Find ( & repoTopics ) ; err != nil {
return err
}
if len ( repoTopics ) == 0 {
break
}
log . Info ( "Number of repositories with more than 25 topics: %v" , len ( repoTopics ) )
for _ , repoTopic := range repoTopics {
touchedRepo [ repoTopic . RepoID ] = struct { } { }
2018-06-27 08:23:10 +03:00
tmpRepoTopics := make ( [ ] * RepoTopic , 0 , 30 )
if err := x . Where ( "repo_id = ?" , repoTopic . RepoID ) . Find ( & tmpRepoTopics ) ; err != nil {
2018-06-21 12:09:46 +03:00
return err
}
log . Info ( "Repository with id = %v has %v topics" , repoTopic . RepoID , len ( tmpRepoTopics ) )
for i := len ( tmpRepoTopics ) - 1 ; i > 24 ; i -- {
delRepoTopics = append ( delRepoTopics , tmpRepoTopics [ i ] )
}
}
}
2018-06-27 08:23:10 +03:00
sess . Init ( )
2018-06-21 12:09:46 +03:00
log . Info ( "Deleting superfluous topics for repositories (more than 25 topics)..." )
2018-06-27 08:23:10 +03:00
if err := sess . Begin ( ) ; err != nil {
return err
}
2018-06-21 12:09:46 +03:00
for _ , repoTopic := range delRepoTopics {
log . Info ( "Deleting 'repo_topic' rows for 'repository' with id = %v. Topic id = %v" ,
repoTopic . RepoID , repoTopic . TopicID )
if _ , err := sess . Where ( "repo_id = ? AND topic_id = ?" , repoTopic . RepoID ,
2018-06-27 08:23:10 +03:00
repoTopic . TopicID ) . Delete ( & RepoTopic { } ) ; err != nil {
2018-06-21 12:09:46 +03:00
return err
}
if _ , err := sess . Exec (
"UPDATE topic SET repo_count = (SELECT repo_count FROM topic WHERE id = ?) - 1 WHERE id = ?" ,
repoTopic . TopicID , repoTopic . TopicID ) ; err != nil {
return err
}
}
log . Info ( "Updating repositories 'topics' fields..." )
for repoID := range touchedRepo {
2018-06-27 08:23:10 +03:00
topicNames := make ( [ ] string , 0 , 30 )
2018-06-21 12:09:46 +03:00
if err := sess . Table ( "topic" ) . Cols ( "name" ) .
2018-06-27 08:23:10 +03:00
Join ( "INNER" , "repo_topic" , "repo_topic.topic_id = topic.id" ) .
Where ( "repo_topic.repo_id = ?" , repoID ) . Desc ( "topic.repo_count" ) . Find ( & topicNames ) ; err != nil {
2018-06-21 12:09:46 +03:00
return err
}
log . Info ( "Updating 'topics' field for repository with id = %v" , repoID )
if _ , err := sess . ID ( repoID ) . Cols ( "topics" ) .
2018-06-27 08:23:10 +03:00
Update ( & Repository { Topics : topicNames } ) ; err != nil {
2018-06-21 12:09:46 +03:00
return err
}
}
2018-10-18 12:51:07 +08:00
return sess . Commit ( )
2018-06-21 12:09:46 +03:00
}