Refactor search

This commit is contained in:
2025-09-08 18:15:43 +08:00
parent 62d10a989d
commit d1da0dc948
7 changed files with 504 additions and 183 deletions

View File

@@ -12,6 +12,10 @@ import (
var db *gorm.DB
var (
ready = false
)
func init() {
if os.Getenv("DB_PORT") != "" {
host := os.Getenv("DB_HOST")
@@ -22,10 +26,18 @@ func init() {
dsn := user + ":" + password + "@tcp(" + host + ":" + port + ")/" + dbName + "?charset=utf8mb4&parseTime=True&loc=Local"
var err error
// wait for mysql to be ready
time.Sleep(5 * time.Second)
db, err = gorm.Open(mysql.Open(dsn), &gorm.Config{})
if err != nil {
panic("failed to connect database")
retrys := 5
for {
db, err = gorm.Open(mysql.Open(dsn), &gorm.Config{})
if err == nil {
ready = true
break
}
retrys--
if retrys < 0 {
panic("failed to connect database: " + err.Error())
}
time.Sleep(1 * time.Second)
}
} else {
var err error
@@ -54,3 +66,7 @@ func init() {
func GetDB() *gorm.DB {
return db
}
func IsReady() bool {
return ready
}

View File

@@ -4,7 +4,6 @@ import (
"errors"
"math/rand"
"nysoure/server/model"
"strings"
"sync"
"sync/atomic"
"time"
@@ -143,180 +142,6 @@ func DeleteResource(id uint) error {
})
}
func splitQuery(query string) []string {
var keywords []string
query = strings.TrimSpace(query)
if query == "" {
return keywords
}
l, r := 0, 0
inQuote := false
quoteChar := byte(0)
for r < len(query) {
if (query[r] == '"' || query[r] == '\'') && (r == 0 || query[r-1] != '\\') {
if !inQuote {
inQuote = true
quoteChar = query[r]
l = r + 1
} else if query[r] == quoteChar {
if r > l {
keywords = append(keywords, strings.TrimSpace(query[l:r]))
}
inQuote = false
r++
l = r
continue
}
} else if !inQuote && query[r] == ' ' {
if r > l {
keywords = append(keywords, strings.TrimSpace(query[l:r]))
}
for r < len(query) && query[r] == ' ' {
r++
}
l = r
continue
}
r++
}
if l < len(query) {
keywords = append(keywords, strings.TrimSpace(query[l:r]))
}
return keywords
}
func Search(query string, page, pageSize int) ([]model.Resource, int, error) {
query = strings.TrimSpace(query)
keywords := splitQuery(query)
if len(keywords) == 0 {
return nil, 0, nil
}
resource, err := searchWithKeyword(keywords[0])
if err != nil {
return nil, 0, err
}
if len(keywords) > 1 {
for _, keyword := range keywords[1:] {
r := make([]model.Resource, 0, len(resource))
for _, res := range resource {
if strings.Contains(res.Title, keyword) {
r = append(r, res)
continue
}
ok := false
for _, at := range res.AlternativeTitles {
if strings.Contains(at, keyword) {
r = append(r, res)
ok = true
break
}
}
if ok {
continue
}
for _, tag := range res.Tags {
if tag.Name == keyword {
r = append(r, res)
ok = true
break
}
}
}
resource = r
}
}
startIndex := (page - 1) * pageSize
endIndex := startIndex + pageSize
if startIndex > len(resource) {
return nil, 0, nil
}
if endIndex > len(resource) {
endIndex = len(resource)
}
totalPages := (len(resource) + pageSize - 1) / pageSize
result := make([]model.Resource, 0, endIndex-startIndex)
for i := startIndex; i < endIndex; i++ {
var r model.Resource
if err := db.Model(&r).Preload("User").Preload("Images").Preload("Tags").Where("id=?", resource[i].ID).First(&r).Error; err != nil {
return nil, 0, err
}
result = append(result, r)
}
return result, totalPages, nil
}
func searchWithKeyword(keyword string) ([]model.Resource, error) {
if len(keyword) == 0 {
return nil, nil
} else if len([]rune(keyword)) > 100 {
return nil, model.NewRequestError("Keyword is too long")
}
var resources []model.Resource
if len([]rune(keyword)) < 20 {
var tag model.Tag
var err error
if tag, err = GetTagByName(keyword); err != nil {
if !model.IsNotFoundError(err) {
return nil, err
}
} else {
if tag.AliasOf != nil {
tag, err = GetTagByID(*tag.AliasOf)
if err != nil {
return nil, err
}
}
var tagIds []uint
tagIds = append(tagIds, tag.ID)
for _, alias := range tag.Aliases {
tagIds = append(tagIds, alias.ID)
}
subQuery := db.Table("resource_tags").
Select("resource_id").
Where("tag_id IN ?", tagIds).
Group("resource_id")
if err := db.Where("id IN (?)", subQuery).Select("id", "title", "alternative_titles").Preload("Tags").Find(&resources).Error; err != nil {
return nil, err
}
}
}
var titleResult []model.Resource
if err := db.Where("title LIKE ?", "%"+keyword+"%").Or("alternative_titles LIKE ?", "%"+keyword+"%").Select("id", "title", "alternative_titles").Preload("Tags").Find(&titleResult).Error; err != nil {
return nil, err
}
if len(titleResult) > 0 {
if len(resources) == 0 {
resources = titleResult
} else {
resourceMap := make(map[uint]model.Resource)
for _, res := range resources {
resourceMap[res.ID] = res
}
for _, res := range titleResult {
if _, exists := resourceMap[res.ID]; !exists {
resources = append(resources, res)
}
}
}
}
return resources, nil
}
func GetResourceByTag(tagID uint, page int, pageSize int) ([]model.Resource, int, error) {
tag, err := GetTagByID(tagID)
if err != nil {
@@ -565,3 +390,58 @@ func RandomResource() (model.Resource, error) {
return resource, nil // Return the found resource
}
}
func GetResourcesIdWithTag(tagID uint) (map[uint]time.Time, error) {
tag, err := GetTagByID(tagID)
if err != nil {
return nil, err
}
if tag.AliasOf != nil {
tag, err = GetTagByID(*tag.AliasOf)
if err != nil {
return nil, err
}
}
var tagIds []uint
tagIds = append(tagIds, tag.ID)
for _, alias := range tag.Aliases {
tagIds = append(tagIds, alias.ID)
}
var result []model.Resource
subQuery := db.Table("resource_tags").
Select("resource_id").
Where("tag_id IN ?", tagIds).
Group("resource_id")
if err := db.Model(&model.Resource{}).
Where("id IN (?)", subQuery).
Order("created_at DESC").
Limit(10000).
Select("id", "created_at").
Find(&result).
Error; err != nil {
return nil, err
}
resMap := make(map[uint]time.Time)
for _, r := range result {
resMap[r.ID] = r.CreatedAt
}
return resMap, nil
}
func BatchGetResources(ids []uint) ([]model.Resource, error) {
idMap := make(map[uint]struct{})
uniqueIds := make([]uint, 0, len(ids))
for _, id := range ids {
if _, exists := idMap[id]; !exists {
idMap[id] = struct{}{}
uniqueIds = append(uniqueIds, id)
}
}
var resources []model.Resource
if err := db.Where("id IN ?", uniqueIds).Find(&resources).Error; err != nil {
return nil, err
}
return resources, nil
}

View File

@@ -2,10 +2,11 @@ package dao
import (
"errors"
"github.com/gofiber/fiber/v3/log"
"nysoure/server/model"
"strings"
"github.com/gofiber/fiber/v3/log"
"gorm.io/gorm"
)
@@ -171,3 +172,11 @@ func ClearUnusedTags() error {
}
return nil
}
func ExistsTag(name string) (bool, error) {
var count int64
if err := db.Model(&model.Tag{}).Where("name = ?", name).Count(&count).Error; err != nil {
return false, err
}
return count > 0, nil
}

106
server/search/resource.go Normal file
View File

@@ -0,0 +1,106 @@
package search
import (
"fmt"
"nysoure/server/dao"
"nysoure/server/model"
"nysoure/server/utils"
"regexp"
"strconv"
"time"
"github.com/blevesearch/bleve"
)
type ResourceParams struct {
Id uint
Title string
Subtitles []string
Time time.Time
}
var index bleve.Index
func removeSpaces(s string) string {
reg := regexp.MustCompile(`\s+`)
return reg.ReplaceAllString(s, " ")
}
func createIndex() error {
for !dao.IsReady() {
time.Sleep(1 * time.Second)
}
page := 1
total := 1
for page <= total {
res, totalPages, err := dao.GetResourceList(page, 100, model.RSortTimeAsc)
if err != nil {
return err
}
for _, r := range res {
title := r.Title
title = removeSpaces(title)
altTitles := make([]string, len(r.AlternativeTitles))
for i, t := range r.AlternativeTitles {
altTitles[i] = removeSpaces(t)
}
err := index.Index(fmt.Sprintf("%d", r.ID), ResourceParams{
Id: r.ID,
Title: title,
Subtitles: altTitles,
Time: r.CreatedAt,
})
if err != nil {
return err
}
}
page++
total = totalPages
}
return nil
}
func init() {
indexPath := utils.GetStoragePath() + "/search_index.bleve"
var err error
index, err = bleve.Open(indexPath)
if err == bleve.ErrorIndexPathDoesNotExist {
mapping := bleve.NewIndexMapping()
index, err = bleve.New(indexPath, mapping)
if err != nil {
panic("Failed to create search index: " + err.Error())
}
go createIndex()
} else if err != nil {
panic("Failed to open search index: " + err.Error())
}
}
func SearchResource(keyword string) (map[uint]time.Time, error) {
keyword = removeSpaces(keyword)
query := bleve.NewMatchQuery(keyword)
searchRequest := bleve.NewSearchRequest(query)
searchRequest.Size = 10000
searchRequest.Fields = []string{"Time"}
searchResults, err := index.Search(searchRequest)
if err != nil {
return nil, err
}
results := make(map[uint]time.Time)
for _, hit := range searchResults.Hits {
id, err := strconv.ParseUint(hit.ID, 10, 32)
if err != nil {
continue
}
t, err := time.Parse(time.RFC3339Nano, hit.Fields["Time"].(string))
if err != nil {
continue
}
results[uint(id)] = t
}
return results, nil
}

View File

@@ -5,8 +5,11 @@ import (
"nysoure/server/config"
"nysoure/server/dao"
"nysoure/server/model"
"nysoure/server/search"
"sort"
"strconv"
"strings"
"time"
"github.com/gofiber/fiber/v3"
"github.com/gofiber/fiber/v3/log"
@@ -154,13 +157,194 @@ func GetResourceList(page int, sort model.RSort) ([]model.ResourceView, int, err
return views, totalPages, nil
}
func SearchResource(keyword string, page int) ([]model.ResourceView, int, error) {
resources, totalPages, err := dao.Search(keyword, page, pageSize)
// splitQuery splits the input query string into keywords, treating quoted substrings (single or double quotes)
// as single keywords and supporting escape characters for quotes. Spaces outside quotes are used as separators.
func splitQuery(query string) []string {
var keywords []string
query = strings.TrimSpace(query)
if query == "" {
return keywords
}
l, r := 0, 0
inQuote := false
quoteChar := byte(0)
for r < len(query) {
if (query[r] == '"' || query[r] == '\'') && (r == 0 || query[r-1] != '\\') {
if !inQuote {
inQuote = true
quoteChar = query[r]
l = r + 1
} else if query[r] == quoteChar {
if r > l {
keywords = append(keywords, strings.TrimSpace(query[l:r]))
}
inQuote = false
r++
l = r
continue
}
} else if !inQuote && query[r] == ' ' {
if r > l {
keywords = append(keywords, strings.TrimSpace(query[l:r]))
}
for r < len(query) && query[r] == ' ' {
r++
}
l = r
continue
}
r++
}
if l < len(query) {
keywords = append(keywords, strings.TrimSpace(query[l:r]))
}
return keywords
}
func searchWithKeyword(keyword string) (map[uint]time.Time, error) {
resources := make(map[uint]time.Time)
exists, err := dao.ExistsTag(keyword)
if err != nil {
return nil, err
}
if exists {
t, err := dao.GetTagByName(keyword)
if err != nil {
return nil, err
}
res, err := dao.GetResourcesIdWithTag(t.ID)
if err != nil {
return nil, err
}
for id, createdAt := range res {
resources[id] = createdAt
}
}
searchResult, err := search.SearchResource(keyword)
if err != nil {
return nil, err
}
for id, createdAt := range searchResult {
resources[id] = createdAt
}
return resources, nil
}
func SearchResource(query string, page int) ([]model.ResourceView, int, error) {
start := (page - 1) * pageSize
end := start + pageSize
resources := make(map[uint]time.Time)
checkTag := func(tag string) error {
exists, err := dao.ExistsTag(tag)
if err != nil {
return err
}
if exists {
t, err := dao.GetTagByName(tag)
if err != nil {
return err
}
res, err := dao.GetResourcesIdWithTag(t.ID)
if err != nil {
return err
}
for id, createdAt := range res {
resources[id] = createdAt
}
}
return nil
}
// check tag
if err := checkTag(query); err != nil {
return nil, 0, err
}
// check tag after removing spaces
trimmed := strings.ReplaceAll(query, " ", "")
if trimmed != query {
if err := checkTag(trimmed); err != nil {
return nil, 0, err
}
}
// split query to search
keywords := splitQuery(query)
temp := make(map[uint]time.Time)
first := true
for _, keyword := range keywords {
res, err := searchWithKeyword(keyword)
if err != nil {
return nil, 0, err
}
if first {
for id, createdAt := range res {
temp[id] = createdAt
}
first = false
} else {
for id := range temp {
if _, ok := res[id]; !ok {
delete(temp, id)
}
}
}
}
for id, createdAt := range temp {
resources[id] = createdAt
}
if start >= len(resources) {
return []model.ResourceView{}, 0, nil
}
type IDWithTime struct {
ID uint
CreatedAt time.Time
}
var idsWithTime []IDWithTime
for id, createdAt := range resources {
idsWithTime = append(idsWithTime, IDWithTime{
ID: id,
CreatedAt: createdAt,
})
}
// sort by createdAt desc
sort.Slice(idsWithTime, func(i, j int) bool {
return idsWithTime[i].CreatedAt.After(idsWithTime[j].CreatedAt)
})
total := len(idsWithTime)
totalPages := (total + pageSize - 1) / pageSize
if start >= total {
return []model.ResourceView{}, totalPages, nil
}
if end > total {
end = total
}
idsPage := idsWithTime[start:end]
var ids []uint
for _, item := range idsPage {
ids = append(ids, item.ID)
}
resourcesPage, err := dao.BatchGetResources(ids)
if err != nil {
return nil, 0, err
}
var views []model.ResourceView
for _, r := range resources {
for _, r := range resourcesPage {
views = append(views, r.ToView())
}
return views, totalPages, nil