mirror of
https://github.com/foomo/typesense.git
synced 2025-10-16 12:45:37 +00:00
Merge pull request #5 from foomo/feature/document-converter
feat: add document converter and option to remove documents from indexing
This commit is contained in:
commit
f18489c623
@ -1,4 +1,4 @@
|
||||
hooks:
|
||||
hooks:
|
||||
pre-commit:
|
||||
- golangci-lint run --fast-only
|
||||
- husky lint-staged
|
||||
|
||||
@ -14,28 +14,30 @@ import (
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
const defaultSearchPresetName = "default"
|
||||
type DocumentConverter[indexDocument any, returnType any] func(indexDocument) returnType
|
||||
|
||||
type BaseAPI[indexDocument any, returnType any] struct {
|
||||
l *zap.Logger
|
||||
client *typesense.Client
|
||||
collections map[pkgx.IndexID]*api.CollectionSchema
|
||||
preset *api.PresetUpsertSchema
|
||||
|
||||
revisionID pkgx.RevisionID
|
||||
l *zap.Logger
|
||||
client *typesense.Client
|
||||
collections map[pkgx.IndexID]*api.CollectionSchema
|
||||
presets map[string]*api.PresetUpsertSchema
|
||||
revisionID pkgx.RevisionID
|
||||
documentConverter DocumentConverter[indexDocument, returnType]
|
||||
}
|
||||
|
||||
func NewBaseAPI[indexDocument any, returnType any](
|
||||
l *zap.Logger,
|
||||
client *typesense.Client,
|
||||
collections map[pkgx.IndexID]*api.CollectionSchema,
|
||||
preset *api.PresetUpsertSchema,
|
||||
presets map[string]*api.PresetUpsertSchema,
|
||||
documentConverter DocumentConverter[indexDocument, returnType],
|
||||
) *BaseAPI[indexDocument, returnType] {
|
||||
return &BaseAPI[indexDocument, returnType]{
|
||||
l: l,
|
||||
client: client,
|
||||
collections: collections,
|
||||
preset: preset,
|
||||
l: l,
|
||||
client: client,
|
||||
collections: collections,
|
||||
presets: presets,
|
||||
documentConverter: documentConverter,
|
||||
}
|
||||
}
|
||||
|
||||
@ -149,11 +151,11 @@ func (b *BaseAPI[indexDocument, returnType]) Initialize(ctx context.Context) (pk
|
||||
// Step 5: Set the latest revision ID and return
|
||||
b.revisionID = newRevisionID
|
||||
|
||||
// Step 6: Ensure search preset is present
|
||||
if b.preset != nil {
|
||||
_, err := b.client.Presets().Upsert(ctx, defaultSearchPresetName, b.preset)
|
||||
// Step 6: ensure search presets are present
|
||||
for name, preset := range b.presets {
|
||||
_, err := b.client.Presets().Upsert(ctx, name, preset)
|
||||
if err != nil {
|
||||
b.l.Error("failed to upsert search preset", zap.Error(err))
|
||||
b.l.Error("failed to upsert preset", zap.String("name", name), zap.Error(err))
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
@ -264,25 +266,18 @@ func (b *BaseAPI[indexDocument, returnType]) RevertRevision(ctx context.Context,
|
||||
return nil
|
||||
}
|
||||
|
||||
// SimpleSearch will perform a search operation on the given index
|
||||
// it will return the documents and the scores
|
||||
// SimpleSearch will perform a search operation on the given index using basic SearchParameters input
|
||||
func (b *BaseAPI[indexDocument, returnType]) SimpleSearch(
|
||||
ctx context.Context,
|
||||
index pkgx.IndexID,
|
||||
q string,
|
||||
filterBy map[string][]string,
|
||||
page, perPage int,
|
||||
sortBy string,
|
||||
parameters *pkgx.SearchParameters,
|
||||
) ([]returnType, pkgx.Scores, int, error) {
|
||||
// Call buildSearchParams but also set QueryBy explicitly
|
||||
parameters := buildSearchParams(q, filterBy, page, perPage, sortBy)
|
||||
parameters.QueryBy = pointer.String("title")
|
||||
|
||||
return b.ExpertSearch(ctx, index, parameters)
|
||||
searchParams := buildSearchParams(parameters)
|
||||
return b.ExpertSearch(ctx, index, searchParams)
|
||||
}
|
||||
|
||||
// ExpertSearch will perform a search operation on the given index
|
||||
// it will return the documents, scores, and totalResults
|
||||
// ExpertSearch performs a search operation on the given index
|
||||
// It returns the converted documents, scores, and totalResults
|
||||
func (b *BaseAPI[indexDocument, returnType]) ExpertSearch(
|
||||
ctx context.Context,
|
||||
indexID pkgx.IndexID,
|
||||
@ -299,6 +294,7 @@ func (b *BaseAPI[indexDocument, returnType]) ExpertSearch(
|
||||
b.l.Error("failed to perform search", zap.String("index", collectionName), zap.Error(err))
|
||||
return nil, nil, 0, err
|
||||
}
|
||||
|
||||
// Extract totalResults from the search response
|
||||
totalResults := *searchResponse.Found
|
||||
|
||||
@ -326,19 +322,24 @@ func (b *BaseAPI[indexDocument, returnType]) ExpertSearch(
|
||||
continue
|
||||
}
|
||||
|
||||
// Convert hit to JSON and then unmarshal into returnType
|
||||
// Convert raw document (map) to indexDocument struct
|
||||
hitJSON, err := json.Marshal(docMap)
|
||||
if err != nil {
|
||||
b.l.Warn("failed to marshal document to JSON", zap.String("index", collectionName), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
var doc returnType
|
||||
if err := json.Unmarshal(hitJSON, &doc); err != nil {
|
||||
b.l.Warn("failed to unmarshal JSON into returnType", zap.String("index", collectionName), zap.Error(err))
|
||||
|
||||
var rawDoc indexDocument
|
||||
if err := json.Unmarshal(hitJSON, &rawDoc); err != nil {
|
||||
b.l.Warn("failed to unmarshal JSON into indexDocument", zap.String("index", collectionName), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
results[i] = doc
|
||||
// Convert the raw document using documentConverter
|
||||
convertedDoc := b.documentConverter(rawDoc)
|
||||
results[i] = convertedDoc
|
||||
|
||||
// Extract search score
|
||||
index := 0
|
||||
if hit.TextMatchInfo != nil && hit.TextMatchInfo.Score != nil {
|
||||
if score, err := strconv.Atoi(*hit.TextMatchInfo.Score); err == nil {
|
||||
|
||||
@ -13,50 +13,35 @@ import (
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
const defaultSearchPresetName = "default"
|
||||
|
||||
// buildSearchParams will return the search collection parameters
|
||||
// this is meant as a utility function to create the search collection parameters
|
||||
// for the typesense search API without any knowledge of the typesense API
|
||||
func buildSearchParams(
|
||||
q string,
|
||||
filterBy map[string][]string,
|
||||
page, perPage int,
|
||||
sortBy string,
|
||||
params *pkgx.SearchParameters,
|
||||
) *api.SearchCollectionParams {
|
||||
parameters := &api.SearchCollectionParams{}
|
||||
parameters.Q = pointer.String(q)
|
||||
if filterByString := formatFilterQuery(filterBy); filterByString != "" {
|
||||
parameters.FilterBy = pointer.String(filterByString)
|
||||
}
|
||||
parameters.Page = pointer.Int(page)
|
||||
parameters.PerPage = pointer.Int(perPage)
|
||||
if sortBy != "" {
|
||||
parameters.SortBy = pointer.String(sortBy)
|
||||
if params.Page < 1 {
|
||||
params.Page = 1
|
||||
}
|
||||
|
||||
return parameters
|
||||
}
|
||||
|
||||
func formatFilterQuery(filterBy map[string][]string) string {
|
||||
if filterBy == nil {
|
||||
return ""
|
||||
searchParams := &api.SearchCollectionParams{
|
||||
Page: pointer.Int(params.Page),
|
||||
}
|
||||
|
||||
var filterClauses []string
|
||||
for key, values := range filterBy {
|
||||
if len(values) == 1 {
|
||||
// Single value → Use `:=` operator
|
||||
filterClauses = append(filterClauses, fmt.Sprintf("%s:=\"%s\"", key, values[0]))
|
||||
} else {
|
||||
// Multiple values → Use `["val1","val2"]` array syntax
|
||||
formattedValues := []string{}
|
||||
for _, v := range values {
|
||||
formattedValues = append(formattedValues, fmt.Sprintf("\"%s\"", v))
|
||||
}
|
||||
filterClauses = append(filterClauses, fmt.Sprintf("%s:[%s]", key, strings.Join(formattedValues, ",")))
|
||||
}
|
||||
if params.PresetName != "" {
|
||||
searchParams.Preset = pointer.String(params.PresetName)
|
||||
} else {
|
||||
searchParams.Preset = pointer.String(defaultSearchPresetName)
|
||||
}
|
||||
|
||||
return strings.Join(filterClauses, " && ")
|
||||
if params.Query != "" {
|
||||
searchParams.Q = pointer.String(params.Query)
|
||||
}
|
||||
|
||||
if params.Modify != nil {
|
||||
params.Modify(searchParams)
|
||||
}
|
||||
|
||||
return searchParams
|
||||
}
|
||||
|
||||
func (b *BaseAPI[indexDocument, returnType]) generateRevisionID() pkgx.RevisionID {
|
||||
|
||||
@ -3,6 +3,7 @@ package typesenseindexing
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"slices"
|
||||
|
||||
contentserverclient "github.com/foomo/contentserver/client"
|
||||
@ -11,6 +12,8 @@ import (
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
const ContentserverDataAttributeNoIndex = "typesenseIndexing-noIndex"
|
||||
|
||||
type ContentServer[indexDocument any] struct {
|
||||
l *zap.Logger
|
||||
contentserverClient *contentserverclient.Client
|
||||
@ -32,6 +35,12 @@ func NewContentServer[indexDocument any](
|
||||
}
|
||||
}
|
||||
|
||||
// Provide retrieves documents for the given indexID from the content server.
|
||||
// It fetches the document IDs, retrieves the URLs for those IDs, and then uses the
|
||||
// document provider functions to create the documents.
|
||||
// The documents are returned as a slice of pointers to the indexDocument type.
|
||||
// If a document provider function is not available for a specific document type,
|
||||
// a warning is logged and that document is skipped.
|
||||
func (c ContentServer[indexDocument]) Provide(
|
||||
ctx context.Context,
|
||||
indexID pkgx.IndexID,
|
||||
@ -49,7 +58,10 @@ func (c ContentServer[indexDocument]) Provide(
|
||||
documents := make([]*indexDocument, len(documentInfos))
|
||||
for index, documentInfo := range documentInfos {
|
||||
if documentProvider, ok := c.documentProviderFuncs[documentInfo.DocumentType]; !ok {
|
||||
c.l.Warn("no document provider available for document type", zap.String("documentType", string(documentInfo.DocumentType)))
|
||||
c.l.Warn(
|
||||
"no document provider available for document type",
|
||||
zap.String("documentType", string(documentInfo.DocumentType)),
|
||||
)
|
||||
} else {
|
||||
document, err := documentProvider(ctx, indexID, documentInfo.DocumentID, urlsByIDs)
|
||||
if err != nil {
|
||||
@ -69,6 +81,7 @@ func (c ContentServer[indexDocument]) Provide(
|
||||
return documents, nil
|
||||
}
|
||||
|
||||
// ProvidePaged
|
||||
func (c ContentServer[indexDocument]) ProvidePaged(
|
||||
ctx context.Context,
|
||||
indexID pkgx.IndexID,
|
||||
@ -95,31 +108,26 @@ func (c ContentServer[indexDocument]) getDocumentIDsByIndexID(
|
||||
nodeMap := createFlatRepoNodeMap(rootRepoNode, map[string]*content.RepoNode{})
|
||||
documentInfos := make([]pkgx.DocumentInfo, 0, len(nodeMap))
|
||||
for _, repoNode := range nodeMap {
|
||||
if slices.Contains(c.supportedMimeTypes, repoNode.MimeType) {
|
||||
documentInfos = append(documentInfos, pkgx.DocumentInfo{
|
||||
DocumentType: pkgx.DocumentType(repoNode.MimeType),
|
||||
DocumentID: pkgx.DocumentID(repoNode.ID),
|
||||
})
|
||||
if !includeNode(c.supportedMimeTypes, repoNode) {
|
||||
c.l.Debug("skipping document indexing",
|
||||
zap.String("path", repoNode.URI),
|
||||
zap.String("mimeType", repoNode.MimeType),
|
||||
zap.Bool("hidden", repoNode.Hidden),
|
||||
)
|
||||
continue
|
||||
}
|
||||
|
||||
documentInfos = append(documentInfos, pkgx.DocumentInfo{
|
||||
DocumentType: pkgx.DocumentType(repoNode.MimeType),
|
||||
DocumentID: pkgx.DocumentID(repoNode.ID),
|
||||
})
|
||||
}
|
||||
|
||||
return documentInfos, nil
|
||||
}
|
||||
|
||||
// createFlatRepoNodeMap recursively retrieves all nodes from the tree and returns them in a flat map.
|
||||
func createFlatRepoNodeMap(node *content.RepoNode, nodeMap map[string]*content.RepoNode) map[string]*content.RepoNode {
|
||||
if node == nil {
|
||||
return nodeMap
|
||||
}
|
||||
// Add the current node to the list.
|
||||
nodeMap[node.ID] = node
|
||||
// Recursively process child nodes.
|
||||
for _, child := range node.Nodes {
|
||||
nodeMap = createFlatRepoNodeMap(child, nodeMap)
|
||||
}
|
||||
return nodeMap
|
||||
}
|
||||
|
||||
// fetchURLsByDocumentIDs fetches the URLs for the given document IDs from the content server.
|
||||
// It uses the contentserverClient to retrieve the URIs and maps them to DocumentID.
|
||||
func (c ContentServer[indexDocument]) fetchURLsByDocumentIDs(
|
||||
ctx context.Context,
|
||||
indexID pkgx.IndexID,
|
||||
@ -140,6 +148,8 @@ func (c ContentServer[indexDocument]) fetchURLsByDocumentIDs(
|
||||
return convertMapStringToDocumentID(uriMap), nil
|
||||
}
|
||||
|
||||
// convertMapStringToDocumentID converts a map with string keys to a map with DocumentID keys.
|
||||
// The keys in the input map are converted to DocumentID type, while the values remain strings.
|
||||
func convertMapStringToDocumentID(input map[string]string) map[pkgx.DocumentID]string {
|
||||
output := make(map[pkgx.DocumentID]string, len(input))
|
||||
for key, value := range input {
|
||||
@ -147,3 +157,33 @@ func convertMapStringToDocumentID(input map[string]string) map[pkgx.DocumentID]s
|
||||
}
|
||||
return output
|
||||
}
|
||||
|
||||
// includeNode checks if the node should be included in the indexing process.
|
||||
// It checks if the node is nil, if it has the noIndex attribute set to true,
|
||||
// and if its mime type is in the list of supported mime types.
|
||||
func includeNode(supportedMimeTypes []string, node *content.RepoNode) bool {
|
||||
if node == nil {
|
||||
return false
|
||||
}
|
||||
if noIndex, noIndexSet := node.Data[ContentserverDataAttributeNoIndex].(bool); noIndexSet && noIndex {
|
||||
return false
|
||||
}
|
||||
if !slices.Contains(supportedMimeTypes, node.MimeType) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// createFlatRepoNodeMap recursively retrieves all nodes from the tree and returns them in a flat map.
|
||||
func createFlatRepoNodeMap(node *content.RepoNode, nodeMap map[string]*content.RepoNode) map[string]*content.RepoNode {
|
||||
if node == nil {
|
||||
return nodeMap
|
||||
}
|
||||
// Add the current node to the list.
|
||||
nodeMap[node.ID] = node
|
||||
// Recursively process child nodes.
|
||||
for _, child := range node.Nodes {
|
||||
nodeMap = createFlatRepoNodeMap(child, nodeMap)
|
||||
}
|
||||
return nodeMap
|
||||
}
|
||||
|
||||
@ -17,14 +17,7 @@ type API[indexDocument any, returnType any] interface {
|
||||
Initialize(ctx context.Context) (RevisionID, error)
|
||||
|
||||
// perform a search operation on the given index
|
||||
SimpleSearch(
|
||||
ctx context.Context,
|
||||
index IndexID,
|
||||
q string,
|
||||
filterBy map[string][]string,
|
||||
page, perPage int,
|
||||
sortBy string,
|
||||
) ([]returnType, Scores, int, error)
|
||||
SimpleSearch(ctx context.Context, index IndexID, parameters *SearchParameters) ([]returnType, Scores, int, error)
|
||||
ExpertSearch(ctx context.Context, index IndexID, parameters *api.SearchCollectionParams) ([]returnType, Scores, int, error)
|
||||
Healthz(ctx context.Context) error
|
||||
Indices() ([]IndexID, error)
|
||||
|
||||
13
pkg/vo.go
13
pkg/vo.go
@ -1,6 +1,10 @@
|
||||
package typesense
|
||||
|
||||
import "context"
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/typesense/typesense-go/v3/typesense/api"
|
||||
)
|
||||
|
||||
type RevisionID string
|
||||
type Query string
|
||||
@ -26,3 +30,10 @@ type DocumentInfo struct {
|
||||
DocumentType DocumentType
|
||||
DocumentID DocumentID
|
||||
}
|
||||
|
||||
type SearchParameters struct {
|
||||
Query string
|
||||
Page int
|
||||
PresetName string
|
||||
Modify func(params *api.SearchCollectionParams)
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user