diff --git a/.husky.yaml b/.husky.yaml index b82e55d..0284970 100644 --- a/.husky.yaml +++ b/.husky.yaml @@ -1,4 +1,4 @@ - hooks: +hooks: pre-commit: - golangci-lint run --fast-only - husky lint-staged diff --git a/pkg/api/api.go b/pkg/api/api.go index d017b7e..2abe97f 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -14,28 +14,30 @@ import ( "go.uber.org/zap" ) -const defaultSearchPresetName = "default" +type DocumentConverter[indexDocument any, returnType any] func(indexDocument) returnType type BaseAPI[indexDocument any, returnType any] struct { - l *zap.Logger - client *typesense.Client - collections map[pkgx.IndexID]*api.CollectionSchema - preset *api.PresetUpsertSchema - - revisionID pkgx.RevisionID + l *zap.Logger + client *typesense.Client + collections map[pkgx.IndexID]*api.CollectionSchema + presets map[string]*api.PresetUpsertSchema + revisionID pkgx.RevisionID + documentConverter DocumentConverter[indexDocument, returnType] } func NewBaseAPI[indexDocument any, returnType any]( l *zap.Logger, client *typesense.Client, collections map[pkgx.IndexID]*api.CollectionSchema, - preset *api.PresetUpsertSchema, + presets map[string]*api.PresetUpsertSchema, + documentConverter DocumentConverter[indexDocument, returnType], ) *BaseAPI[indexDocument, returnType] { return &BaseAPI[indexDocument, returnType]{ - l: l, - client: client, - collections: collections, - preset: preset, + l: l, + client: client, + collections: collections, + presets: presets, + documentConverter: documentConverter, } } @@ -149,11 +151,11 @@ func (b *BaseAPI[indexDocument, returnType]) Initialize(ctx context.Context) (pk // Step 5: Set the latest revision ID and return b.revisionID = newRevisionID - // Step 6: Ensure search preset is present - if b.preset != nil { - _, err := b.client.Presets().Upsert(ctx, defaultSearchPresetName, b.preset) + // Step 6: ensure search presets are present + for name, preset := range b.presets { + _, err := b.client.Presets().Upsert(ctx, name, preset) if err != nil { - b.l.Error("failed to upsert search preset", zap.Error(err)) + b.l.Error("failed to upsert preset", zap.String("name", name), zap.Error(err)) return "", err } } @@ -264,25 +266,18 @@ func (b *BaseAPI[indexDocument, returnType]) RevertRevision(ctx context.Context, return nil } -// SimpleSearch will perform a search operation on the given index -// it will return the documents and the scores +// SimpleSearch will perform a search operation on the given index using basic SearchParameters input func (b *BaseAPI[indexDocument, returnType]) SimpleSearch( ctx context.Context, index pkgx.IndexID, - q string, - filterBy map[string][]string, - page, perPage int, - sortBy string, + parameters *pkgx.SearchParameters, ) ([]returnType, pkgx.Scores, int, error) { - // Call buildSearchParams but also set QueryBy explicitly - parameters := buildSearchParams(q, filterBy, page, perPage, sortBy) - parameters.QueryBy = pointer.String("title") - - return b.ExpertSearch(ctx, index, parameters) + searchParams := buildSearchParams(parameters) + return b.ExpertSearch(ctx, index, searchParams) } -// ExpertSearch will perform a search operation on the given index -// it will return the documents, scores, and totalResults +// ExpertSearch performs a search operation on the given index +// It returns the converted documents, scores, and totalResults func (b *BaseAPI[indexDocument, returnType]) ExpertSearch( ctx context.Context, indexID pkgx.IndexID, @@ -299,6 +294,7 @@ func (b *BaseAPI[indexDocument, returnType]) ExpertSearch( b.l.Error("failed to perform search", zap.String("index", collectionName), zap.Error(err)) return nil, nil, 0, err } + // Extract totalResults from the search response totalResults := *searchResponse.Found @@ -326,19 +322,24 @@ func (b *BaseAPI[indexDocument, returnType]) ExpertSearch( continue } - // Convert hit to JSON and then unmarshal into returnType + // Convert raw document (map) to indexDocument struct hitJSON, err := json.Marshal(docMap) if err != nil { b.l.Warn("failed to marshal document to JSON", zap.String("index", collectionName), zap.Error(err)) continue } - var doc returnType - if err := json.Unmarshal(hitJSON, &doc); err != nil { - b.l.Warn("failed to unmarshal JSON into returnType", zap.String("index", collectionName), zap.Error(err)) + + var rawDoc indexDocument + if err := json.Unmarshal(hitJSON, &rawDoc); err != nil { + b.l.Warn("failed to unmarshal JSON into indexDocument", zap.String("index", collectionName), zap.Error(err)) continue } - results[i] = doc + // Convert the raw document using documentConverter + convertedDoc := b.documentConverter(rawDoc) + results[i] = convertedDoc + + // Extract search score index := 0 if hit.TextMatchInfo != nil && hit.TextMatchInfo.Score != nil { if score, err := strconv.Atoi(*hit.TextMatchInfo.Score); err == nil { diff --git a/pkg/api/utils.go b/pkg/api/utils.go index 898303b..2bb73a5 100644 --- a/pkg/api/utils.go +++ b/pkg/api/utils.go @@ -13,50 +13,35 @@ import ( "go.uber.org/zap" ) +const defaultSearchPresetName = "default" + // buildSearchParams will return the search collection parameters -// this is meant as a utility function to create the search collection parameters -// for the typesense search API without any knowledge of the typesense API func buildSearchParams( - q string, - filterBy map[string][]string, - page, perPage int, - sortBy string, + params *pkgx.SearchParameters, ) *api.SearchCollectionParams { - parameters := &api.SearchCollectionParams{} - parameters.Q = pointer.String(q) - if filterByString := formatFilterQuery(filterBy); filterByString != "" { - parameters.FilterBy = pointer.String(filterByString) - } - parameters.Page = pointer.Int(page) - parameters.PerPage = pointer.Int(perPage) - if sortBy != "" { - parameters.SortBy = pointer.String(sortBy) + if params.Page < 1 { + params.Page = 1 } - return parameters -} - -func formatFilterQuery(filterBy map[string][]string) string { - if filterBy == nil { - return "" + searchParams := &api.SearchCollectionParams{ + Page: pointer.Int(params.Page), } - var filterClauses []string - for key, values := range filterBy { - if len(values) == 1 { - // Single value → Use `:=` operator - filterClauses = append(filterClauses, fmt.Sprintf("%s:=\"%s\"", key, values[0])) - } else { - // Multiple values → Use `["val1","val2"]` array syntax - formattedValues := []string{} - for _, v := range values { - formattedValues = append(formattedValues, fmt.Sprintf("\"%s\"", v)) - } - filterClauses = append(filterClauses, fmt.Sprintf("%s:[%s]", key, strings.Join(formattedValues, ","))) - } + if params.PresetName != "" { + searchParams.Preset = pointer.String(params.PresetName) + } else { + searchParams.Preset = pointer.String(defaultSearchPresetName) } - return strings.Join(filterClauses, " && ") + if params.Query != "" { + searchParams.Q = pointer.String(params.Query) + } + + if params.Modify != nil { + params.Modify(searchParams) + } + + return searchParams } func (b *BaseAPI[indexDocument, returnType]) generateRevisionID() pkgx.RevisionID { diff --git a/pkg/indexing/contentserver.go b/pkg/indexing/contentserver.go index ce14a26..4c4a75b 100644 --- a/pkg/indexing/contentserver.go +++ b/pkg/indexing/contentserver.go @@ -3,6 +3,7 @@ package typesenseindexing import ( "context" "fmt" + "slices" contentserverclient "github.com/foomo/contentserver/client" @@ -11,6 +12,8 @@ import ( "go.uber.org/zap" ) +const ContentserverDataAttributeNoIndex = "typesenseIndexing-noIndex" + type ContentServer[indexDocument any] struct { l *zap.Logger contentserverClient *contentserverclient.Client @@ -32,6 +35,12 @@ func NewContentServer[indexDocument any]( } } +// Provide retrieves documents for the given indexID from the content server. +// It fetches the document IDs, retrieves the URLs for those IDs, and then uses the +// document provider functions to create the documents. +// The documents are returned as a slice of pointers to the indexDocument type. +// If a document provider function is not available for a specific document type, +// a warning is logged and that document is skipped. func (c ContentServer[indexDocument]) Provide( ctx context.Context, indexID pkgx.IndexID, @@ -49,7 +58,10 @@ func (c ContentServer[indexDocument]) Provide( documents := make([]*indexDocument, len(documentInfos)) for index, documentInfo := range documentInfos { if documentProvider, ok := c.documentProviderFuncs[documentInfo.DocumentType]; !ok { - c.l.Warn("no document provider available for document type", zap.String("documentType", string(documentInfo.DocumentType))) + c.l.Warn( + "no document provider available for document type", + zap.String("documentType", string(documentInfo.DocumentType)), + ) } else { document, err := documentProvider(ctx, indexID, documentInfo.DocumentID, urlsByIDs) if err != nil { @@ -69,6 +81,7 @@ func (c ContentServer[indexDocument]) Provide( return documents, nil } +// ProvidePaged func (c ContentServer[indexDocument]) ProvidePaged( ctx context.Context, indexID pkgx.IndexID, @@ -95,31 +108,26 @@ func (c ContentServer[indexDocument]) getDocumentIDsByIndexID( nodeMap := createFlatRepoNodeMap(rootRepoNode, map[string]*content.RepoNode{}) documentInfos := make([]pkgx.DocumentInfo, 0, len(nodeMap)) for _, repoNode := range nodeMap { - if slices.Contains(c.supportedMimeTypes, repoNode.MimeType) { - documentInfos = append(documentInfos, pkgx.DocumentInfo{ - DocumentType: pkgx.DocumentType(repoNode.MimeType), - DocumentID: pkgx.DocumentID(repoNode.ID), - }) + if !includeNode(c.supportedMimeTypes, repoNode) { + c.l.Debug("skipping document indexing", + zap.String("path", repoNode.URI), + zap.String("mimeType", repoNode.MimeType), + zap.Bool("hidden", repoNode.Hidden), + ) + continue } + + documentInfos = append(documentInfos, pkgx.DocumentInfo{ + DocumentType: pkgx.DocumentType(repoNode.MimeType), + DocumentID: pkgx.DocumentID(repoNode.ID), + }) } return documentInfos, nil } -// createFlatRepoNodeMap recursively retrieves all nodes from the tree and returns them in a flat map. -func createFlatRepoNodeMap(node *content.RepoNode, nodeMap map[string]*content.RepoNode) map[string]*content.RepoNode { - if node == nil { - return nodeMap - } - // Add the current node to the list. - nodeMap[node.ID] = node - // Recursively process child nodes. - for _, child := range node.Nodes { - nodeMap = createFlatRepoNodeMap(child, nodeMap) - } - return nodeMap -} - +// fetchURLsByDocumentIDs fetches the URLs for the given document IDs from the content server. +// It uses the contentserverClient to retrieve the URIs and maps them to DocumentID. func (c ContentServer[indexDocument]) fetchURLsByDocumentIDs( ctx context.Context, indexID pkgx.IndexID, @@ -140,6 +148,8 @@ func (c ContentServer[indexDocument]) fetchURLsByDocumentIDs( return convertMapStringToDocumentID(uriMap), nil } +// convertMapStringToDocumentID converts a map with string keys to a map with DocumentID keys. +// The keys in the input map are converted to DocumentID type, while the values remain strings. func convertMapStringToDocumentID(input map[string]string) map[pkgx.DocumentID]string { output := make(map[pkgx.DocumentID]string, len(input)) for key, value := range input { @@ -147,3 +157,33 @@ func convertMapStringToDocumentID(input map[string]string) map[pkgx.DocumentID]s } return output } + +// includeNode checks if the node should be included in the indexing process. +// It checks if the node is nil, if it has the noIndex attribute set to true, +// and if its mime type is in the list of supported mime types. +func includeNode(supportedMimeTypes []string, node *content.RepoNode) bool { + if node == nil { + return false + } + if noIndex, noIndexSet := node.Data[ContentserverDataAttributeNoIndex].(bool); noIndexSet && noIndex { + return false + } + if !slices.Contains(supportedMimeTypes, node.MimeType) { + return false + } + return true +} + +// createFlatRepoNodeMap recursively retrieves all nodes from the tree and returns them in a flat map. +func createFlatRepoNodeMap(node *content.RepoNode, nodeMap map[string]*content.RepoNode) map[string]*content.RepoNode { + if node == nil { + return nodeMap + } + // Add the current node to the list. + nodeMap[node.ID] = node + // Recursively process child nodes. + for _, child := range node.Nodes { + nodeMap = createFlatRepoNodeMap(child, nodeMap) + } + return nodeMap +} diff --git a/pkg/interface.go b/pkg/interface.go index b838883..572c3fb 100644 --- a/pkg/interface.go +++ b/pkg/interface.go @@ -17,14 +17,7 @@ type API[indexDocument any, returnType any] interface { Initialize(ctx context.Context) (RevisionID, error) // perform a search operation on the given index - SimpleSearch( - ctx context.Context, - index IndexID, - q string, - filterBy map[string][]string, - page, perPage int, - sortBy string, - ) ([]returnType, Scores, int, error) + SimpleSearch(ctx context.Context, index IndexID, parameters *SearchParameters) ([]returnType, Scores, int, error) ExpertSearch(ctx context.Context, index IndexID, parameters *api.SearchCollectionParams) ([]returnType, Scores, int, error) Healthz(ctx context.Context) error Indices() ([]IndexID, error) diff --git a/pkg/vo.go b/pkg/vo.go index b7acbb2..a90027e 100644 --- a/pkg/vo.go +++ b/pkg/vo.go @@ -1,6 +1,10 @@ package typesense -import "context" +import ( + "context" + + "github.com/typesense/typesense-go/v3/typesense/api" +) type RevisionID string type Query string @@ -26,3 +30,10 @@ type DocumentInfo struct { DocumentType DocumentType DocumentID DocumentID } + +type SearchParameters struct { + Query string + Page int + PresetName string + Modify func(params *api.SearchCollectionParams) +}