From 625a19cb07e28b4b76427dc633f0997ce4ececa9 Mon Sep 17 00:00:00 2001 From: Miroslav Cvetic Date: Thu, 13 Mar 2025 14:30:03 +0100 Subject: [PATCH 01/11] feat: add document converter --- pkg/api/api.go | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/pkg/api/api.go b/pkg/api/api.go index d017b7e..fb0697b 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -16,13 +16,15 @@ import ( const defaultSearchPresetName = "default" -type BaseAPI[indexDocument any, returnType any] struct { - l *zap.Logger - client *typesense.Client - collections map[pkgx.IndexID]*api.CollectionSchema - preset *api.PresetUpsertSchema +type DocumentConverter[indexDocument any, returnType any] func(indexDocument) returnType - revisionID pkgx.RevisionID +type BaseAPI[indexDocument any, returnType any] struct { + l *zap.Logger + client *typesense.Client + collections map[pkgx.IndexID]*api.CollectionSchema + preset *api.PresetUpsertSchema + revisionID pkgx.RevisionID + documentConverter DocumentConverter[indexDocument, returnType] } func NewBaseAPI[indexDocument any, returnType any]( @@ -30,12 +32,14 @@ func NewBaseAPI[indexDocument any, returnType any]( client *typesense.Client, collections map[pkgx.IndexID]*api.CollectionSchema, preset *api.PresetUpsertSchema, + documentConverter DocumentConverter[indexDocument, returnType], ) *BaseAPI[indexDocument, returnType] { return &BaseAPI[indexDocument, returnType]{ - l: l, - client: client, - collections: collections, - preset: preset, + l: l, + client: client, + collections: collections, + preset: preset, + documentConverter: documentConverter, } } @@ -281,8 +285,8 @@ func (b *BaseAPI[indexDocument, returnType]) SimpleSearch( return b.ExpertSearch(ctx, index, parameters) } -// ExpertSearch will perform a search operation on the given index -// it will return the documents, scores, and totalResults +// ExpertSearch performs a search operation on the given index +// It returns the converted documents, scores, and totalResults func (b *BaseAPI[indexDocument, returnType]) ExpertSearch( ctx context.Context, indexID pkgx.IndexID, @@ -299,6 +303,7 @@ func (b *BaseAPI[indexDocument, returnType]) ExpertSearch( b.l.Error("failed to perform search", zap.String("index", collectionName), zap.Error(err)) return nil, nil, 0, err } + // Extract totalResults from the search response totalResults := *searchResponse.Found @@ -326,19 +331,24 @@ func (b *BaseAPI[indexDocument, returnType]) ExpertSearch( continue } - // Convert hit to JSON and then unmarshal into returnType + // Convert raw document (map) to indexDocument struct hitJSON, err := json.Marshal(docMap) if err != nil { b.l.Warn("failed to marshal document to JSON", zap.String("index", collectionName), zap.Error(err)) continue } - var doc returnType - if err := json.Unmarshal(hitJSON, &doc); err != nil { - b.l.Warn("failed to unmarshal JSON into returnType", zap.String("index", collectionName), zap.Error(err)) + + var rawDoc indexDocument + if err := json.Unmarshal(hitJSON, &rawDoc); err != nil { + b.l.Warn("failed to unmarshal JSON into indexDocument", zap.String("index", collectionName), zap.Error(err)) continue } - results[i] = doc + // Convert the raw document using documentConverter + convertedDoc := b.documentConverter(rawDoc) + results[i] = convertedDoc + + // Extract search score index := 0 if hit.TextMatchInfo != nil && hit.TextMatchInfo.Score != nil { if score, err := strconv.Atoi(*hit.TextMatchInfo.Score); err == nil { From 82e8704ace6c94b40abf509ec0e5a4f1858977c9 Mon Sep 17 00:00:00 2001 From: Miroslav Cvetic Date: Thu, 20 Mar 2025 16:42:31 +0100 Subject: [PATCH 02/11] feat: add option to restict paths --- pkg/indexing/contentserver.go | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/pkg/indexing/contentserver.go b/pkg/indexing/contentserver.go index ce14a26..407a548 100644 --- a/pkg/indexing/contentserver.go +++ b/pkg/indexing/contentserver.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "slices" + "strings" contentserverclient "github.com/foomo/contentserver/client" "github.com/foomo/contentserver/content" @@ -16,6 +17,7 @@ type ContentServer[indexDocument any] struct { contentserverClient *contentserverclient.Client documentProviderFuncs map[pkgx.DocumentType]pkgx.DocumentProviderFunc[indexDocument] supportedMimeTypes []string + restrictedPaths []string } func NewContentServer[indexDocument any]( @@ -23,12 +25,14 @@ func NewContentServer[indexDocument any]( client *contentserverclient.Client, documentProviderFuncs map[pkgx.DocumentType]pkgx.DocumentProviderFunc[indexDocument], supportedMimeTypes []string, + restrictedPaths []string, ) *ContentServer[indexDocument] { return &ContentServer[indexDocument]{ l: l, contentserverClient: client, documentProviderFuncs: documentProviderFuncs, supportedMimeTypes: supportedMimeTypes, + restrictedPaths: restrictedPaths, } } @@ -69,6 +73,15 @@ func (c ContentServer[indexDocument]) Provide( return documents, nil } +func (c ContentServer[indexDocument]) isRestrictedPath(path string) bool { + for _, restricted := range c.restrictedPaths { + if strings.HasPrefix(path, restricted) { + return true + } + } + return false +} + func (c ContentServer[indexDocument]) ProvidePaged( ctx context.Context, indexID pkgx.IndexID, @@ -137,13 +150,15 @@ func (c ContentServer[indexDocument]) fetchURLsByDocumentIDs( return nil, err } - return convertMapStringToDocumentID(uriMap), nil -} - -func convertMapStringToDocumentID(input map[string]string) map[pkgx.DocumentID]string { - output := make(map[pkgx.DocumentID]string, len(input)) - for key, value := range input { - output[pkgx.DocumentID(key)] = value + // Filter out restricted paths + filteredURIs := make(map[pkgx.DocumentID]string) + for docID, url := range uriMap { + if c.isRestrictedPath(url) { + c.l.Warn("skipping restricted path", zap.String("path", url)) + continue + } + filteredURIs[pkgx.DocumentID(docID)] = url } - return output + + return filteredURIs, nil } From 2d2f1163d007af8ca0834cb1abf39f3d592b7f42 Mon Sep 17 00:00:00 2001 From: Miroslav Cvetic Date: Thu, 20 Mar 2025 20:13:48 +0100 Subject: [PATCH 03/11] fix: filter by resticted paths --- pkg/indexing/contentserver.go | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/pkg/indexing/contentserver.go b/pkg/indexing/contentserver.go index 407a548..76a456b 100644 --- a/pkg/indexing/contentserver.go +++ b/pkg/indexing/contentserver.go @@ -108,6 +108,12 @@ func (c ContentServer[indexDocument]) getDocumentIDsByIndexID( nodeMap := createFlatRepoNodeMap(rootRepoNode, map[string]*content.RepoNode{}) documentInfos := make([]pkgx.DocumentInfo, 0, len(nodeMap)) for _, repoNode := range nodeMap { + // filter out restricted paths + if c.isRestrictedPath(repoNode.URI) { + c.l.Warn("Skipping document due to restricted path", zap.String("path", repoNode.URI)) + continue + } + if slices.Contains(c.supportedMimeTypes, repoNode.MimeType) { documentInfos = append(documentInfos, pkgx.DocumentInfo{ DocumentType: pkgx.DocumentType(repoNode.MimeType), @@ -150,15 +156,13 @@ func (c ContentServer[indexDocument]) fetchURLsByDocumentIDs( return nil, err } - // Filter out restricted paths - filteredURIs := make(map[pkgx.DocumentID]string) - for docID, url := range uriMap { - if c.isRestrictedPath(url) { - c.l.Warn("skipping restricted path", zap.String("path", url)) - continue - } - filteredURIs[pkgx.DocumentID(docID)] = url - } - - return filteredURIs, nil + return convertMapStringToDocumentID(uriMap), nil +} + +func convertMapStringToDocumentID(input map[string]string) map[pkgx.DocumentID]string { + output := make(map[pkgx.DocumentID]string, len(input)) + for key, value := range input { + output[pkgx.DocumentID(key)] = value + } + return output } From 7b963e742bc3d27383906791904bcfd5ccd468ae Mon Sep 17 00:00:00 2001 From: Miroslav Cvetic Date: Thu, 20 Mar 2025 20:48:45 +0100 Subject: [PATCH 04/11] fix: change logic for resticted paths --- pkg/indexing/contentserver.go | 46 ++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/pkg/indexing/contentserver.go b/pkg/indexing/contentserver.go index 76a456b..1e5a429 100644 --- a/pkg/indexing/contentserver.go +++ b/pkg/indexing/contentserver.go @@ -3,7 +3,6 @@ package typesenseindexing import ( "context" "fmt" - "slices" "strings" contentserverclient "github.com/foomo/contentserver/client" @@ -16,23 +15,20 @@ type ContentServer[indexDocument any] struct { l *zap.Logger contentserverClient *contentserverclient.Client documentProviderFuncs map[pkgx.DocumentType]pkgx.DocumentProviderFunc[indexDocument] - supportedMimeTypes []string - restrictedPaths []string + supportedMimeTypes map[string][]string // key: mime type, value: list of restricted paths } func NewContentServer[indexDocument any]( l *zap.Logger, client *contentserverclient.Client, documentProviderFuncs map[pkgx.DocumentType]pkgx.DocumentProviderFunc[indexDocument], - supportedMimeTypes []string, - restrictedPaths []string, + supportedMimeTypes map[string][]string, ) *ContentServer[indexDocument] { return &ContentServer[indexDocument]{ l: l, contentserverClient: client, documentProviderFuncs: documentProviderFuncs, supportedMimeTypes: supportedMimeTypes, - restrictedPaths: restrictedPaths, } } @@ -73,12 +69,27 @@ func (c ContentServer[indexDocument]) Provide( return documents, nil } -func (c ContentServer[indexDocument]) isRestrictedPath(path string) bool { - for _, restricted := range c.restrictedPaths { +func (c ContentServer[indexDocument]) shouldSkipPath(mimeType, path string) bool { + restrictedPaths, exists := c.supportedMimeTypes[mimeType] + + // MimeType is not supported + if !exists { + return true + } + + // MimeType is supported but no restrictions are defined + if len(restrictedPaths) == 0 { + return false + } + + // Check if path is restricted + for _, restricted := range restrictedPaths { if strings.HasPrefix(path, restricted) { return true } } + + // Path is not restricted return false } @@ -108,18 +119,19 @@ func (c ContentServer[indexDocument]) getDocumentIDsByIndexID( nodeMap := createFlatRepoNodeMap(rootRepoNode, map[string]*content.RepoNode{}) documentInfos := make([]pkgx.DocumentInfo, 0, len(nodeMap)) for _, repoNode := range nodeMap { - // filter out restricted paths - if c.isRestrictedPath(repoNode.URI) { - c.l.Warn("Skipping document due to restricted path", zap.String("path", repoNode.URI)) + // If the MIME type is unsupported OR path is restricted, skip it + if c.shouldSkipPath(repoNode.MimeType, repoNode.URI) { + c.l.Warn("Skipping document due to filter rule", + zap.String("path", repoNode.URI), + zap.String("mimeType", repoNode.MimeType), + ) continue } - if slices.Contains(c.supportedMimeTypes, repoNode.MimeType) { - documentInfos = append(documentInfos, pkgx.DocumentInfo{ - DocumentType: pkgx.DocumentType(repoNode.MimeType), - DocumentID: pkgx.DocumentID(repoNode.ID), - }) - } + documentInfos = append(documentInfos, pkgx.DocumentInfo{ + DocumentType: pkgx.DocumentType(repoNode.MimeType), + DocumentID: pkgx.DocumentID(repoNode.ID), + }) } return documentInfos, nil From 7f014afa0617d7fd4e37861034b5c03728de595f Mon Sep 17 00:00:00 2001 From: Miroslav Cvetic Date: Wed, 16 Apr 2025 10:41:07 +0200 Subject: [PATCH 05/11] feat: add queryBy param to SimpleSearch --- pkg/api/api.go | 6 ++---- pkg/api/utils.go | 7 +++++++ pkg/interface.go | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/pkg/api/api.go b/pkg/api/api.go index fb0697b..ec44c45 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -277,11 +277,9 @@ func (b *BaseAPI[indexDocument, returnType]) SimpleSearch( filterBy map[string][]string, page, perPage int, sortBy string, + queryBy string, ) ([]returnType, pkgx.Scores, int, error) { - // Call buildSearchParams but also set QueryBy explicitly - parameters := buildSearchParams(q, filterBy, page, perPage, sortBy) - parameters.QueryBy = pointer.String("title") - + parameters := buildSearchParams(q, filterBy, page, perPage, sortBy, queryBy) return b.ExpertSearch(ctx, index, parameters) } diff --git a/pkg/api/utils.go b/pkg/api/utils.go index 898303b..2b27876 100644 --- a/pkg/api/utils.go +++ b/pkg/api/utils.go @@ -21,6 +21,7 @@ func buildSearchParams( filterBy map[string][]string, page, perPage int, sortBy string, + queryBy string, ) *api.SearchCollectionParams { parameters := &api.SearchCollectionParams{} parameters.Q = pointer.String(q) @@ -33,6 +34,12 @@ func buildSearchParams( parameters.SortBy = pointer.String(sortBy) } + if queryBy != "" { + parameters.QueryBy = pointer.String(queryBy) + } else { + parameters.QueryBy = pointer.String("title") + } + return parameters } diff --git a/pkg/interface.go b/pkg/interface.go index b838883..38b7a72 100644 --- a/pkg/interface.go +++ b/pkg/interface.go @@ -24,6 +24,7 @@ type API[indexDocument any, returnType any] interface { filterBy map[string][]string, page, perPage int, sortBy string, + queryBy string, ) ([]returnType, Scores, int, error) ExpertSearch(ctx context.Context, index IndexID, parameters *api.SearchCollectionParams) ([]returnType, Scores, int, error) Healthz(ctx context.Context) error From cc2074d954ebee9e29be21a4dae8dd04e6630304 Mon Sep 17 00:00:00 2001 From: Miroslav Cvetic Date: Thu, 17 Apr 2025 08:33:35 +0200 Subject: [PATCH 06/11] Revert "feat: add queryBy param to SimpleSearch" This reverts commit 7f014afa0617d7fd4e37861034b5c03728de595f. --- pkg/api/api.go | 6 ++++-- pkg/api/utils.go | 7 ------- pkg/interface.go | 1 - 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/pkg/api/api.go b/pkg/api/api.go index ec44c45..fb0697b 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -277,9 +277,11 @@ func (b *BaseAPI[indexDocument, returnType]) SimpleSearch( filterBy map[string][]string, page, perPage int, sortBy string, - queryBy string, ) ([]returnType, pkgx.Scores, int, error) { - parameters := buildSearchParams(q, filterBy, page, perPage, sortBy, queryBy) + // Call buildSearchParams but also set QueryBy explicitly + parameters := buildSearchParams(q, filterBy, page, perPage, sortBy) + parameters.QueryBy = pointer.String("title") + return b.ExpertSearch(ctx, index, parameters) } diff --git a/pkg/api/utils.go b/pkg/api/utils.go index 2b27876..898303b 100644 --- a/pkg/api/utils.go +++ b/pkg/api/utils.go @@ -21,7 +21,6 @@ func buildSearchParams( filterBy map[string][]string, page, perPage int, sortBy string, - queryBy string, ) *api.SearchCollectionParams { parameters := &api.SearchCollectionParams{} parameters.Q = pointer.String(q) @@ -34,12 +33,6 @@ func buildSearchParams( parameters.SortBy = pointer.String(sortBy) } - if queryBy != "" { - parameters.QueryBy = pointer.String(queryBy) - } else { - parameters.QueryBy = pointer.String("title") - } - return parameters } diff --git a/pkg/interface.go b/pkg/interface.go index 38b7a72..b838883 100644 --- a/pkg/interface.go +++ b/pkg/interface.go @@ -24,7 +24,6 @@ type API[indexDocument any, returnType any] interface { filterBy map[string][]string, page, perPage int, sortBy string, - queryBy string, ) ([]returnType, Scores, int, error) ExpertSearch(ctx context.Context, index IndexID, parameters *api.SearchCollectionParams) ([]returnType, Scores, int, error) Healthz(ctx context.Context) error From dba6561ba1f7f8bbd6e38098fddc1b9fd0f64016 Mon Sep 17 00:00:00 2001 From: Miroslav Cvetic Date: Wed, 14 May 2025 10:51:21 +0200 Subject: [PATCH 07/11] Reapply "feat: add queryBy param to SimpleSearch" This reverts commit cc2074d954ebee9e29be21a4dae8dd04e6630304. --- pkg/api/api.go | 6 ++---- pkg/api/utils.go | 4 ++++ pkg/interface.go | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pkg/api/api.go b/pkg/api/api.go index fb0697b..ec44c45 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -277,11 +277,9 @@ func (b *BaseAPI[indexDocument, returnType]) SimpleSearch( filterBy map[string][]string, page, perPage int, sortBy string, + queryBy string, ) ([]returnType, pkgx.Scores, int, error) { - // Call buildSearchParams but also set QueryBy explicitly - parameters := buildSearchParams(q, filterBy, page, perPage, sortBy) - parameters.QueryBy = pointer.String("title") - + parameters := buildSearchParams(q, filterBy, page, perPage, sortBy, queryBy) return b.ExpertSearch(ctx, index, parameters) } diff --git a/pkg/api/utils.go b/pkg/api/utils.go index 898303b..28c7d33 100644 --- a/pkg/api/utils.go +++ b/pkg/api/utils.go @@ -21,6 +21,7 @@ func buildSearchParams( filterBy map[string][]string, page, perPage int, sortBy string, + queryBy string, ) *api.SearchCollectionParams { parameters := &api.SearchCollectionParams{} parameters.Q = pointer.String(q) @@ -32,6 +33,9 @@ func buildSearchParams( if sortBy != "" { parameters.SortBy = pointer.String(sortBy) } + if queryBy != "" { + parameters.QueryBy = pointer.String(queryBy) + } return parameters } diff --git a/pkg/interface.go b/pkg/interface.go index b838883..38b7a72 100644 --- a/pkg/interface.go +++ b/pkg/interface.go @@ -24,6 +24,7 @@ type API[indexDocument any, returnType any] interface { filterBy map[string][]string, page, perPage int, sortBy string, + queryBy string, ) ([]returnType, Scores, int, error) ExpertSearch(ctx context.Context, index IndexID, parameters *api.SearchCollectionParams) ([]returnType, Scores, int, error) Healthz(ctx context.Context) error From 12fc7c8448bbf3e2559fe96b2bc8f77b673bddf4 Mon Sep 17 00:00:00 2001 From: Miroslav Cvetic Date: Mon, 19 May 2025 11:43:15 +0200 Subject: [PATCH 08/11] feat: allow multiple presets --- pkg/api/api.go | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pkg/api/api.go b/pkg/api/api.go index ec44c45..7266028 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -14,15 +14,13 @@ import ( "go.uber.org/zap" ) -const defaultSearchPresetName = "default" - type DocumentConverter[indexDocument any, returnType any] func(indexDocument) returnType type BaseAPI[indexDocument any, returnType any] struct { l *zap.Logger client *typesense.Client collections map[pkgx.IndexID]*api.CollectionSchema - preset *api.PresetUpsertSchema + presets map[string]*api.PresetUpsertSchema revisionID pkgx.RevisionID documentConverter DocumentConverter[indexDocument, returnType] } @@ -31,14 +29,14 @@ func NewBaseAPI[indexDocument any, returnType any]( l *zap.Logger, client *typesense.Client, collections map[pkgx.IndexID]*api.CollectionSchema, - preset *api.PresetUpsertSchema, + presets map[string]*api.PresetUpsertSchema, documentConverter DocumentConverter[indexDocument, returnType], ) *BaseAPI[indexDocument, returnType] { return &BaseAPI[indexDocument, returnType]{ l: l, client: client, collections: collections, - preset: preset, + presets: presets, documentConverter: documentConverter, } } @@ -153,11 +151,11 @@ func (b *BaseAPI[indexDocument, returnType]) Initialize(ctx context.Context) (pk // Step 5: Set the latest revision ID and return b.revisionID = newRevisionID - // Step 6: Ensure search preset is present - if b.preset != nil { - _, err := b.client.Presets().Upsert(ctx, defaultSearchPresetName, b.preset) + // Step 6: ensure search presets are present + for name, preset := range b.presets { + _, err := b.client.Presets().Upsert(ctx, name, preset) if err != nil { - b.l.Error("failed to upsert search preset", zap.Error(err)) + b.l.Error("failed to upsert preset", zap.String("name", name), zap.Error(err)) return "", err } } From 5a3e72f0a0c27959bc39c06e17d189a8ed3fab77 Mon Sep 17 00:00:00 2001 From: Miroslav Cvetic Date: Mon, 19 May 2025 16:09:24 +0200 Subject: [PATCH 09/11] feat: change simple search endpoint params --- pkg/api/api.go | 13 ++++------- pkg/api/utils.go | 59 ++++++++++++++++-------------------------------- pkg/interface.go | 10 +------- pkg/vo.go | 13 ++++++++++- 4 files changed, 37 insertions(+), 58 deletions(-) diff --git a/pkg/api/api.go b/pkg/api/api.go index 7266028..2abe97f 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -266,19 +266,14 @@ func (b *BaseAPI[indexDocument, returnType]) RevertRevision(ctx context.Context, return nil } -// SimpleSearch will perform a search operation on the given index -// it will return the documents and the scores +// SimpleSearch will perform a search operation on the given index using basic SearchParameters input func (b *BaseAPI[indexDocument, returnType]) SimpleSearch( ctx context.Context, index pkgx.IndexID, - q string, - filterBy map[string][]string, - page, perPage int, - sortBy string, - queryBy string, + parameters *pkgx.SearchParameters, ) ([]returnType, pkgx.Scores, int, error) { - parameters := buildSearchParams(q, filterBy, page, perPage, sortBy, queryBy) - return b.ExpertSearch(ctx, index, parameters) + searchParams := buildSearchParams(parameters) + return b.ExpertSearch(ctx, index, searchParams) } // ExpertSearch performs a search operation on the given index diff --git a/pkg/api/utils.go b/pkg/api/utils.go index 28c7d33..2bb73a5 100644 --- a/pkg/api/utils.go +++ b/pkg/api/utils.go @@ -13,54 +13,35 @@ import ( "go.uber.org/zap" ) +const defaultSearchPresetName = "default" + // buildSearchParams will return the search collection parameters -// this is meant as a utility function to create the search collection parameters -// for the typesense search API without any knowledge of the typesense API func buildSearchParams( - q string, - filterBy map[string][]string, - page, perPage int, - sortBy string, - queryBy string, + params *pkgx.SearchParameters, ) *api.SearchCollectionParams { - parameters := &api.SearchCollectionParams{} - parameters.Q = pointer.String(q) - if filterByString := formatFilterQuery(filterBy); filterByString != "" { - parameters.FilterBy = pointer.String(filterByString) - } - parameters.Page = pointer.Int(page) - parameters.PerPage = pointer.Int(perPage) - if sortBy != "" { - parameters.SortBy = pointer.String(sortBy) - } - if queryBy != "" { - parameters.QueryBy = pointer.String(queryBy) + if params.Page < 1 { + params.Page = 1 } - return parameters -} - -func formatFilterQuery(filterBy map[string][]string) string { - if filterBy == nil { - return "" + searchParams := &api.SearchCollectionParams{ + Page: pointer.Int(params.Page), } - var filterClauses []string - for key, values := range filterBy { - if len(values) == 1 { - // Single value → Use `:=` operator - filterClauses = append(filterClauses, fmt.Sprintf("%s:=\"%s\"", key, values[0])) - } else { - // Multiple values → Use `["val1","val2"]` array syntax - formattedValues := []string{} - for _, v := range values { - formattedValues = append(formattedValues, fmt.Sprintf("\"%s\"", v)) - } - filterClauses = append(filterClauses, fmt.Sprintf("%s:[%s]", key, strings.Join(formattedValues, ","))) - } + if params.PresetName != "" { + searchParams.Preset = pointer.String(params.PresetName) + } else { + searchParams.Preset = pointer.String(defaultSearchPresetName) } - return strings.Join(filterClauses, " && ") + if params.Query != "" { + searchParams.Q = pointer.String(params.Query) + } + + if params.Modify != nil { + params.Modify(searchParams) + } + + return searchParams } func (b *BaseAPI[indexDocument, returnType]) generateRevisionID() pkgx.RevisionID { diff --git a/pkg/interface.go b/pkg/interface.go index 38b7a72..572c3fb 100644 --- a/pkg/interface.go +++ b/pkg/interface.go @@ -17,15 +17,7 @@ type API[indexDocument any, returnType any] interface { Initialize(ctx context.Context) (RevisionID, error) // perform a search operation on the given index - SimpleSearch( - ctx context.Context, - index IndexID, - q string, - filterBy map[string][]string, - page, perPage int, - sortBy string, - queryBy string, - ) ([]returnType, Scores, int, error) + SimpleSearch(ctx context.Context, index IndexID, parameters *SearchParameters) ([]returnType, Scores, int, error) ExpertSearch(ctx context.Context, index IndexID, parameters *api.SearchCollectionParams) ([]returnType, Scores, int, error) Healthz(ctx context.Context) error Indices() ([]IndexID, error) diff --git a/pkg/vo.go b/pkg/vo.go index b7acbb2..a90027e 100644 --- a/pkg/vo.go +++ b/pkg/vo.go @@ -1,6 +1,10 @@ package typesense -import "context" +import ( + "context" + + "github.com/typesense/typesense-go/v3/typesense/api" +) type RevisionID string type Query string @@ -26,3 +30,10 @@ type DocumentInfo struct { DocumentType DocumentType DocumentID DocumentID } + +type SearchParameters struct { + Query string + Page int + PresetName string + Modify func(params *api.SearchCollectionParams) +} From 1f5d1f9849b346fe65e626889424bc91c02074cf Mon Sep 17 00:00:00 2001 From: Miroslav Cvetic Date: Tue, 20 May 2025 14:49:22 +0200 Subject: [PATCH 10/11] feat: remove restircted paths and use node.Hidden field + supported mime type list to filter out documents --- pkg/indexing/contentserver.go | 37 +++++++---------------------------- 1 file changed, 7 insertions(+), 30 deletions(-) diff --git a/pkg/indexing/contentserver.go b/pkg/indexing/contentserver.go index 1e5a429..3cb9280 100644 --- a/pkg/indexing/contentserver.go +++ b/pkg/indexing/contentserver.go @@ -3,7 +3,8 @@ package typesenseindexing import ( "context" "fmt" - "strings" + + "slices" contentserverclient "github.com/foomo/contentserver/client" "github.com/foomo/contentserver/content" @@ -15,14 +16,14 @@ type ContentServer[indexDocument any] struct { l *zap.Logger contentserverClient *contentserverclient.Client documentProviderFuncs map[pkgx.DocumentType]pkgx.DocumentProviderFunc[indexDocument] - supportedMimeTypes map[string][]string // key: mime type, value: list of restricted paths + supportedMimeTypes []string } func NewContentServer[indexDocument any]( l *zap.Logger, client *contentserverclient.Client, documentProviderFuncs map[pkgx.DocumentType]pkgx.DocumentProviderFunc[indexDocument], - supportedMimeTypes map[string][]string, + supportedMimeTypes []string, ) *ContentServer[indexDocument] { return &ContentServer[indexDocument]{ l: l, @@ -69,30 +70,6 @@ func (c ContentServer[indexDocument]) Provide( return documents, nil } -func (c ContentServer[indexDocument]) shouldSkipPath(mimeType, path string) bool { - restrictedPaths, exists := c.supportedMimeTypes[mimeType] - - // MimeType is not supported - if !exists { - return true - } - - // MimeType is supported but no restrictions are defined - if len(restrictedPaths) == 0 { - return false - } - - // Check if path is restricted - for _, restricted := range restrictedPaths { - if strings.HasPrefix(path, restricted) { - return true - } - } - - // Path is not restricted - return false -} - func (c ContentServer[indexDocument]) ProvidePaged( ctx context.Context, indexID pkgx.IndexID, @@ -119,11 +96,11 @@ func (c ContentServer[indexDocument]) getDocumentIDsByIndexID( nodeMap := createFlatRepoNodeMap(rootRepoNode, map[string]*content.RepoNode{}) documentInfos := make([]pkgx.DocumentInfo, 0, len(nodeMap)) for _, repoNode := range nodeMap { - // If the MIME type is unsupported OR path is restricted, skip it - if c.shouldSkipPath(repoNode.MimeType, repoNode.URI) { - c.l.Warn("Skipping document due to filter rule", + if repoNode.Hidden || !slices.Contains(c.supportedMimeTypes, repoNode.MimeType) { + c.l.Warn("Skipping document indexing", zap.String("path", repoNode.URI), zap.String("mimeType", repoNode.MimeType), + zap.Bool("hidden", repoNode.Hidden), ) continue } From 32f94353e6172b3aa947388c0f71826803b7f6e3 Mon Sep 17 00:00:00 2001 From: Daniel Thomas Date: Wed, 21 May 2025 14:54:24 +0200 Subject: [PATCH 11/11] feat: use noIndex flag to skip documents for indexing --- .husky.yaml | 2 +- pkg/indexing/contentserver.go | 66 ++++++++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/.husky.yaml b/.husky.yaml index b82e55d..0284970 100644 --- a/.husky.yaml +++ b/.husky.yaml @@ -1,4 +1,4 @@ - hooks: +hooks: pre-commit: - golangci-lint run --fast-only - husky lint-staged diff --git a/pkg/indexing/contentserver.go b/pkg/indexing/contentserver.go index 3cb9280..4c4a75b 100644 --- a/pkg/indexing/contentserver.go +++ b/pkg/indexing/contentserver.go @@ -12,6 +12,8 @@ import ( "go.uber.org/zap" ) +const ContentserverDataAttributeNoIndex = "typesenseIndexing-noIndex" + type ContentServer[indexDocument any] struct { l *zap.Logger contentserverClient *contentserverclient.Client @@ -33,6 +35,12 @@ func NewContentServer[indexDocument any]( } } +// Provide retrieves documents for the given indexID from the content server. +// It fetches the document IDs, retrieves the URLs for those IDs, and then uses the +// document provider functions to create the documents. +// The documents are returned as a slice of pointers to the indexDocument type. +// If a document provider function is not available for a specific document type, +// a warning is logged and that document is skipped. func (c ContentServer[indexDocument]) Provide( ctx context.Context, indexID pkgx.IndexID, @@ -50,7 +58,10 @@ func (c ContentServer[indexDocument]) Provide( documents := make([]*indexDocument, len(documentInfos)) for index, documentInfo := range documentInfos { if documentProvider, ok := c.documentProviderFuncs[documentInfo.DocumentType]; !ok { - c.l.Warn("no document provider available for document type", zap.String("documentType", string(documentInfo.DocumentType))) + c.l.Warn( + "no document provider available for document type", + zap.String("documentType", string(documentInfo.DocumentType)), + ) } else { document, err := documentProvider(ctx, indexID, documentInfo.DocumentID, urlsByIDs) if err != nil { @@ -70,6 +81,7 @@ func (c ContentServer[indexDocument]) Provide( return documents, nil } +// ProvidePaged func (c ContentServer[indexDocument]) ProvidePaged( ctx context.Context, indexID pkgx.IndexID, @@ -96,8 +108,8 @@ func (c ContentServer[indexDocument]) getDocumentIDsByIndexID( nodeMap := createFlatRepoNodeMap(rootRepoNode, map[string]*content.RepoNode{}) documentInfos := make([]pkgx.DocumentInfo, 0, len(nodeMap)) for _, repoNode := range nodeMap { - if repoNode.Hidden || !slices.Contains(c.supportedMimeTypes, repoNode.MimeType) { - c.l.Warn("Skipping document indexing", + if !includeNode(c.supportedMimeTypes, repoNode) { + c.l.Debug("skipping document indexing", zap.String("path", repoNode.URI), zap.String("mimeType", repoNode.MimeType), zap.Bool("hidden", repoNode.Hidden), @@ -114,20 +126,8 @@ func (c ContentServer[indexDocument]) getDocumentIDsByIndexID( return documentInfos, nil } -// createFlatRepoNodeMap recursively retrieves all nodes from the tree and returns them in a flat map. -func createFlatRepoNodeMap(node *content.RepoNode, nodeMap map[string]*content.RepoNode) map[string]*content.RepoNode { - if node == nil { - return nodeMap - } - // Add the current node to the list. - nodeMap[node.ID] = node - // Recursively process child nodes. - for _, child := range node.Nodes { - nodeMap = createFlatRepoNodeMap(child, nodeMap) - } - return nodeMap -} - +// fetchURLsByDocumentIDs fetches the URLs for the given document IDs from the content server. +// It uses the contentserverClient to retrieve the URIs and maps them to DocumentID. func (c ContentServer[indexDocument]) fetchURLsByDocumentIDs( ctx context.Context, indexID pkgx.IndexID, @@ -148,6 +148,8 @@ func (c ContentServer[indexDocument]) fetchURLsByDocumentIDs( return convertMapStringToDocumentID(uriMap), nil } +// convertMapStringToDocumentID converts a map with string keys to a map with DocumentID keys. +// The keys in the input map are converted to DocumentID type, while the values remain strings. func convertMapStringToDocumentID(input map[string]string) map[pkgx.DocumentID]string { output := make(map[pkgx.DocumentID]string, len(input)) for key, value := range input { @@ -155,3 +157,33 @@ func convertMapStringToDocumentID(input map[string]string) map[pkgx.DocumentID]s } return output } + +// includeNode checks if the node should be included in the indexing process. +// It checks if the node is nil, if it has the noIndex attribute set to true, +// and if its mime type is in the list of supported mime types. +func includeNode(supportedMimeTypes []string, node *content.RepoNode) bool { + if node == nil { + return false + } + if noIndex, noIndexSet := node.Data[ContentserverDataAttributeNoIndex].(bool); noIndexSet && noIndex { + return false + } + if !slices.Contains(supportedMimeTypes, node.MimeType) { + return false + } + return true +} + +// createFlatRepoNodeMap recursively retrieves all nodes from the tree and returns them in a flat map. +func createFlatRepoNodeMap(node *content.RepoNode, nodeMap map[string]*content.RepoNode) map[string]*content.RepoNode { + if node == nil { + return nodeMap + } + // Add the current node to the list. + nodeMap[node.ID] = node + // Recursively process child nodes. + for _, child := range node.Nodes { + nodeMap = createFlatRepoNodeMap(child, nodeMap) + } + return nodeMap +}