mirror of
https://github.com/foomo/typesense.git
synced 2025-10-16 12:45:37 +00:00
feat: use noIndex flag to skip documents for indexing
This commit is contained in:
parent
24460f6be7
commit
32f94353e6
@ -1,4 +1,4 @@
|
||||
hooks:
|
||||
hooks:
|
||||
pre-commit:
|
||||
- golangci-lint run --fast-only
|
||||
- husky lint-staged
|
||||
|
||||
@ -12,6 +12,8 @@ import (
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
const ContentserverDataAttributeNoIndex = "typesenseIndexing-noIndex"
|
||||
|
||||
type ContentServer[indexDocument any] struct {
|
||||
l *zap.Logger
|
||||
contentserverClient *contentserverclient.Client
|
||||
@ -33,6 +35,12 @@ func NewContentServer[indexDocument any](
|
||||
}
|
||||
}
|
||||
|
||||
// Provide retrieves documents for the given indexID from the content server.
|
||||
// It fetches the document IDs, retrieves the URLs for those IDs, and then uses the
|
||||
// document provider functions to create the documents.
|
||||
// The documents are returned as a slice of pointers to the indexDocument type.
|
||||
// If a document provider function is not available for a specific document type,
|
||||
// a warning is logged and that document is skipped.
|
||||
func (c ContentServer[indexDocument]) Provide(
|
||||
ctx context.Context,
|
||||
indexID pkgx.IndexID,
|
||||
@ -50,7 +58,10 @@ func (c ContentServer[indexDocument]) Provide(
|
||||
documents := make([]*indexDocument, len(documentInfos))
|
||||
for index, documentInfo := range documentInfos {
|
||||
if documentProvider, ok := c.documentProviderFuncs[documentInfo.DocumentType]; !ok {
|
||||
c.l.Warn("no document provider available for document type", zap.String("documentType", string(documentInfo.DocumentType)))
|
||||
c.l.Warn(
|
||||
"no document provider available for document type",
|
||||
zap.String("documentType", string(documentInfo.DocumentType)),
|
||||
)
|
||||
} else {
|
||||
document, err := documentProvider(ctx, indexID, documentInfo.DocumentID, urlsByIDs)
|
||||
if err != nil {
|
||||
@ -70,6 +81,7 @@ func (c ContentServer[indexDocument]) Provide(
|
||||
return documents, nil
|
||||
}
|
||||
|
||||
// ProvidePaged
|
||||
func (c ContentServer[indexDocument]) ProvidePaged(
|
||||
ctx context.Context,
|
||||
indexID pkgx.IndexID,
|
||||
@ -96,8 +108,8 @@ func (c ContentServer[indexDocument]) getDocumentIDsByIndexID(
|
||||
nodeMap := createFlatRepoNodeMap(rootRepoNode, map[string]*content.RepoNode{})
|
||||
documentInfos := make([]pkgx.DocumentInfo, 0, len(nodeMap))
|
||||
for _, repoNode := range nodeMap {
|
||||
if repoNode.Hidden || !slices.Contains(c.supportedMimeTypes, repoNode.MimeType) {
|
||||
c.l.Warn("Skipping document indexing",
|
||||
if !includeNode(c.supportedMimeTypes, repoNode) {
|
||||
c.l.Debug("skipping document indexing",
|
||||
zap.String("path", repoNode.URI),
|
||||
zap.String("mimeType", repoNode.MimeType),
|
||||
zap.Bool("hidden", repoNode.Hidden),
|
||||
@ -114,20 +126,8 @@ func (c ContentServer[indexDocument]) getDocumentIDsByIndexID(
|
||||
return documentInfos, nil
|
||||
}
|
||||
|
||||
// createFlatRepoNodeMap recursively retrieves all nodes from the tree and returns them in a flat map.
|
||||
func createFlatRepoNodeMap(node *content.RepoNode, nodeMap map[string]*content.RepoNode) map[string]*content.RepoNode {
|
||||
if node == nil {
|
||||
return nodeMap
|
||||
}
|
||||
// Add the current node to the list.
|
||||
nodeMap[node.ID] = node
|
||||
// Recursively process child nodes.
|
||||
for _, child := range node.Nodes {
|
||||
nodeMap = createFlatRepoNodeMap(child, nodeMap)
|
||||
}
|
||||
return nodeMap
|
||||
}
|
||||
|
||||
// fetchURLsByDocumentIDs fetches the URLs for the given document IDs from the content server.
|
||||
// It uses the contentserverClient to retrieve the URIs and maps them to DocumentID.
|
||||
func (c ContentServer[indexDocument]) fetchURLsByDocumentIDs(
|
||||
ctx context.Context,
|
||||
indexID pkgx.IndexID,
|
||||
@ -148,6 +148,8 @@ func (c ContentServer[indexDocument]) fetchURLsByDocumentIDs(
|
||||
return convertMapStringToDocumentID(uriMap), nil
|
||||
}
|
||||
|
||||
// convertMapStringToDocumentID converts a map with string keys to a map with DocumentID keys.
|
||||
// The keys in the input map are converted to DocumentID type, while the values remain strings.
|
||||
func convertMapStringToDocumentID(input map[string]string) map[pkgx.DocumentID]string {
|
||||
output := make(map[pkgx.DocumentID]string, len(input))
|
||||
for key, value := range input {
|
||||
@ -155,3 +157,33 @@ func convertMapStringToDocumentID(input map[string]string) map[pkgx.DocumentID]s
|
||||
}
|
||||
return output
|
||||
}
|
||||
|
||||
// includeNode checks if the node should be included in the indexing process.
|
||||
// It checks if the node is nil, if it has the noIndex attribute set to true,
|
||||
// and if its mime type is in the list of supported mime types.
|
||||
func includeNode(supportedMimeTypes []string, node *content.RepoNode) bool {
|
||||
if node == nil {
|
||||
return false
|
||||
}
|
||||
if noIndex, noIndexSet := node.Data[ContentserverDataAttributeNoIndex].(bool); noIndexSet && noIndex {
|
||||
return false
|
||||
}
|
||||
if !slices.Contains(supportedMimeTypes, node.MimeType) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// createFlatRepoNodeMap recursively retrieves all nodes from the tree and returns them in a flat map.
|
||||
func createFlatRepoNodeMap(node *content.RepoNode, nodeMap map[string]*content.RepoNode) map[string]*content.RepoNode {
|
||||
if node == nil {
|
||||
return nodeMap
|
||||
}
|
||||
// Add the current node to the list.
|
||||
nodeMap[node.ID] = node
|
||||
// Recursively process child nodes.
|
||||
for _, child := range node.Nodes {
|
||||
nodeMap = createFlatRepoNodeMap(child, nodeMap)
|
||||
}
|
||||
return nodeMap
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user