mirror of
https://github.com/foomo/typesense.git
synced 2025-10-16 12:45:37 +00:00
190 lines
5.8 KiB
Go
190 lines
5.8 KiB
Go
package typesenseindexing
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"slices"
|
|
|
|
contentserverclient "github.com/foomo/contentserver/client"
|
|
"github.com/foomo/contentserver/content"
|
|
pkgx "github.com/foomo/typesense/pkg"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
const ContentserverDataAttributeNoIndex = "typesenseIndexing-noIndex"
|
|
|
|
type ContentServer[indexDocument any] struct {
|
|
l *zap.Logger
|
|
contentserverClient *contentserverclient.Client
|
|
documentProviderFuncs map[pkgx.DocumentType]pkgx.DocumentProviderFunc[indexDocument]
|
|
supportedMimeTypes []string
|
|
}
|
|
|
|
func NewContentServer[indexDocument any](
|
|
l *zap.Logger,
|
|
client *contentserverclient.Client,
|
|
documentProviderFuncs map[pkgx.DocumentType]pkgx.DocumentProviderFunc[indexDocument],
|
|
supportedMimeTypes []string,
|
|
) *ContentServer[indexDocument] {
|
|
return &ContentServer[indexDocument]{
|
|
l: l,
|
|
contentserverClient: client,
|
|
documentProviderFuncs: documentProviderFuncs,
|
|
supportedMimeTypes: supportedMimeTypes,
|
|
}
|
|
}
|
|
|
|
// Provide retrieves documents for the given indexID from the content server.
|
|
// It fetches the document IDs, retrieves the URLs for those IDs, and then uses the
|
|
// document provider functions to create the documents.
|
|
// The documents are returned as a slice of pointers to the indexDocument type.
|
|
// If a document provider function is not available for a specific document type,
|
|
// a warning is logged and that document is skipped.
|
|
func (c ContentServer[indexDocument]) Provide(
|
|
ctx context.Context,
|
|
indexID pkgx.IndexID,
|
|
) ([]*indexDocument, error) {
|
|
documentInfos, err := c.getDocumentIDsByIndexID(ctx, indexID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
urlsByIDs, err := c.fetchURLsByDocumentIDs(ctx, indexID, documentInfos)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
documents := make([]*indexDocument, len(documentInfos))
|
|
for index, documentInfo := range documentInfos {
|
|
if documentProvider, ok := c.documentProviderFuncs[documentInfo.DocumentType]; !ok {
|
|
c.l.Warn(
|
|
"no document provider available for document type",
|
|
zap.String("documentType", string(documentInfo.DocumentType)),
|
|
)
|
|
} else {
|
|
document, err := documentProvider(ctx, indexID, documentInfo.DocumentID, urlsByIDs)
|
|
if err != nil {
|
|
c.l.Error(
|
|
"index document not created",
|
|
zap.Error(err),
|
|
zap.String("documentID", string(documentInfo.DocumentID)),
|
|
zap.String("documentType", string(documentInfo.DocumentType)),
|
|
)
|
|
continue
|
|
}
|
|
if document != nil {
|
|
documents[index] = document
|
|
}
|
|
}
|
|
}
|
|
return documents, nil
|
|
}
|
|
|
|
// ProvidePaged
|
|
func (c ContentServer[indexDocument]) ProvidePaged(
|
|
ctx context.Context,
|
|
indexID pkgx.IndexID,
|
|
offset int,
|
|
) ([]*indexDocument, int, error) {
|
|
panic("implement me")
|
|
}
|
|
|
|
func (c ContentServer[indexDocument]) getDocumentIDsByIndexID(
|
|
ctx context.Context,
|
|
indexID pkgx.IndexID,
|
|
) ([]pkgx.DocumentInfo, error) {
|
|
// get the contentserver dimension defined by indexID
|
|
// create the list of document infos
|
|
repo, err := c.contentserverClient.GetRepo(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
rootRepoNode, ok := repo[string(indexID)]
|
|
if !ok {
|
|
return nil, fmt.Errorf("contenserver dimension %s not found", indexID)
|
|
}
|
|
|
|
nodeMap := createFlatRepoNodeMap(rootRepoNode, map[string]*content.RepoNode{})
|
|
documentInfos := make([]pkgx.DocumentInfo, 0, len(nodeMap))
|
|
for _, repoNode := range nodeMap {
|
|
if !includeNode(c.supportedMimeTypes, repoNode) {
|
|
c.l.Debug("skipping document indexing",
|
|
zap.String("path", repoNode.URI),
|
|
zap.String("mimeType", repoNode.MimeType),
|
|
zap.Bool("hidden", repoNode.Hidden),
|
|
)
|
|
continue
|
|
}
|
|
|
|
documentInfos = append(documentInfos, pkgx.DocumentInfo{
|
|
DocumentType: pkgx.DocumentType(repoNode.MimeType),
|
|
DocumentID: pkgx.DocumentID(repoNode.ID),
|
|
})
|
|
}
|
|
|
|
return documentInfos, nil
|
|
}
|
|
|
|
// fetchURLsByDocumentIDs fetches the URLs for the given document IDs from the content server.
|
|
// It uses the contentserverClient to retrieve the URIs and maps them to DocumentID.
|
|
func (c ContentServer[indexDocument]) fetchURLsByDocumentIDs(
|
|
ctx context.Context,
|
|
indexID pkgx.IndexID,
|
|
documentInfos []pkgx.DocumentInfo,
|
|
) (map[pkgx.DocumentID]string, error) {
|
|
ids := make([]string, len(documentInfos))
|
|
|
|
for i, documentInfo := range documentInfos {
|
|
ids[i] = string(documentInfo.DocumentID)
|
|
}
|
|
|
|
uriMap, err := c.contentserverClient.GetURIs(ctx, string(indexID), ids)
|
|
if err != nil {
|
|
c.l.Error("failed to get URIs", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
|
|
return convertMapStringToDocumentID(uriMap), nil
|
|
}
|
|
|
|
// convertMapStringToDocumentID converts a map with string keys to a map with DocumentID keys.
|
|
// The keys in the input map are converted to DocumentID type, while the values remain strings.
|
|
func convertMapStringToDocumentID(input map[string]string) map[pkgx.DocumentID]string {
|
|
output := make(map[pkgx.DocumentID]string, len(input))
|
|
for key, value := range input {
|
|
output[pkgx.DocumentID(key)] = value
|
|
}
|
|
return output
|
|
}
|
|
|
|
// includeNode checks if the node should be included in the indexing process.
|
|
// It checks if the node is nil, if it has the noIndex attribute set to true,
|
|
// and if its mime type is in the list of supported mime types.
|
|
func includeNode(supportedMimeTypes []string, node *content.RepoNode) bool {
|
|
if node == nil {
|
|
return false
|
|
}
|
|
if noIndex, noIndexSet := node.Data[ContentserverDataAttributeNoIndex].(bool); noIndexSet && noIndex {
|
|
return false
|
|
}
|
|
if !slices.Contains(supportedMimeTypes, node.MimeType) {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
// createFlatRepoNodeMap recursively retrieves all nodes from the tree and returns them in a flat map.
|
|
func createFlatRepoNodeMap(node *content.RepoNode, nodeMap map[string]*content.RepoNode) map[string]*content.RepoNode {
|
|
if node == nil {
|
|
return nodeMap
|
|
}
|
|
// Add the current node to the list.
|
|
nodeMap[node.ID] = node
|
|
// Recursively process child nodes.
|
|
for _, child := range node.Nodes {
|
|
nodeMap = createFlatRepoNodeMap(child, nodeMap)
|
|
}
|
|
return nodeMap
|
|
}
|