feat: unicode text janitor

This commit is contained in:
Cristian Vidmar 2023-03-03 13:44:44 +01:00
parent 56c31f215a
commit 1783c724c8
13 changed files with 692 additions and 152 deletions

View File

@ -3,7 +3,6 @@ package erm
import (
"fmt"
"go/format"
"io/ioutil"
"os"
"path/filepath"
"strings"
@ -14,7 +13,7 @@ import (
)
func formatAndFixImports(filename string) error {
sourceBytes, errReadFile := ioutil.ReadFile(filename)
sourceBytes, errReadFile := os.ReadFile(filename)
if errReadFile != nil {
return errReadFile
}
@ -26,7 +25,7 @@ func formatAndFixImports(filename string) error {
if errProcess != nil {
return errProcess
}
return ioutil.WriteFile(filename, finalSource, 0644)
return os.WriteFile(filename, finalSource, 0o644)
}
func generate(filename string, tpl []byte, conf spaceConf) error {

View File

@ -9,11 +9,12 @@ import (
"fmt"
"html"
"io"
"io/ioutil"
"os"
"regexp"
"strings"
"sync"
"time"
"unicode"
"github.com/foomo/contentful"
"golang.org/x/sync/errgroup"
)
@ -62,12 +63,13 @@ type ContentfulClient struct {
args ...interface{},
)
logLevel int
optimisticPageSize uint16 // Start downloading entries at this page size
optimisticPageSize uint16
SpaceID string
offline bool
offlineTemp offlineTemp
sync bool
syncToken string
textJanitor bool
}
type offlineTemp struct {
@ -265,6 +267,14 @@ func (cc *ContentfulClient) DeleteAssetFromCache(key string) error {
return cc.deleteAssetFromCache(key)
}
func (cc *ContentfulClient) DisableTextJanitor() {
cc.textJanitor = false
}
func (cc *ContentfulClient) EnableTextJanitor() {
cc.textJanitor = true
}
func (cc *ContentfulClient) GetAllAssets() (map[string]*contentful.Asset, error) {
return cc.getAllAssets(true)
}
@ -481,7 +491,7 @@ func NewContentfulClient(spaceID string, clientMode ClientMode, clientKey string
return cc, nil
}
func NewOfflineContentfulClient(filename string, logFn func(fields map[string]interface{}, level int, args ...interface{}), logLevel int, cacheAssets bool) (*ContentfulClient, error) {
func NewOfflineContentfulClient(filename string, logFn func(fields map[string]interface{}, level int, args ...interface{}), logLevel int, cacheAssets bool, textJanitor bool) (*ContentfulClient, error) {
offlineTemp, err := getOfflineSpaceFromFile(filename)
if err != nil {
return nil, fmt.Errorf("NewOfflineContentfulClient could not parse space export file: %v", err)
@ -514,6 +524,7 @@ func NewOfflineContentfulClient(filename string, logFn func(fields map[string]in
SpaceID: "OFFLINE",
offline: true,
offlineTemp: *offlineTemp,
textJanitor: textJanitor,
}
if cc.logFn != nil && cc.logLevel <= LogInfo {
cc.logFn(map[string]interface{}{"entries": len(offlineTemp.Entries),"assets": len(offlineTemp.Assets)}, LogInfo, InfoLoadingFromFile)
@ -953,7 +964,7 @@ func (cc *ContentfulClient) getAllAssets(tryCacheFirst bool) (map[string]*conten
}
func getOfflineSpaceFromFile(filename string) (*offlineTemp, error) {
fileBytes, err := ioutil.ReadFile(filename)
fileBytes, err := os.ReadFile(filename)
if err != nil {
return nil, fmt.Errorf("getOfflineSpaceFromFile could not read space export file: %v", err)
}
@ -1519,3 +1530,90 @@ func commonGetParents(cc *ContentfulClient, id string, contentType []string) (pa
}
return parents, nil
}
// Unicode clean-up
func cleanUpStringField(field map[string]string) map[string]string {
cleanField := map[string]string{}
for locale, value := range field {
cleanField[locale] = stripInvisibleUnicodeChars(value)
}
return cleanField
}
func cleanUpStringSliceField(field map[string][]string) map[string][]string {
cleanField := map[string][]string{}
for locale, value := range field {
cleanLocalizedSliceElems := []string{}
for _, sliceElem := range value {
cleanLocalizedSliceElems = append(cleanLocalizedSliceElems, stripInvisibleUnicodeChars(sliceElem))
}
cleanField[locale] = cleanLocalizedSliceElems
}
return cleanField
}
func cleanUpRichTextField(field map[string]interface{}) map[string]interface{} {
cleanField := map[string]interface{}{}
for locale, value := range field {
node, err := objectToRichTextGenericNode(value)
if err != nil {
return field
}
cleanNode := cleanUpRichTextIterateNode(node)
cleanField[locale] = cleanNode
}
return cleanField
}
func cleanUpRichTextIterateNode(node *RichTextGenericNode) *RichTextGenericNode {
cleanNode := &RichTextGenericNode{
NodeType: node.NodeType,
Data: node.Data,
Value: stripInvisibleUnicodeChars(node.Value),
Marks: node.Marks,
}
for _, childNode := range node.Content {
cleanNode.Content = append(cleanNode.Content, cleanUpRichTextIterateNode(childNode))
}
return cleanNode
}
func isFieldRichText(field map[string]interface{}) bool {
for _, value := range field {
if value == nil {
continue
}
node, err := objectToRichTextGenericNode(value)
if err != nil {
return false
}
if node.NodeType == "document" && node.Content != nil {
return true
}
}
return false
}
func objectToRichTextGenericNode(value interface{}) (*RichTextGenericNode, error) {
node := &RichTextGenericNode{}
byt, err := json.Marshal(value)
if err != nil {
return nil, err
}
err = json.Unmarshal(byt, node)
if err != nil {
return nil, err
}
return node, nil
}
func stripInvisibleUnicodeChars(dirty string) string {
clean := strings.Map(func(r rune) rune {
if unicode.IsGraphic(r) || unicode.IsControl(r) {
return r
}
return -1
}, dirty)
return clean
}

View File

@ -632,6 +632,7 @@ func (cc *ContentfulClient) cacheAll{{ firstCap $contentType.Sys.ID }}(ctx conte
Items: []interface{}{},
}
cc.cacheMutex.sharedDataGcLock.RLock()
defer cc.cacheMutex.sharedDataGcLock.RUnlock()
if cc.offline {
for _, entry := range cc.offlineTemp.Entries {
if entry.Sys.ContentType.Sys.ID == ContentType{{ firstCap $contentType.Sys.ID }} {
@ -644,7 +645,6 @@ func (cc *ContentfulClient) cacheAll{{ firstCap $contentType.Sys.ID }}(ctx conte
return nil, errors.New("optimisticPageSizeGetAll for {{ firstCap $contentType.Sys.ID }} failed: "+err.Error())
}
}
cc.cacheMutex.sharedDataGcLock.RUnlock()
all{{ firstCap $contentType.Sys.ID }}, err = colToCf{{ firstCap $contentType.Sys.ID }}(col,cc)
if err != nil {
return nil, errors.New("colToCf{{ firstCap $contentType.Sys.ID }} failed: "+err.Error())
@ -814,6 +814,16 @@ func colToCf{{ firstCap $contentType.Sys.ID }}(col *contentful.Collection, cc *C
if err != nil {
break
}
if cc.textJanitor {
{{ range $fieldIndex, $field := $contentType.Fields }}
{{ if or (fieldIsSymbol $field) (fieldIsText $field) }}
vo.Fields.{{ firstCap $field.ID }} = cleanUpStringField(vo.Fields.{{ firstCap $field.ID }}){{ end }}
{{ if fieldIsSymbolList $field }}
vo.Fields.{{ firstCap $field.ID }} = cleanUpStringSliceField(vo.Fields.{{ firstCap $field.ID }}){{ end }}
{{ if fieldIsRichText $field }}
vo.Fields.{{ firstCap $field.ID }} = cleanUpRichTextField(vo.Fields.{{ firstCap $field.ID }}){{ end }}
{{ end }}
}
vo.CC = cc
vos = append(vos, &vo)
}

View File

@ -4,7 +4,6 @@ import (
"encoding/json"
"flag"
"fmt"
"io/ioutil"
"os"
"os/user"
"path/filepath"
@ -46,7 +45,7 @@ func getCmaKeyFromRcFile() string {
if errGetUser != nil {
return ""
}
contentfulRcBytes, errReadFile := ioutil.ReadFile(currentUser.HomeDir + "/.contentfulrc.json")
contentfulRcBytes, errReadFile := os.ReadFile(currentUser.HomeDir + "/.contentfulrc.json")
if errReadFile != nil {
return ""
}
@ -113,5 +112,4 @@ func main() {
fatal("Something went horribly wrong...", err)
}
fmt.Println("ALL DONE!")
}

View File

@ -1,10 +1,13 @@
package test
import (
"github.com/foomo/gocontentful/test/testapi"
"github.com/stretchr/testify/require"
"testing"
"time"
"github.com/foomo/gocontentful/test/testapi"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestPublishingStatus(t *testing.T) {
@ -21,3 +24,25 @@ func TestPublishingStatus(t *testing.T) {
require.NoError(t, err)
require.Equal(t, testapi.StatusChanged, changed.GetPublishingStatus())
}
func TestCleanUpUnicode(t *testing.T) {
testLogger := logrus.StandardLogger()
cc, errClient := testapi.NewOfflineContentfulClient("./test-space-export.json",
GetContenfulLogger(testLogger),
LogDebug,
true,
true)
require.NoError(t, errClient)
testCleanUpUnicode, err := cc.GetProductByID("6dbjWqNd9SqccegcqYq224")
require.NoError(t, err)
html, err := testapi.RichTextToHtml(testCleanUpUnicode.SeoText(testapi.SpaceLocaleGerman), nil, nil, nil, nil, testapi.SpaceLocaleGerman)
require.NoError(t, err)
assert.Equal(t, 2109, len(html))
assert.Equal(t, 13, len(testCleanUpUnicode.ProductName()))
tags := []int{}
for _, tag := range testCleanUpUnicode.Tags() {
testLogger.Info(tag)
tags = append(tags, len(tag))
}
assert.Equal(t, tags, []int{7, 11, 5, 11, 7})
}

View File

@ -57,8 +57,7 @@
"type": "Text",
"localized": false,
"required": true,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false
},
@ -68,8 +67,7 @@
"type": "Link",
"localized": false,
"required": false,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false,
"linkType": "Asset"
@ -80,8 +78,7 @@
"type": "Text",
"localized": false,
"required": false,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false
},
@ -91,8 +88,7 @@
"type": "Symbol",
"localized": false,
"required": false,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false
},
@ -102,8 +98,7 @@
"type": "Symbol",
"localized": false,
"required": false,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false
},
@ -113,8 +108,7 @@
"type": "Symbol",
"localized": false,
"required": false,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false
},
@ -124,14 +118,12 @@
"type": "Array",
"localized": false,
"required": false,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false,
"items": {
"type": "Symbol",
"validations": [
]
"validations": []
}
}
]
@ -193,8 +185,7 @@
"type": "Text",
"localized": false,
"required": true,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false
},
@ -204,8 +195,7 @@
"type": "Link",
"localized": false,
"required": false,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false,
"linkType": "Asset"
@ -216,8 +206,7 @@
"type": "Text",
"localized": false,
"required": false,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false
}
@ -280,8 +269,7 @@
"type": "Text",
"localized": false,
"required": true,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false
},
@ -291,8 +279,7 @@
"type": "Symbol",
"localized": false,
"required": false,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false
},
@ -302,8 +289,7 @@
"type": "Text",
"localized": false,
"required": false,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false
},
@ -313,8 +299,7 @@
"type": "Symbol",
"localized": false,
"required": false,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false
},
@ -324,14 +309,12 @@
"type": "Array",
"localized": false,
"required": false,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false,
"items": {
"type": "Link",
"validations": [
],
"validations": [],
"linkType": "Asset"
}
},
@ -341,14 +324,12 @@
"type": "Array",
"localized": false,
"required": false,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false,
"items": {
"type": "Symbol",
"validations": [
]
"validations": []
}
},
{
@ -357,17 +338,14 @@
"type": "Array",
"localized": false,
"required": false,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false,
"items": {
"type": "Link",
"validations": [
{
"linkContentType": [
"category"
]
"linkContentType": ["category"]
}
],
"linkType": "Entry"
@ -379,8 +357,7 @@
"type": "Number",
"localized": false,
"required": false,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false
},
@ -392,9 +369,7 @@
"required": false,
"validations": [
{
"linkContentType": [
"brand"
]
"linkContentType": ["brand"]
}
],
"disabled": false,
@ -407,8 +382,7 @@
"type": "Integer",
"localized": false,
"required": false,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false
},
@ -418,8 +392,7 @@
"type": "Symbol",
"localized": false,
"required": false,
"validations": [
],
"validations": [],
"disabled": false,
"omitted": false
},
@ -429,16 +402,62 @@
"type": "Symbol",
"localized": false,
"required": false,
"validations": [],
"disabled": false,
"omitted": false
},
{
"id": "seoText",
"name": "SEO Text",
"type": "RichText",
"localized": true,
"required": true,
"validations": [
{
"enabledMarks": ["bold", "italic", "underline", "code"],
"message": "Only bold, italic, underline, and code marks are allowed"
},
{
"enabledNodeTypes": [
"heading-1",
"heading-2",
"heading-3",
"heading-4",
"heading-5",
"heading-6",
"ordered-list",
"unordered-list",
"hr",
"blockquote",
"embedded-asset-block",
"hyperlink",
"entry-hyperlink",
"embedded-entry-block",
"table"
],
"message": "Only heading 1, heading 2, heading 3, heading 4, heading 5, heading 6, ordered list, unordered list, horizontal rule, quote, asset, link to Url, link to entry, block entry, and table nodes are allowed"
},
{
"nodes": {}
}
],
"disabled": false,
"omitted": false
},
{
"id": "nodes",
"name": "Nodes",
"type": "Object",
"localized": true,
"required": false,
"validations": [],
"disabled": false,
"omitted": false
}
]
}
],
"tags": [
],
"tags": [],
"editorInterfaces": [
{
"sys": {
@ -672,8 +691,7 @@
"entries": [
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -729,7 +747,7 @@
},
"fields": {
"productName": {
"de": "Whisk Beater"
"de": "Whisk \u0009Beater\u2028"
},
"slug": {
"de": "whisk-beater"
@ -754,10 +772,10 @@
"tags": {
"de": [
"kitchen",
"accessories",
"accesso\u2028ries",
"whisk",
"scandinavia",
"design"
"\u0009design"
]
},
"categories": {
@ -791,13 +809,182 @@
},
"website": {
"de": "http://www.amazon.com/dp/B0081F2CCK/"
},
"seoText": {
"de": {
"nodeType": "document",
"data": {},
"content": [
{
"nodeType": "heading-1",
"content": [
{
"nodeType": "text",
"value": "Header 1",
"marks": [],
"data": {}
}
],
"data": {}
},
{
"nodeType": "heading-2",
"content": [
{
"nodeType": "text",
"value": "Header 2",
"marks": [],
"data": {}
}
],
"data": {}
},
{
"nodeType": "heading-3",
"content": [
{
"nodeType": "text",
"value": "Hello",
"marks": [],
"data": {}
}
],
"data": {}
},
{
"nodeType": "heading-4",
"content": [
{
"nodeType": "text",
"value": "Lorem \u2028ipsum (4)",
"marks": [],
"data": {}
}
],
"data": {}
},
{
"nodeType": "paragraph",
"content": [
{
"nodeType": "text",
"value": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Morbi nisi orci, aliquam id ipsum sit amet, convallis volutpat enim. Cras vitae nibh sit amet justo lacinia posuere. Pellentesque luctus imperdiet pharetra. Etiam varius, tortor quis bibendum pharetra, dolor mi consequat quam, et pharetra nulla ante eu quam. Morbi venenatis libero nisl, ut ultricies dui interdum sit amet. Sed in tellus est. Mauris varius ante nec massa tincidunt aliquet. Nam gravida lacus arcu, ac pharetra nunc interdum et. ",
"marks": [],
"data": {}
}
],
"data": {}
},
{
"nodeType": "paragraph",
"content": [
{
"nodeType": "text",
"value": "Maecenas vehicula pretium augue eget interdumSed posuere purus quis urna dignissim, ac consectetur nisl venenatis. Sed consectetur nisl leo, sit amet pharetra enim vestibulum sed. Donec pulvinar turpis vel urna gravida, et sollicitudin velit pellentesque. Vivamus ut ex et eros iaculis efficitur nec a enim. In hac habitasse platea dictumst. Morbi semper felis id odio tempor, id blandit purus placerat. Aliquam iaculis vestibulum tortor non placerat. Pellentesque fermentum rutrum risus eget dapibus. Vestibulum aliquam aliquam velit, vel dapibus ipsum ullamcorper quis. Aliquam in velit quis nunc volutpat commodo. Nunc aliquam neque metus, a commodo turpis porttitor eget. Etiam euismod arcu nec mi maximus, et vestibulum ligula consectetur. Curabitur sit amet enim eget purus placerat dapibus tempor a elit.",
"marks": [],
"data": {}
}
],
"data": {}
},
{
"nodeType": "paragraph",
"content": [
{
"nodeType": "text",
"value": "Duis viverra rhoncus \u200bmagna at fringilla. Vivamus imperdiet tellus eu luctus aliquam. Donec ac ligula sit amet ante tincidunt pharetra. Vestibulum hendrerit commodo varius. Vivamus massa tellus, ornare id tincidunt eu, lacinia et metus. Morbi vitae finibus velit. Sed venenatis sagittis felis non tempus. Fusce ante leo, bibendum eu ante eu, blandit\n interdum ligula. Aliquam lectus tortor, gravida sit amet nisl id, pretium sodales mauris. Ut dapibus massa eget pretium porttitor. Aenean lacinia sapien id arcu vulputate malesuada. Pellentesque vitae enim sapien. Integer pulvinar eros in tincidunt semper. Nulla facilisi.",
"marks": [],
"data": {}
}
],
"data": {}
},
{
"nodeType": "unordered-list",
"content": [
{
"nodeType": "list-item",
"content": [
{
"nodeType": "paragraph",
"content": [
{
"nodeType": "text",
"value": "One\t",
"marks": [],
"data": {}
}
],
"data": {}
}
],
"data": {}
},
{
"nodeType": "list-item",
"content": [
{
"nodeType": "paragraph",
"content": [
{
"nodeType": "text",
"value": "Two",
"marks": [],
"data": {}
}
],
"data": {}
}
],
"data": {}
},
{
"nodeType": "list-item",
"content": [
{
"nodeType": "paragraph",
"content": [
{
"nodeType": "text",
"value": "Three!",
"marks": [],
"data": {}
}
],
"data": {}
}
],
"data": {}
}
],
"data": {}
},
{
"nodeType": "paragraph",
"content": [
{
"nodeType": "text",
"value": "",
"marks": [],
"data": {}
}
],
"data": {}
}
]
}
},
"nodes": {
"de": {
"destinationID": "123123",
"static": true
}
}
}
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -872,8 +1059,7 @@
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -953,11 +1139,7 @@
]
},
"tags": {
"de": [
"vase",
"flowers",
"accessories"
]
"de": ["vase", "flowers", "accessories"]
},
"categories": {
"de": [
@ -995,8 +1177,7 @@
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -1076,13 +1257,7 @@
]
},
"tags": {
"de": [
"home décor",
"clocks",
"interior design",
"yellow",
"gifts"
]
"de": ["home décor", "clocks", "interior design", "yellow", "gifts"]
},
"categories": {
"de": [
@ -1120,8 +1295,7 @@
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -1201,13 +1375,7 @@
]
},
"tags": {
"de": [
"wood",
"toy",
"car",
"sweden",
"design"
]
"de": ["wood", "toy", "car", "sweden", "design"]
},
"categories": {
"de": [
@ -1252,8 +1420,7 @@
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -1331,8 +1498,7 @@
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -1407,8 +1573,7 @@
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -1486,16 +1651,13 @@
"de": "info@acgears.com"
},
"phone": {
"de": [
"+1 212 260 2269"
]
"de": ["+1 212 260 2269"]
}
}
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -1576,9 +1738,7 @@
"de": "normann@normann-copenhagen.com"
},
"phone": {
"de": [
"+45 35 55 44 59"
]
"de": ["+45 35 55 44 59"]
}
}
}
@ -1586,8 +1746,7 @@
"assets": [
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -1660,8 +1819,7 @@
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -1734,8 +1892,7 @@
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -1808,8 +1965,7 @@
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -1882,8 +2038,7 @@
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -1956,8 +2111,7 @@
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -2030,8 +2184,7 @@
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -2104,8 +2257,7 @@
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -2178,8 +2330,7 @@
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -2252,8 +2403,7 @@
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -2326,8 +2476,7 @@
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -2400,8 +2549,7 @@
},
{
"metadata": {
"tags": [
]
"tags": []
},
"sys": {
"space": {
@ -2562,8 +2710,6 @@
}
}
],
"webhooks": [
],
"roles": [
]
}
"webhooks": [],
"roles": []
}

View File

@ -1,6 +1,8 @@
package testapi
import "errors"
import (
"errors"
)
type CacheStats struct {
AssetCount int
@ -33,5 +35,4 @@ func (cc *ContentfulClient) SetProductInCache(product *CfProduct) {
defer cc.cacheMutex.parentMapGcLock.Unlock()
cc.Cache.entryMaps.product[product.Sys.ID] = product
cc.Cache.idContentTypeMap[product.Sys.ID] = product.Sys.ContentType.Sys.ID
return
}

View File

@ -89,6 +89,10 @@ type CfProductFields struct {
RWLockSku sync.RWMutex `json:"-"`
Website map[string]string `json:"website,omitempty"`
RWLockWebsite sync.RWMutex `json:"-"`
SeoText map[string]interface{} `json:"seoText,omitempty"`
RWLockSeoText sync.RWMutex `json:"-"`
Nodes map[string]interface{} `json:"nodes,omitempty"`
RWLockNodes sync.RWMutex `json:"-"`
}
type genericEntryNoFields struct {

View File

@ -9,11 +9,12 @@ import (
"fmt"
"html"
"io"
"io/ioutil"
"os"
"regexp"
"strings"
"sync"
"time"
"unicode"
"github.com/foomo/contentful"
"golang.org/x/sync/errgroup"
@ -65,12 +66,13 @@ type ContentfulClient struct {
args ...interface{},
)
logLevel int
optimisticPageSize uint16 // Start downloading entries at this page size
optimisticPageSize uint16
SpaceID string
offline bool
offlineTemp offlineTemp
sync bool
syncToken string
textJanitor bool
}
type offlineTemp struct {
@ -282,6 +284,14 @@ func (cc *ContentfulClient) DeleteAssetFromCache(key string) error {
return cc.deleteAssetFromCache(key)
}
func (cc *ContentfulClient) DisableTextJanitor() {
cc.textJanitor = false
}
func (cc *ContentfulClient) EnableTextJanitor() {
cc.textJanitor = true
}
func (cc *ContentfulClient) GetAllAssets() (map[string]*contentful.Asset, error) {
return cc.getAllAssets(true)
}
@ -513,7 +523,7 @@ func NewContentfulClient(spaceID string, clientMode ClientMode, clientKey string
return cc, nil
}
func NewOfflineContentfulClient(filename string, logFn func(fields map[string]interface{}, level int, args ...interface{}), logLevel int, cacheAssets bool) (*ContentfulClient, error) {
func NewOfflineContentfulClient(filename string, logFn func(fields map[string]interface{}, level int, args ...interface{}), logLevel int, cacheAssets bool, textJanitor bool) (*ContentfulClient, error) {
offlineTemp, err := getOfflineSpaceFromFile(filename)
if err != nil {
return nil, fmt.Errorf("NewOfflineContentfulClient could not parse space export file: %v", err)
@ -548,6 +558,7 @@ func NewOfflineContentfulClient(filename string, logFn func(fields map[string]in
SpaceID: "OFFLINE",
offline: true,
offlineTemp: *offlineTemp,
textJanitor: textJanitor,
}
if cc.logFn != nil && cc.logLevel <= LogInfo {
cc.logFn(map[string]interface{}{"entries": len(offlineTemp.Entries), "assets": len(offlineTemp.Assets)}, LogInfo, InfoLoadingFromFile)
@ -997,7 +1008,7 @@ func (cc *ContentfulClient) getAllAssets(tryCacheFirst bool) (map[string]*conten
}
func getOfflineSpaceFromFile(filename string) (*offlineTemp, error) {
fileBytes, err := ioutil.ReadFile(filename)
fileBytes, err := os.ReadFile(filename)
if err != nil {
return nil, fmt.Errorf("getOfflineSpaceFromFile could not read space export file: %v", err)
}
@ -1637,3 +1648,90 @@ func commonGetParents(cc *ContentfulClient, id string, contentType []string) (pa
}
return parents, nil
}
// Unicode clean-up
func cleanUpStringField(field map[string]string) map[string]string {
cleanField := map[string]string{}
for locale, value := range field {
cleanField[locale] = stripInvisibleUnicodeChars(value)
}
return cleanField
}
func cleanUpStringSliceField(field map[string][]string) map[string][]string {
cleanField := map[string][]string{}
for locale, value := range field {
cleanLocalizedSliceElems := []string{}
for _, sliceElem := range value {
cleanLocalizedSliceElems = append(cleanLocalizedSliceElems, stripInvisibleUnicodeChars(sliceElem))
}
cleanField[locale] = cleanLocalizedSliceElems
}
return cleanField
}
func cleanUpRichTextField(field map[string]interface{}) map[string]interface{} {
cleanField := map[string]interface{}{}
for locale, value := range field {
node, err := objectToRichTextGenericNode(value)
if err != nil {
return field
}
cleanNode := cleanUpRichTextIterateNode(node)
cleanField[locale] = cleanNode
}
return cleanField
}
func cleanUpRichTextIterateNode(node *RichTextGenericNode) *RichTextGenericNode {
cleanNode := &RichTextGenericNode{
NodeType: node.NodeType,
Data: node.Data,
Value: stripInvisibleUnicodeChars(node.Value),
Marks: node.Marks,
}
for _, childNode := range node.Content {
cleanNode.Content = append(cleanNode.Content, cleanUpRichTextIterateNode(childNode))
}
return cleanNode
}
func isFieldRichText(field map[string]interface{}) bool {
for _, value := range field {
if value == nil {
continue
}
node, err := objectToRichTextGenericNode(value)
if err != nil {
return false
}
if node.NodeType == "document" && node.Content != nil {
return true
}
}
return false
}
func objectToRichTextGenericNode(value interface{}) (*RichTextGenericNode, error) {
node := &RichTextGenericNode{}
byt, err := json.Marshal(value)
if err != nil {
return nil, err
}
err = json.Unmarshal(byt, node)
if err != nil {
return nil, err
}
return node, nil
}
func stripInvisibleUnicodeChars(dirty string) string {
clean := strings.Map(func(r rune) rune {
if unicode.IsGraphic(r) || unicode.IsControl(r) {
return r
}
return -1
}, dirty)
return clean
}

View File

@ -719,6 +719,7 @@ func (cc *ContentfulClient) cacheAllBrand(ctx context.Context, resultChan chan<-
Items: []interface{}{},
}
cc.cacheMutex.sharedDataGcLock.RLock()
defer cc.cacheMutex.sharedDataGcLock.RUnlock()
if cc.offline {
for _, entry := range cc.offlineTemp.Entries {
if entry.Sys.ContentType.Sys.ID == ContentTypeBrand {
@ -731,7 +732,6 @@ func (cc *ContentfulClient) cacheAllBrand(ctx context.Context, resultChan chan<-
return nil, errors.New("optimisticPageSizeGetAll for Brand failed: " + err.Error())
}
}
cc.cacheMutex.sharedDataGcLock.RUnlock()
allBrand, err = colToCfBrand(col, cc)
if err != nil {
return nil, errors.New("colToCfBrand failed: " + err.Error())
@ -833,6 +833,21 @@ func colToCfBrand(col *contentful.Collection, cc *ContentfulClient) (vos []*CfBr
if err != nil {
break
}
if cc.textJanitor {
vo.Fields.CompanyName = cleanUpStringField(vo.Fields.CompanyName)
vo.Fields.CompanyDescription = cleanUpStringField(vo.Fields.CompanyDescription)
vo.Fields.Website = cleanUpStringField(vo.Fields.Website)
vo.Fields.Twitter = cleanUpStringField(vo.Fields.Twitter)
vo.Fields.Email = cleanUpStringField(vo.Fields.Email)
vo.Fields.Phone = cleanUpStringSliceField(vo.Fields.Phone)
}
vo.CC = cc
vos = append(vos, &vo)
}

View File

@ -483,6 +483,7 @@ func (cc *ContentfulClient) cacheAllCategory(ctx context.Context, resultChan cha
Items: []interface{}{},
}
cc.cacheMutex.sharedDataGcLock.RLock()
defer cc.cacheMutex.sharedDataGcLock.RUnlock()
if cc.offline {
for _, entry := range cc.offlineTemp.Entries {
if entry.Sys.ContentType.Sys.ID == ContentTypeCategory {
@ -495,7 +496,6 @@ func (cc *ContentfulClient) cacheAllCategory(ctx context.Context, resultChan cha
return nil, errors.New("optimisticPageSizeGetAll for Category failed: " + err.Error())
}
}
cc.cacheMutex.sharedDataGcLock.RUnlock()
allCategory, err = colToCfCategory(col, cc)
if err != nil {
return nil, errors.New("colToCfCategory failed: " + err.Error())
@ -597,6 +597,13 @@ func colToCfCategory(col *contentful.Collection, cc *ContentfulClient) (vos []*C
if err != nil {
break
}
if cc.textJanitor {
vo.Fields.Title = cleanUpStringField(vo.Fields.Title)
vo.Fields.CategoryDescription = cleanUpStringField(vo.Fields.CategoryDescription)
}
vo.CC = cc
vos = append(vos, &vo)
}

View File

@ -124,6 +124,10 @@ func NewCfProduct(contentfulClient ...*ContentfulClient) (cfProduct *CfProduct)
cfProduct.Fields.Website = map[string]string{}
cfProduct.Fields.SeoText = map[string]interface{}{}
cfProduct.Fields.Nodes = map[string]interface{}{}
cfProduct.Sys.ContentType.Sys.ID = "product"
cfProduct.Sys.ContentType.Sys.Type = FieldTypeLink
cfProduct.Sys.ContentType.Sys.LinkType = "ContentType"
@ -713,6 +717,82 @@ func (vo *CfProduct) Website(locale ...Locale) string {
return vo.Fields.Website[string(loc)]
}
func (vo *CfProduct) SeoText(locale ...Locale) *interface{} {
if vo == nil {
return nil
}
if vo.CC == nil {
return nil
}
vo.Fields.RWLockSeoText.RLock()
defer vo.Fields.RWLockSeoText.RUnlock()
loc := defaultLocale
if len(locale) != 0 {
loc = locale[0]
if _, ok := localeFallback[loc]; !ok {
if vo.CC.logFn != nil && vo.CC.logLevel <= LogError {
vo.CC.logFn(map[string]interface{}{"content type": vo.Sys.ContentType.Sys.ID, "entry ID": vo.Sys.ID, "method": "SeoText()"}, LogError, ErrLocaleUnsupported)
}
return nil
}
}
if _, ok := vo.Fields.SeoText[string(loc)]; !ok {
if _, ok := localeFallback[loc]; !ok {
if vo.CC.logFn != nil && vo.CC.logLevel == LogDebug {
vo.CC.logFn(map[string]interface{}{"content type": vo.Sys.ContentType.Sys.ID, "entry ID": vo.Sys.ID, "method": "SeoText()"}, LogWarn, ErrNotSet)
}
return nil
}
loc = localeFallback[loc]
if _, ok := vo.Fields.SeoText[string(loc)]; !ok {
if vo.CC.logFn != nil && vo.CC.logLevel == LogDebug {
vo.CC.logFn(map[string]interface{}{"content type": vo.Sys.ContentType.Sys.ID, "entry ID": vo.Sys.ID, "method": "SeoText()"}, LogWarn, ErrNotSetNoFallback)
}
return nil
}
}
seoText := vo.Fields.SeoText[string(loc)]
return &seoText
}
func (vo *CfProduct) Nodes(locale ...Locale) *interface{} {
if vo == nil {
return nil
}
if vo.CC == nil {
return nil
}
vo.Fields.RWLockNodes.RLock()
defer vo.Fields.RWLockNodes.RUnlock()
loc := defaultLocale
if len(locale) != 0 {
loc = locale[0]
if _, ok := localeFallback[loc]; !ok {
if vo.CC.logFn != nil && vo.CC.logLevel <= LogError {
vo.CC.logFn(map[string]interface{}{"content type": vo.Sys.ContentType.Sys.ID, "entry ID": vo.Sys.ID, "method": "Nodes()"}, LogError, ErrLocaleUnsupported)
}
return nil
}
}
if _, ok := vo.Fields.Nodes[string(loc)]; !ok {
if _, ok := localeFallback[loc]; !ok {
if vo.CC.logFn != nil && vo.CC.logLevel == LogDebug {
vo.CC.logFn(map[string]interface{}{"content type": vo.Sys.ContentType.Sys.ID, "entry ID": vo.Sys.ID, "method": "Nodes()"}, LogWarn, ErrNotSet)
}
return nil
}
loc = localeFallback[loc]
if _, ok := vo.Fields.Nodes[string(loc)]; !ok {
if vo.CC.logFn != nil && vo.CC.logLevel == LogDebug {
vo.CC.logFn(map[string]interface{}{"content type": vo.Sys.ContentType.Sys.ID, "entry ID": vo.Sys.ID, "method": "Nodes()"}, LogWarn, ErrNotSetNoFallback)
}
return nil
}
}
nodes := vo.Fields.Nodes[string(loc)]
return &nodes
}
// Product Field setters
func (vo *CfProduct) SetProductName(productName string, locale ...Locale) (err error) {
@ -955,6 +1035,46 @@ func (vo *CfProduct) SetWebsite(website string, locale ...Locale) (err error) {
return
}
func (vo *CfProduct) SetSeoText(seoText interface{}, locale ...Locale) (err error) {
if vo == nil {
return errors.New("SetSeoText(seoText: Value Object is nil")
}
loc := defaultLocale
if len(locale) != 0 {
loc = locale[0]
if _, ok := localeFallback[loc]; !ok {
return ErrLocaleUnsupported
}
}
vo.Fields.RWLockSeoText.Lock()
defer vo.Fields.RWLockSeoText.Unlock()
if vo.Fields.SeoText == nil {
vo.Fields.SeoText = make(map[string]interface{})
}
vo.Fields.SeoText[string(loc)] = seoText
return
}
func (vo *CfProduct) SetNodes(nodes interface{}, locale ...Locale) (err error) {
if vo == nil {
return errors.New("SetNodes(nodes: Value Object is nil")
}
loc := defaultLocale
if len(locale) != 0 {
loc = locale[0]
if _, ok := localeFallback[loc]; !ok {
return ErrLocaleUnsupported
}
}
vo.Fields.RWLockNodes.Lock()
defer vo.Fields.RWLockNodes.Unlock()
if vo.Fields.Nodes == nil {
vo.Fields.Nodes = make(map[string]interface{})
}
vo.Fields.Nodes[string(loc)] = nodes
return
}
func (vo *CfProduct) UpsertEntry() (err error) {
if vo == nil {
return errors.New("UpsertEntry: Value Object is nil")
@ -1101,6 +1221,7 @@ func (cc *ContentfulClient) cacheAllProduct(ctx context.Context, resultChan chan
Items: []interface{}{},
}
cc.cacheMutex.sharedDataGcLock.RLock()
defer cc.cacheMutex.sharedDataGcLock.RUnlock()
if cc.offline {
for _, entry := range cc.offlineTemp.Entries {
if entry.Sys.ContentType.Sys.ID == ContentTypeProduct {
@ -1113,7 +1234,6 @@ func (cc *ContentfulClient) cacheAllProduct(ctx context.Context, resultChan chan
return nil, errors.New("optimisticPageSizeGetAll for Product failed: " + err.Error())
}
}
cc.cacheMutex.sharedDataGcLock.RUnlock()
allProduct, err = colToCfProduct(col, cc)
if err != nil {
return nil, errors.New("colToCfProduct failed: " + err.Error())
@ -1277,6 +1397,25 @@ func colToCfProduct(col *contentful.Collection, cc *ContentfulClient) (vos []*Cf
if err != nil {
break
}
if cc.textJanitor {
vo.Fields.ProductName = cleanUpStringField(vo.Fields.ProductName)
vo.Fields.Slug = cleanUpStringField(vo.Fields.Slug)
vo.Fields.ProductDescription = cleanUpStringField(vo.Fields.ProductDescription)
vo.Fields.Sizetypecolor = cleanUpStringField(vo.Fields.Sizetypecolor)
vo.Fields.Tags = cleanUpStringSliceField(vo.Fields.Tags)
vo.Fields.Sku = cleanUpStringField(vo.Fields.Sku)
vo.Fields.Website = cleanUpStringField(vo.Fields.Website)
vo.Fields.SeoText = cleanUpRichTextField(vo.Fields.SeoText)
}
vo.CC = cc
vos = append(vos, &vo)
}

View File

@ -15,7 +15,7 @@ const (
var testLogger = logrus.StandardLogger()
func getTestClient() (*testapi.ContentfulClient, error) {
return testapi.NewOfflineContentfulClient("./test-space-export.json", GetContenfulLogger(testLogger), LogDebug, true)
return testapi.NewOfflineContentfulClient("./test-space-export.json", GetContenfulLogger(testLogger), LogDebug, true, true)
}
func GetContenfulLogger(log *logrus.Logger) func(fields map[string]interface{}, level int, args ...interface{}) {