mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-30 12:11:44 +00:00
Docs: add zh-CN entrypoint translations (#6300)
* Docs: add zh-CN entrypoint translations * Docs: harden docs-i18n parsing
This commit is contained in:
160
scripts/docs-i18n/html_translate.go
Normal file
160
scripts/docs-i18n/html_translate.go
Normal file
@@ -0,0 +1,160 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/yuin/goldmark"
|
||||
"github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/extension"
|
||||
"github.com/yuin/goldmark/text"
|
||||
"golang.org/x/net/html"
|
||||
"sort"
|
||||
)
|
||||
|
||||
type htmlReplacement struct {
|
||||
Start int
|
||||
Stop int
|
||||
Value string
|
||||
}
|
||||
|
||||
func translateHTMLBlocks(ctx context.Context, translator *PiTranslator, body, srcLang, tgtLang string) (string, error) {
|
||||
source := []byte(body)
|
||||
r := text.NewReader(source)
|
||||
md := goldmark.New(
|
||||
goldmark.WithExtensions(extension.GFM),
|
||||
)
|
||||
doc := md.Parser().Parse(r)
|
||||
|
||||
replacements := make([]htmlReplacement, 0, 8)
|
||||
|
||||
_ = ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
block, ok := n.(*ast.HTMLBlock)
|
||||
if !ok {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
start, stop, ok := htmlBlockSpan(block, source)
|
||||
if !ok {
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
htmlText := string(source[start:stop])
|
||||
translated, err := translateHTMLBlock(ctx, translator, htmlText, srcLang, tgtLang)
|
||||
if err != nil {
|
||||
return ast.WalkStop, err
|
||||
}
|
||||
replacements = append(replacements, htmlReplacement{Start: start, Stop: stop, Value: translated})
|
||||
return ast.WalkSkipChildren, nil
|
||||
})
|
||||
|
||||
if len(replacements) == 0 {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
return applyHTMLReplacements(body, replacements), nil
|
||||
}
|
||||
|
||||
func htmlBlockSpan(block *ast.HTMLBlock, source []byte) (int, int, bool) {
|
||||
lines := block.Lines()
|
||||
if lines.Len() == 0 {
|
||||
return 0, 0, false
|
||||
}
|
||||
start := lines.At(0).Start
|
||||
stop := lines.At(lines.Len() - 1).Stop
|
||||
if start >= stop {
|
||||
return 0, 0, false
|
||||
}
|
||||
return start, stop, true
|
||||
}
|
||||
|
||||
func applyHTMLReplacements(body string, replacements []htmlReplacement) string {
|
||||
if len(replacements) == 0 {
|
||||
return body
|
||||
}
|
||||
sortHTMLReplacements(replacements)
|
||||
var out strings.Builder
|
||||
last := 0
|
||||
for _, rep := range replacements {
|
||||
if rep.Start < last {
|
||||
continue
|
||||
}
|
||||
out.WriteString(body[last:rep.Start])
|
||||
out.WriteString(rep.Value)
|
||||
last = rep.Stop
|
||||
}
|
||||
out.WriteString(body[last:])
|
||||
return out.String()
|
||||
}
|
||||
|
||||
func sortHTMLReplacements(replacements []htmlReplacement) {
|
||||
sort.Slice(replacements, func(i, j int) bool {
|
||||
return replacements[i].Start < replacements[j].Start
|
||||
})
|
||||
}
|
||||
|
||||
func translateHTMLBlock(ctx context.Context, translator *PiTranslator, htmlText, srcLang, tgtLang string) (string, error) {
|
||||
tokenizer := html.NewTokenizer(strings.NewReader(htmlText))
|
||||
var out strings.Builder
|
||||
skipDepth := 0
|
||||
|
||||
for {
|
||||
tt := tokenizer.Next()
|
||||
if tt == html.ErrorToken {
|
||||
if err := tokenizer.Err(); err != nil && err != io.EOF {
|
||||
return "", err
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
raw := string(tokenizer.Raw())
|
||||
tok := tokenizer.Token()
|
||||
|
||||
switch tt {
|
||||
case html.StartTagToken:
|
||||
out.WriteString(raw)
|
||||
if isSkipTag(strings.ToLower(tok.Data)) {
|
||||
skipDepth++
|
||||
}
|
||||
case html.EndTagToken:
|
||||
out.WriteString(raw)
|
||||
if isSkipTag(strings.ToLower(tok.Data)) && skipDepth > 0 {
|
||||
skipDepth--
|
||||
}
|
||||
case html.SelfClosingTagToken:
|
||||
out.WriteString(raw)
|
||||
case html.TextToken:
|
||||
if shouldTranslateHTMLText(skipDepth, raw) {
|
||||
translated, err := translator.Translate(ctx, raw, srcLang, tgtLang)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
out.WriteString(translated)
|
||||
} else {
|
||||
out.WriteString(raw)
|
||||
}
|
||||
default:
|
||||
out.WriteString(raw)
|
||||
}
|
||||
}
|
||||
|
||||
return out.String(), nil
|
||||
}
|
||||
|
||||
func shouldTranslateHTMLText(skipDepth int, text string) bool {
|
||||
if strings.TrimSpace(text) == "" {
|
||||
return false
|
||||
}
|
||||
return skipDepth == 0
|
||||
}
|
||||
|
||||
func isSkipTag(tag string) bool {
|
||||
switch tag {
|
||||
case "code", "pre", "script", "style":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user