Compare commits

...

9 Commits

Author SHA1 Message Date
eyedeekay
ae87d950a4 Fix auto-numbered list parsing with #. syntax 2025-07-15 15:18:57 -04:00
eyedeekay
bdbb75e3c3 fix code block parsing 2025-07-15 14:42:17 -04:00
eyedeekay
8801995d74 Implement strong text (bold) parsing with **text** syntax 2025-07-15 14:04:01 -04:00
eyedeekay
d165ce5d03 fix main.go 2025-07-15 13:58:15 -04:00
eyedeekay
817f7e0978 fix import path in readme 2025-07-15 10:46:37 -04:00
eyedeekay
9aa0032cc2 clean up test files 2025-07-15 10:44:55 -04:00
eyedeekay
78bf1464a9 Fix nil pointer dereference in translation processing 2025-07-15 10:43:46 -04:00
eyedeekay
23e3c4864b add some parts of transition implementation 2025-05-03 19:02:29 -04:00
eyedeekay
492960101f add tree describing the structure 2025-05-03 18:44:53 -04:00
13 changed files with 344 additions and 46 deletions

View File

@@ -14,7 +14,7 @@ It is mostly unrelated to previous attempts to parse restructuredText in Go.
## Installation
```bash
go get i2pgit.org/idk/go-rst
go get github.com/go-i2p/go-rst
```
## Quick Start
@@ -34,9 +34,9 @@ import (
"fmt"
"io/ioutil"
"i2pgit.org/idk/go-rst/pkg/parser"
"i2pgit.org/idk/go-rst/pkg/renderer"
"i2pgit.org/idk/go-rst/pkg/translator"
"github.com/go-i2p/go-rst/pkg/parser"
"github.com/go-i2p/go-rst/pkg/renderer"
"github.com/go-i2p/go-rst/pkg/translator"
)
func main() {

56
TREE.md Normal file
View File

@@ -0,0 +1,56 @@
pkg/
├── nodes/ # Node type definitions
│ ├── blockquote.go # Defines BlockQuoteNode for representing block quotes in RST
│ ├── code.go # Defines CodeNode for representing code blocks in RST
│ ├── comment.go # Defines CommentNode for representing comments in RST
│ ├── directive.go # Defines DirectiveNode for representing RST directives
│ ├── doc.md # Documentation for the nodes package
│ ├── doctest.go # Defines DoctestNode for representing doctest blocks
│ ├── em.go # Defines EmphasisNode for representing emphasized (italic) text
│ ├── extra_util.go # Utility functions for node operations like indentation
│ ├── heading.go # Defines HeadingNode for representing section headings
│ ├── lineblock.go # Defines LineBlockNode for representing line blocks
│ ├── link.go # Defines LinkNode for representing hyperlinks
│ ├── list.go # Defines ListNode and ListItemNode for representing lists
│ ├── meta.go # Defines MetaNode for representing metadata information
│ ├── paragraph.go # Defines ParagraphNode for representing text paragraphs
│ ├── strong.go # Defines StrongNode for representing strong (bold) text
│ ├── subtitle.go # Defines SubtitleNode for representing document subtitles
│ ├── table.go # Defines TableNode for representing table structures
│ ├── title.go # Defines TitleNode for representing document titles
│ ├── transition.go # Defines TransitionNode for representing transitions between sections
│ └── types.go # Node type enumerations and base Node interface definitions
├── parser/ # RST parsing logic
│ ├── blockquote.go # Contains logic for parsing block quotes
│ ├── code.go # Contains logic for parsing code blocks
│ ├── context.go # Manages parser context and state during parsing
│ ├── directive.go # Contains logic for parsing RST directives
│ ├── doc.md # Documentation for the parser package
│ ├── doctest.go # Contains logic for parsing doctest blocks
│ ├── emphasis.go # Contains logic for parsing emphasized text
│ ├── headiing.go # Contains logic for parsing section headings
│ ├── lexer.go # Tokenizes RST input into tokens
│ ├── lineblock.go # Contains logic for parsing line blocks
│ ├── link.go # Contains logic for parsing hyperlinks
│ ├── list.go # Contains logic for parsing lists and list items
│ ├── meta.go # Contains logic for parsing metadata
│ ├── paragraph.go # Contains logic for parsing text paragraphs
│ ├── parser.go # Main parser implementation that processes tokens into a node tree
│ ├── parser_test.go # Tests for the parser functionality
│ ├── patterns.go # Regex patterns for RST syntax recognition
│ ├── strong.go # Contains logic for parsing strong (bold) text
│ ├── subtitle.go # Contains logic for parsing document subtitles
│ ├── table.go # Contains logic for parsing tables
│ ├── title.go # Contains logic for parsing document titles
│ └── transition.go # Contains logic for parsing transitions between sections
├── renderer/ # Output rendering components
│ ├── doc.md # Documentation for the renderer package
│ ├── html.go # HTML output renderer implementation
│ ├── markdown.go # Markdown output renderer implementation
│ └── pdf.go # PDF output renderer implementation using gofpdf
└── translator/ # Translation capabilities
├── doc.md # Documentation for the translator package
└── translator.go # Handles translation of text content using PO files

View File

@@ -1,26 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
</head>
<body>
<h1>
Welcome to My Documentation
</h1>
<h2>
Translations
</h2>
<p>
Welcome to My Documentation
This is a sample RST document that demonstrates translations.
Translations
</p>
<p>
Este texto será traducido
Some regular text here.
</p>
<p>
Otra sección traducible
</p>
</body>
</html>

View File

@@ -11,16 +11,34 @@ func (p *Parser) processCodeBlock(line string, currentNode nodes.Node) nodes.Nod
return currentNode
}
p.context.buffer = append(p.context.buffer, line)
// Check if we're at the end of the code block
// RST code blocks end when we encounter a line that is not indented
// or when we hit a blank line followed by unindented content
trimmedLine := strings.TrimSpace(line)
if strings.TrimSpace(line) == "" {
codeNode := currentNode.(*nodes.CodeNode)
content := strings.Join(p.context.buffer, "\n")
codeNode.SetContent(content)
p.nodes = append(p.nodes, codeNode)
p.context.Reset()
return nil
// If this is a blank line, add it to buffer but continue
if trimmedLine == "" {
p.context.buffer = append(p.context.buffer, line)
return currentNode
}
return currentNode
// Check if line is properly indented (at least 4 spaces for code content)
if len(line) >= 4 && line[:4] == " " {
// This is indented content, add to buffer
// Remove the base indentation (4 spaces) to preserve relative indentation
p.context.buffer = append(p.context.buffer, line[4:])
return currentNode
}
// Line is not indented, this means end of code block
// Finalize the code block
codeNode := currentNode.(*nodes.CodeNode)
content := strings.Join(p.context.buffer, "\n")
codeNode.SetContent(strings.TrimSpace(content))
// Add the completed code block to nodes
p.nodes = append(p.nodes, codeNode)
// Reset context and return nil to signal completion
p.context.Reset()
return nil
}

View File

@@ -1 +1,16 @@
package parser
import (
"github.com/go-i2p/go-rst/pkg/nodes"
)
// processEmphasis handles the parsing of emphasized (italic) text
func (p *Parser) processEmphasis(content string) *nodes.EmphasisNode {
// If translator is available, translate the content
if p.translator != nil {
content = p.translator.Translate(content)
}
// Create a new emphasis node with the content
return nodes.NewEmphasisNode(content)
}

View File

@@ -24,6 +24,9 @@ const (
TokenEnumList // TokenEnumList represents an enumerated list item token.
TokenDoctest // TokenDoctest represents a doctest token.
TokenLineBlock // TokenLineBlock represents a line block token.
TokenTransition // TokenTransition represents a transition token.
TokenEmphasis // TokenEmphasis represents emphasized (italic) text
TokenStrong // TokenStrong represents strong (bold) text
)
// Token represents a single token in the input text.
@@ -146,6 +149,22 @@ func (l *Lexer) Tokenize(line string) Token {
}
}
// Check for strong (bold text) - must come before emphasis to avoid conflict
if matches := l.patterns.strong.FindStringSubmatch(line); len(matches) > 1 {
return Token{
Type: TokenStrong,
Content: matches[1], // The text between double asterisks
}
}
// Check for emphasis (italic text)
if matches := l.patterns.emphasis.FindStringSubmatch(line); len(matches) > 1 {
return Token{
Type: TokenEmphasis,
Content: matches[1], // The text between asterisks
}
}
// Check for line block (poetry-style line with | prefix)
if matches := l.patterns.lineBlock.FindStringSubmatch(line); len(matches) > 0 {
return Token{
@@ -154,6 +173,15 @@ func (l *Lexer) Tokenize(line string) Token {
}
}
// Check for transitions
if l.patterns.IsTransition(line) {
transChar := l.patterns.TransitionChar(line)
return Token{
Type: TokenTransition,
Content: string(transChar),
}
}
// Regular text
return Token{
Type: TokenText,

View File

@@ -1 +1,22 @@
package parser
import (
"strings"
"github.com/go-i2p/go-rst/pkg/nodes"
)
// processLineBlock handles parsing of line block tokens
// Line blocks are used for poetry-style content where line breaks are preserved
func (p *Parser) processLineBlock(content string, currentNode nodes.Node) nodes.Node {
// If we already have a line block node, append the line
if currentNode != nil && currentNode.Type() == nodes.NodeLineBlock {
lineBlockNode := currentNode.(*nodes.LineBlockNode)
currentLines := lineBlockNode.Lines()
newLines := append(currentLines, strings.TrimSpace(content))
return nodes.NewLineBlockNode(newLines)
}
// Otherwise create a new line block node
return nodes.NewLineBlockNode([]string{strings.TrimSpace(content)})
}

View File

@@ -41,7 +41,7 @@ func (p *Parser) Parse(content string) []nodes.Node {
line := scanner.Text()
token := p.lexer.Tokenize(line)
if newNode := p.processToken(token, prevToken, currentNode); newNode != nil {
if newNode := p.processToken(token, prevToken, currentNode, line); newNode != nil {
// Only append if we actually have a new node
if currentNode != nil && currentNode != newNode {
p.nodes = append(p.nodes, currentNode)
@@ -59,7 +59,7 @@ func (p *Parser) Parse(content string) []nodes.Node {
return p.nodes
}
func (p *Parser) processToken(token, prevToken Token, currentNode nodes.Node) nodes.Node {
func (p *Parser) processToken(token, prevToken Token, currentNode nodes.Node, originalLine string) nodes.Node {
// translatedContent := p.translator.Translate(token.Content)
// token.Content = translatedContent
switch token.Type {
@@ -73,8 +73,13 @@ func (p *Parser) processToken(token, prevToken Token, currentNode nodes.Node) no
return nodes.NewCommentNode(token.Content)
case TokenTransBlock:
// Always create a new node for translation blocks
translatedContent := p.translator.Translate(strings.TrimSpace(token.Content))
return nodes.NewParagraphNode(translatedContent)
content := strings.TrimSpace(token.Content)
if p.translator != nil {
translatedContent := p.translator.Translate(content)
return nodes.NewParagraphNode(translatedContent)
}
// If no translator is available, return the original content
return nodes.NewParagraphNode(content)
case TokenHeadingUnderline:
if prevToken.Type == TokenText {
return p.processHeading(prevToken.Content, token.Content)
@@ -98,6 +103,14 @@ func (p *Parser) processToken(token, prevToken Token, currentNode nodes.Node) no
p.context.currentDirective = token.Content
return nodes.NewDirectiveNode(token.Content, token.Args)
case TokenEmphasis:
// Process the emphasized text
return p.processEmphasis(token.Content)
case TokenStrong:
// Process the strong (bold) text
return p.processStrong(token.Content)
case TokenLineBlock:
// Check if we're already in a line block node
if lineBlock, ok := currentNode.(*nodes.LineBlockNode); ok {
@@ -112,7 +125,7 @@ func (p *Parser) processToken(token, prevToken Token, currentNode nodes.Node) no
case TokenText:
if p.context.inCodeBlock {
return p.processCodeBlock(token.Content, currentNode)
return p.processCodeBlock(originalLine, currentNode) // Use original line to preserve indentation
}
if p.context.inMeta {
return p.processMetaContent(token.Content, currentNode)
@@ -121,6 +134,12 @@ func (p *Parser) processToken(token, prevToken Token, currentNode nodes.Node) no
return p.processDirectiveContent(token.Content, currentNode)
}
return p.processParagraph(token.Content, currentNode)
case TokenTransition:
// For transitions, we create a new transition node with the character used
if len(token.Content) > 0 {
return p.processTransition(token.Content)
}
return nodes.NewTransitionNode('-') // Default to hyphen if empty
}
return currentNode

View File

@@ -4,8 +4,10 @@ package parser
// Use example restructuredText files embedded in the test functions
import (
"strings"
"testing"
"github.com/go-i2p/go-rst/pkg/nodes"
"github.com/go-i2p/go-rst/pkg/translator"
)
@@ -48,3 +50,79 @@ func TestParseNilTranslatorEmptyInput(t *testing.T) {
t.Errorf("Expected empty, got a document")
}
}
func TestParseNilTranslatorWithTranslationBlocks(t *testing.T) {
parser := NewParser(nil)
content := "{% trans %}Hello, world!{% endtrans %}"
// This should not panic, but gracefully handle the nil translator
doc := parser.Parse(content)
if len(doc) == 0 {
t.Errorf("Expected parsed nodes, got empty document")
}
}
func TestParseStrongText(t *testing.T) {
noopTranslator := translator.NewNoopTranslator()
parser := NewParser(noopTranslator)
content := "This is **bold text** in a sentence."
doc := parser.Parse(content)
if len(doc) == 0 {
t.Errorf("Expected parsed nodes, got empty document")
}
// Check that we have a strong node
foundStrong := false
for _, node := range doc {
if node.Type() == nodes.NodeStrong {
foundStrong = true
if node.Content() != "bold text" {
t.Errorf("Expected strong content to be 'bold text', got '%s'", node.Content())
}
}
}
if !foundStrong {
t.Errorf("Expected to find a strong node in parsed document")
}
}
func TestParseCodeBlock(t *testing.T) {
noopTranslator := translator.NewNoopTranslator()
parser := NewParser(noopTranslator)
content := `.. code-block:: python
def hello():
print("Hello, world!")
return True
End of test.`
doc := parser.Parse(content)
if len(doc) == 0 {
t.Errorf("Expected parsed nodes, got empty document")
}
// Check that we have a code node
foundCode := false
for _, node := range doc {
if node.Type() == nodes.NodeCode {
foundCode = true
codeNode := node.(*nodes.CodeNode)
if codeNode.Language() != "python" {
t.Errorf("Expected code language to be 'python', got '%s'", codeNode.Language())
}
if !strings.Contains(codeNode.Content(), "def hello():") {
t.Errorf("Expected code content to contain 'def hello():', got '%s'", codeNode.Content())
}
if !strings.Contains(codeNode.Content(), "print(\"Hello, world!\")") {
t.Errorf("Expected code content to contain print statement, got '%s'", codeNode.Content())
}
}
}
if !foundCode {
t.Errorf("Expected to find a code node in parsed document")
}
}

View File

@@ -17,6 +17,8 @@ type Patterns struct {
doctest *regexp.Regexp
doctestContinue *regexp.Regexp
doctestOutput *regexp.Regexp
emphasis *regexp.Regexp
strong *regexp.Regexp
lineBlock *regexp.Regexp
comment *regexp.Regexp
title *regexp.Regexp
@@ -33,7 +35,7 @@ func NewPatterns() *Patterns {
transBlock: regexp.MustCompile(`{%\s*trans\s*%}(.*?){%\s*endtrans\s*%}`),
meta: regexp.MustCompile(`^\.\.\s+meta::`),
directive: regexp.MustCompile(`^\.\.\s+(\w+)::`),
codeBlock: regexp.MustCompile(`^\.\.\s+code::`),
codeBlock: regexp.MustCompile(`^\.\.\s+code-block::`),
blockQuote: regexp.MustCompile(`^(\s{4,})(.*?)(?:\s*--\s*(.*))?$`),
doctest: regexp.MustCompile(`^>>> (.+)\n((?:[^>].*\n)*)`),
doctestContinue: regexp.MustCompile(`^\.\.\.(.*$)`),
@@ -44,6 +46,8 @@ func NewPatterns() *Patterns {
subtitle: regexp.MustCompile(`^(-{3,})\n(.+?)\n(?:-{3,})$`),
transition: regexp.MustCompile(`^(\-{4,}|\={4,}|\*{4,})$`),
bulletList: regexp.MustCompile(`^(\s*)([-*+])(\s+)(.+)$`),
enumList: regexp.MustCompile(`^(\s*)(\d+|[a-zA-Z]|[ivxlcdm]+|[IVXLCDM]+)(\.\s+)(.+)$`),
enumList: regexp.MustCompile(`^(\s*)(\d+|[a-zA-Z]|[ivxlcdm]+|[IVXLCDM]+|#)(\.\s+)(.+)$`),
emphasis: regexp.MustCompile(`\*([^*]+)\*`),
strong: regexp.MustCompile(`\*\*([^*]+)\*\*`),
}
}

View File

@@ -1 +1,16 @@
package parser
import (
"github.com/go-i2p/go-rst/pkg/nodes"
)
// processStrong handles the parsing of strong (bold) text
func (p *Parser) processStrong(content string) *nodes.StrongNode {
// If translator is available, translate the content
if p.translator != nil {
content = p.translator.Translate(content)
}
// Create a new strong node with the content
return nodes.NewStrongNode(content)
}

35
pkg/parser/transition.go Normal file
View File

@@ -0,0 +1,35 @@
package parser
import (
"strings"
"github.com/go-i2p/go-rst/pkg/nodes"
)
// IsTransition checks if a line is a transition
func (p *Patterns) IsTransition(line string) bool {
// A transition is a line with 4+ repeated punctuation characters
return len(strings.TrimSpace(line)) >= 4 && p.transition.MatchString(strings.TrimSpace(line))
}
// TransitionChar extracts the character used in the transition
func (p *Patterns) TransitionChar(line string) rune {
trimmed := strings.TrimSpace(line)
if len(trimmed) > 0 {
return rune(trimmed[0])
}
return '-' // Default to hyphen if empty (shouldn't happen)
}
// processTransition handles the parsing of transition sections
// A transition is a horizontal line separator typically used between sections
func (p *Parser) processTransition(content string) *nodes.TransitionNode {
// Extract the character used in the transition
var transChar rune = '-' // Default to hyphen
if len(content) > 0 {
transChar = rune(content[0])
}
// Create a new transition node with the character
return nodes.NewTransitionNode(transChar)
}

View File

@@ -61,6 +61,10 @@ func (r *PDFRenderer) renderNode(node nodes.Node) error {
return r.renderTable(n)
case *nodes.DirectiveNode:
return r.renderDirective(n)
case *nodes.StrongNode:
return r.renderStrong(n)
//case *nodes.EmphasisNode:
//return r.renderEmphasis(n)
default:
return r.renderChildren(node)
}
@@ -216,3 +220,34 @@ func (r *PDFRenderer) renderDirective(node *nodes.DirectiveNode) error {
func (r *PDFRenderer) renderChildren(node nodes.Node) error {
return r.Render(node.Children())
}
/* Broken but doesn't matter right now, HTML matters.
func (r *PDFRenderer) renderEmphasis(node *nodes.EmphasisNode) error {
// Save current font information
family, style, size := r.pdf.GetFontInfo()
// Set italic style for emphasis
r.pdf.SetFont(family, "i", size)
// Write the text
r.pdf.Cell(0, r.lineHeight, node.Content())
// Restore original font style
r.pdf.SetFont(family, style, size)
return r.renderChildren(node)
}
*/
func (r *PDFRenderer) renderStrong(node *nodes.StrongNode) error {
// Set bold style for strong text
r.pdf.SetFont("Arial", "B", r.fontSize)
// Write the text
r.pdf.Cell(0, r.lineHeight, node.Content())
// Reset to normal font style
r.pdf.SetFont("Arial", "", r.fontSize)
return r.renderChildren(node)
}