Fix auto-numbered list parsing with #. syntax

fix code block parsing
Implement strong text (bold) parsing with **text** syntax
2025-09-06 02:47:51 -04:00 · 2025-07-15 15:18:57 -04:00 · 2025-07-15 14:42:17 -04:00 · 2025-07-15 14:04:01 -04:00 · 2025-07-15 13:58:15 -04:00 · 2025-07-15 10:46:37 -04:00
13 changed files with 344 additions and 46 deletions
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ It is mostly unrelated to previous attempts to parse restructuredText in Go.
 ## Installation

 ```bash
-go get i2pgit.org/idk/go-rst
+go get github.com/go-i2p/go-rst
 ```

 ## Quick Start
@@ -34,9 +34,9 @@ import (
    "fmt"
    "io/ioutil"
    
-    "i2pgit.org/idk/go-rst/pkg/parser"
-    "i2pgit.org/idk/go-rst/pkg/renderer"
-    "i2pgit.org/idk/go-rst/pkg/translator"
+    "github.com/go-i2p/go-rst/pkg/parser"
+    "github.com/go-i2p/go-rst/pkg/renderer"
+    "github.com/go-i2p/go-rst/pkg/translator"
 )

 func main() {
--- a/TREE.md
+++ b/TREE.md
@@ -0,0 +1,56 @@
+pkg/
+├── nodes/                       # Node type definitions
+│   ├── blockquote.go            # Defines BlockQuoteNode for representing block quotes in RST
+│   ├── code.go                  # Defines CodeNode for representing code blocks in RST
+│   ├── comment.go               # Defines CommentNode for representing comments in RST
+│   ├── directive.go             # Defines DirectiveNode for representing RST directives
+│   ├── doc.md                   # Documentation for the nodes package
+│   ├── doctest.go               # Defines DoctestNode for representing doctest blocks
+│   ├── em.go                    # Defines EmphasisNode for representing emphasized (italic) text
+│   ├── extra_util.go            # Utility functions for node operations like indentation
+│   ├── heading.go               # Defines HeadingNode for representing section headings
+│   ├── lineblock.go             # Defines LineBlockNode for representing line blocks
+│   ├── link.go                  # Defines LinkNode for representing hyperlinks
+│   ├── list.go                  # Defines ListNode and ListItemNode for representing lists
+│   ├── meta.go                  # Defines MetaNode for representing metadata information
+│   ├── paragraph.go             # Defines ParagraphNode for representing text paragraphs
+│   ├── strong.go                # Defines StrongNode for representing strong (bold) text
+│   ├── subtitle.go              # Defines SubtitleNode for representing document subtitles
+│   ├── table.go                 # Defines TableNode for representing table structures
+│   ├── title.go                 # Defines TitleNode for representing document titles
+│   ├── transition.go            # Defines TransitionNode for representing transitions between sections
+│   └── types.go                 # Node type enumerations and base Node interface definitions
+│
+├── parser/                      # RST parsing logic
+│   ├── blockquote.go            # Contains logic for parsing block quotes
+│   ├── code.go                  # Contains logic for parsing code blocks
+│   ├── context.go               # Manages parser context and state during parsing
+│   ├── directive.go             # Contains logic for parsing RST directives
+│   ├── doc.md                   # Documentation for the parser package
+│   ├── doctest.go               # Contains logic for parsing doctest blocks
+│   ├── emphasis.go              # Contains logic for parsing emphasized text
+│   ├── headiing.go              # Contains logic for parsing section headings
+│   ├── lexer.go                 # Tokenizes RST input into tokens
+│   ├── lineblock.go             # Contains logic for parsing line blocks
+│   ├── link.go                  # Contains logic for parsing hyperlinks
+│   ├── list.go                  # Contains logic for parsing lists and list items
+│   ├── meta.go                  # Contains logic for parsing metadata
+│   ├── paragraph.go             # Contains logic for parsing text paragraphs
+│   ├── parser.go                # Main parser implementation that processes tokens into a node tree
+│   ├── parser_test.go           # Tests for the parser functionality
+│   ├── patterns.go              # Regex patterns for RST syntax recognition
+│   ├── strong.go                # Contains logic for parsing strong (bold) text
+│   ├── subtitle.go              # Contains logic for parsing document subtitles
+│   ├── table.go                 # Contains logic for parsing tables
+│   ├── title.go                 # Contains logic for parsing document titles
+│   └── transition.go            # Contains logic for parsing transitions between sections
+│
+├── renderer/                    # Output rendering components
+│   ├── doc.md                   # Documentation for the renderer package
+│   ├── html.go                  # HTML output renderer implementation
+│   ├── markdown.go              # Markdown output renderer implementation
+│   └── pdf.go                   # PDF output renderer implementation using gofpdf
+│
+└── translator/                  # Translation capabilities
+    ├── doc.md                   # Documentation for the translator package
+    └── translator.go            # Handles translation of text content using PO files
--- a/output.html
+++ b/output.html
@@ -1,26 +0,0 @@
-<!DOCTYPE html>
-<html>
-  <head>
-    <meta charset="UTF-8">
-  </head>
-  <body>
-    <h1>
-      Welcome to My Documentation
-    </h1>
-    <h2>
-      Translations
-    </h2>
-    <p>
-      Welcome to My Documentation
-      This is a sample RST document that demonstrates translations.
-      Translations
-    </p>
-    <p>
-      Este texto será traducido
-      Some regular text here.
-    </p>
-    <p>
-      Otra sección traducible
-    </p>
-  </body>
-</html>
--- a/pkg/parser/code.go
+++ b/pkg/parser/code.go
@@ -11,16 +11,34 @@ func (p *Parser) processCodeBlock(line string, currentNode nodes.Node) nodes.Nod
 		return currentNode
 	}

-	p.context.buffer = append(p.context.buffer, line)
+	// Check if we're at the end of the code block
+	// RST code blocks end when we encounter a line that is not indented
+	// or when we hit a blank line followed by unindented content
+	trimmedLine := strings.TrimSpace(line)

-	if strings.TrimSpace(line) == "" {
-		codeNode := currentNode.(*nodes.CodeNode)
-		content := strings.Join(p.context.buffer, "\n")
-		codeNode.SetContent(content)
-		p.nodes = append(p.nodes, codeNode)
-		p.context.Reset()
-		return nil
+	// If this is a blank line, add it to buffer but continue
+	if trimmedLine == "" {
+		p.context.buffer = append(p.context.buffer, line)
+		return currentNode
 	}

-	return currentNode
+	// Check if line is properly indented (at least 4 spaces for code content)
+	if len(line) >= 4 && line[:4] == "    " {
+		// This is indented content, add to buffer
+		// Remove the base indentation (4 spaces) to preserve relative indentation
+		p.context.buffer = append(p.context.buffer, line[4:])
+		return currentNode
+	}
+	// Line is not indented, this means end of code block
+	// Finalize the code block
+	codeNode := currentNode.(*nodes.CodeNode)
+	content := strings.Join(p.context.buffer, "\n")
+	codeNode.SetContent(strings.TrimSpace(content))
+
+	// Add the completed code block to nodes
+	p.nodes = append(p.nodes, codeNode)
+
+	// Reset context and return nil to signal completion
+	p.context.Reset()
+	return nil
 }
--- a/pkg/parser/emphasis.go
+++ b/pkg/parser/emphasis.go
@@ -1 +1,16 @@
 package parser
+
+import (
+	"github.com/go-i2p/go-rst/pkg/nodes"
+)
+
+// processEmphasis handles the parsing of emphasized (italic) text
+func (p *Parser) processEmphasis(content string) *nodes.EmphasisNode {
+	// If translator is available, translate the content
+	if p.translator != nil {
+		content = p.translator.Translate(content)
+	}
+
+	// Create a new emphasis node with the content
+	return nodes.NewEmphasisNode(content)
+}
--- a/pkg/parser/lexer.go
+++ b/pkg/parser/lexer.go
@@ -24,6 +24,9 @@ const (
 	TokenEnumList                          // TokenEnumList represents an enumerated list item token.
 	TokenDoctest                           // TokenDoctest represents a doctest token.
 	TokenLineBlock                         // TokenLineBlock represents a line block token.
+	TokenTransition                        // TokenTransition represents a transition token.
+	TokenEmphasis                          // TokenEmphasis represents emphasized (italic) text
+	TokenStrong                            // TokenStrong represents strong (bold) text
 )

 // Token represents a single token in the input text.
@@ -146,6 +149,22 @@ func (l *Lexer) Tokenize(line string) Token {
 		}
 	}

+	// Check for strong (bold text) - must come before emphasis to avoid conflict
+	if matches := l.patterns.strong.FindStringSubmatch(line); len(matches) > 1 {
+		return Token{
+			Type:    TokenStrong,
+			Content: matches[1], // The text between double asterisks
+		}
+	}
+
+	// Check for emphasis (italic text)
+	if matches := l.patterns.emphasis.FindStringSubmatch(line); len(matches) > 1 {
+		return Token{
+			Type:    TokenEmphasis,
+			Content: matches[1], // The text between asterisks
+		}
+	}
+
 	// Check for line block (poetry-style line with | prefix)
 	if matches := l.patterns.lineBlock.FindStringSubmatch(line); len(matches) > 0 {
 		return Token{
@@ -154,6 +173,15 @@ func (l *Lexer) Tokenize(line string) Token {
 		}
 	}

+	// Check for transitions
+	if l.patterns.IsTransition(line) {
+		transChar := l.patterns.TransitionChar(line)
+		return Token{
+			Type:    TokenTransition,
+			Content: string(transChar),
+		}
+	}
+
 	// Regular text
 	return Token{
 		Type:    TokenText,
--- a/pkg/parser/lineblock.go
+++ b/pkg/parser/lineblock.go
@@ -1 +1,22 @@
 package parser
+
+import (
+	"strings"
+
+	"github.com/go-i2p/go-rst/pkg/nodes"
+)
+
+// processLineBlock handles parsing of line block tokens
+// Line blocks are used for poetry-style content where line breaks are preserved
+func (p *Parser) processLineBlock(content string, currentNode nodes.Node) nodes.Node {
+	// If we already have a line block node, append the line
+	if currentNode != nil && currentNode.Type() == nodes.NodeLineBlock {
+		lineBlockNode := currentNode.(*nodes.LineBlockNode)
+		currentLines := lineBlockNode.Lines()
+		newLines := append(currentLines, strings.TrimSpace(content))
+		return nodes.NewLineBlockNode(newLines)
+	}
+
+	// Otherwise create a new line block node
+	return nodes.NewLineBlockNode([]string{strings.TrimSpace(content)})
+}
--- a/pkg/parser/parser.go
+++ b/pkg/parser/parser.go
@@ -41,7 +41,7 @@ func (p *Parser) Parse(content string) []nodes.Node {
 		line := scanner.Text()
 		token := p.lexer.Tokenize(line)

-		if newNode := p.processToken(token, prevToken, currentNode); newNode != nil {
+		if newNode := p.processToken(token, prevToken, currentNode, line); newNode != nil {
 			// Only append if we actually have a new node
 			if currentNode != nil && currentNode != newNode {
 				p.nodes = append(p.nodes, currentNode)
@@ -59,7 +59,7 @@ func (p *Parser) Parse(content string) []nodes.Node {
 	return p.nodes
 }

-func (p *Parser) processToken(token, prevToken Token, currentNode nodes.Node) nodes.Node {
+func (p *Parser) processToken(token, prevToken Token, currentNode nodes.Node, originalLine string) nodes.Node {
 	// translatedContent := p.translator.Translate(token.Content)
 	// token.Content = translatedContent
 	switch token.Type {
@@ -73,8 +73,13 @@ func (p *Parser) processToken(token, prevToken Token, currentNode nodes.Node) no
 		return nodes.NewCommentNode(token.Content)
 	case TokenTransBlock:
 		// Always create a new node for translation blocks
-		translatedContent := p.translator.Translate(strings.TrimSpace(token.Content))
-		return nodes.NewParagraphNode(translatedContent)
+		content := strings.TrimSpace(token.Content)
+		if p.translator != nil {
+			translatedContent := p.translator.Translate(content)
+			return nodes.NewParagraphNode(translatedContent)
+		}
+		// If no translator is available, return the original content
+		return nodes.NewParagraphNode(content)
 	case TokenHeadingUnderline:
 		if prevToken.Type == TokenText {
 			return p.processHeading(prevToken.Content, token.Content)
@@ -98,6 +103,14 @@ func (p *Parser) processToken(token, prevToken Token, currentNode nodes.Node) no
 		p.context.currentDirective = token.Content
 		return nodes.NewDirectiveNode(token.Content, token.Args)

+	case TokenEmphasis:
+		// Process the emphasized text
+		return p.processEmphasis(token.Content)
+
+	case TokenStrong:
+		// Process the strong (bold) text
+		return p.processStrong(token.Content)
+
 	case TokenLineBlock:
 		// Check if we're already in a line block node
 		if lineBlock, ok := currentNode.(*nodes.LineBlockNode); ok {
@@ -112,7 +125,7 @@ func (p *Parser) processToken(token, prevToken Token, currentNode nodes.Node) no

 	case TokenText:
 		if p.context.inCodeBlock {
-			return p.processCodeBlock(token.Content, currentNode)
+			return p.processCodeBlock(originalLine, currentNode) // Use original line to preserve indentation
 		}
 		if p.context.inMeta {
 			return p.processMetaContent(token.Content, currentNode)
@@ -121,6 +134,12 @@ func (p *Parser) processToken(token, prevToken Token, currentNode nodes.Node) no
 			return p.processDirectiveContent(token.Content, currentNode)
 		}
 		return p.processParagraph(token.Content, currentNode)
+	case TokenTransition:
+		// For transitions, we create a new transition node with the character used
+		if len(token.Content) > 0 {
+			return p.processTransition(token.Content)
+		}
+		return nodes.NewTransitionNode('-') // Default to hyphen if empty
 	}

 	return currentNode
--- a/pkg/parser/parser_test.go
+++ b/pkg/parser/parser_test.go
@@ -4,8 +4,10 @@ package parser
 // Use example restructuredText files embedded in the test functions

 import (
+	"strings"
 	"testing"

+	"github.com/go-i2p/go-rst/pkg/nodes"
 	"github.com/go-i2p/go-rst/pkg/translator"
 )

@@ -48,3 +50,79 @@ func TestParseNilTranslatorEmptyInput(t *testing.T) {
 		t.Errorf("Expected empty, got a document")
 	}
 }
+
+func TestParseNilTranslatorWithTranslationBlocks(t *testing.T) {
+	parser := NewParser(nil)
+	content := "{% trans %}Hello, world!{% endtrans %}"
+
+	// This should not panic, but gracefully handle the nil translator
+	doc := parser.Parse(content)
+	if len(doc) == 0 {
+		t.Errorf("Expected parsed nodes, got empty document")
+	}
+}
+
+func TestParseStrongText(t *testing.T) {
+	noopTranslator := translator.NewNoopTranslator()
+	parser := NewParser(noopTranslator)
+	content := "This is **bold text** in a sentence."
+
+	doc := parser.Parse(content)
+	if len(doc) == 0 {
+		t.Errorf("Expected parsed nodes, got empty document")
+	}
+
+	// Check that we have a strong node
+	foundStrong := false
+	for _, node := range doc {
+		if node.Type() == nodes.NodeStrong {
+			foundStrong = true
+			if node.Content() != "bold text" {
+				t.Errorf("Expected strong content to be 'bold text', got '%s'", node.Content())
+			}
+		}
+	}
+
+	if !foundStrong {
+		t.Errorf("Expected to find a strong node in parsed document")
+	}
+}
+
+func TestParseCodeBlock(t *testing.T) {
+	noopTranslator := translator.NewNoopTranslator()
+	parser := NewParser(noopTranslator)
+	content := `.. code-block:: python
+
+    def hello():
+        print("Hello, world!")
+        return True
+
+End of test.`
+
+	doc := parser.Parse(content)
+	if len(doc) == 0 {
+		t.Errorf("Expected parsed nodes, got empty document")
+	}
+
+	// Check that we have a code node
+	foundCode := false
+	for _, node := range doc {
+		if node.Type() == nodes.NodeCode {
+			foundCode = true
+			codeNode := node.(*nodes.CodeNode)
+			if codeNode.Language() != "python" {
+				t.Errorf("Expected code language to be 'python', got '%s'", codeNode.Language())
+			}
+			if !strings.Contains(codeNode.Content(), "def hello():") {
+				t.Errorf("Expected code content to contain 'def hello():', got '%s'", codeNode.Content())
+			}
+			if !strings.Contains(codeNode.Content(), "print(\"Hello, world!\")") {
+				t.Errorf("Expected code content to contain print statement, got '%s'", codeNode.Content())
+			}
+		}
+	}
+
+	if !foundCode {
+		t.Errorf("Expected to find a code node in parsed document")
+	}
+}
--- a/pkg/parser/patterns.go
+++ b/pkg/parser/patterns.go
@@ -17,6 +17,8 @@ type Patterns struct {
 	doctest          *regexp.Regexp
 	doctestContinue  *regexp.Regexp
 	doctestOutput    *regexp.Regexp
+	emphasis         *regexp.Regexp
+	strong           *regexp.Regexp
 	lineBlock        *regexp.Regexp
 	comment          *regexp.Regexp
 	title            *regexp.Regexp
@@ -33,7 +35,7 @@ func NewPatterns() *Patterns {
 		transBlock:       regexp.MustCompile(`{%\s*trans\s*%}(.*?){%\s*endtrans\s*%}`),
 		meta:             regexp.MustCompile(`^\.\.\s+meta::`),
 		directive:        regexp.MustCompile(`^\.\.\s+(\w+)::`),
-		codeBlock:        regexp.MustCompile(`^\.\.\s+code::`),
+		codeBlock:        regexp.MustCompile(`^\.\.\s+code-block::`),
 		blockQuote:       regexp.MustCompile(`^(\s{4,})(.*?)(?:\s*--\s*(.*))?$`),
 		doctest:          regexp.MustCompile(`^>>> (.+)\n((?:[^>].*\n)*)`),
 		doctestContinue:  regexp.MustCompile(`^\.\.\.(.*$)`),
@@ -44,6 +46,8 @@ func NewPatterns() *Patterns {
 		subtitle:         regexp.MustCompile(`^(-{3,})\n(.+?)\n(?:-{3,})$`),
 		transition:       regexp.MustCompile(`^(\-{4,}|\={4,}|\*{4,})$`),
 		bulletList:       regexp.MustCompile(`^(\s*)([-*+])(\s+)(.+)$`),
-		enumList:         regexp.MustCompile(`^(\s*)(\d+|[a-zA-Z]|[ivxlcdm]+|[IVXLCDM]+)(\.\s+)(.+)$`),
+		enumList:         regexp.MustCompile(`^(\s*)(\d+|[a-zA-Z]|[ivxlcdm]+|[IVXLCDM]+|#)(\.\s+)(.+)$`),
+		emphasis:         regexp.MustCompile(`\*([^*]+)\*`),
+		strong:           regexp.MustCompile(`\*\*([^*]+)\*\*`),
 	}
 }
--- a/pkg/parser/strong.go
+++ b/pkg/parser/strong.go
@@ -1 +1,16 @@
 package parser
+
+import (
+	"github.com/go-i2p/go-rst/pkg/nodes"
+)
+
+// processStrong handles the parsing of strong (bold) text
+func (p *Parser) processStrong(content string) *nodes.StrongNode {
+	// If translator is available, translate the content
+	if p.translator != nil {
+		content = p.translator.Translate(content)
+	}
+
+	// Create a new strong node with the content
+	return nodes.NewStrongNode(content)
+}
--- a/pkg/parser/transition.go
+++ b/pkg/parser/transition.go
@@ -0,0 +1,35 @@
+package parser
+
+import (
+	"strings"
+
+	"github.com/go-i2p/go-rst/pkg/nodes"
+)
+
+// IsTransition checks if a line is a transition
+func (p *Patterns) IsTransition(line string) bool {
+	// A transition is a line with 4+ repeated punctuation characters
+	return len(strings.TrimSpace(line)) >= 4 && p.transition.MatchString(strings.TrimSpace(line))
+}
+
+// TransitionChar extracts the character used in the transition
+func (p *Patterns) TransitionChar(line string) rune {
+	trimmed := strings.TrimSpace(line)
+	if len(trimmed) > 0 {
+		return rune(trimmed[0])
+	}
+	return '-' // Default to hyphen if empty (shouldn't happen)
+}
+
+// processTransition handles the parsing of transition sections
+// A transition is a horizontal line separator typically used between sections
+func (p *Parser) processTransition(content string) *nodes.TransitionNode {
+	// Extract the character used in the transition
+	var transChar rune = '-' // Default to hyphen
+	if len(content) > 0 {
+		transChar = rune(content[0])
+	}
+
+	// Create a new transition node with the character
+	return nodes.NewTransitionNode(transChar)
+}
--- a/pkg/renderer/pdf.go
+++ b/pkg/renderer/pdf.go
@@ -61,6 +61,10 @@ func (r *PDFRenderer) renderNode(node nodes.Node) error {
 		return r.renderTable(n)
 	case *nodes.DirectiveNode:
 		return r.renderDirective(n)
+	case *nodes.StrongNode:
+		return r.renderStrong(n)
+	//case *nodes.EmphasisNode:
+	//return r.renderEmphasis(n)
 	default:
 		return r.renderChildren(node)
 	}
@@ -216,3 +220,34 @@ func (r *PDFRenderer) renderDirective(node *nodes.DirectiveNode) error {
 func (r *PDFRenderer) renderChildren(node nodes.Node) error {
 	return r.Render(node.Children())
 }
+
+/* Broken but doesn't matter right now, HTML matters.
+func (r *PDFRenderer) renderEmphasis(node *nodes.EmphasisNode) error {
+	// Save current font information
+	family, style, size := r.pdf.GetFontInfo()
+
+	// Set italic style for emphasis
+	r.pdf.SetFont(family, "i", size)
+
+	// Write the text
+	r.pdf.Cell(0, r.lineHeight, node.Content())
+
+	// Restore original font style
+	r.pdf.SetFont(family, style, size)
+
+	return r.renderChildren(node)
+}
+*/
+
+func (r *PDFRenderer) renderStrong(node *nodes.StrongNode) error {
+	// Set bold style for strong text
+	r.pdf.SetFont("Arial", "B", r.fontSize)
+
+	// Write the text
+	r.pdf.Cell(0, r.lineHeight, node.Content())
+
+	// Reset to normal font style
+	r.pdf.SetFont("Arial", "", r.fontSize)
+
+	return r.renderChildren(node)
+}
Author	SHA1	Message	Date
eyedeekay	ae87d950a4	Fix auto-numbered list parsing with #. syntax	2025-07-15 15:18:57 -04:00
eyedeekay	bdbb75e3c3	fix code block parsing	2025-07-15 14:42:17 -04:00
eyedeekay	8801995d74	Implement strong text (bold) parsing with text syntax	2025-07-15 14:04:01 -04:00
eyedeekay	d165ce5d03	fix main.go	2025-07-15 13:58:15 -04:00
eyedeekay	817f7e0978	fix import path in readme	2025-07-15 10:46:37 -04:00
eyedeekay	9aa0032cc2	clean up test files	2025-07-15 10:44:55 -04:00
eyedeekay	78bf1464a9	Fix nil pointer dereference in translation processing	2025-07-15 10:43:46 -04:00
eyedeekay	23e3c4864b	add some parts of transition implementation	2025-05-03 19:02:29 -04:00
eyedeekay	492960101f	add tree describing the structure	2025-05-03 18:44:53 -04:00