Skip to content

Commit 0573041

Browse files
authored
Merge pull request #263 from kellemNegasi/us-777-search-replace-example
[US-777]Example Code for Search and Replace Usages
2 parents 982a1b3 + 904a18b commit 0573041

File tree

5 files changed

+196
-0
lines changed

5 files changed

+196
-0
lines changed

search-and-replace/READ.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# PDF Text Search and Replace
2+
3+
This example shows how to do text search and replacement on PDF using unipdf.
4+
5+
## Examples
6+
- [search_text.go](search_text.go) This examples shows how to do text searching using unipdf's by providing the pattern string and the pages to search on.
7+
- [replace_text.go](replace_text.go) This example show how to replace a given text by searching for it using a pattern and a replacement string.
8+
A list of pages is also provided in the parameter to specify which to do the replacement.

search-and-replace/replace_text.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/*
2+
* This example code shows how to do search and replace operation in PDF using unipdf
3+
*
4+
* Run as: go run replace_text.go <pattern> <replacement> <pages> <input> <output>
5+
*
6+
* example: go run replace_text.go "Australia" "America" "1,2" ./test-data/file1.pdf ./test-data/result.pdf
7+
*/
8+
package main
9+
10+
import (
11+
"fmt"
12+
"os"
13+
"strconv"
14+
"strings"
15+
16+
"github.com/unidoc/unipdf/v3/common/license"
17+
"github.com/unidoc/unipdf/v3/extractor"
18+
"github.com/unidoc/unipdf/v3/model"
19+
)
20+
21+
func init() {
22+
// Make sure to load your metered License API key prior to using the library.
23+
// If you need a key, you can sign up and create a free one at https://cloud.unidoc.io
24+
err := license.SetMeteredKey(os.Getenv(`UNIDOC_LICENSE_API_KEY`))
25+
if err != nil {
26+
panic(err)
27+
}
28+
}
29+
30+
func main() {
31+
// Ensure enough arguments are provided
32+
if len(os.Args) < 5 {
33+
fmt.Println("Usage: go run replace_text.go <pattern> <replacement> <pages> <input> <output>")
34+
os.Exit(1)
35+
}
36+
37+
// Parse positional arguments
38+
pattern := os.Args[1]
39+
replacement := os.Args[2]
40+
pagesArg := os.Args[3]
41+
filePath := os.Args[4]
42+
outputPath := os.Args[5]
43+
44+
// Convert pages string to a slice of integers
45+
pageStrings := strings.Split(pagesArg, ",")
46+
pageList := []int{}
47+
for _, pageStr := range pageStrings {
48+
page, err := strconv.Atoi(pageStr)
49+
if err != nil {
50+
fmt.Printf("Invalid page number: %s\n", pageStr)
51+
os.Exit(1)
52+
}
53+
pageList = append(pageList, page)
54+
}
55+
56+
reader, _, err := model.NewPdfReaderFromFile(filePath, nil)
57+
if err != nil {
58+
fmt.Printf("Failed to create PDF reader: %v", err)
59+
os.Exit(1)
60+
}
61+
editor := extractor.NewEditor(reader)
62+
63+
err = editor.Replace(pattern, replacement, pageList)
64+
if err != nil {
65+
fmt.Printf("Failed to search pattern: %v\n", err)
66+
os.Exit(1)
67+
}
68+
69+
err = editor.WriteToFile(outputPath)
70+
if err != nil {
71+
fmt.Printf("Failed to write to file: %v", err)
72+
os.Exit(1)
73+
}
74+
75+
fmt.Printf("Finished replacing %s by %s and saved the output file at %s\n", pattern, replacement, filePath)
76+
}

search-and-replace/search_text.go

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/*
2+
* This example code shows how to do text searching on pdf using unipdf
3+
*
4+
* Run as: go run search_text.go <pattern> <pages> <input>
5+
*
6+
* Example: go run search_text.go "copyright law" "1,2" ./test-data/file1.pdf
7+
*/
8+
9+
package main
10+
11+
import (
12+
"fmt"
13+
"os"
14+
"strconv"
15+
"strings"
16+
17+
"github.com/unidoc/unipdf/v3/common/license"
18+
"github.com/unidoc/unipdf/v3/extractor"
19+
"github.com/unidoc/unipdf/v3/model"
20+
)
21+
22+
func init() {
23+
// Make sure to load your metered License API key prior to using the library.
24+
// If you need a key, you can sign up and create a free one at https://cloud.unidoc.io
25+
err := license.SetMeteredKey(os.Getenv(`UNIDOC_LICENSE_API_KEY`))
26+
if err != nil {
27+
panic(err)
28+
}
29+
}
30+
31+
func main() {
32+
// Ensure enough arguments are provided
33+
if len(os.Args) < 4 {
34+
fmt.Println("Usage: go run main.go <pattern> <pages> <input>")
35+
os.Exit(1)
36+
}
37+
38+
// Parse positional arguments
39+
pattern := os.Args[1]
40+
pagesArg := os.Args[2]
41+
filePath := os.Args[3]
42+
43+
// Convert pages string to a slice of integers
44+
pageStrings := strings.Split(pagesArg, ",")
45+
pageList := []int{}
46+
for _, pageStr := range pageStrings {
47+
page, err := strconv.Atoi(pageStr)
48+
if err != nil {
49+
fmt.Printf("Invalid page number: %s\n", pageStr)
50+
os.Exit(1)
51+
}
52+
pageList = append(pageList, page)
53+
}
54+
55+
// Create a new PDF reader
56+
reader, _, err := model.NewPdfReaderFromFile(filePath, nil)
57+
if err != nil {
58+
fmt.Printf("Failed to create PDF reader: %v\n", err)
59+
os.Exit(1)
60+
}
61+
62+
// Create an Editor object for searching
63+
editor := extractor.NewEditor(reader)
64+
65+
// Perform the search for the specified pattern on the given pages
66+
matchesPerPage, err := editor.Search(pattern, pageList)
67+
if err != nil {
68+
fmt.Printf("Failed to search pattern: %v\n", err)
69+
os.Exit(1)
70+
}
71+
72+
// Print formatted search results
73+
printSearchResults(matchesPerPage, pageList, pattern)
74+
}
75+
76+
// printSearchResults formats and prints the search results.
77+
// It displays indexes as [beg:end] and locations as {Llx Lly Urx Ury}.
78+
// If no matches are found for a page, it prints a not found message.
79+
func printSearchResults(matchesPerPage map[int]extractor.Match, pages []int, pattern string) {
80+
foundAny := false // Flag to check if any match is found across all pages
81+
82+
for _, page := range pages {
83+
result, exists := matchesPerPage[page]
84+
if exists && len(result.Indexes) > 0 {
85+
foundAny = true
86+
fmt.Printf("Page %d:\n", page)
87+
88+
// Prepare index strings
89+
var indexStrings []string
90+
for _, idx := range result.Indexes {
91+
indexStrings = append(indexStrings, fmt.Sprintf("[%d:%d]", idx[0], idx[1]))
92+
}
93+
fmt.Printf("indexes: %s\n", strings.Join(indexStrings, ", "))
94+
95+
// Prepare location strings
96+
var locationStrings []string
97+
for _, box := range result.Locations {
98+
locationStrings = append(locationStrings, fmt.Sprintf("{%.2f %.2f %.2f %.2f}", box.BBox.Llx, box.BBox.Lly, box.BBox.Urx, box.BBox.Ury))
99+
}
100+
fmt.Printf("locations: %s\n\n", strings.Join(locationStrings, ", "))
101+
} else {
102+
// If no matches found for the current page
103+
fmt.Printf("Page %d:\n", page)
104+
fmt.Println("pattern didn't match any text\n")
105+
}
106+
}
107+
108+
if !foundAny {
109+
// If no matches found in any of the pages
110+
fmt.Println("pattern didn't match any text in the specified pages.")
111+
}
112+
}
198 KB
Binary file not shown.
196 KB
Binary file not shown.

0 commit comments

Comments
 (0)