Learn how to extract text from any kind of file or URL with the crawler.dev Go SDK.
Prerequisites
To get the most out of this guide, you'll need to:
- Create a free crawler.dev account
- Create an API key
Installation
Install the crawler.dev Go SDK using go get:
go get github.com/crawler-dot-dev/api-sdk-go
Quick Start
Here's how to get started with text extraction using Go:
package main
import (
"bytes"
"context"
"fmt"
"io"
"os"
crawlerdev "github.com/crawler-dot-dev/api-sdk-go"
"github.com/crawler-dot-dev/api-sdk-go/option"
)
func main() {
client := crawlerdev.NewClient(
option.WithAPIKey(os.Getenv("API_CRAWLER_DEV_SDKS_API_KEY")), // defaults to os.LookupEnv("API_CRAWLER_DEV_SDKS_API_KEY")
)
// Extract text from a file
fileContent := []byte("file content here")
response, err := client.Extract.FromFile(context.TODO(), crawlerdev.ExtractFromFileParams{
File: bytes.NewBuffer(fileContent),
})
if err != nil {
panic(err.Error())
}
fmt.Printf("Content type: %s\n", response.ContentType)
fmt.Printf("Text: %s\n", response.Text)
}
Features
- High-performance SDK with strong typing
- Excellent concurrency support
- Comprehensive error handling
- Context support for timeouts and cancellation
- Built-in retry logic
- Full Go module support
Repository
Examples
package main
import (
"context"
"fmt"
"os"
crawlerdev "github.com/crawler-dot-dev/api-sdk-go"
"github.com/crawler-dot-dev/api-sdk-go/option"
)
func main() {
client := crawlerdev.NewClient(
option.WithAPIKey(os.Getenv("API_CRAWLER_DEV_SDKS_API_KEY")),
)
// Open a file
file, err := os.Open("document.pdf")
if err != nil {
panic(err)
}
defer file.Close()
// Extract text from the file
result, err := client.Extract.FromFile(context.TODO(), crawlerdev.ExtractFromFileParams{
File: file,
})
if err != nil {
panic(err)
}
fmt.Println(result.Text)
}
package main
import (
"context"
"fmt"
"os"
"sync"
crawlerdev "github.com/crawler-dot-dev/api-sdk-go"
"github.com/crawler-dot-dev/api-sdk-go/option"
)
func main() {
client := crawlerdev.NewClient(
option.WithAPIKey(os.Getenv("API_CRAWLER_DEV_SDKS_API_KEY")),
)
urls := []string{
"https://example.com/page1",
"https://example.com/page2",
"https://example.com/page3",
}
ctx := context.TODO()
var wg sync.WaitGroup
results := make([]*crawlerdev.ExtractFromUrlResponse, len(urls))
errors := make([]error, len(urls))
for i, url := range urls {
wg.Add(1)
go func(index int, u string) {
defer wg.Done()
result, err := client.Extract.FromUrl(ctx, crawlerdev.ExtractFromUrlParams{
Url: u,
})
if err != nil {
errors[index] = err
return
}
results[index] = result
}(i, url)
}
wg.Wait()
for i, result := range results {
if errors[i] != nil {
fmt.Printf("Error extracting from %s: %v\n", urls[i], errors[i])
} else if result != nil {
fmt.Printf("Text from %s: %s\n", urls[i], result.Text)
}
}
}
Using Context for Timeouts
package main
import (
"context"
"fmt"
"os"
"time"
crawlerdev "github.com/crawler-dot-dev/api-sdk-go"
"github.com/crawler-dot-dev/api-sdk-go/option"
)
func main() {
client := crawlerdev.NewClient(
option.WithAPIKey(os.Getenv("API_CRAWLER_DEV_SDKS_API_KEY")),
)
// Create a context with timeout
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// Extract text with context
result, err := client.Extract.FromUrl(ctx, crawlerdev.ExtractFromUrlParams{
Url: "https://example.com",
})
if err != nil {
panic(err)
}
fmt.Println(result.Text)
}
Error Handling
The SDK provides comprehensive error handling:
package main
import (
"context"
"fmt"
"os"
crawlerdev "github.com/crawler-dot-dev/api-sdk-go"
"github.com/crawler-dot-dev/api-sdk-go/option"
)
func main() {
client := crawlerdev.NewClient(
option.WithAPIKey(os.Getenv("API_CRAWLER_DEV_SDKS_API_KEY")),
)
result, err := client.Extract.FromUrl(context.TODO(), crawlerdev.ExtractFromUrlParams{
Url: "https://example.com",
})
if err != nil {
// Check for API errors
if apiErr, ok := err.(*crawlerdev.APIError); ok {
switch apiErr.StatusCode {
case 401:
fmt.Println("Invalid API key")
case 429:
fmt.Println("Rate limit exceeded")
default:
fmt.Printf("API error: %s\n", apiErr.Message)
}
} else {
fmt.Printf("An error occurred: %v\n", err)
}
return
}
fmt.Println(result.Text)
}