Extract plain text from various document formats including PDF, DOCX, TXT, and more. This example shows you how to upload a file and extract its text content.
Overview
The /extract/file endpoint accepts file uploads and returns the extracted text content. Supported formats include PDF, Word documents, text files, and many other document types.
{ "filename": "document.pdf", "contentType": "application/pdf", "size": 245760, "text": "This is the extracted text content from the document..."}
JavaScript Example
Using Fetch API
Code
async function extractTextFromFile(file) { const formData = new FormData(); formData.append('file', file); const response = await fetch('https://api.crawler.dev/v1/extract/file', { method: 'POST', headers: { 'x-api-key': 'YOUR_API_KEY' }, body: formData }); if (!response.ok) { const error = await response.json(); throw new Error(error.error?.message || 'Failed to extract text'); } const result = await response.json(); return result.text;}// Usage with file inputconst fileInput = document.querySelector('input[type="file"]');fileInput.addEventListener('change', async (e) => { const file = e.target.files[0]; if (file) { try { const text = await extractTextFromFile(file); console.log('Extracted text:', text); } catch (error) { console.error('Error:', error.message); } }});
Using Node.js with FormData
Code
const FormData = require('form-data');const fs = require('fs');const fetch = require('node-fetch');async function extractTextFromFile(filePath) { const form = new FormData(); form.append('file', fs.createReadStream(filePath)); const response = await fetch('https://api.crawler.dev/v1/extract/file', { method: 'POST', headers: { 'x-api-key': process.env.CRAWLER_API_KEY, ...form.getHeaders() }, body: form }); if (!response.ok) { const error = await response.json(); throw new Error(error.error?.message || 'Failed to extract text'); } const result = await response.json(); return result;}// UsageextractTextFromFile('./document.pdf') .then(result => { console.log(`Extracted ${result.text.length} characters from ${result.filename}`); console.log(result.text); }) .catch(error => console.error('Error:', error.message));
Python Example
Code
import requestsimport osdef extract_text_from_file(file_path): """ Extract text from a file using the crawler.dev API. Args: file_path: Path to the file to extract text from Returns: dict: API response containing extracted text and metadata """ api_key = os.getenv('CRAWLER_API_KEY') if not api_key: raise ValueError("CRAWLER_API_KEY environment variable not set") url = "https://api.crawler.dev/v1/extract/file" headers = { "x-api-key": api_key } with open(file_path, 'rb') as file: files = {'file': file} response = requests.post(url, headers=headers, files=files) response.raise_for_status() return response.json()# Usagetry: result = extract_text_from_file('document.pdf') print(f"Extracted {len(result['text'])} characters from {result['filename']}") print(result['text'])except requests.exceptions.RequestException as e: print(f"Error: {e}")
Error Handling
Always handle potential errors when extracting text from files:
Code
async function extractTextWithErrorHandling(file) { try { const formData = new FormData(); formData.append('file', file); const response = await fetch('https://api.crawler.dev/v1/extract/file', { method: 'POST', headers: { 'x-api-key': 'YOUR_API_KEY' }, body: formData }); const data = await response.json(); if (!response.ok) { // Handle API errors if (response.status === 400) { throw new Error(`Invalid request: ${data.error?.message}`); } else if (response.status === 401) { throw new Error('Invalid API key'); } else if (response.status === 413) { throw new Error('File too large'); } else { throw new Error(data.error?.message || 'Unknown error'); } } if (!data.text) { throw new Error('No text extracted from file'); } return data; } catch (error) { if (error.name === 'TypeError' && error.message.includes('fetch')) { throw new Error('Network error: Check your internet connection'); } throw error; }}
Supported File Types
The API supports a wide range of file formats. See the Supported File Types page for a complete list.