Appearance
API documentation for LLMCrawl
Servers
https://api.llmcrawl.devProduction server
POST /v1/scrape
POST
/v1/scrape
Scrape a single webpage
Authorizations
bearerAuth
TypeHTTP (bearer)
Request Body
application/json
{
"formats": [
[
"markdown",
"html"
]
],
"headers": {
"additionalProperties": "string"
},
"includeTags": [
[
"h1",
"h2",
"p",
"article"
]
],
"excludeTags": [
[
"nav",
"footer",
"script",
"style"
]
],
"timeout": 30000,
"waitFor": 3000,
"extract": {
"mode": "string",
"schema": {
"type": "object",
"properties": {
"title": {
"type": "string"
},
"price": {
"type": "number"
},
"description": {
"type": "string"
}
},
"required": [
"title",
"price"
]
},
"systemPrompt": "Based on the information on the page, extract all the information from the schema. Try to extract all the fields even those that might not be marked as required.",
"prompt": "Extract the main article title and author from this page"
},
"url": "string",
"origin": "api",
"webhookUrls": [
[
"https://your-webhook.com/endpoint"
]
],
"metadata": {
"userId": "12345",
"source": "api"
}
}
Responses
Successful responseSchema JSON JSON
application/json
{
"success": true,
"warning": "string",
"data": {
"markdown": "string",
"extract": "string",
"html": "string",
"rawHtml": "string",
"links": [
"string"
],
"screenshot": "string",
"metadata": {
"additionalProperties": {
}
}
},
"scrape_id": "string"
}
POST /v1/crawl
POST
/v1/crawl
Crawl a website
Authorizations
bearerAuth
TypeHTTP (bearer)
Request Body
application/json
{
"includePaths": [
[
"/blog/*",
"/articles/*",
"/docs/*"
]
],
"excludePaths": [
[
"/admin/*",
"/private/*",
"/api/*"
]
],
"maxDepth": 3,
"limit": 500,
"allowBackwardLinks": false,
"allowExternalLinks": false,
"ignoreSitemap": true,
"url": "string",
"origin": "api",
"scrapeOptions": {
"formats": [
[
"markdown",
"rawHtml"
]
],
"headers": {
"additionalProperties": "string"
},
"includeTags": [
[
"h1",
"h2",
"p",
"article"
]
],
"excludeTags": [
[
"nav",
"footer",
"script",
"style"
]
],
"waitFor": 3000,
"extract": {
"mode": "string",
"schema": {
"type": "object",
"properties": {
"title": {
"type": "string"
},
"price": {
"type": "number"
},
"description": {
"type": "string"
}
},
"required": [
"title",
"price"
]
},
"systemPrompt": "Based on the information on the page, extract all the information from the schema. Try to extract all the fields even those that might not be marked as required.",
"prompt": "Extract the main article title and author from this page"
}
},
"webhookUrls": [
[
"https://your-webhook.com/crawl-status"
]
],
"webhookMetadata": {
"crawlId": "crawl_123",
"userId": "user_456"
}
}
Responses
Successful responseSchema JSON JSON
application/json
{
"success": true,
"id": "crawl_123e4567-e89b-12d3-a456-426614174000",
"url": "https://firecrawl.dev"
}
GET /v1/crawl/{id}
GET
/v1/crawl/{id}
Get crawl job status
Authorizations
bearerAuth
TypeHTTP (bearer)
Parameters
Path Parameters
id*
Typestring
RequiredResponses
Successful responseSchema JSON JSON
application/json
{
"success": true,
"status": "string",
"completed": 0,
"total": 0,
"expiresAt": "string",
"next": "string",
"data": [
{
"markdown": "string",
"extract": "string",
"html": "string",
"rawHtml": "string",
"links": [
"string"
],
"screenshot": "string",
"metadata": {
"additionalProperties": {
}
}
}
]
}
DELETE /v1/crawl/{id}/cancel
POST /v1/map
POST
/v1/map
Map a website to get all URLs
Authorizations
bearerAuth
TypeHTTP (bearer)
Request Body
application/json
{
"includePaths": [
[
"/blog/*",
"/articles/*",
"/docs/*"
]
],
"excludePaths": [
[
"/admin/*",
"/private/*",
"/api/*"
]
],
"maxDepth": 3,
"limit": 1000,
"allowBackwardLinks": false,
"allowExternalLinks": false,
"ignoreSitemap": true,
"url": "string",
"origin": "api",
"includeSubdomains": true,
"search": "documentation"
}
Responses
Successful responseSchema JSON JSON
application/json
{
"success": true,
"links": [
[
"https://firecrawl.dev/pricing",
"https://firecrawl.dev/docs",
"https://firecrawl.dev/blog",
"https://firecrawl.dev/contact"
]
],
"scrape_id": "map_987fcdeb-51a2-4bc3-8765-fedcba098765"
}