11/**
22 * Firecrawl Research tools (experimental).
33 *
4- * Thin MCP wrappers over the `/v2/research/*` endpoints (arXiv papers + GitHub
5- * history/readmes). These tools are hidden unless research is enabled for the
6- * session — locally via `FIRECRAWL_RESEARCH=true`, or remotely via the
7- * `?research=true` query param on the MCP endpoint (see `isResearchEnabled` in
8- * index.ts, which sets `session.research`).
4+ * Thin MCP wrappers over the `/v2/search/research/*` endpoints (arXiv papers + GitHub
5+ * history/readmes).
96 *
107 * The installed `@mendable/firecrawl-js` predates the SDK's `research` client,
118 * so we call the endpoints directly through the SDK's HTTP layer (auth +
@@ -18,7 +15,6 @@ import { z } from 'zod';
1815
1916interface SessionData {
2017 firecrawlApiKey ?: string ;
21- research ?: boolean ;
2218 [ key : string ] : unknown ;
2319}
2420
@@ -36,7 +32,7 @@ type ClientLike = {
3632// the callback loosely and narrow to `ClientLike` at each call site.
3733type GetClient = ( session ?: SessionData ) => unknown ;
3834
39- const BASE = '/v2/research' ;
35+ const BASE = '/v2/search/ research' ;
4036
4137/** Append a value (or repeated array values) to a URLSearchParams instance. */
4238function appendParam (
@@ -73,18 +69,22 @@ const MAX_AFFIL_CHARS = 60;
7369const MAX_AUTHORS_LINE_CHARS = 400 ;
7470
7571interface PaperHit {
76- paper_id ?: string ;
72+ paperId ?: string ;
73+ primaryId ?: string ;
7774 ids ?: Record < string , string [ ] > ;
7875 title ?: string ;
7976 abstract ?: string ;
8077 // Search/metadata responses give a comma-joined string; some shapes give the
8178 // structured form — handle both.
8279 authors ?: string | { name : string ; affiliation ?: string } [ ] ;
80+ categories ?: string [ ] ;
81+ createdDate ?: string ;
82+ updateDate ?: string ;
8383}
8484
85- /** Best display id for a paper: its arXiv id, falling back to the canonical id . */
85+ /** Display id supplied by the API, already ordered for citation/fetch use . */
8686function displayId ( p : PaperHit ) : string {
87- return p . ids ?. arxiv ?. [ 0 ] ?? p . paper_id ?? '? ';
87+ return p . primaryId ?? 'missing-primary-id ';
8888}
8989
9090/** Format the authors line, accepting either the string or structured form. */
@@ -122,7 +122,7 @@ function fmtHits(results?: PaperHit[]): string {
122122 if ( ! results || results . length === 0 ) return '(no results)' ;
123123 return results
124124 . map ( ( r ) => {
125- const lines = [ `[${ displayId ( r ) } ] ${ r . title ?? '(untitled)' } ` ] ;
125+ const lines = [ `## [${ displayId ( r ) } ] ${ r . title ?? '(untitled)' } ` ] ;
126126 const authors = fmtAuthors ( r . authors ) ;
127127 if ( authors ) lines . push ( authors ) ;
128128 lines . push (
@@ -135,6 +135,40 @@ function fmtHits(results?: PaperHit[]): string {
135135 . join ( '\n\n' ) ;
136136}
137137
138+ function fmtPaperMetadata ( paper ?: PaperHit ) : string {
139+ if ( ! paper ) return '(paper not found)' ;
140+ const lines = [ `# ${ paper . title ?? '(untitled)' } ` ] ;
141+ lines . push ( '' ) ;
142+ lines . push ( `Paper ID: ${ paper . paperId ?? '?' } ` ) ;
143+
144+ const ids = Object . entries ( paper . ids ?? { } )
145+ . flatMap ( ( [ namespace , values ] ) =>
146+ values . map ( ( value ) => `${ namespace } :${ value } ` )
147+ )
148+ . join ( ', ' ) ;
149+ if ( ids ) lines . push ( `IDs: ${ ids } ` ) ;
150+
151+ const authors = fmtAuthors ( paper . authors ) ;
152+ if ( authors ) lines . push ( authors ) ;
153+
154+ if ( paper . categories ?. length ) {
155+ lines . push ( `Categories: ${ paper . categories . join ( ', ' ) } ` ) ;
156+ }
157+
158+ const dates = [
159+ paper . createdDate ? `created ${ paper . createdDate } ` : '' ,
160+ paper . updateDate ? `updated ${ paper . updateDate } ` : '' ,
161+ ]
162+ . filter ( Boolean )
163+ . join ( '; ' ) ;
164+ if ( dates ) lines . push ( `Dates: ${ dates } ` ) ;
165+
166+ lines . push ( '' ) ;
167+ lines . push ( '## Abstract' ) ;
168+ lines . push ( ( paper . abstract || '(no abstract)' ) . replace ( / \s + / g, ' ' ) ) ;
169+ return lines . join ( '\n' ) ;
170+ }
171+
138172// Cap GitHub matched content so a page of results stays within the MCP
139173// output-token limit. Higher than abstracts since issue/PR threads carry the
140174// signal (repro steps, stack traces) the agent actually needs to verify.
@@ -193,18 +227,13 @@ function fmtGithub(results?: GitHubItem[]): string {
193227 . join ( '\n\n' ) ;
194228}
195229
196- /** Only present these tools when the session has research enabled. */
197- const canAccess = ( session ?: SessionData ) : boolean =>
198- session ?. research === true ;
199-
200230export function registerResearchTools (
201231 server : FastMCP < SessionData > ,
202232 getClient : GetClient
203233) : void {
204234 // --- search_papers ---
205235 server . addTool ( {
206236 name : 'firecrawl_research_search_papers' ,
207- canAccess,
208237 annotations : {
209238 title : 'Search arXiv papers' ,
210239 readOnlyHint : true ,
@@ -270,10 +299,42 @@ export function registerResearchTools(
270299 } ,
271300 } ) ;
272301
302+ // --- inspect_paper ---
303+ server . addTool ( {
304+ name : 'firecrawl_research_inspect_paper' ,
305+ annotations : {
306+ title : 'Inspect a paper' ,
307+ readOnlyHint : true ,
308+ openWorldHint : true ,
309+ } ,
310+ description :
311+ 'Fetch canonical metadata for one paper by primaryId or canonical paperId. ' +
312+ 'Use this after search/related results when you need the full title, abstract, authors, ' +
313+ 'categories, source ids, and dates rendered as markdown.' ,
314+ parameters : z . object ( {
315+ paperId : z
316+ . string ( )
317+ . min ( 1 )
318+ . describe (
319+ 'Canonical paperId or primaryId such as `arxiv:1706.03762`, `pmcid:PMC12530322`, `pmid:40953549`, or `doi:10.1016/j.neunet.2025.108095`.'
320+ ) ,
321+ } ) ,
322+ execute : async (
323+ args : unknown ,
324+ { session } : { session ?: SessionData ; log : Logger }
325+ ) : Promise < string > => {
326+ const { paperId } = args as { paperId : string } ;
327+ const client = getClient ( session ) as ClientLike ;
328+ const res = await client . http . get < { paper ?: PaperHit } > (
329+ `${ BASE } /papers/${ encodeURIComponent ( paperId ) } `
330+ ) ;
331+ return fmtPaperMetadata ( res . data ?. paper ) ;
332+ } ,
333+ } ) ;
334+
273335 // --- related_papers ---
274336 server . addTool ( {
275337 name : 'firecrawl_research_related_papers' ,
276- canAccess,
277338 annotations : {
278339 title : 'Find related arXiv papers' ,
279340 readOnlyHint : true ,
@@ -322,7 +383,7 @@ export function registerResearchTools(
322383 const client = getClient ( session ) as ClientLike ;
323384 const res = await client . http . get < {
324385 results ?: PaperHit [ ] ;
325- pool_size ?: number ;
386+ poolSize ?: number ;
326387 note ?: string | null ;
327388 } > (
328389 withQuery (
@@ -331,16 +392,15 @@ export function registerResearchTools(
331392 )
332393 ) ;
333394 const note = res . data ?. note ? `\nnote: ${ res . data . note } ` : '' ;
334- return `${ fmtHits ( res . data ?. results ) } \n(pool_size =${ res . data ?. pool_size ?? 0 } )${ note } ` ;
395+ return `${ fmtHits ( res . data ?. results ) } \n(poolSize =${ res . data ?. poolSize ?? 0 } )${ note } ` ;
335396 } ,
336397 } ) ;
337398
338399 // --- read_paper ---
339400 server . addTool ( {
340401 name : 'firecrawl_research_read_paper' ,
341- canAccess,
342402 annotations : {
343- title : 'Read an arXiv paper' ,
403+ title : 'Read a paper' ,
344404 readOnlyHint : true ,
345405 openWorldHint : true ,
346406 destructiveHint : false ,
@@ -351,7 +411,12 @@ export function registerResearchTools(
351411 "reject it (e.g. 'does this paper actually use technique X / report a score on benchmark Y'). " +
352412 "Returns the best-matching passages, or a notice if the paper's full text is unavailable." ,
353413 parameters : z . object ( {
354- arxiv_id : z . string ( ) . min ( 1 ) ,
414+ paperId : z
415+ . string ( )
416+ . min ( 1 )
417+ . describe (
418+ 'Canonical paperId or primaryId such as `arxiv:1706.03762`, `pmcid:PMC12530322`, `pmid:40953549`, or `doi:10.1016/j.neunet.2025.108095`.'
419+ ) ,
355420 question : z . string ( ) . min ( 1 ) ,
356421 k : z
357422 . number ( )
@@ -365,8 +430,8 @@ export function registerResearchTools(
365430 args : unknown ,
366431 { session } : { session ?: SessionData ; log : Logger }
367432 ) : Promise < string > => {
368- const { arxiv_id , question, k } = args as {
369- arxiv_id : string ;
433+ const { paperId , question, k } = args as {
434+ paperId : string ;
370435 question : string ;
371436 k ?: number ;
372437 } ;
@@ -375,7 +440,7 @@ export function registerResearchTools(
375440 appendParam ( params , 'k' , k ) ;
376441 const client = getClient ( session ) as ClientLike ;
377442 const res = await client . http . get < { passages ?: { text : string } [ ] } > (
378- withQuery ( `${ BASE } /papers/${ encodeURIComponent ( arxiv_id ) } ` , params )
443+ withQuery ( `${ BASE } /papers/${ encodeURIComponent ( paperId ) } ` , params )
379444 ) ;
380445 const passages = res . data ?. passages ?? [ ] ;
381446 return passages . length
@@ -387,7 +452,6 @@ export function registerResearchTools(
387452 // --- search_github ---
388453 server . addTool ( {
389454 name : 'firecrawl_research_search_github' ,
390- canAccess,
391455 annotations : {
392456 title : 'Search GitHub history' ,
393457 readOnlyHint : true ,
0 commit comments