Lesson 10: Structured Extraction from Text

Learn a powerful pattern for extracting structured data from unstructured text by defining a tool with a specific JSON schema and instructing the model to populate it.

Code: lesson_10_structured_extraction.mjs

In this pattern, the tool's execute function is not needed. We define the tool solely for its parameters schema. We then manually process the stream, intercept the tool_calls event, and parse the arguments to get our structured data, without ever executing the tool itself.

// lesson_10_structured_extraction_from_text.mjs
// Merci SDK Tutorial: Lesson 10 - Structured Extraction from Text

// --- IMPORTS ---
import { MerciClient, createUserMessage } from '../lib/merci.2.14.0.mjs';
import { token } from '../secret/token.mjs';

const MODEL = 'google-chat-gemini-flash-2.5';

// --- TOOL DEFINITION (SCHEMA ONLY) ---
// NOTE: No `execute` function is needed for this pattern!
const actionItemExtractorTool = {
    name: 'extract_action_items',
    parameters: {
        schema: {
            type: 'object',
            properties: {
                action_items: {
                    type: 'array',
                    description: 'An array of all action items found in the text.',
                    items: {
                        type: 'object',
                        properties: {
                            task: { type: 'string', description: 'A clear description of the action to be taken.' },
                            assigned_to: { type: 'string', description: 'The name of the person responsible for the task.' },
                            due_date: { type: 'string', description: 'The deadline for the task, in YYYY-MM-DD format.' },
                            priority: {
                                type: 'string',
                                description: 'The priority level of the task.',
                                enum: ['High', 'Medium', 'Low']
                            }
                        },
                        required: ['task', 'assigned_to', 'due_date', 'priority']
                    }
                }
            },
            required: ['action_items']
        }
    },
};

async function main() {
    console.log(`--- Merci SDK Lesson 10: Structured Extraction (Model: ${MODEL}) ---`);

    try {
        // --- STEP 1: INITIALIZE THE CLIENT ---
        console.log('[STEP 1] Initializing MerciClient...');
        const client = new MerciClient({ token });

        // --- STEP 2: DEFINE PROMPT AND INPUT DATA ---
        console.log('[STEP 2] Preparing prompt and input data...');
        const meetingMinutesContent = `
    Meeting Notes: Q4 Strategy Kick-off - 2024-10-28
    Attendees: David, Eve, Frank
    Action Items:
    - Frank is responsible for drafting the initial project specification. This is a High priority task and must be completed by 2024-11-04.
    - Eve will coordinate with the design team to get the new branding assets. This is a Medium priority item with a deadline of 2024-11-08.
    - David needs to book a venue for the end-of-year party. This is a Low priority task, due by 2024-11-15.
    `;
        const userPrompt = `Use the 'extract_action_items' tool to extract all action items from the following text.\n\nTEXT:\n---\n${meetingMinutesContent}\n---`;

        // --- STEP 3: CONFIGURE THE CHAT SESSION ---
        console.log('[STEP 3] Configuring the chat session with the extraction tool...');
        const chatSession = client.chat.session(MODEL).withTools([actionItemExtractorTool]);

        // --- STEP 4: PREPARE THE MESSAGE PAYLOAD ---
        console.log('[STEP 4] Creating the message payload...');
        const messages = [createUserMessage(userPrompt)];

        // --- STEP 5: EXECUTE AND INTERCEPT THE TOOL CALL ---
        console.log('[STEP 5] Sending request and intercepting tool call...');
        let extractedData = null;
        for await (const event of chatSession.stream(messages)) {
            if (event.type === 'tool_calls') {
                console.log('\n[AGENT ACTION] Model wants to call a tool. Intercepting the call...');
                const argumentsJson = event.calls[0].arguments;
                extractedData = JSON.parse(argumentsJson);
                break; // We have our data, no need to continue.
            }
        }
        console.log('\n[INFO] Stream finished. Response fully received.');

        // --- STEP 6: USE THE EXTRACTED DATA ---
        console.log('\n[STEP 6] Displaying extracted data...');
        console.log('\n\n--- FINAL RESULT ---');
        if (extractedData && extractedData.action_items.length > 0) {
            console.log('✅ Successfully extracted and validated structured data from the text block:');
            console.log(JSON.stringify(extractedData, null, 2));
            const nonHighPriorityTasks = extractedData.action_items.filter(item => item.priority !== 'High');
            console.log(`\nFound ${nonHighPriorityTasks.length} non-high-priority tasks.`);
        } else {
            console.error('❌ Extraction failed. The model did not return any action items.');
            console.log('Received data:', JSON.stringify(extractedData, null, 2));
        }
        console.log('--------------------');

    } catch (error) {
        console.error('\n\n[FATAL ERROR] An error occurred during the operation.');
        console.error('  Message:', error.message);
        if (error.status) {
            console.error('  API Status:', error.status);
        }
        if (error.details) {
            console.error('  Details:', JSON.stringify(error.details, null, 2));
        }
        if (error.stack) {
            console.error('  Stack:', error.stack);
        }
        console.error('\n  Possible causes: Invalid token, network issues, or an API service problem.');
        process.exit(1); // Exit with a non-zero code to indicate failure.
    }
}

main().catch(console.error);

Expected Output

The model will populate the JSON schema defined in the tool's parameters with the information it finds in the text, resulting in a clean, structured object.

✅ Successfully extracted and validated structured data from the text block:
{
  "action_items": [
    {
      "task": "Draft the initial project specification",
      "assigned_to": "Frank",
      "due_date": "2024-11-04",
      "priority": "High"
    },
    {
      "task": "Coordinate with the design team to get the new branding assets",
      "assigned_to": "Eve",
      "due_date": "2024-11-08",
      "priority": "Medium"
    },
    {
      "task": "Book a venue for the end-of-year party",
      "assigned_to": "David",
      "due_date": "2024-11-15",
      "priority": "Low"
    }
  ]
}

Found 2 non-high-priority tasks.