mirror of
https://gitcode.com/ageerle/ruoyi-ai.git
synced 2026-04-16 21:33:40 +00:00
v3.0.0 init
This commit is contained in:
@@ -1,21 +1,20 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<artifactId>ruoyi-ai</artifactId>
|
||||
<groupId>org.ruoyi</groupId>
|
||||
<version>${revision}</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>ruoyi-extend</artifactId>
|
||||
<packaging>pom</packaging>
|
||||
|
||||
<modules>
|
||||
<module>ruoyi-mcp-server</module>
|
||||
<module>ruoyi-ai-copilot</module>
|
||||
<module>ruoyi-monitor-admin</module>
|
||||
<module>ruoyi-snailjob-server</module>
|
||||
</modules>
|
||||
|
||||
</project>
|
||||
|
||||
@@ -17,8 +17,8 @@
|
||||
<maven.compiler.source>17</maven.compiler.source>
|
||||
<maven.compiler.target>17</maven.compiler.target>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<spring-boot.version>3.4.5</spring-boot.version>
|
||||
<spring-ai.version>1.0.0</spring-ai.version>
|
||||
<spring-boot.version>4.0.1</spring-boot.version>
|
||||
<spring-ai.version>2.0.0-M2</spring-ai.version>
|
||||
</properties>
|
||||
|
||||
<dependencyManagement>
|
||||
@@ -47,27 +47,6 @@
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-thymeleaf</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-aop</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- AspectJ Runtime -->
|
||||
<dependency>
|
||||
<groupId>org.aspectj</groupId>
|
||||
<artifactId>aspectjrt</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.aspectj</groupId>
|
||||
<artifactId>aspectjweaver</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Spring AI -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
@@ -79,6 +58,12 @@
|
||||
<artifactId>spring-ai-starter-mcp-client</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springaicommunity</groupId>
|
||||
<artifactId>spring-ai-agent-utils</artifactId>
|
||||
<version>0.4.2</version>
|
||||
</dependency>
|
||||
|
||||
<!-- JSON Processing -->
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
@@ -105,60 +90,11 @@
|
||||
<artifactId>commons-lang3</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Logging -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-logging</artifactId>
|
||||
<artifactId>spring-boot-starter-thymeleaf</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Test Dependencies -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<!-- 添加Maven编译器插件配置 -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.11.0</version>
|
||||
<configuration>
|
||||
<source>17</source>
|
||||
<target>17</target>
|
||||
<parameters>true</parameters>
|
||||
<compilerArgs>
|
||||
<arg>-parameters</arg>
|
||||
</compilerArgs>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
<version>${spring-boot.version}</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>repackage</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
<repositories>
|
||||
<repository>
|
||||
<id>spring-milestones</id>
|
||||
<name>Spring Milestones</name>
|
||||
<url>https://repo.spring.io/milestone</url>
|
||||
<snapshots>
|
||||
<enabled>false</enabled>
|
||||
</snapshots>
|
||||
</repository>
|
||||
</repositories>
|
||||
</project>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
package com.example.demo;
|
||||
|
||||
import com.example.demo.config.AppProperties;
|
||||
import com.example.demo.util.BrowserUtil;
|
||||
import com.example.demo.utils.BrowserUtil;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
@@ -9,7 +9,6 @@ import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.context.event.ApplicationReadyEvent;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.context.annotation.EnableAspectJAutoProxy;
|
||||
import org.springframework.context.event.EventListener;
|
||||
import org.springframework.core.env.Environment;
|
||||
|
||||
@@ -21,7 +20,6 @@ import org.springframework.core.env.Environment;
|
||||
*/
|
||||
@SpringBootApplication
|
||||
@EnableConfigurationProperties(AppProperties.class)
|
||||
@EnableAspectJAutoProxy
|
||||
public class CopilotApplication {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(CopilotApplication.class);
|
||||
@@ -33,7 +31,7 @@ public class CopilotApplication {
|
||||
private Environment environment;
|
||||
|
||||
public static void main(String[] args) {
|
||||
SpringApplication.run(CopilotApplication.class, args);
|
||||
SpringApplication.run(CopilotApplication.class, args);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,104 +0,0 @@
|
||||
package com.example.demo.config;
|
||||
|
||||
import com.example.demo.service.ToolExecutionLogger;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* 自定义工具执行监听器
|
||||
* 提供中文日志和详细的文件操作信息记录
|
||||
* <p>
|
||||
* 注意:Spring AI 1.0.0使用@Tool注解来定义工具,不需要ToolCallbackProvider接口
|
||||
* 这个类主要用于工具执行的日志记录和监控
|
||||
*/
|
||||
@Component
|
||||
public class CustomToolExecutionMonitor {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(CustomToolExecutionMonitor.class);
|
||||
|
||||
@Autowired
|
||||
private ToolExecutionLogger executionLogger;
|
||||
|
||||
/**
|
||||
* 记录工具执行开始
|
||||
*/
|
||||
public long logToolStart(String toolName, String description, String parameters) {
|
||||
String fileInfo = extractFileInfo(toolName, parameters);
|
||||
long callId = executionLogger.logToolStart(toolName, description,
|
||||
String.format("参数: %s | 文件信息: %s", parameters, fileInfo));
|
||||
|
||||
logger.debug("🚀 [Spring AI] 开始执行工具: {} | 文件/目录: {}", toolName, fileInfo);
|
||||
return callId;
|
||||
}
|
||||
|
||||
/**
|
||||
* 记录工具执行成功
|
||||
*/
|
||||
public void logToolSuccess(long callId, String toolName, String result, long executionTime, String parameters) {
|
||||
String fileInfo = extractFileInfo(toolName, parameters);
|
||||
logger.debug("✅ [Spring AI] 工具执行成功: {} | 耗时: {}ms | 文件/目录: {}",
|
||||
toolName, executionTime, fileInfo);
|
||||
executionLogger.logToolSuccess(callId, toolName, result, executionTime);
|
||||
}
|
||||
|
||||
/**
|
||||
* 记录工具执行失败
|
||||
*/
|
||||
public void logToolError(long callId, String toolName, String errorMessage, long executionTime, String parameters) {
|
||||
String fileInfo = extractFileInfo(toolName, parameters);
|
||||
logger.error("❌ [Spring AI] 工具执行失败: {} | 耗时: {}ms | 文件/目录: {} | 错误: {}",
|
||||
toolName, executionTime, fileInfo, errorMessage);
|
||||
executionLogger.logToolError(callId, toolName, errorMessage, executionTime);
|
||||
}
|
||||
|
||||
/**
|
||||
* 提取文件信息用于日志记录
|
||||
*/
|
||||
private String extractFileInfo(String toolName, String arguments) {
|
||||
try {
|
||||
switch (toolName) {
|
||||
case "readFile":
|
||||
case "read_file":
|
||||
return extractPathFromArgs(arguments, "absolutePath", "filePath");
|
||||
case "writeFile":
|
||||
case "write_file":
|
||||
return extractPathFromArgs(arguments, "filePath");
|
||||
case "editFile":
|
||||
case "edit_file":
|
||||
return extractPathFromArgs(arguments, "filePath");
|
||||
case "listDirectory":
|
||||
return extractPathFromArgs(arguments, "directoryPath", "path");
|
||||
case "analyzeProject":
|
||||
case "analyze_project":
|
||||
return extractPathFromArgs(arguments, "projectPath");
|
||||
case "scaffoldProject":
|
||||
case "scaffold_project":
|
||||
return extractPathFromArgs(arguments, "projectPath");
|
||||
case "smartEdit":
|
||||
case "smart_edit":
|
||||
return extractPathFromArgs(arguments, "projectPath");
|
||||
default:
|
||||
return "未知文件路径";
|
||||
}
|
||||
} catch (Exception e) {
|
||||
return "解析文件路径失败: " + e.getMessage();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从参数中提取路径
|
||||
*/
|
||||
private String extractPathFromArgs(String arguments, String... pathKeys) {
|
||||
for (String key : pathKeys) {
|
||||
String pattern = "\"" + key + "\"\\s*:\\s*\"([^\"]+)\"";
|
||||
java.util.regex.Pattern p = java.util.regex.Pattern.compile(pattern);
|
||||
java.util.regex.Matcher m = p.matcher(arguments);
|
||||
if (m.find()) {
|
||||
return m.group(1);
|
||||
}
|
||||
}
|
||||
return "未找到路径参数";
|
||||
}
|
||||
}
|
||||
@@ -1,12 +1,14 @@
|
||||
package com.example.demo.config;
|
||||
|
||||
import com.example.demo.schema.SchemaValidator;
|
||||
import com.example.demo.tools.*;
|
||||
import org.springaicommunity.agent.tools.FileSystemTools;
|
||||
import org.springaicommunity.agent.tools.ShellTools;
|
||||
import org.springaicommunity.agent.tools.SkillsTool;
|
||||
import org.springframework.ai.chat.client.ChatClient;
|
||||
import org.springframework.ai.chat.model.ChatModel;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.core.io.Resource;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@@ -16,80 +18,22 @@ import java.util.List;
|
||||
@Configuration
|
||||
public class SpringAIConfiguration {
|
||||
|
||||
@Value("${agent.skills.dirs:Unknown}") List<Resource> agentSkillsDirs;
|
||||
|
||||
@Bean
|
||||
public ChatClient chatClient(ChatModel chatModel,
|
||||
FileOperationTools fileOperationTools,
|
||||
SmartEditTool smartEditTool,
|
||||
AnalyzeProjectTool analyzeProjectTool,
|
||||
ProjectScaffoldTool projectScaffoldTool,
|
||||
AppProperties appProperties) {
|
||||
public ChatClient chatClient(ChatModel chatModel, AppProperties appProperties) {
|
||||
// 动态获取工作目录路径
|
||||
String workspaceDir = appProperties.getWorkspace().getRootDirectory();
|
||||
ChatClient.Builder chatClientBuilder = ChatClient.builder(chatModel);
|
||||
|
||||
return ChatClient.builder(chatModel)
|
||||
.defaultSystem("""
|
||||
You are an expert software development assistant with access to file system tools.
|
||||
You excel at creating complete, well-structured projects through systematic execution of multiple related tasks.
|
||||
|
||||
# CORE BEHAVIOR:
|
||||
- When given a complex task (like "create a web project"), break it down into ALL necessary steps
|
||||
- Execute MULTIPLE tool calls in sequence to complete the entire task
|
||||
- Don't stop after just one file - create the complete project structure
|
||||
- Always verify your work by reading files after creating them
|
||||
- Continue working until the ENTIRE task is complete
|
||||
|
||||
# TASK EXECUTION STRATEGY:
|
||||
1. **Plan First**: Mentally outline all files and directories needed
|
||||
2. **Execute Systematically**: Use tools in logical sequence to build the complete solution
|
||||
3. **Verify Progress**: Read files after creation to ensure correctness
|
||||
4. **Continue Until Complete**: Don't stop until the entire requested project/task is finished
|
||||
5. **Signal Continuation**: Use phrases like "Next, I will...", "Now I'll...", "Let me..." to indicate ongoing work
|
||||
|
||||
# AVAILABLE TOOLS:
|
||||
- readFile: Read file contents (supports pagination)
|
||||
- writeFile: Create or overwrite files
|
||||
- editFile: Edit files by replacing specific text
|
||||
- listDirectory: List directory contents (supports recursive)
|
||||
- analyzeProject: Analyze existing projects to understand structure and dependencies
|
||||
- smartEdit: Intelligently edit projects based on natural language descriptions
|
||||
- scaffoldProject: Create new projects with standard structure and templates
|
||||
|
||||
# CRITICAL RULES:
|
||||
- ALWAYS use absolute paths starting with the workspace directory: """ + workspaceDir + """
|
||||
- Use proper path separators for the current operating system
|
||||
- For complex requests, execute 5-15 tool calls to create a complete solution
|
||||
- Use continuation phrases to signal you have more work to do
|
||||
- If creating a project, make it production-ready with proper structure
|
||||
- Continue working until you've delivered a complete, functional result
|
||||
- Only say "completed" or "finished" when the ENTIRE task is truly done
|
||||
- The tools will show both full paths and relative paths - this helps users locate files
|
||||
- Always mention the full path when describing what you've created
|
||||
|
||||
# PATH EXAMPLES:
|
||||
- Correct absolute path format:+ workspaceDir + + file separator + filename
|
||||
- Always ensure paths are within the workspace directory
|
||||
- Use the system's native path separators
|
||||
|
||||
# CONTINUATION SIGNALS:
|
||||
Use these phrases when you have more work to do:
|
||||
- "Next, I will create..."
|
||||
- "Now I'll add..."
|
||||
- "Let me now..."
|
||||
- "Moving on to..."
|
||||
- "I'll proceed to..."
|
||||
|
||||
Remember: Your goal is to deliver COMPLETE solutions through continuous execution!
|
||||
""")
|
||||
.defaultTools(fileOperationTools, smartEditTool, analyzeProjectTool, projectScaffoldTool)
|
||||
return chatClientBuilder
|
||||
.defaultSystem("Always use the available skills to assist the user in their requests.")
|
||||
// Skills tool callbacks
|
||||
.defaultToolCallbacks(SkillsTool.builder().addSkillsResources(agentSkillsDirs).build())
|
||||
// Built-in tools
|
||||
.defaultTools(
|
||||
// FileSystemTools.builder().build(),
|
||||
ShellTools.builder().build()
|
||||
)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 为所有工具注入Schema验证器
|
||||
*/
|
||||
@Autowired
|
||||
public void configureTools(List<BaseTool<?>> tools, SchemaValidator schemaValidator) {
|
||||
tools.forEach(tool -> tool.setSchemaValidator(schemaValidator));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,313 +0,0 @@
|
||||
package com.example.demo.config;
|
||||
|
||||
import com.example.demo.service.LogStreamService;
|
||||
import com.example.demo.service.ToolExecutionLogger;
|
||||
import org.aspectj.lang.ProceedingJoinPoint;
|
||||
import org.aspectj.lang.annotation.Around;
|
||||
import org.aspectj.lang.annotation.Aspect;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* 工具调用日志切面
|
||||
* 拦截 Spring AI 的工具调用并提供中文日志
|
||||
*/
|
||||
@Aspect
|
||||
@Component
|
||||
public class ToolCallLoggingAspect {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(ToolCallLoggingAspect.class);
|
||||
|
||||
@Autowired
|
||||
private ToolExecutionLogger executionLogger;
|
||||
|
||||
@Autowired
|
||||
private LogStreamService logStreamService;
|
||||
|
||||
/**
|
||||
* 拦截使用@Tool注解的方法执行
|
||||
*/
|
||||
@Around("@annotation(org.springframework.ai.tool.annotation.Tool)")
|
||||
public Object interceptToolAnnotation(ProceedingJoinPoint joinPoint) throws Throwable {
|
||||
Object[] args = joinPoint.getArgs();
|
||||
String methodName = joinPoint.getSignature().getName();
|
||||
String className = joinPoint.getSignature().getDeclaringType().getSimpleName();
|
||||
|
||||
// 详细的参数信息
|
||||
String parametersInfo = formatMethodParameters(args);
|
||||
String fileInfo = extractFileInfoFromMethodArgs(methodName, args);
|
||||
|
||||
logger.debug("🚀 [Spring AI @Tool] 执行工具: {}.{} | 参数: {} | 文件/目录: {}",
|
||||
className, methodName, parametersInfo, fileInfo);
|
||||
|
||||
// 获取当前任务ID (从线程本地变量或其他方式)
|
||||
String taskId = getCurrentTaskId();
|
||||
|
||||
// 推送工具开始执行事件
|
||||
if (taskId != null) {
|
||||
String startMessage = generateStartMessage(methodName, fileInfo);
|
||||
logStreamService.pushToolStart(taskId, methodName, fileInfo, startMessage);
|
||||
}
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
try {
|
||||
Object result = joinPoint.proceed();
|
||||
long executionTime = System.currentTimeMillis() - startTime;
|
||||
|
||||
logger.debug("✅ [Spring AI @Tool] 工具执行成功: {}.{} | 耗时: {}ms | 文件/目录: {} | 参数: {}",
|
||||
className, methodName, executionTime, fileInfo, parametersInfo);
|
||||
|
||||
// 推送工具执行成功事件
|
||||
if (taskId != null) {
|
||||
String successMessage = generateSuccessMessage(methodName, fileInfo, result, executionTime);
|
||||
logStreamService.pushToolSuccess(taskId, methodName, fileInfo, successMessage, executionTime);
|
||||
}
|
||||
|
||||
return result;
|
||||
} catch (Throwable e) {
|
||||
long executionTime = System.currentTimeMillis() - startTime;
|
||||
|
||||
logger.error("❌ [Spring AI @Tool] 工具执行失败: {}.{} | 耗时: {}ms | 文件/目录: {} | 参数: {} | 错误: {}",
|
||||
className, methodName, executionTime, fileInfo, parametersInfo, e.getMessage());
|
||||
|
||||
// 推送工具执行失败事件
|
||||
if (taskId != null) {
|
||||
String errorMessage = generateErrorMessage(methodName, fileInfo, e.getMessage());
|
||||
logStreamService.pushToolError(taskId, methodName, fileInfo, errorMessage, executionTime);
|
||||
}
|
||||
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化方法参数为可读字符串
|
||||
*/
|
||||
private String formatMethodParameters(Object[] args) {
|
||||
if (args == null || args.length == 0) {
|
||||
return "无参数";
|
||||
}
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < args.length; i++) {
|
||||
if (i > 0) sb.append(", ");
|
||||
Object arg = args[i];
|
||||
if (arg == null) {
|
||||
sb.append("null");
|
||||
} else if (arg instanceof String) {
|
||||
String str = (String) arg;
|
||||
// 如果字符串太长,截断显示
|
||||
if (str.length() > 100) {
|
||||
sb.append("\"").append(str.substring(0, 100)).append("...\"");
|
||||
} else {
|
||||
sb.append("\"").append(str).append("\"");
|
||||
}
|
||||
} else {
|
||||
sb.append(arg.toString());
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* 从方法参数中直接提取文件信息
|
||||
*/
|
||||
private String extractFileInfoFromMethodArgs(String methodName, Object[] args) {
|
||||
if (args == null || args.length == 0) {
|
||||
return "无参数";
|
||||
}
|
||||
|
||||
try {
|
||||
switch (methodName) {
|
||||
case "readFile":
|
||||
// readFile(String absolutePath, Integer offset, Integer limit)
|
||||
return args.length > 0 && args[0] != null ? args[0].toString() : "未指定路径";
|
||||
|
||||
case "writeFile":
|
||||
// writeFile(String filePath, String content)
|
||||
return args.length > 0 && args[0] != null ? args[0].toString() : "未指定路径";
|
||||
|
||||
case "editFile":
|
||||
// editFile(String filePath, String oldText, String newText)
|
||||
return args.length > 0 && args[0] != null ? args[0].toString() : "未指定路径";
|
||||
|
||||
case "listDirectory":
|
||||
// listDirectory(String directoryPath, Boolean recursive)
|
||||
return args.length > 0 && args[0] != null ? args[0].toString() : "未指定路径";
|
||||
|
||||
case "analyzeProject":
|
||||
// analyzeProject(String projectPath, ...)
|
||||
return args.length > 0 && args[0] != null ? args[0].toString() : "未指定路径";
|
||||
|
||||
case "scaffoldProject":
|
||||
// scaffoldProject(String projectName, String projectType, String projectPath, ...)
|
||||
return args.length > 2 && args[2] != null ? args[2].toString() : "未指定路径";
|
||||
|
||||
case "smartEdit":
|
||||
// smartEdit(String projectPath, ...)
|
||||
return args.length > 0 && args[0] != null ? args[0].toString() : "未指定路径";
|
||||
|
||||
default:
|
||||
// 对于未知方法,尝试从第一个参数中提取路径
|
||||
if (args.length > 0 && args[0] != null) {
|
||||
String firstArg = args[0].toString();
|
||||
if (firstArg.contains("/") || firstArg.contains("\\")) {
|
||||
return firstArg;
|
||||
}
|
||||
}
|
||||
return "未识别的工具类型";
|
||||
}
|
||||
} catch (Exception e) {
|
||||
return "解析参数失败: " + e.getMessage();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从参数字符串中提取文件信息(备用方法)
|
||||
*/
|
||||
private String extractFileInfoFromArgs(String toolName, String arguments) {
|
||||
try {
|
||||
switch (toolName) {
|
||||
case "readFile":
|
||||
case "read_file":
|
||||
return extractPathFromString(arguments, "absolutePath", "filePath");
|
||||
case "writeFile":
|
||||
case "write_file":
|
||||
case "editFile":
|
||||
case "edit_file":
|
||||
return extractPathFromString(arguments, "filePath");
|
||||
case "listDirectory":
|
||||
return extractPathFromString(arguments, "directoryPath", "path");
|
||||
case "analyzeProject":
|
||||
case "analyze_project":
|
||||
case "scaffoldProject":
|
||||
case "scaffold_project":
|
||||
case "smartEdit":
|
||||
case "smart_edit":
|
||||
return extractPathFromString(arguments, "projectPath");
|
||||
default:
|
||||
return "未指定文件路径";
|
||||
}
|
||||
} catch (Exception e) {
|
||||
return "解析文件路径失败";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从字符串中提取路径
|
||||
*/
|
||||
private String extractPathFromString(String text, String... pathKeys) {
|
||||
for (String key : pathKeys) {
|
||||
// JSON 格式
|
||||
Pattern jsonPattern = Pattern.compile("\"" + key + "\"\\s*:\\s*\"([^\"]+)\"");
|
||||
Matcher jsonMatcher = jsonPattern.matcher(text);
|
||||
if (jsonMatcher.find()) {
|
||||
return jsonMatcher.group(1);
|
||||
}
|
||||
|
||||
// 键值对格式
|
||||
Pattern kvPattern = Pattern.compile(key + "=([^,\\s\\]]+)");
|
||||
Matcher kvMatcher = kvPattern.matcher(text);
|
||||
if (kvMatcher.find()) {
|
||||
return kvMatcher.group(1);
|
||||
}
|
||||
}
|
||||
|
||||
return "未找到路径";
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取当前任务ID
|
||||
* 从线程本地变量或请求上下文中获取
|
||||
*/
|
||||
private String getCurrentTaskId() {
|
||||
// 这里需要从某个地方获取当前任务ID
|
||||
// 可以从ThreadLocal、RequestAttributes或其他方式获取
|
||||
try {
|
||||
// 临时实现:从线程名或其他方式获取
|
||||
return TaskContextHolder.getCurrentTaskId();
|
||||
} catch (Exception e) {
|
||||
logger.debug("无法获取当前任务ID: {}", e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成工具开始执行消息
|
||||
*/
|
||||
private String generateStartMessage(String toolName, String fileInfo) {
|
||||
switch (toolName) {
|
||||
case "readFile":
|
||||
return "正在读取文件: " + getFileName(fileInfo);
|
||||
case "writeFile":
|
||||
return "正在写入文件: " + getFileName(fileInfo);
|
||||
case "editFile":
|
||||
return "正在编辑文件: " + getFileName(fileInfo);
|
||||
case "listDirectory":
|
||||
return "正在列出目录: " + fileInfo;
|
||||
case "analyzeProject":
|
||||
return "正在分析项目: " + fileInfo;
|
||||
case "scaffoldProject":
|
||||
return "正在创建项目脚手架: " + fileInfo;
|
||||
case "smartEdit":
|
||||
return "正在智能编辑项目: " + fileInfo;
|
||||
default:
|
||||
return "正在执行工具: " + toolName;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成工具执行成功消息
|
||||
*/
|
||||
private String generateSuccessMessage(String toolName, String fileInfo, Object result, long executionTime) {
|
||||
String fileName = getFileName(fileInfo);
|
||||
switch (toolName) {
|
||||
case "readFile":
|
||||
return String.format("已读取文件 %s (耗时 %dms)", fileName, executionTime);
|
||||
case "writeFile":
|
||||
return String.format("已写入文件 %s (耗时 %dms)", fileName, executionTime);
|
||||
case "editFile":
|
||||
return String.format("已编辑文件 %s (耗时 %dms)", fileName, executionTime);
|
||||
case "listDirectory":
|
||||
return String.format("已列出目录 %s (耗时 %dms)", fileInfo, executionTime);
|
||||
case "analyzeProject":
|
||||
return String.format("已分析项目 %s (耗时 %dms)", fileInfo, executionTime);
|
||||
case "scaffoldProject":
|
||||
return String.format("已创建项目脚手架 %s (耗时 %dms)", fileInfo, executionTime);
|
||||
case "smartEdit":
|
||||
return String.format("已智能编辑项目 %s (耗时 %dms)", fileInfo, executionTime);
|
||||
default:
|
||||
return String.format("工具 %s 执行成功 (耗时 %dms)", toolName, executionTime);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成工具执行失败消息
|
||||
*/
|
||||
private String generateErrorMessage(String toolName, String fileInfo, String errorMsg) {
|
||||
String fileName = getFileName(fileInfo);
|
||||
return String.format("工具 %s 执行失败: %s (文件: %s)", toolName, errorMsg, fileName);
|
||||
}
|
||||
|
||||
/**
|
||||
* 从文件路径中提取文件名
|
||||
*/
|
||||
private String getFileName(String filePath) {
|
||||
if (filePath == null || filePath.isEmpty()) {
|
||||
return "未知文件";
|
||||
}
|
||||
|
||||
// 处理Windows和Unix路径
|
||||
int lastSlash = Math.max(filePath.lastIndexOf('/'), filePath.lastIndexOf('\\'));
|
||||
if (lastSlash >= 0 && lastSlash < filePath.length() - 1) {
|
||||
return filePath.substring(lastSlash + 1);
|
||||
}
|
||||
|
||||
return filePath;
|
||||
}
|
||||
}
|
||||
@@ -16,7 +16,6 @@ import reactor.core.publisher.Mono;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
/**
|
||||
* 聊天控制器
|
||||
@@ -43,84 +42,11 @@ public class ChatController {
|
||||
}
|
||||
|
||||
/**
|
||||
* 发送消息给AI - 支持连续工具调用
|
||||
* 流式聊天 - 直接返回流式数据
|
||||
*/
|
||||
// 在现有ChatController中修改sendMessage方法
|
||||
@PostMapping("/message")
|
||||
public Mono<ChatResponseDto> sendMessage(@RequestBody ChatRequestDto request) {
|
||||
return Mono.fromCallable(() -> {
|
||||
try {
|
||||
logger.info("💬 ========== 新的聊天请求 ==========");
|
||||
logger.info("📝 用户消息: {}", request.getMessage());
|
||||
logger.info("🕐 请求时间: {}", java.time.LocalDateTime.now());
|
||||
|
||||
// 智能判断是否需要工具调用
|
||||
boolean needsToolExecution = continuousConversationService.isLikelyToNeedTools(request.getMessage());
|
||||
logger.info("🔍 工具需求分析: {}", needsToolExecution ? "可能需要工具" : "简单对话");
|
||||
|
||||
if (needsToolExecution) {
|
||||
// 需要工具调用的复杂任务 - 使用异步模式
|
||||
String taskId = continuousConversationService.startTask(request.getMessage());
|
||||
logger.info("🆔 任务ID: {}", taskId);
|
||||
|
||||
// 记录任务开始
|
||||
executionLogger.logToolStatistics(); // 显示当前统计
|
||||
|
||||
// 异步执行连续对话
|
||||
CompletableFuture.runAsync(() -> {
|
||||
try {
|
||||
logger.info("🚀 开始异步执行连续对话任务: {}", taskId);
|
||||
continuousConversationService.executeContinuousConversation(
|
||||
taskId, request.getMessage(), conversationHistory
|
||||
);
|
||||
logger.info("✅ 连续对话任务完成: {}", taskId);
|
||||
} catch (Exception e) {
|
||||
logger.error("❌ 异步对话执行错误: {}", e.getMessage(), e);
|
||||
}
|
||||
});
|
||||
|
||||
// 返回异步任务响应
|
||||
ChatResponseDto responseDto = new ChatResponseDto();
|
||||
responseDto.setTaskId(taskId);
|
||||
responseDto.setMessage("任务已启动,正在处理中...");
|
||||
responseDto.setSuccess(true);
|
||||
responseDto.setAsyncTask(true);
|
||||
|
||||
logger.info("📤 返回响应: taskId={}, 异步任务已启动", taskId);
|
||||
return responseDto;
|
||||
} else {
|
||||
// 简单对话 - 使用流式模式
|
||||
logger.info("🔄 执行流式对话处理");
|
||||
|
||||
// 返回流式响应标识,让前端建立流式连接
|
||||
ChatResponseDto responseDto = new ChatResponseDto();
|
||||
responseDto.setMessage("开始流式对话...");
|
||||
responseDto.setSuccess(true);
|
||||
responseDto.setAsyncTask(false); // 关键:设置为false,表示不是工具任务
|
||||
responseDto.setStreamResponse(true); // 新增:标识为流式响应
|
||||
responseDto.setTotalTurns(1);
|
||||
|
||||
logger.info("📤 返回流式响应标识");
|
||||
return responseDto;
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("Error processing chat message", e);
|
||||
ChatResponseDto errorResponse = new ChatResponseDto();
|
||||
errorResponse.setMessage("Error: " + e.getMessage());
|
||||
errorResponse.setSuccess(false);
|
||||
return errorResponse;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 流式聊天 - 真正的流式实现
|
||||
*/
|
||||
@PostMapping(value = "/stream", produces = MediaType.TEXT_EVENT_STREAM_VALUE)
|
||||
@PostMapping(value = "/message", produces = MediaType.TEXT_EVENT_STREAM_VALUE)
|
||||
public Flux<String> streamMessage(@RequestBody ChatRequestDto request) {
|
||||
logger.info("🌊 开始流式对话: {}", request.getMessage());
|
||||
logger.info("📨 开始流式聊天: {}", request.getMessage());
|
||||
|
||||
return Flux.create(sink -> {
|
||||
try {
|
||||
@@ -137,27 +63,29 @@ public class ChatController {
|
||||
contentStream
|
||||
.doOnNext(content -> {
|
||||
logger.debug("📨 流式内容片段: {}", content);
|
||||
// 发送SSE格式的数据
|
||||
sink.next("data: " + content + "\n\n");
|
||||
// 发送内容片段(SSE格式会自动添加 "data: " 前缀)
|
||||
sink.next(content);
|
||||
})
|
||||
.doOnComplete(() -> {
|
||||
logger.info("✅ 流式对话完成");
|
||||
sink.next("data: [DONE]\n\n");
|
||||
logger.info("✅ 流式聊天完成");
|
||||
// 发送完成标记
|
||||
sink.next("[DONE]");
|
||||
sink.complete();
|
||||
})
|
||||
.doOnError(error -> {
|
||||
logger.error("❌ 流式对话错误: {}", error.getMessage());
|
||||
logger.error("❌ 流式聊天错误: {}", error.getMessage());
|
||||
sink.error(error);
|
||||
})
|
||||
.subscribe();
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("❌ 流式对话启动失败: {}", e.getMessage());
|
||||
logger.error("❌ 流式聊天启动失败: {}", e.getMessage());
|
||||
sink.error(e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 清除对话历史
|
||||
*/
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,125 +0,0 @@
|
||||
package com.example.demo.tools;
|
||||
|
||||
import com.example.demo.schema.JsonSchema;
|
||||
import com.example.demo.schema.SchemaValidator;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
/**
|
||||
* Base abstract class for tools
|
||||
* All tools should inherit from this class
|
||||
*/
|
||||
public abstract class BaseTool<P> {
|
||||
|
||||
protected final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
protected final String name;
|
||||
protected final String displayName;
|
||||
protected final String description;
|
||||
protected final JsonSchema parameterSchema;
|
||||
protected final boolean isOutputMarkdown;
|
||||
protected final boolean canUpdateOutput;
|
||||
|
||||
protected SchemaValidator schemaValidator;
|
||||
|
||||
public BaseTool(String name, String displayName, String description, JsonSchema parameterSchema) {
|
||||
this(name, displayName, description, parameterSchema, true, false);
|
||||
}
|
||||
|
||||
public BaseTool(String name, String displayName, String description, JsonSchema parameterSchema,
|
||||
boolean isOutputMarkdown, boolean canUpdateOutput) {
|
||||
this.name = name;
|
||||
this.displayName = displayName;
|
||||
this.description = description;
|
||||
this.parameterSchema = parameterSchema;
|
||||
this.isOutputMarkdown = isOutputMarkdown;
|
||||
this.canUpdateOutput = canUpdateOutput;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set Schema validator (through dependency injection)
|
||||
*/
|
||||
public void setSchemaValidator(SchemaValidator schemaValidator) {
|
||||
this.schemaValidator = schemaValidator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate tool parameters
|
||||
*
|
||||
* @param params Parameter object
|
||||
* @return Validation error message, null means validation passed
|
||||
*/
|
||||
public String validateToolParams(P params) {
|
||||
if (schemaValidator == null || parameterSchema == null) {
|
||||
logger.warn("Schema validator or parameter schema is null, skipping validation");
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
return schemaValidator.validate(parameterSchema, params);
|
||||
} catch (Exception e) {
|
||||
logger.error("Parameter validation failed", e);
|
||||
return "Parameter validation error: " + e.getMessage();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Confirm whether user approval is needed for execution
|
||||
*
|
||||
* @param params Parameter object
|
||||
* @return Confirmation details, null means no confirmation needed
|
||||
*/
|
||||
public CompletableFuture<ToolConfirmationDetails> shouldConfirmExecute(P params) {
|
||||
return CompletableFuture.completedFuture(null); // Default no confirmation needed
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute tool
|
||||
*
|
||||
* @param params Parameter object
|
||||
* @return Execution result
|
||||
*/
|
||||
public abstract CompletableFuture<ToolResult> execute(P params);
|
||||
|
||||
/**
|
||||
* Get tool description (for AI understanding)
|
||||
*
|
||||
* @param params Parameter object
|
||||
* @return Description information
|
||||
*/
|
||||
public String getDescription(P params) {
|
||||
return description;
|
||||
}
|
||||
|
||||
// Getters
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String getDisplayName() {
|
||||
return displayName;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public JsonSchema getParameterSchema() {
|
||||
return parameterSchema;
|
||||
}
|
||||
|
||||
public boolean isOutputMarkdown() {
|
||||
return isOutputMarkdown;
|
||||
}
|
||||
|
||||
public boolean canUpdateOutput() {
|
||||
return canUpdateOutput;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Tool{name='%s', displayName='%s'}", name, displayName);
|
||||
}
|
||||
}
|
||||
@@ -1,466 +0,0 @@
|
||||
package com.example.demo.tools;
|
||||
|
||||
import com.example.demo.config.AppProperties;
|
||||
import com.example.demo.schema.JsonSchema;
|
||||
import com.example.demo.service.ToolExecutionLogger;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.github.difflib.DiffUtils;
|
||||
import com.github.difflib.UnifiedDiffUtils;
|
||||
import com.github.difflib.patch.Patch;
|
||||
import org.springframework.ai.tool.annotation.Tool;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
/**
|
||||
* File editing tool
|
||||
* Supports file editing based on string replacement, automatically shows differences
|
||||
*/
|
||||
@Component
|
||||
public class EditFileTool extends BaseTool<EditFileTool.EditFileParams> {
|
||||
|
||||
private final String rootDirectory;
|
||||
private final AppProperties appProperties;
|
||||
|
||||
@Autowired
|
||||
private ToolExecutionLogger executionLogger;
|
||||
|
||||
public EditFileTool(AppProperties appProperties) {
|
||||
super(
|
||||
"edit_file",
|
||||
"EditFile",
|
||||
"Edits a file by replacing specified text with new text. " +
|
||||
"Shows a diff of the changes before applying them. " +
|
||||
"Supports both exact string matching and line-based editing. " +
|
||||
"Use absolute paths within the workspace directory.",
|
||||
createSchema()
|
||||
);
|
||||
this.appProperties = appProperties;
|
||||
this.rootDirectory = appProperties.getWorkspace().getRootDirectory();
|
||||
}
|
||||
|
||||
private static String getWorkspaceBasePath() {
|
||||
return Paths.get(System.getProperty("user.dir"), "workspace").toString();
|
||||
}
|
||||
|
||||
private static String getPathExample(String subPath) {
|
||||
return "Example: \"" + Paths.get(getWorkspaceBasePath(), subPath).toString() + "\"";
|
||||
}
|
||||
|
||||
private static JsonSchema createSchema() {
|
||||
return JsonSchema.object()
|
||||
.addProperty("file_path", JsonSchema.string(
|
||||
"MUST be an absolute path to the file to edit. Path must be within the workspace directory (" +
|
||||
getWorkspaceBasePath() + "). " +
|
||||
getPathExample("project/src/main.java") + ". " +
|
||||
"Relative paths are NOT allowed."
|
||||
))
|
||||
.addProperty("old_str", JsonSchema.string(
|
||||
"The exact string to find and replace. Must match exactly including whitespace and newlines."
|
||||
))
|
||||
.addProperty("new_str", JsonSchema.string(
|
||||
"The new string to replace the old string with. Can be empty to delete the old string."
|
||||
))
|
||||
.addProperty("start_line", JsonSchema.integer(
|
||||
"Optional: 1-based line number where the old_str starts. Helps with disambiguation."
|
||||
).minimum(1))
|
||||
.addProperty("end_line", JsonSchema.integer(
|
||||
"Optional: 1-based line number where the old_str ends. Must be >= start_line."
|
||||
).minimum(1))
|
||||
.required("file_path", "old_str", "new_str");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String validateToolParams(EditFileParams params) {
|
||||
String baseValidation = super.validateToolParams(params);
|
||||
if (baseValidation != null) {
|
||||
return baseValidation;
|
||||
}
|
||||
|
||||
// 验证路径
|
||||
if (params.filePath == null || params.filePath.trim().isEmpty()) {
|
||||
return "File path cannot be empty";
|
||||
}
|
||||
|
||||
if (params.oldStr == null) {
|
||||
return "Old string cannot be null";
|
||||
}
|
||||
|
||||
if (params.newStr == null) {
|
||||
return "New string cannot be null";
|
||||
}
|
||||
|
||||
Path filePath = Paths.get(params.filePath);
|
||||
|
||||
// Validate if it's an absolute path
|
||||
if (!filePath.isAbsolute()) {
|
||||
return "File path must be absolute: " + params.filePath;
|
||||
}
|
||||
|
||||
// 验证是否在工作目录内
|
||||
if (!isWithinWorkspace(filePath)) {
|
||||
return "File path must be within the workspace directory (" + rootDirectory + "): " + params.filePath;
|
||||
}
|
||||
|
||||
// 验证行号
|
||||
if (params.startLine != null && params.endLine != null) {
|
||||
if (params.endLine < params.startLine) {
|
||||
return "End line must be >= start line";
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletableFuture<ToolConfirmationDetails> shouldConfirmExecute(EditFileParams params) {
|
||||
// Decide whether confirmation is needed based on configuration
|
||||
if (appProperties.getSecurity().getApprovalMode() == AppProperties.ApprovalMode.AUTO_EDIT ||
|
||||
appProperties.getSecurity().getApprovalMode() == AppProperties.ApprovalMode.YOLO) {
|
||||
return CompletableFuture.completedFuture(null);
|
||||
}
|
||||
|
||||
return CompletableFuture.supplyAsync(() -> {
|
||||
try {
|
||||
Path filePath = Paths.get(params.filePath);
|
||||
|
||||
if (!Files.exists(filePath)) {
|
||||
return null; // 文件不存在,无法预览差异
|
||||
}
|
||||
|
||||
String currentContent = Files.readString(filePath, StandardCharsets.UTF_8);
|
||||
String newContent = performEdit(currentContent, params);
|
||||
|
||||
if (newContent == null) {
|
||||
return null; // Edit failed, cannot preview differences
|
||||
}
|
||||
|
||||
// 生成差异显示
|
||||
String diff = generateDiff(filePath.getFileName().toString(), currentContent, newContent);
|
||||
String title = "Confirm Edit: " + getRelativePath(filePath);
|
||||
|
||||
return ToolConfirmationDetails.edit(title, filePath.getFileName().toString(), diff);
|
||||
|
||||
} catch (IOException e) {
|
||||
logger.warn("Could not read file for edit preview: " + params.filePath, e);
|
||||
return null;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Edit file tool method for Spring AI integration
|
||||
*/
|
||||
@Tool(name = "edit_file", description = "Edits a file by replacing specified text with new text")
|
||||
public String editFile(String filePath, String oldStr, String newStr, Integer startLine, Integer endLine) {
|
||||
long callId = executionLogger.logToolStart("edit_file", "编辑文件内容",
|
||||
String.format("文件=%s, 替换文本长度=%d->%d, 行号范围=%s-%s",
|
||||
filePath, oldStr != null ? oldStr.length() : 0,
|
||||
newStr != null ? newStr.length() : 0, startLine, endLine));
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
try {
|
||||
EditFileParams params = new EditFileParams();
|
||||
params.setFilePath(filePath);
|
||||
params.setOldStr(oldStr);
|
||||
params.setNewStr(newStr);
|
||||
params.setStartLine(startLine);
|
||||
params.setEndLine(endLine);
|
||||
|
||||
executionLogger.logToolStep(callId, "edit_file", "参数验证", "验证文件路径和替换内容");
|
||||
|
||||
// Validate parameters
|
||||
String validation = validateToolParams(params);
|
||||
if (validation != null) {
|
||||
long executionTime = System.currentTimeMillis() - startTime;
|
||||
executionLogger.logToolError(callId, "edit_file", "参数验证失败: " + validation, executionTime);
|
||||
return "Error: " + validation;
|
||||
}
|
||||
|
||||
String editDetails = startLine != null && endLine != null ?
|
||||
String.format("行号范围编辑: %d-%d行", startLine, endLine) : "字符串替换编辑";
|
||||
executionLogger.logFileOperation(callId, "编辑文件", filePath, editDetails);
|
||||
|
||||
// Execute the tool
|
||||
ToolResult result = execute(params).join();
|
||||
|
||||
long executionTime = System.currentTimeMillis() - startTime;
|
||||
|
||||
if (result.isSuccess()) {
|
||||
executionLogger.logToolSuccess(callId, "edit_file", "文件编辑成功", executionTime);
|
||||
return result.getLlmContent();
|
||||
} else {
|
||||
executionLogger.logToolError(callId, "edit_file", result.getErrorMessage(), executionTime);
|
||||
return "Error: " + result.getErrorMessage();
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
long executionTime = System.currentTimeMillis() - startTime;
|
||||
executionLogger.logToolError(callId, "edit_file", "工具执行异常: " + e.getMessage(), executionTime);
|
||||
logger.error("Error in edit file tool", e);
|
||||
return "Error: " + e.getMessage();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletableFuture<ToolResult> execute(EditFileParams params) {
|
||||
return CompletableFuture.supplyAsync(() -> {
|
||||
try {
|
||||
Path filePath = Paths.get(params.filePath);
|
||||
|
||||
// Check if file exists
|
||||
if (!Files.exists(filePath)) {
|
||||
return ToolResult.error("File not found: " + params.filePath);
|
||||
}
|
||||
|
||||
// Check if it's a file
|
||||
if (!Files.isRegularFile(filePath)) {
|
||||
return ToolResult.error("Path is not a regular file: " + params.filePath);
|
||||
}
|
||||
|
||||
// 读取原始内容
|
||||
String originalContent = Files.readString(filePath, StandardCharsets.UTF_8);
|
||||
|
||||
// 执行编辑
|
||||
String newContent = performEdit(originalContent, params);
|
||||
if (newContent == null) {
|
||||
return ToolResult.error("Could not find the specified text to replace in file: " + params.filePath);
|
||||
}
|
||||
|
||||
// 创建备份
|
||||
if (shouldCreateBackup()) {
|
||||
createBackup(filePath, originalContent);
|
||||
}
|
||||
|
||||
// Write new content
|
||||
Files.writeString(filePath, newContent, StandardCharsets.UTF_8);
|
||||
|
||||
// Generate differences and results
|
||||
String diff = generateDiff(filePath.getFileName().toString(), originalContent, newContent);
|
||||
String relativePath = getRelativePath(filePath);
|
||||
String successMessage = String.format("Successfully edited file: %s", params.filePath);
|
||||
|
||||
return ToolResult.success(successMessage, new FileDiff(diff, filePath.getFileName().toString()));
|
||||
|
||||
} catch (IOException e) {
|
||||
logger.error("Error editing file: " + params.filePath, e);
|
||||
return ToolResult.error("Error editing file: " + e.getMessage());
|
||||
} catch (Exception e) {
|
||||
logger.error("Unexpected error editing file: " + params.filePath, e);
|
||||
return ToolResult.error("Unexpected error: " + e.getMessage());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private String performEdit(String content, EditFileParams params) {
|
||||
// If line numbers are specified, use line numbers to assist in finding
|
||||
if (params.startLine != null && params.endLine != null) {
|
||||
return performEditWithLineNumbers(content, params);
|
||||
} else {
|
||||
return performSimpleEdit(content, params);
|
||||
}
|
||||
}
|
||||
|
||||
private String performSimpleEdit(String content, EditFileParams params) {
|
||||
// Simple string replacement
|
||||
if (!content.contains(params.oldStr)) {
|
||||
return null; // Cannot find string to replace
|
||||
}
|
||||
|
||||
// Only replace the first match to avoid unexpected multiple replacements
|
||||
int index = content.indexOf(params.oldStr);
|
||||
if (index == -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return content.substring(0, index) + params.newStr + content.substring(index + params.oldStr.length());
|
||||
}
|
||||
|
||||
private String performEditWithLineNumbers(String content, EditFileParams params) {
|
||||
String[] lines = content.split("\n", -1); // -1 preserve trailing empty lines
|
||||
|
||||
// Validate line number range
|
||||
if (params.startLine > lines.length || params.endLine > lines.length) {
|
||||
return null; // Line number out of range
|
||||
}
|
||||
|
||||
// Extract content from specified line range
|
||||
StringBuilder targetContent = new StringBuilder();
|
||||
for (int i = params.startLine - 1; i < params.endLine; i++) {
|
||||
if (i > params.startLine - 1) {
|
||||
targetContent.append("\n");
|
||||
}
|
||||
targetContent.append(lines[i]);
|
||||
}
|
||||
|
||||
// 检查是否匹配
|
||||
if (!targetContent.toString().equals(params.oldStr)) {
|
||||
return null; // 指定行范围的内容与old_str不匹配
|
||||
}
|
||||
|
||||
// 执行替换
|
||||
StringBuilder result = new StringBuilder();
|
||||
|
||||
// 添加前面的行
|
||||
for (int i = 0; i < params.startLine - 1; i++) {
|
||||
if (i > 0) result.append("\n");
|
||||
result.append(lines[i]);
|
||||
}
|
||||
|
||||
// 添加新内容
|
||||
if (params.startLine > 1) result.append("\n");
|
||||
result.append(params.newStr);
|
||||
|
||||
// 添加后面的行
|
||||
for (int i = params.endLine; i < lines.length; i++) {
|
||||
result.append("\n");
|
||||
result.append(lines[i]);
|
||||
}
|
||||
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
private String generateDiff(String fileName, String oldContent, String newContent) {
|
||||
try {
|
||||
List<String> oldLines = Arrays.asList(oldContent.split("\n"));
|
||||
List<String> newLines = Arrays.asList(newContent.split("\n"));
|
||||
|
||||
Patch<String> patch = DiffUtils.diff(oldLines, newLines);
|
||||
List<String> unifiedDiff = UnifiedDiffUtils.generateUnifiedDiff(
|
||||
fileName + " (Original)",
|
||||
fileName + " (Edited)",
|
||||
oldLines,
|
||||
patch,
|
||||
3 // context lines
|
||||
);
|
||||
|
||||
return String.join("\n", unifiedDiff);
|
||||
} catch (Exception e) {
|
||||
logger.warn("Could not generate diff", e);
|
||||
return "Diff generation failed: " + e.getMessage();
|
||||
}
|
||||
}
|
||||
|
||||
private void createBackup(Path filePath, String content) throws IOException {
|
||||
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"));
|
||||
String backupFileName = filePath.getFileName().toString() + ".backup." + timestamp;
|
||||
Path backupPath = filePath.getParent().resolve(backupFileName);
|
||||
|
||||
Files.writeString(backupPath, content, StandardCharsets.UTF_8);
|
||||
logger.info("Created backup: {}", backupPath);
|
||||
}
|
||||
|
||||
private boolean shouldCreateBackup() {
|
||||
return true; // 总是创建备份
|
||||
}
|
||||
|
||||
private boolean isWithinWorkspace(Path filePath) {
|
||||
try {
|
||||
Path workspaceRoot = Paths.get(rootDirectory).toRealPath();
|
||||
Path normalizedPath = filePath.normalize();
|
||||
return normalizedPath.startsWith(workspaceRoot.normalize());
|
||||
} catch (IOException e) {
|
||||
logger.warn("Could not resolve workspace path", e);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private String getRelativePath(Path filePath) {
|
||||
try {
|
||||
Path workspaceRoot = Paths.get(rootDirectory);
|
||||
return workspaceRoot.relativize(filePath).toString();
|
||||
} catch (Exception e) {
|
||||
return filePath.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 编辑文件参数
|
||||
*/
|
||||
public static class EditFileParams {
|
||||
@JsonProperty("file_path")
|
||||
private String filePath;
|
||||
|
||||
@JsonProperty("old_str")
|
||||
private String oldStr;
|
||||
|
||||
@JsonProperty("new_str")
|
||||
private String newStr;
|
||||
|
||||
@JsonProperty("start_line")
|
||||
private Integer startLine;
|
||||
|
||||
@JsonProperty("end_line")
|
||||
private Integer endLine;
|
||||
|
||||
// 构造器
|
||||
public EditFileParams() {
|
||||
}
|
||||
|
||||
public EditFileParams(String filePath, String oldStr, String newStr) {
|
||||
this.filePath = filePath;
|
||||
this.oldStr = oldStr;
|
||||
this.newStr = newStr;
|
||||
}
|
||||
|
||||
// Getters and Setters
|
||||
public String getFilePath() {
|
||||
return filePath;
|
||||
}
|
||||
|
||||
public void setFilePath(String filePath) {
|
||||
this.filePath = filePath;
|
||||
}
|
||||
|
||||
public String getOldStr() {
|
||||
return oldStr;
|
||||
}
|
||||
|
||||
public void setOldStr(String oldStr) {
|
||||
this.oldStr = oldStr;
|
||||
}
|
||||
|
||||
public String getNewStr() {
|
||||
return newStr;
|
||||
}
|
||||
|
||||
public void setNewStr(String newStr) {
|
||||
this.newStr = newStr;
|
||||
}
|
||||
|
||||
public Integer getStartLine() {
|
||||
return startLine;
|
||||
}
|
||||
|
||||
public void setStartLine(Integer startLine) {
|
||||
this.startLine = startLine;
|
||||
}
|
||||
|
||||
public Integer getEndLine() {
|
||||
return endLine;
|
||||
}
|
||||
|
||||
public void setEndLine(Integer endLine) {
|
||||
this.endLine = endLine;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("EditFileParams{path='%s', oldStrLength=%d, newStrLength=%d, lines=%s-%s}",
|
||||
filePath,
|
||||
oldStr != null ? oldStr.length() : 0,
|
||||
newStr != null ? newStr.length() : 0,
|
||||
startLine, endLine);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,409 +0,0 @@
|
||||
package com.example.demo.tools;
|
||||
|
||||
import com.example.demo.config.AppProperties;
|
||||
import com.example.demo.utils.PathUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.ai.tool.annotation.Tool;
|
||||
import org.springframework.ai.tool.annotation.ToolParam;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* 文件操作工具类 - 使用Spring AI 1.0.0 @Tool注解
|
||||
*/
|
||||
@Component
|
||||
public class FileOperationTools {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(FileOperationTools.class);
|
||||
|
||||
private final String rootDirectory;
|
||||
private final AppProperties appProperties;
|
||||
|
||||
// 在构造函数中
|
||||
public FileOperationTools(AppProperties appProperties) {
|
||||
this.appProperties = appProperties;
|
||||
// 使用规范化的路径
|
||||
this.rootDirectory = PathUtils.normalizePath(appProperties.getWorkspace().getRootDirectory());
|
||||
}
|
||||
|
||||
@Tool(description = "Read the content of a file from the local filesystem. Supports pagination for large files.")
|
||||
public String readFile(
|
||||
@ToolParam(description = "The absolute path to the file to read. Must be within the workspace directory.")
|
||||
String absolutePath,
|
||||
@ToolParam(description = "Optional: For text files, the 0-based line number to start reading from.", required = false)
|
||||
Integer offset,
|
||||
@ToolParam(description = "Optional: For text files, the number of lines to read from the offset.", required = false)
|
||||
Integer limit) {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
try {
|
||||
logger.debug("Starting readFile operation for: {}", absolutePath);
|
||||
// 验证路径
|
||||
String validationError = validatePath(absolutePath);
|
||||
if (validationError != null) {
|
||||
return "Error: " + validationError;
|
||||
}
|
||||
|
||||
Path filePath = Paths.get(absolutePath);
|
||||
|
||||
// 检查文件是否存在
|
||||
if (!Files.exists(filePath)) {
|
||||
return "Error: File not found: " + absolutePath;
|
||||
}
|
||||
|
||||
// 检查是否为文件
|
||||
if (!Files.isRegularFile(filePath)) {
|
||||
return "Error: Path is not a regular file: " + absolutePath;
|
||||
}
|
||||
|
||||
// 检查文件大小
|
||||
long fileSize = Files.size(filePath);
|
||||
if (fileSize > appProperties.getWorkspace().getMaxFileSize()) {
|
||||
return "Error: File too large: " + fileSize + " bytes. Maximum allowed: " +
|
||||
appProperties.getWorkspace().getMaxFileSize() + " bytes";
|
||||
}
|
||||
|
||||
// 检查文件扩展名
|
||||
String fileName = filePath.getFileName().toString();
|
||||
if (!isAllowedFileType(fileName)) {
|
||||
return "Error: File type not allowed: " + fileName +
|
||||
". Allowed extensions: " + appProperties.getWorkspace().getAllowedExtensions();
|
||||
}
|
||||
|
||||
// 读取文件
|
||||
if (offset != null && limit != null) {
|
||||
return readFileWithPagination(filePath, offset, limit);
|
||||
} else {
|
||||
return readFullFile(filePath);
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
logger.error("Error reading file: {} (duration: {}ms)", absolutePath, duration, e);
|
||||
return String.format("❌ Error reading file: %s\n⏱️ Duration: %dms\n🔍 Details: %s",
|
||||
absolutePath, duration, e.getMessage());
|
||||
} catch (Exception e) {
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
logger.error("Unexpected error reading file: {} (duration: {}ms)", absolutePath, duration, e);
|
||||
return String.format("❌ Unexpected error reading file: %s\n⏱️ Duration: %dms\n🔍 Details: %s",
|
||||
absolutePath, duration, e.getMessage());
|
||||
} finally {
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
logger.debug("Completed readFile operation for: {} (duration: {}ms)", absolutePath, duration);
|
||||
}
|
||||
}
|
||||
|
||||
@Tool(description = "Write content to a file. Creates new file or overwrites existing file.")
|
||||
public String writeFile(
|
||||
@ToolParam(description = "The absolute path to the file to write. Must be within the workspace directory.")
|
||||
String filePath,
|
||||
@ToolParam(description = "The content to write to the file")
|
||||
String content) {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
try {
|
||||
logger.debug("Starting writeFile operation for: {}", filePath);
|
||||
// 验证路径
|
||||
String validationError = validatePath(filePath);
|
||||
if (validationError != null) {
|
||||
return "Error: " + validationError;
|
||||
}
|
||||
|
||||
// 验证内容大小
|
||||
byte[] contentBytes = content.getBytes(StandardCharsets.UTF_8);
|
||||
if (contentBytes.length > appProperties.getWorkspace().getMaxFileSize()) {
|
||||
return "Error: Content too large: " + contentBytes.length + " bytes. Maximum allowed: " +
|
||||
appProperties.getWorkspace().getMaxFileSize() + " bytes";
|
||||
}
|
||||
|
||||
Path path = Paths.get(filePath);
|
||||
boolean isNewFile = !Files.exists(path);
|
||||
|
||||
// 确保父目录存在
|
||||
Files.createDirectories(path.getParent());
|
||||
|
||||
// 写入文件
|
||||
Files.writeString(path, content, StandardCharsets.UTF_8,
|
||||
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
|
||||
|
||||
long lineCount = content.lines().count();
|
||||
String absolutePath = path.toAbsolutePath().toString();
|
||||
String relativePath = getRelativePath(path);
|
||||
|
||||
if (isNewFile) {
|
||||
return String.format("Successfully created file:\n📁 Full path: %s\n📂 Relative path: %s\n📊 Stats: %d lines, %d bytes",
|
||||
absolutePath, relativePath, lineCount, contentBytes.length);
|
||||
} else {
|
||||
return String.format("Successfully wrote to file:\n📁 Full path: %s\n📂 Relative path: %s\n📊 Stats: %d lines, %d bytes",
|
||||
absolutePath, relativePath, lineCount, contentBytes.length);
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
logger.error("Error writing file: {} (duration: {}ms)", filePath, duration, e);
|
||||
return String.format("❌ Error writing file: %s\n⏱️ Duration: %dms\n🔍 Details: %s",
|
||||
filePath, duration, e.getMessage());
|
||||
} catch (Exception e) {
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
logger.error("Unexpected error writing file: {} (duration: {}ms)", filePath, duration, e);
|
||||
return String.format("❌ Unexpected error writing file: %s\n⏱️ Duration: %dms\n🔍 Details: %s",
|
||||
filePath, duration, e.getMessage());
|
||||
} finally {
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
logger.debug("Completed writeFile operation for: {} (duration: {}ms)", filePath, duration);
|
||||
}
|
||||
}
|
||||
|
||||
@Tool(description = "Edit a file by replacing specific text content.")
|
||||
public String editFile(
|
||||
@ToolParam(description = "The absolute path to the file to edit. Must be within the workspace directory.")
|
||||
String filePath,
|
||||
@ToolParam(description = "The text to find and replace in the file")
|
||||
String oldText,
|
||||
@ToolParam(description = "The new text to replace the old text with")
|
||||
String newText) {
|
||||
|
||||
try {
|
||||
// 验证路径
|
||||
String validationError = validatePath(filePath);
|
||||
if (validationError != null) {
|
||||
return "Error: " + validationError;
|
||||
}
|
||||
|
||||
Path path = Paths.get(filePath);
|
||||
|
||||
// 检查文件是否存在
|
||||
if (!Files.exists(path)) {
|
||||
return "Error: File not found: " + filePath;
|
||||
}
|
||||
|
||||
// 检查是否为文件
|
||||
if (!Files.isRegularFile(path)) {
|
||||
return "Error: Path is not a regular file: " + filePath;
|
||||
}
|
||||
|
||||
// 读取原始内容
|
||||
String originalContent = Files.readString(path, StandardCharsets.UTF_8);
|
||||
|
||||
// 执行替换
|
||||
if (!originalContent.contains(oldText)) {
|
||||
return "Error: Could not find the specified text to replace in file: " + filePath;
|
||||
}
|
||||
|
||||
String newContent = originalContent.replace(oldText, newText);
|
||||
|
||||
// 写入新内容
|
||||
Files.writeString(path, newContent, StandardCharsets.UTF_8);
|
||||
|
||||
String absolutePath = path.toAbsolutePath().toString();
|
||||
String relativePath = getRelativePath(path);
|
||||
return String.format("Successfully edited file:\n📁 Full path: %s\n📂 Relative path: %s\n✏️ Replaced text successfully",
|
||||
absolutePath, relativePath);
|
||||
|
||||
} catch (IOException e) {
|
||||
logger.error("Error editing file: " + filePath, e);
|
||||
return "Error editing file: " + e.getMessage();
|
||||
} catch (Exception e) {
|
||||
logger.error("Unexpected error editing file: " + filePath, e);
|
||||
return "Unexpected error: " + e.getMessage();
|
||||
}
|
||||
}
|
||||
|
||||
@Tool(description = "List the contents of a directory.")
|
||||
public String listDirectory(
|
||||
@ToolParam(description = "The absolute path to the directory to list. Must be within the workspace directory.")
|
||||
String directoryPath,
|
||||
@ToolParam(description = "Whether to list contents recursively", required = false)
|
||||
Boolean recursive) {
|
||||
|
||||
try {
|
||||
// 验证路径
|
||||
String validationError = validatePath(directoryPath);
|
||||
if (validationError != null) {
|
||||
return "Error: " + validationError;
|
||||
}
|
||||
|
||||
Path path = Paths.get(directoryPath);
|
||||
|
||||
// 检查目录是否存在
|
||||
if (!Files.exists(path)) {
|
||||
return "Error: Directory not found: " + directoryPath;
|
||||
}
|
||||
|
||||
// 检查是否为目录
|
||||
if (!Files.isDirectory(path)) {
|
||||
return "Error: Path is not a directory: " + directoryPath;
|
||||
}
|
||||
|
||||
boolean isRecursive = recursive != null && recursive;
|
||||
String absolutePath = path.toAbsolutePath().toString();
|
||||
String relativePath = getRelativePath(path);
|
||||
|
||||
if (isRecursive) {
|
||||
return listDirectoryRecursive(path, absolutePath, relativePath);
|
||||
} else {
|
||||
return listDirectorySimple(path, absolutePath, relativePath);
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
logger.error("Error listing directory: " + directoryPath, e);
|
||||
return "Error listing directory: " + e.getMessage();
|
||||
} catch (Exception e) {
|
||||
logger.error("Unexpected error listing directory: " + directoryPath, e);
|
||||
return "Unexpected error: " + e.getMessage();
|
||||
}
|
||||
}
|
||||
|
||||
// 辅助方法
|
||||
private String validatePath(String path) {
|
||||
if (path == null || path.trim().isEmpty()) {
|
||||
return "Path cannot be empty";
|
||||
}
|
||||
|
||||
Path filePath = Paths.get(path);
|
||||
|
||||
// 验证是否为绝对路径
|
||||
if (!filePath.isAbsolute()) {
|
||||
return "Path must be absolute: " + path;
|
||||
}
|
||||
|
||||
// 验证是否在工作目录内
|
||||
if (!isWithinWorkspace(filePath)) {
|
||||
return "Path must be within the workspace directory (" + rootDirectory + "): " + path;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private boolean isWithinWorkspace(Path path) {
|
||||
try {
|
||||
Path workspacePath = Paths.get(rootDirectory).toRealPath();
|
||||
Path targetPath = path.toRealPath();
|
||||
return targetPath.startsWith(workspacePath);
|
||||
} catch (IOException e) {
|
||||
// 如果路径不存在,检查其父目录
|
||||
try {
|
||||
Path workspacePath = Paths.get(rootDirectory).toRealPath();
|
||||
Path normalizedPath = path.normalize();
|
||||
return normalizedPath.startsWith(workspacePath.normalize());
|
||||
} catch (IOException ex) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isAllowedFileType(String fileName) {
|
||||
List<String> allowedExtensions = appProperties.getWorkspace().getAllowedExtensions();
|
||||
return allowedExtensions.stream()
|
||||
.anyMatch(ext -> fileName.toLowerCase().endsWith(ext.toLowerCase()));
|
||||
}
|
||||
|
||||
private String getRelativePath(Path path) {
|
||||
try {
|
||||
Path workspacePath = Paths.get(rootDirectory);
|
||||
return workspacePath.relativize(path).toString();
|
||||
} catch (Exception e) {
|
||||
return path.toString();
|
||||
}
|
||||
}
|
||||
|
||||
private String readFullFile(Path filePath) throws IOException {
|
||||
String content = Files.readString(filePath, StandardCharsets.UTF_8);
|
||||
String absolutePath = filePath.toAbsolutePath().toString();
|
||||
String relativePath = getRelativePath(filePath);
|
||||
|
||||
long lineCount = content.lines().count();
|
||||
return String.format("📁 Full path: %s\n📂 Relative path: %s\n📊 Stats: %d lines, %d bytes\n\n📄 Content:\n%s",
|
||||
absolutePath, relativePath, lineCount, content.getBytes(StandardCharsets.UTF_8).length, content);
|
||||
}
|
||||
|
||||
private String readFileWithPagination(Path filePath, int offset, int limit) throws IOException {
|
||||
List<String> allLines = Files.readAllLines(filePath, StandardCharsets.UTF_8);
|
||||
|
||||
if (offset >= allLines.size()) {
|
||||
return "Error: Offset " + offset + " is beyond file length (" + allLines.size() + " lines)";
|
||||
}
|
||||
|
||||
int endIndex = Math.min(offset + limit, allLines.size());
|
||||
List<String> selectedLines = allLines.subList(offset, endIndex);
|
||||
String content = String.join("\n", selectedLines);
|
||||
|
||||
String absolutePath = filePath.toAbsolutePath().toString();
|
||||
String relativePath = getRelativePath(filePath);
|
||||
return String.format("📁 Full path: %s\n📂 Relative path: %s\n📊 Showing lines %d-%d of %d total\n\n📄 Content:\n%s",
|
||||
absolutePath, relativePath, offset + 1, endIndex, allLines.size(), content);
|
||||
}
|
||||
|
||||
private String listDirectorySimple(Path path, String absolutePath, String relativePath) throws IOException {
|
||||
StringBuilder result = new StringBuilder();
|
||||
result.append("📁 Full path: ").append(absolutePath).append("\n");
|
||||
result.append("📂 Relative path: ").append(relativePath).append("\n\n");
|
||||
result.append("📋 Directory contents:\n");
|
||||
|
||||
try (Stream<Path> entries = Files.list(path)) {
|
||||
List<Path> sortedEntries = entries.sorted().collect(Collectors.toList());
|
||||
|
||||
for (Path entry : sortedEntries) {
|
||||
String name = entry.getFileName().toString();
|
||||
String entryAbsolutePath = entry.toAbsolutePath().toString();
|
||||
if (Files.isDirectory(entry)) {
|
||||
result.append("📁 [DIR] ").append(name).append("/\n");
|
||||
result.append(" └─ ").append(entryAbsolutePath).append("\n");
|
||||
} else {
|
||||
long size = Files.size(entry);
|
||||
result.append("📄 [FILE] ").append(name).append(" (").append(size).append(" bytes)\n");
|
||||
result.append(" └─ ").append(entryAbsolutePath).append("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
private String listDirectoryRecursive(Path path, String absolutePath, String relativePath) throws IOException {
|
||||
StringBuilder result = new StringBuilder();
|
||||
result.append("📁 Full path: ").append(absolutePath).append("\n");
|
||||
result.append("📂 Relative path: ").append(relativePath).append("\n\n");
|
||||
result.append("🌳 Directory tree (recursive):\n");
|
||||
|
||||
try (Stream<Path> entries = Files.walk(path)) {
|
||||
entries.sorted()
|
||||
.forEach(entry -> {
|
||||
if (!entry.equals(path)) {
|
||||
String entryAbsolutePath = entry.toAbsolutePath().toString();
|
||||
String entryRelativePath = getRelativePath(entry);
|
||||
|
||||
// 计算缩进级别
|
||||
int depth = entry.getNameCount() - path.getNameCount();
|
||||
String indent = " ".repeat(depth);
|
||||
|
||||
if (Files.isDirectory(entry)) {
|
||||
result.append(indent).append("📁 ").append(entryRelativePath).append("/\n");
|
||||
result.append(indent).append(" └─ ").append(entryAbsolutePath).append("\n");
|
||||
} else {
|
||||
try {
|
||||
long size = Files.size(entry);
|
||||
result.append(indent).append("📄 ").append(entryRelativePath).append(" (").append(size).append(" bytes)\n");
|
||||
result.append(indent).append(" └─ ").append(entryAbsolutePath).append("\n");
|
||||
} catch (IOException e) {
|
||||
result.append(indent).append("📄 ").append(entryRelativePath).append(" (size unknown)\n");
|
||||
result.append(indent).append(" └─ ").append(entryAbsolutePath).append("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return result.toString();
|
||||
}
|
||||
}
|
||||
@@ -1,394 +0,0 @@
|
||||
package com.example.demo.tools;
|
||||
|
||||
import com.example.demo.config.AppProperties;
|
||||
import com.example.demo.schema.JsonSchema;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* 目录列表工具
|
||||
* 列出指定目录的文件和子目录,支持递归列表
|
||||
*/
|
||||
@Component
|
||||
public class ListDirectoryTool extends BaseTool<ListDirectoryTool.ListDirectoryParams> {
|
||||
|
||||
private final String rootDirectory;
|
||||
private final AppProperties appProperties;
|
||||
|
||||
public ListDirectoryTool(AppProperties appProperties) {
|
||||
super(
|
||||
"list_directory",
|
||||
"ListDirectory",
|
||||
"Lists files and directories in the specified path. " +
|
||||
"Supports recursive listing and filtering. " +
|
||||
"Shows file sizes, modification times, and types. " +
|
||||
"Use absolute paths within the workspace directory.",
|
||||
createSchema()
|
||||
);
|
||||
this.appProperties = appProperties;
|
||||
this.rootDirectory = appProperties.getWorkspace().getRootDirectory();
|
||||
}
|
||||
|
||||
private static String getWorkspaceBasePath() {
|
||||
return Paths.get(System.getProperty("user.dir"), "workspace").toString();
|
||||
}
|
||||
|
||||
private static String getPathExample(String subPath) {
|
||||
return "Example: \"" + Paths.get(getWorkspaceBasePath(), subPath).toString() + "\"";
|
||||
}
|
||||
|
||||
private static JsonSchema createSchema() {
|
||||
return JsonSchema.object()
|
||||
.addProperty("path", JsonSchema.string(
|
||||
"MUST be an absolute path to the directory to list. Path must be within the workspace directory (" +
|
||||
getWorkspaceBasePath() + "). " +
|
||||
getPathExample("project/src") + ". " +
|
||||
"Relative paths are NOT allowed."
|
||||
))
|
||||
.addProperty("recursive", JsonSchema.bool(
|
||||
"Optional: Whether to list files recursively in subdirectories. Default: false"
|
||||
))
|
||||
.addProperty("max_depth", JsonSchema.integer(
|
||||
"Optional: Maximum depth for recursive listing. Default: 3, Maximum: 10"
|
||||
).minimum(1).maximum(10))
|
||||
.addProperty("show_hidden", JsonSchema.bool(
|
||||
"Optional: Whether to show hidden files (starting with '.'). Default: false"
|
||||
))
|
||||
.required("path");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String validateToolParams(ListDirectoryParams params) {
|
||||
String baseValidation = super.validateToolParams(params);
|
||||
if (baseValidation != null) {
|
||||
return baseValidation;
|
||||
}
|
||||
|
||||
// 验证路径
|
||||
if (params.path == null || params.path.trim().isEmpty()) {
|
||||
return "Directory path cannot be empty";
|
||||
}
|
||||
|
||||
Path dirPath = Paths.get(params.path);
|
||||
|
||||
// 验证是否为绝对路径
|
||||
if (!dirPath.isAbsolute()) {
|
||||
return "Directory path must be absolute: " + params.path;
|
||||
}
|
||||
|
||||
// 验证是否在工作目录内
|
||||
if (!isWithinWorkspace(dirPath)) {
|
||||
return "Directory path must be within the workspace directory (" + rootDirectory + "): " + params.path;
|
||||
}
|
||||
|
||||
// 验证最大深度
|
||||
if (params.maxDepth != null && (params.maxDepth < 1 || params.maxDepth > 10)) {
|
||||
return "Max depth must be between 1 and 10";
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletableFuture<ToolResult> execute(ListDirectoryParams params) {
|
||||
return CompletableFuture.supplyAsync(() -> {
|
||||
try {
|
||||
Path dirPath = Paths.get(params.path);
|
||||
|
||||
// 检查目录是否存在
|
||||
if (!Files.exists(dirPath)) {
|
||||
return ToolResult.error("Directory not found: " + params.path);
|
||||
}
|
||||
|
||||
// 检查是否为目录
|
||||
if (!Files.isDirectory(dirPath)) {
|
||||
return ToolResult.error("Path is not a directory: " + params.path);
|
||||
}
|
||||
|
||||
// 列出文件和目录
|
||||
List<FileInfo> fileInfos = listFiles(dirPath, params);
|
||||
|
||||
// 生成输出
|
||||
String content = formatFileList(fileInfos, params);
|
||||
String relativePath = getRelativePath(dirPath);
|
||||
String displayMessage = String.format("Listed directory: %s (%d items)",
|
||||
relativePath, fileInfos.size());
|
||||
|
||||
return ToolResult.success(content, displayMessage);
|
||||
|
||||
} catch (IOException e) {
|
||||
logger.error("Error listing directory: " + params.path, e);
|
||||
return ToolResult.error("Error listing directory: " + e.getMessage());
|
||||
} catch (Exception e) {
|
||||
logger.error("Unexpected error listing directory: " + params.path, e);
|
||||
return ToolResult.error("Unexpected error: " + e.getMessage());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private List<FileInfo> listFiles(Path dirPath, ListDirectoryParams params) throws IOException {
|
||||
List<FileInfo> fileInfos = new ArrayList<>();
|
||||
|
||||
if (params.recursive != null && params.recursive) {
|
||||
int maxDepth = params.maxDepth != null ? params.maxDepth : 3;
|
||||
listFilesRecursive(dirPath, fileInfos, 0, maxDepth, params);
|
||||
} else {
|
||||
listFilesInDirectory(dirPath, fileInfos, params);
|
||||
}
|
||||
|
||||
// 排序:目录在前,然后按名称排序
|
||||
fileInfos.sort(Comparator
|
||||
.comparing((FileInfo f) -> !f.isDirectory())
|
||||
.thenComparing(FileInfo::getName));
|
||||
|
||||
return fileInfos;
|
||||
}
|
||||
|
||||
private void listFilesInDirectory(Path dirPath, List<FileInfo> fileInfos, ListDirectoryParams params) throws IOException {
|
||||
try (Stream<Path> stream = Files.list(dirPath)) {
|
||||
stream.forEach(path -> {
|
||||
try {
|
||||
String fileName = path.getFileName().toString();
|
||||
|
||||
// 跳过隐藏文件(除非明确要求显示)
|
||||
if (!params.showHidden && fileName.startsWith(".")) {
|
||||
return;
|
||||
}
|
||||
|
||||
FileInfo fileInfo = createFileInfo(path, dirPath);
|
||||
fileInfos.add(fileInfo);
|
||||
} catch (IOException e) {
|
||||
logger.warn("Could not get info for file: " + path, e);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private void listFilesRecursive(Path dirPath, List<FileInfo> fileInfos, int currentDepth, int maxDepth, ListDirectoryParams params) throws IOException {
|
||||
if (currentDepth >= maxDepth) {
|
||||
return;
|
||||
}
|
||||
|
||||
try (Stream<Path> stream = Files.list(dirPath)) {
|
||||
List<Path> paths = stream.collect(Collectors.toList());
|
||||
|
||||
for (Path path : paths) {
|
||||
String fileName = path.getFileName().toString();
|
||||
|
||||
// 跳过隐藏文件(除非明确要求显示)
|
||||
if (!params.showHidden && fileName.startsWith(".")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
FileInfo fileInfo = createFileInfo(path, Paths.get(params.path));
|
||||
fileInfos.add(fileInfo);
|
||||
|
||||
// 如果是目录,递归列出
|
||||
if (Files.isDirectory(path)) {
|
||||
listFilesRecursive(path, fileInfos, currentDepth + 1, maxDepth, params);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
logger.warn("Could not get info for file: " + path, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private FileInfo createFileInfo(Path path, Path basePath) throws IOException {
|
||||
String name = path.getFileName().toString();
|
||||
boolean isDirectory = Files.isDirectory(path);
|
||||
long size = isDirectory ? 0 : Files.size(path);
|
||||
|
||||
LocalDateTime lastModified = LocalDateTime.ofInstant(
|
||||
Files.getLastModifiedTime(path).toInstant(),
|
||||
ZoneId.systemDefault()
|
||||
);
|
||||
|
||||
String relativePath = basePath.relativize(path).toString();
|
||||
|
||||
return new FileInfo(name, relativePath, isDirectory, size, lastModified);
|
||||
}
|
||||
|
||||
private String formatFileList(List<FileInfo> fileInfos, ListDirectoryParams params) {
|
||||
if (fileInfos.isEmpty()) {
|
||||
return "Directory is empty.";
|
||||
}
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(String.format("Directory listing for: %s\n", getRelativePath(Paths.get(params.path))));
|
||||
sb.append(String.format("Total items: %d\n\n", fileInfos.size()));
|
||||
|
||||
// 表头
|
||||
sb.append(String.format("%-4s %-40s %-12s %-20s %s\n",
|
||||
"Type", "Name", "Size", "Modified", "Path"));
|
||||
sb.append("-".repeat(80)).append("\n");
|
||||
|
||||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
||||
|
||||
for (FileInfo fileInfo : fileInfos) {
|
||||
String type = fileInfo.isDirectory() ? "DIR" : "FILE";
|
||||
String sizeStr = fileInfo.isDirectory() ? "-" : formatFileSize(fileInfo.getSize());
|
||||
String modifiedStr = fileInfo.getLastModified().format(formatter);
|
||||
|
||||
sb.append(String.format("%-4s %-40s %-12s %-20s %s\n",
|
||||
type,
|
||||
truncate(fileInfo.getName(), 40),
|
||||
sizeStr,
|
||||
modifiedStr,
|
||||
fileInfo.getRelativePath()
|
||||
));
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private String formatFileSize(long bytes) {
|
||||
if (bytes < 1024) return bytes + " B";
|
||||
if (bytes < 1024 * 1024) return String.format("%.1f KB", bytes / 1024.0);
|
||||
if (bytes < 1024 * 1024 * 1024) return String.format("%.1f MB", bytes / (1024.0 * 1024));
|
||||
return String.format("%.1f GB", bytes / (1024.0 * 1024 * 1024));
|
||||
}
|
||||
|
||||
private String truncate(String str, int maxLength) {
|
||||
if (str.length() <= maxLength) {
|
||||
return str;
|
||||
}
|
||||
return str.substring(0, maxLength - 3) + "...";
|
||||
}
|
||||
|
||||
private boolean isWithinWorkspace(Path dirPath) {
|
||||
try {
|
||||
Path workspaceRoot = Paths.get(rootDirectory).toRealPath();
|
||||
Path normalizedPath = dirPath.normalize();
|
||||
return normalizedPath.startsWith(workspaceRoot.normalize());
|
||||
} catch (IOException e) {
|
||||
logger.warn("Could not resolve workspace path", e);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private String getRelativePath(Path dirPath) {
|
||||
try {
|
||||
Path workspaceRoot = Paths.get(rootDirectory);
|
||||
return workspaceRoot.relativize(dirPath).toString();
|
||||
} catch (Exception e) {
|
||||
return dirPath.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 文件信息
|
||||
*/
|
||||
public static class FileInfo {
|
||||
private final String name;
|
||||
private final String relativePath;
|
||||
private final boolean isDirectory;
|
||||
private final long size;
|
||||
private final LocalDateTime lastModified;
|
||||
|
||||
public FileInfo(String name, String relativePath, boolean isDirectory, long size, LocalDateTime lastModified) {
|
||||
this.name = name;
|
||||
this.relativePath = relativePath;
|
||||
this.isDirectory = isDirectory;
|
||||
this.size = size;
|
||||
this.lastModified = lastModified;
|
||||
}
|
||||
|
||||
// Getters
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String getRelativePath() {
|
||||
return relativePath;
|
||||
}
|
||||
|
||||
public boolean isDirectory() {
|
||||
return isDirectory;
|
||||
}
|
||||
|
||||
public long getSize() {
|
||||
return size;
|
||||
}
|
||||
|
||||
public LocalDateTime getLastModified() {
|
||||
return lastModified;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 列表目录参数
|
||||
*/
|
||||
public static class ListDirectoryParams {
|
||||
private String path;
|
||||
private Boolean recursive;
|
||||
|
||||
@JsonProperty("max_depth")
|
||||
private Integer maxDepth;
|
||||
|
||||
@JsonProperty("show_hidden")
|
||||
private Boolean showHidden;
|
||||
|
||||
// 构造器
|
||||
public ListDirectoryParams() {
|
||||
}
|
||||
|
||||
public ListDirectoryParams(String path) {
|
||||
this.path = path;
|
||||
}
|
||||
|
||||
// Getters and Setters
|
||||
public String getPath() {
|
||||
return path;
|
||||
}
|
||||
|
||||
public void setPath(String path) {
|
||||
this.path = path;
|
||||
}
|
||||
|
||||
public Boolean getRecursive() {
|
||||
return recursive;
|
||||
}
|
||||
|
||||
public void setRecursive(Boolean recursive) {
|
||||
this.recursive = recursive;
|
||||
}
|
||||
|
||||
public Integer getMaxDepth() {
|
||||
return maxDepth;
|
||||
}
|
||||
|
||||
public void setMaxDepth(Integer maxDepth) {
|
||||
this.maxDepth = maxDepth;
|
||||
}
|
||||
|
||||
public Boolean getShowHidden() {
|
||||
return showHidden;
|
||||
}
|
||||
|
||||
public void setShowHidden(Boolean showHidden) {
|
||||
this.showHidden = showHidden;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("ListDirectoryParams{path='%s', recursive=%s, maxDepth=%d}",
|
||||
path, recursive, maxDepth);
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,325 +0,0 @@
|
||||
package com.example.demo.tools;
|
||||
|
||||
import com.example.demo.config.AppProperties;
|
||||
import com.example.demo.schema.JsonSchema;
|
||||
import com.example.demo.service.ToolExecutionLogger;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import org.springframework.ai.tool.annotation.Tool;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
/**
|
||||
* 文件读取工具
|
||||
* 支持读取文本文件,可以分页读取大文件
|
||||
*/
|
||||
@Component
|
||||
public class ReadFileTool extends BaseTool<ReadFileTool.ReadFileParams> {
|
||||
|
||||
private final String rootDirectory;
|
||||
private final AppProperties appProperties;
|
||||
|
||||
@Autowired
|
||||
private ToolExecutionLogger executionLogger;
|
||||
|
||||
public ReadFileTool(AppProperties appProperties) {
|
||||
super(
|
||||
"read_file",
|
||||
"ReadFile",
|
||||
"Reads and returns the content of a specified file from the local filesystem. " +
|
||||
"Handles text files and supports pagination for large files. " +
|
||||
"Always use absolute paths within the workspace directory.",
|
||||
createSchema()
|
||||
);
|
||||
this.appProperties = appProperties;
|
||||
this.rootDirectory = appProperties.getWorkspace().getRootDirectory();
|
||||
}
|
||||
|
||||
private static String getWorkspaceBasePath() {
|
||||
return Paths.get(System.getProperty("user.dir"), "workspace").toString();
|
||||
}
|
||||
|
||||
private static String getPathExample(String subPath) {
|
||||
return "Example: \"" + Paths.get(getWorkspaceBasePath(), subPath).toString() + "\"";
|
||||
}
|
||||
|
||||
private static JsonSchema createSchema() {
|
||||
return JsonSchema.object()
|
||||
.addProperty("absolute_path", JsonSchema.string(
|
||||
"MUST be an absolute path to the file to read. Path must be within the workspace directory (" +
|
||||
getWorkspaceBasePath() + "). " +
|
||||
getPathExample("project/src/main.java") + ". " +
|
||||
"Relative paths are NOT allowed."
|
||||
))
|
||||
.addProperty("offset", JsonSchema.integer(
|
||||
"Optional: For text files, the 0-based line number to start reading from. " +
|
||||
"Requires 'limit' to be set. Use for paginating through large files."
|
||||
).minimum(0))
|
||||
.addProperty("limit", JsonSchema.integer(
|
||||
"Optional: For text files, the number of lines to read from the offset. " +
|
||||
"Use for paginating through large files."
|
||||
).minimum(1))
|
||||
.required("absolute_path");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String validateToolParams(ReadFileParams params) {
|
||||
String baseValidation = super.validateToolParams(params);
|
||||
if (baseValidation != null) {
|
||||
return baseValidation;
|
||||
}
|
||||
|
||||
// 验证路径
|
||||
if (params.absolutePath == null || params.absolutePath.trim().isEmpty()) {
|
||||
return "File path cannot be empty";
|
||||
}
|
||||
|
||||
Path filePath = Paths.get(params.absolutePath);
|
||||
|
||||
// 验证是否为绝对路径
|
||||
if (!filePath.isAbsolute()) {
|
||||
return "File path must be absolute: " + params.absolutePath;
|
||||
}
|
||||
|
||||
// 验证是否在工作目录内
|
||||
if (!isWithinWorkspace(filePath)) {
|
||||
return "File path must be within the workspace directory (" + rootDirectory + "): " + params.absolutePath;
|
||||
}
|
||||
|
||||
// 验证分页参数
|
||||
if (params.offset != null && params.limit == null) {
|
||||
return "When 'offset' is specified, 'limit' must also be specified";
|
||||
}
|
||||
|
||||
if (params.offset != null && params.offset < 0) {
|
||||
return "Offset must be non-negative";
|
||||
}
|
||||
|
||||
if (params.limit != null && params.limit <= 0) {
|
||||
return "Limit must be positive";
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read file tool method for Spring AI integration
|
||||
*/
|
||||
@Tool(name = "read_file", description = "Reads and returns the content of a specified file from the local filesystem")
|
||||
public String readFile(String absolutePath, Integer offset, Integer limit) {
|
||||
long callId = executionLogger.logToolStart("read_file", "读取文件内容",
|
||||
String.format("文件路径=%s, 偏移量=%s, 限制行数=%s", absolutePath, offset, limit));
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
try {
|
||||
ReadFileParams params = new ReadFileParams();
|
||||
params.setAbsolutePath(absolutePath);
|
||||
params.setOffset(offset);
|
||||
params.setLimit(limit);
|
||||
|
||||
executionLogger.logToolStep(callId, "read_file", "参数验证", "验证文件路径和分页参数");
|
||||
|
||||
// Validate parameters
|
||||
String validation = validateToolParams(params);
|
||||
if (validation != null) {
|
||||
long executionTime = System.currentTimeMillis() - startTime;
|
||||
executionLogger.logToolError(callId, "read_file", "参数验证失败: " + validation, executionTime);
|
||||
return "Error: " + validation;
|
||||
}
|
||||
|
||||
executionLogger.logFileOperation(callId, "读取文件", absolutePath,
|
||||
offset != null ? String.format("分页读取: 偏移=%d, 限制=%d", offset, limit) : "完整读取");
|
||||
|
||||
// Execute the tool
|
||||
ToolResult result = execute(params).join();
|
||||
|
||||
long executionTime = System.currentTimeMillis() - startTime;
|
||||
|
||||
if (result.isSuccess()) {
|
||||
executionLogger.logToolSuccess(callId, "read_file", "文件读取成功", executionTime);
|
||||
return result.getLlmContent();
|
||||
} else {
|
||||
executionLogger.logToolError(callId, "read_file", result.getErrorMessage(), executionTime);
|
||||
return "Error: " + result.getErrorMessage();
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
long executionTime = System.currentTimeMillis() - startTime;
|
||||
executionLogger.logToolError(callId, "read_file", "工具执行异常: " + e.getMessage(), executionTime);
|
||||
logger.error("Error in read file tool", e);
|
||||
return "Error: " + e.getMessage();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletableFuture<ToolResult> execute(ReadFileParams params) {
|
||||
return CompletableFuture.supplyAsync(() -> {
|
||||
try {
|
||||
Path filePath = Paths.get(params.absolutePath);
|
||||
|
||||
// 检查文件是否存在
|
||||
if (!Files.exists(filePath)) {
|
||||
return ToolResult.error("File not found: " + params.absolutePath);
|
||||
}
|
||||
|
||||
// 检查是否为文件
|
||||
if (!Files.isRegularFile(filePath)) {
|
||||
return ToolResult.error("Path is not a regular file: " + params.absolutePath);
|
||||
}
|
||||
|
||||
// 检查文件大小
|
||||
long fileSize = Files.size(filePath);
|
||||
if (fileSize > appProperties.getWorkspace().getMaxFileSize()) {
|
||||
return ToolResult.error("File too large: " + fileSize + " bytes. Maximum allowed: " +
|
||||
appProperties.getWorkspace().getMaxFileSize() + " bytes");
|
||||
}
|
||||
|
||||
// 检查文件扩展名
|
||||
String fileName = filePath.getFileName().toString();
|
||||
if (!isAllowedFileType(fileName)) {
|
||||
return ToolResult.error("File type not allowed: " + fileName +
|
||||
". Allowed extensions: " + appProperties.getWorkspace().getAllowedExtensions());
|
||||
}
|
||||
|
||||
// 读取文件
|
||||
if (params.offset != null && params.limit != null) {
|
||||
return readFileWithPagination(filePath, params.offset, params.limit);
|
||||
} else {
|
||||
return readFullFile(filePath);
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
logger.error("Error reading file: " + params.absolutePath, e);
|
||||
return ToolResult.error("Error reading file: " + e.getMessage());
|
||||
} catch (Exception e) {
|
||||
logger.error("Unexpected error reading file: " + params.absolutePath, e);
|
||||
return ToolResult.error("Unexpected error: " + e.getMessage());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private ToolResult readFullFile(Path filePath) throws IOException {
|
||||
String content = Files.readString(filePath, StandardCharsets.UTF_8);
|
||||
String relativePath = getRelativePath(filePath);
|
||||
|
||||
long lineCount = content.lines().count();
|
||||
String displayMessage = String.format("Read file: %s (%d lines, %d bytes)",
|
||||
relativePath, lineCount, content.getBytes(StandardCharsets.UTF_8).length);
|
||||
|
||||
return ToolResult.success(content, displayMessage);
|
||||
}
|
||||
|
||||
private ToolResult readFileWithPagination(Path filePath, int offset, int limit) throws IOException {
|
||||
List<String> allLines = Files.readAllLines(filePath, StandardCharsets.UTF_8);
|
||||
|
||||
if (offset >= allLines.size()) {
|
||||
return ToolResult.error("Offset " + offset + " is beyond file length (" + allLines.size() + " lines)");
|
||||
}
|
||||
|
||||
int endIndex = Math.min(offset + limit, allLines.size());
|
||||
List<String> selectedLines = allLines.subList(offset, endIndex);
|
||||
String content = String.join("\n", selectedLines);
|
||||
|
||||
String relativePath = getRelativePath(filePath);
|
||||
String displayMessage = String.format("Read file: %s (lines %d-%d of %d total)",
|
||||
relativePath, offset + 1, endIndex, allLines.size());
|
||||
|
||||
return ToolResult.success(content, displayMessage);
|
||||
}
|
||||
|
||||
private boolean isWithinWorkspace(Path filePath) {
|
||||
try {
|
||||
Path workspaceRoot = Paths.get(rootDirectory).toRealPath();
|
||||
Path resolvedPath = filePath.toRealPath();
|
||||
return resolvedPath.startsWith(workspaceRoot);
|
||||
} catch (IOException e) {
|
||||
// 如果路径不存在,检查其父目录
|
||||
try {
|
||||
Path workspaceRoot = Paths.get(rootDirectory).toRealPath();
|
||||
Path normalizedPath = filePath.normalize();
|
||||
return normalizedPath.startsWith(workspaceRoot.normalize());
|
||||
} catch (IOException ex) {
|
||||
logger.warn("Could not resolve workspace path", ex);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isAllowedFileType(String fileName) {
|
||||
List<String> allowedExtensions = appProperties.getWorkspace().getAllowedExtensions();
|
||||
return allowedExtensions.stream()
|
||||
.anyMatch(ext -> fileName.toLowerCase().endsWith(ext.toLowerCase()));
|
||||
}
|
||||
|
||||
private String getRelativePath(Path filePath) {
|
||||
try {
|
||||
Path workspaceRoot = Paths.get(rootDirectory);
|
||||
return workspaceRoot.relativize(filePath).toString();
|
||||
} catch (Exception e) {
|
||||
return filePath.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 读取文件参数
|
||||
*/
|
||||
public static class ReadFileParams {
|
||||
@JsonProperty("absolute_path")
|
||||
private String absolutePath;
|
||||
|
||||
private Integer offset;
|
||||
private Integer limit;
|
||||
|
||||
// 构造器
|
||||
public ReadFileParams() {
|
||||
}
|
||||
|
||||
public ReadFileParams(String absolutePath) {
|
||||
this.absolutePath = absolutePath;
|
||||
}
|
||||
|
||||
public ReadFileParams(String absolutePath, Integer offset, Integer limit) {
|
||||
this.absolutePath = absolutePath;
|
||||
this.offset = offset;
|
||||
this.limit = limit;
|
||||
}
|
||||
|
||||
// Getters and Setters
|
||||
public String getAbsolutePath() {
|
||||
return absolutePath;
|
||||
}
|
||||
|
||||
public void setAbsolutePath(String absolutePath) {
|
||||
this.absolutePath = absolutePath;
|
||||
}
|
||||
|
||||
public Integer getOffset() {
|
||||
return offset;
|
||||
}
|
||||
|
||||
public void setOffset(Integer offset) {
|
||||
this.offset = offset;
|
||||
}
|
||||
|
||||
public Integer getLimit() {
|
||||
return limit;
|
||||
}
|
||||
|
||||
public void setLimit(Integer limit) {
|
||||
this.limit = limit;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("ReadFileParams{path='%s', offset=%d, limit=%d}",
|
||||
absolutePath, offset, limit);
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,132 +0,0 @@
|
||||
package com.example.demo.tools;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
|
||||
/**
|
||||
* 工具执行结果
|
||||
*/
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public class ToolResult {
|
||||
|
||||
private final boolean success;
|
||||
private final String llmContent;
|
||||
private final Object returnDisplay;
|
||||
private final String errorMessage;
|
||||
|
||||
private ToolResult(boolean success, String llmContent, Object returnDisplay, String errorMessage) {
|
||||
this.success = success;
|
||||
this.llmContent = llmContent;
|
||||
this.returnDisplay = returnDisplay;
|
||||
this.errorMessage = errorMessage;
|
||||
}
|
||||
|
||||
// 静态工厂方法
|
||||
public static ToolResult success(String llmContent) {
|
||||
return new ToolResult(true, llmContent, llmContent, null);
|
||||
}
|
||||
|
||||
public static ToolResult success(String llmContent, Object returnDisplay) {
|
||||
return new ToolResult(true, llmContent, returnDisplay, null);
|
||||
}
|
||||
|
||||
public static ToolResult error(String errorMessage) {
|
||||
return new ToolResult(false, "Error: " + errorMessage, "Error: " + errorMessage, errorMessage);
|
||||
}
|
||||
|
||||
// Getters
|
||||
public boolean isSuccess() {
|
||||
return success;
|
||||
}
|
||||
|
||||
public String getLlmContent() {
|
||||
return llmContent;
|
||||
}
|
||||
|
||||
public Object getReturnDisplay() {
|
||||
return returnDisplay;
|
||||
}
|
||||
|
||||
public String getErrorMessage() {
|
||||
return errorMessage;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
if (success) {
|
||||
return "ToolResult{success=true, content='" + llmContent + "'}";
|
||||
} else {
|
||||
return "ToolResult{success=false, error='" + errorMessage + "'}";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 文件差异结果
|
||||
*/
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
class FileDiff {
|
||||
private final String fileDiff;
|
||||
private final String fileName;
|
||||
|
||||
public FileDiff(String fileDiff, String fileName) {
|
||||
this.fileDiff = fileDiff;
|
||||
this.fileName = fileName;
|
||||
}
|
||||
|
||||
public String getFileDiff() {
|
||||
return fileDiff;
|
||||
}
|
||||
|
||||
public String getFileName() {
|
||||
return fileName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "FileDiff{fileName='" + fileName + "'}";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 工具确认详情
|
||||
*/
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
class ToolConfirmationDetails {
|
||||
private final String type;
|
||||
private final String title;
|
||||
private final String description;
|
||||
private final Object details;
|
||||
|
||||
public ToolConfirmationDetails(String type, String title, String description, Object details) {
|
||||
this.type = type;
|
||||
this.title = title;
|
||||
this.description = description;
|
||||
this.details = details;
|
||||
}
|
||||
|
||||
public static ToolConfirmationDetails edit(String title, String fileName, String fileDiff) {
|
||||
return new ToolConfirmationDetails("edit", title, "File edit confirmation",
|
||||
new FileDiff(fileDiff, fileName));
|
||||
}
|
||||
|
||||
public static ToolConfirmationDetails exec(String title, String command) {
|
||||
return new ToolConfirmationDetails("exec", title, "Command execution confirmation", command);
|
||||
}
|
||||
|
||||
// Getters
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public Object getDetails() {
|
||||
return details;
|
||||
}
|
||||
}
|
||||
@@ -1,359 +0,0 @@
|
||||
package com.example.demo.tools;
|
||||
|
||||
import com.example.demo.config.AppProperties;
|
||||
import com.example.demo.schema.JsonSchema;
|
||||
import com.example.demo.service.ToolExecutionLogger;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.github.difflib.DiffUtils;
|
||||
import com.github.difflib.UnifiedDiffUtils;
|
||||
import com.github.difflib.patch.Patch;
|
||||
import org.springframework.ai.tool.annotation.Tool;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
/**
|
||||
* 文件写入工具
|
||||
* 支持创建新文件或覆盖现有文件,自动显示差异
|
||||
*/
|
||||
@Component
|
||||
public class WriteFileTool extends BaseTool<WriteFileTool.WriteFileParams> {
|
||||
|
||||
private final String rootDirectory;
|
||||
private final AppProperties appProperties;
|
||||
|
||||
@Autowired
|
||||
private ToolExecutionLogger executionLogger;
|
||||
|
||||
public WriteFileTool(AppProperties appProperties) {
|
||||
super(
|
||||
"write_file",
|
||||
"WriteFile",
|
||||
"Writes content to a file. Creates new files or overwrites existing ones. " +
|
||||
"Always shows a diff before writing. Automatically creates parent directories if needed. " +
|
||||
"Use absolute paths within the workspace directory.",
|
||||
createSchema()
|
||||
);
|
||||
this.appProperties = appProperties;
|
||||
this.rootDirectory = appProperties.getWorkspace().getRootDirectory();
|
||||
}
|
||||
|
||||
private static String getWorkspaceBasePath() {
|
||||
return Paths.get(System.getProperty("user.dir"), "workspace").toString();
|
||||
}
|
||||
|
||||
private static String getPathExample(String subPath) {
|
||||
return "Example: \"" + Paths.get(getWorkspaceBasePath(), subPath).toString() + "\"";
|
||||
}
|
||||
|
||||
private static JsonSchema createSchema() {
|
||||
return JsonSchema.object()
|
||||
.addProperty("file_path", JsonSchema.string(
|
||||
"MUST be an absolute path to the file to write to. Path must be within the workspace directory (" +
|
||||
getWorkspaceBasePath() + "). " +
|
||||
getPathExample("project/src/main.java") + ". " +
|
||||
"Relative paths are NOT allowed."
|
||||
))
|
||||
.addProperty("content", JsonSchema.string(
|
||||
"The content to write to the file. Will completely replace existing content if file exists."
|
||||
))
|
||||
.required("file_path", "content");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String validateToolParams(WriteFileParams params) {
|
||||
String baseValidation = super.validateToolParams(params);
|
||||
if (baseValidation != null) {
|
||||
return baseValidation;
|
||||
}
|
||||
|
||||
// 验证路径
|
||||
if (params.filePath == null || params.filePath.trim().isEmpty()) {
|
||||
return "File path cannot be empty";
|
||||
}
|
||||
|
||||
if (params.content == null) {
|
||||
return "Content cannot be null";
|
||||
}
|
||||
|
||||
Path filePath = Paths.get(params.filePath);
|
||||
|
||||
// 验证是否为绝对路径
|
||||
if (!filePath.isAbsolute()) {
|
||||
return "File path must be absolute: " + params.filePath;
|
||||
}
|
||||
|
||||
// 验证是否在工作目录内
|
||||
if (!isWithinWorkspace(filePath)) {
|
||||
return "File path must be within the workspace directory (" + rootDirectory + "): " + params.filePath;
|
||||
}
|
||||
|
||||
// 验证文件扩展名
|
||||
String fileName = filePath.getFileName().toString();
|
||||
if (!isAllowedFileType(fileName)) {
|
||||
return "File type not allowed: " + fileName +
|
||||
". Allowed extensions: " + appProperties.getWorkspace().getAllowedExtensions();
|
||||
}
|
||||
|
||||
// 验证内容大小
|
||||
byte[] contentBytes = params.content.getBytes(StandardCharsets.UTF_8);
|
||||
if (contentBytes.length > appProperties.getWorkspace().getMaxFileSize()) {
|
||||
return "Content too large: " + contentBytes.length + " bytes. Maximum allowed: " +
|
||||
appProperties.getWorkspace().getMaxFileSize() + " bytes";
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletableFuture<ToolConfirmationDetails> shouldConfirmExecute(WriteFileParams params) {
|
||||
// 根据配置决定是否需要确认
|
||||
if (appProperties.getSecurity().getApprovalMode() == AppProperties.ApprovalMode.AUTO_EDIT ||
|
||||
appProperties.getSecurity().getApprovalMode() == AppProperties.ApprovalMode.YOLO) {
|
||||
return CompletableFuture.completedFuture(null);
|
||||
}
|
||||
|
||||
return CompletableFuture.supplyAsync(() -> {
|
||||
try {
|
||||
Path filePath = Paths.get(params.filePath);
|
||||
String currentContent = "";
|
||||
boolean isNewFile = !Files.exists(filePath);
|
||||
|
||||
if (!isNewFile) {
|
||||
currentContent = Files.readString(filePath, StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
// 生成差异显示
|
||||
String diff = generateDiff(
|
||||
filePath.getFileName().toString(),
|
||||
currentContent,
|
||||
params.content
|
||||
);
|
||||
|
||||
String title = isNewFile ?
|
||||
"Confirm Create: " + getRelativePath(filePath) :
|
||||
"Confirm Write: " + getRelativePath(filePath);
|
||||
|
||||
return ToolConfirmationDetails.edit(title, filePath.getFileName().toString(), diff);
|
||||
|
||||
} catch (IOException e) {
|
||||
logger.warn("Could not read existing file for diff: " + params.filePath, e);
|
||||
return null; // 如果无法读取文件,直接执行
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Write file tool method for Spring AI integration
|
||||
*/
|
||||
@Tool(name = "write_file", description = "Creates a new file or overwrites an existing file with the specified content")
|
||||
public String writeFile(String filePath, String content) {
|
||||
long callId = executionLogger.logToolStart("write_file", "写入文件内容",
|
||||
String.format("文件路径=%s, 内容长度=%d字符", filePath, content != null ? content.length() : 0));
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
try {
|
||||
WriteFileParams params = new WriteFileParams();
|
||||
params.setFilePath(filePath);
|
||||
params.setContent(content);
|
||||
|
||||
executionLogger.logToolStep(callId, "write_file", "参数验证", "验证文件路径和内容");
|
||||
|
||||
// Validate parameters
|
||||
String validation = validateToolParams(params);
|
||||
if (validation != null) {
|
||||
long executionTime = System.currentTimeMillis() - startTime;
|
||||
executionLogger.logToolError(callId, "write_file", "参数验证失败: " + validation, executionTime);
|
||||
return "Error: " + validation;
|
||||
}
|
||||
|
||||
executionLogger.logFileOperation(callId, "写入文件", filePath,
|
||||
String.format("内容长度: %d字符", content != null ? content.length() : 0));
|
||||
|
||||
// Execute the tool
|
||||
ToolResult result = execute(params).join();
|
||||
|
||||
long executionTime = System.currentTimeMillis() - startTime;
|
||||
|
||||
if (result.isSuccess()) {
|
||||
executionLogger.logToolSuccess(callId, "write_file", "文件写入成功", executionTime);
|
||||
return result.getLlmContent();
|
||||
} else {
|
||||
executionLogger.logToolError(callId, "write_file", result.getErrorMessage(), executionTime);
|
||||
return "Error: " + result.getErrorMessage();
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
long executionTime = System.currentTimeMillis() - startTime;
|
||||
executionLogger.logToolError(callId, "write_file", "工具执行异常: " + e.getMessage(), executionTime);
|
||||
logger.error("Error in write file tool", e);
|
||||
return "Error: " + e.getMessage();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletableFuture<ToolResult> execute(WriteFileParams params) {
|
||||
return CompletableFuture.supplyAsync(() -> {
|
||||
try {
|
||||
Path filePath = Paths.get(params.filePath);
|
||||
boolean isNewFile = !Files.exists(filePath);
|
||||
String originalContent = "";
|
||||
|
||||
// 读取原始内容(用于备份和差异显示)
|
||||
if (!isNewFile) {
|
||||
originalContent = Files.readString(filePath, StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
// 创建备份(如果启用)
|
||||
if (!isNewFile && shouldCreateBackup()) {
|
||||
createBackup(filePath, originalContent);
|
||||
}
|
||||
|
||||
// 确保父目录存在
|
||||
Files.createDirectories(filePath.getParent());
|
||||
|
||||
// 写入文件
|
||||
Files.writeString(filePath, params.content, StandardCharsets.UTF_8,
|
||||
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
|
||||
|
||||
// 生成结果
|
||||
String relativePath = getRelativePath(filePath);
|
||||
long lineCount = params.content.lines().count();
|
||||
long byteCount = params.content.getBytes(StandardCharsets.UTF_8).length;
|
||||
|
||||
if (isNewFile) {
|
||||
String successMessage = String.format("Successfully created file: %s (%d lines, %d bytes)",
|
||||
params.filePath, lineCount, byteCount);
|
||||
String displayMessage = String.format("Created %s (%d lines)", relativePath, lineCount);
|
||||
return ToolResult.success(successMessage, displayMessage);
|
||||
} else {
|
||||
String diff = generateDiff(filePath.getFileName().toString(), originalContent, params.content);
|
||||
String successMessage = String.format("Successfully wrote to file: %s (%d lines, %d bytes)",
|
||||
params.filePath, lineCount, byteCount);
|
||||
return ToolResult.success(successMessage, new FileDiff(diff, filePath.getFileName().toString()));
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
logger.error("Error writing file: " + params.filePath, e);
|
||||
return ToolResult.error("Error writing file: " + e.getMessage());
|
||||
} catch (Exception e) {
|
||||
logger.error("Unexpected error writing file: " + params.filePath, e);
|
||||
return ToolResult.error("Unexpected error: " + e.getMessage());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private String generateDiff(String fileName, String oldContent, String newContent) {
|
||||
try {
|
||||
List<String> oldLines = Arrays.asList(oldContent.split("\n"));
|
||||
List<String> newLines = Arrays.asList(newContent.split("\n"));
|
||||
|
||||
Patch<String> patch = DiffUtils.diff(oldLines, newLines);
|
||||
List<String> unifiedDiff = UnifiedDiffUtils.generateUnifiedDiff(
|
||||
fileName + " (Original)",
|
||||
fileName + " (New)",
|
||||
oldLines,
|
||||
patch,
|
||||
3 // context lines
|
||||
);
|
||||
|
||||
return String.join("\n", unifiedDiff);
|
||||
} catch (Exception e) {
|
||||
logger.warn("Could not generate diff", e);
|
||||
return "Diff generation failed: " + e.getMessage();
|
||||
}
|
||||
}
|
||||
|
||||
private void createBackup(Path filePath, String content) throws IOException {
|
||||
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"));
|
||||
String backupFileName = filePath.getFileName().toString() + ".backup." + timestamp;
|
||||
Path backupPath = filePath.getParent().resolve(backupFileName);
|
||||
|
||||
Files.writeString(backupPath, content, StandardCharsets.UTF_8);
|
||||
logger.info("Created backup: {}", backupPath);
|
||||
}
|
||||
|
||||
private boolean shouldCreateBackup() {
|
||||
// 可以从配置中读取,这里简化为总是创建备份
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean isWithinWorkspace(Path filePath) {
|
||||
try {
|
||||
Path workspaceRoot = Paths.get(rootDirectory).toRealPath();
|
||||
Path normalizedPath = filePath.normalize();
|
||||
return normalizedPath.startsWith(workspaceRoot.normalize());
|
||||
} catch (IOException e) {
|
||||
logger.warn("Could not resolve workspace path", e);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isAllowedFileType(String fileName) {
|
||||
List<String> allowedExtensions = appProperties.getWorkspace().getAllowedExtensions();
|
||||
return allowedExtensions.stream()
|
||||
.anyMatch(ext -> fileName.toLowerCase().endsWith(ext.toLowerCase()));
|
||||
}
|
||||
|
||||
private String getRelativePath(Path filePath) {
|
||||
try {
|
||||
Path workspaceRoot = Paths.get(rootDirectory);
|
||||
return workspaceRoot.relativize(filePath).toString();
|
||||
} catch (Exception e) {
|
||||
return filePath.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 写入文件参数
|
||||
*/
|
||||
public static class WriteFileParams {
|
||||
@JsonProperty("file_path")
|
||||
private String filePath;
|
||||
|
||||
private String content;
|
||||
|
||||
// 构造器
|
||||
public WriteFileParams() {
|
||||
}
|
||||
|
||||
public WriteFileParams(String filePath, String content) {
|
||||
this.filePath = filePath;
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
// Getters and Setters
|
||||
public String getFilePath() {
|
||||
return filePath;
|
||||
}
|
||||
|
||||
public void setFilePath(String filePath) {
|
||||
this.filePath = filePath;
|
||||
}
|
||||
|
||||
public String getContent() {
|
||||
return content;
|
||||
}
|
||||
|
||||
public void setContent(String content) {
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("WriteFileParams{path='%s', contentLength=%d}",
|
||||
filePath, content != null ? content.length() : 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.example.demo.util;
|
||||
package com.example.demo.utils;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
@@ -0,0 +1,31 @@
|
||||
# ai-tutor-skill
|
||||
|
||||
A Claude skill for explaining complex AI and ML concepts in accessible, plain English. This skill transforms abstract technical ideas into clear explanations using structured narrative frameworks, making it ideal for teaching and learning technical topics.
|
||||
|
||||
Resources:
|
||||
- [YouTube Explainer](https://youtu.be/vEvytl7wrGM)
|
||||
|
||||
## Requirements
|
||||
|
||||
- **Python**: >= 3.12
|
||||
- **Package Manager**: [uv](https://github.com/astral-sh/uv)
|
||||
- **Dependencies**: youtube-transcript-api (installed automatically)
|
||||
|
||||
## Setup
|
||||
|
||||
1. Clone the repository:
|
||||
```bash
|
||||
git clone <repository-url>
|
||||
cd ai-tutor-skill
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
```bash
|
||||
uv sync
|
||||
```
|
||||
|
||||
## Important Note: YouTube Transcript Limitations
|
||||
|
||||
> **The YouTube transcript functionality only works when Claude Code is running locally.**
|
||||
>
|
||||
> YouTube blocks requests from Claude's servers, so transcript extraction will fail when using Claude Code in cloud/remote mode. To use this feature, ensure you're running Claude Code on your local machine.
|
||||
@@ -0,0 +1,131 @@
|
||||
---
|
||||
name: ai-tutor
|
||||
description: Use when user asks to explain, break down, or help understand technical concepts (AI, ML, or other technical topics). Makes complex ideas accessible through plain English and narrative structure. Use the provided scripts to transcribe videos
|
||||
---
|
||||
|
||||
# AI Tutor
|
||||
|
||||
Transform complex technical concepts into clear, accessible explanations using narrative storytelling frameworks.
|
||||
|
||||
## Before Responding: Think Hard
|
||||
|
||||
Before crafting your explanation:
|
||||
|
||||
1. **Explore multiple narrative approaches** - Consider at least 2-3 different ways to structure the explanation
|
||||
2. **Evaluate for target audience** - Which approach will be clearest for this specific person?
|
||||
3. **Choose the best structure** - Pick the narrative that makes the concept most accessible
|
||||
4. **Plan your examples** - Identify concrete, specific examples before writing
|
||||
|
||||
Take time to think through these options. A well-chosen structure is more valuable than a quick response.
|
||||
|
||||
**If concept is unfamiliar or requires research:** Load `research_methodology.md` for detailed guidance.
|
||||
**If user provides YouTube video:** Call `uv run scripts/get_youtube_transcript.py <video_url_or_id>` for video's transcript.
|
||||
|
||||
## Core Teaching Framework
|
||||
|
||||
Use one of three narrative structures:
|
||||
|
||||
### Status Quo → Problem → Solution
|
||||
1. **Status Quo**: Describe the existing situation or baseline approach
|
||||
2. **Problem**: Explain what's broken, inefficient, or limiting
|
||||
3. **Solution**: Show how the concept solves the problem
|
||||
|
||||
This is the primary go-to structure.
|
||||
|
||||
### What → Why → How
|
||||
1. **What**: Define the concept in simple terms (what it is)
|
||||
2. **Why**: Explain the motivation and importance (why it matters)
|
||||
3. **How**: Break down the mechanics (how it works)
|
||||
|
||||
### What → So What → What Now
|
||||
1. **What**: State the situation or finding
|
||||
2. **So What**: Explain the implications or impact
|
||||
3. **What Now**: Describe next steps or actions
|
||||
|
||||
Use for business contexts and practical applications.
|
||||
|
||||
## Teaching Principles
|
||||
|
||||
### Plain English First
|
||||
Replace technical jargon with clear, direct explanations of the core concept.
|
||||
|
||||
**Example:**
|
||||
- ❌ "The gradient descent algorithm optimizes the loss function via backpropagation"
|
||||
- ✅ "Gradient descent is a way to find the model parameters that make the best predictions based on real-world data"
|
||||
|
||||
Plain English means explaining the concept directly without jargon—not just using analogies.
|
||||
|
||||
### Concrete Examples Ground Abstract Ideas
|
||||
Always provide at least one concrete example with specific details, numbers, or real instances.
|
||||
|
||||
**Example:**
|
||||
- Abstract: "Features are things we use to make predictions"
|
||||
- Concrete: "For our customer churn model, features include age of account and number of logins in the past 90 days"
|
||||
|
||||
### Use Analogies Judiciously
|
||||
Analogies map the unfamiliar to the familiar, but use them sparingly and strategically—not as the primary explanation method.
|
||||
|
||||
**When to use:**
|
||||
- After explaining the concept in plain English
|
||||
- When the technical concept has a strong parallel to everyday experience
|
||||
- To create memorable mental models
|
||||
|
||||
Avoid over-relying on analogies. Start with direct, plain English explanations.
|
||||
|
||||
### Progressive Complexity
|
||||
- Start with the intuition and big picture
|
||||
- Add details layer by layer
|
||||
- Use concrete examples before abstractions
|
||||
- Build from familiar to unfamiliar
|
||||
|
||||
### Less is More
|
||||
Attention and mental effort are finite. Be economical with your audience's cognitive resources.
|
||||
- Cut unnecessary fluff
|
||||
- Every word should earn its place
|
||||
- Focus attention on key information
|
||||
|
||||
### Use Numbered Lists Strategically
|
||||
Numbers help navigate information and make it more digestible (e.g., "3 ways to fine-tune", "System 1 and System 2").
|
||||
|
||||
### Know Thy Audience
|
||||
Adjust technical depth, terminology, and focus based on who you're talking to.
|
||||
|
||||
**C-Suite / Business Leaders:**
|
||||
- Use high-level terms (e.g., "AI")
|
||||
- Focus on what and why, emphasize business impact
|
||||
- Keep it high-level, skip implementation details
|
||||
|
||||
**BI Analysts / Technical Adjacent:**
|
||||
- Use more specific terms (e.g., "LLM")
|
||||
- Cover what and why with more technical context
|
||||
- Discuss workflow relevance, include moderate technical details
|
||||
|
||||
**Data Scientists / Technical Peers:**
|
||||
- Use precise terminology (e.g., "Llama 3 8B")
|
||||
- Cover what, why, AND how
|
||||
- Dive into technical details, discuss specific implementation
|
||||
- Still emphasize business impact (everyone wants to know why)
|
||||
|
||||
**If audience level is unclear:** Assume the lowest level of understanding and explain accordingly. Don't ask the user to clarify—just start with fundamentals. You can always go deeper if they ask for more detail.
|
||||
|
||||
## Response Style
|
||||
|
||||
- Start with the big picture before diving into details
|
||||
- Use conversational, friendly tone
|
||||
- Offer to explain subsections in more depth
|
||||
- Use bullet points sparingly—prefer flowing narrative prose
|
||||
- Include concrete examples with specific details
|
||||
- Connect concepts to real-world applications
|
||||
- Be economical with words—every sentence should add value
|
||||
|
||||
## Workflow Summary
|
||||
|
||||
1. **Think hard**: Explore 2-3 narrative structures, choose the clearest for the audience
|
||||
2. **Identify audience**: Assess knowledge level (if unclear, assume beginner level)
|
||||
3. **Check if research needed**:
|
||||
- Can you explain this with your existing knowledge? → Proceed to step 4
|
||||
- Unfamiliar/cutting-edge topic? → Load `research_methodology.md` first
|
||||
4. **Craft explanation**: Plain English first, no jargon
|
||||
5. **Add concrete example**: Specific details, numbers, real instances
|
||||
6. **Optional analogy**: Only if it adds value beyond direct explanation
|
||||
7. **Offer to dive deeper**: Invite questions on specific aspects
|
||||
@@ -0,0 +1,9 @@
|
||||
[project]
|
||||
name = "ai-tutor"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"youtube-transcript-api>=1.2.3",
|
||||
]
|
||||
@@ -0,0 +1,218 @@
|
||||
# Research Methodology for Explaining Unfamiliar Concepts
|
||||
|
||||
Use this guide when you encounter concepts outside your reliable knowledge or when explaining cutting-edge developments.
|
||||
|
||||
## When to Research
|
||||
|
||||
**Always research when:**
|
||||
- Concept is unfamiliar or outside your training data
|
||||
- Topic involves developments after early 2025
|
||||
- User references specific papers, articles, or sources
|
||||
- Explaining cutting-edge techniques or recent breakthroughs
|
||||
- You're uncertain about technical accuracy
|
||||
- User asks "what's new" or "recent developments"
|
||||
|
||||
**Don't research when:**
|
||||
- Explaining well-established, fundamental concepts (e.g., gradient descent, neural networks)
|
||||
- You have high confidence in your knowledge
|
||||
- Topic is clearly pre-2025 and stable
|
||||
|
||||
## Research Strategy
|
||||
|
||||
### 1. Start with Broad Context (web_search)
|
||||
|
||||
**Effective search queries:**
|
||||
- For new concepts: `"{concept name}" explained tutorial`
|
||||
- For recent developments: `"{concept}" 2024 2025 latest`
|
||||
- For comparisons: `"{concept A}" vs "{concept B}" differences`
|
||||
- For practical use: `"{concept}" real world applications examples`
|
||||
|
||||
**Evaluate search results:**
|
||||
- Prioritize: Official documentation, academic institutions, reputable tech blogs
|
||||
- Look for: Recent dates, author credentials, technical depth
|
||||
- Avoid: Marketing content, SEO spam, unsourced claims
|
||||
|
||||
**Extract from results:**
|
||||
- Core definition in plain language
|
||||
- Key motivations (what problem it solves)
|
||||
- Main components or mechanisms
|
||||
- Concrete examples or applications
|
||||
- Common misconceptions
|
||||
|
||||
### 2. Deep Dive on Best Sources (web_fetch)
|
||||
|
||||
**When to fetch full content:**
|
||||
- Found a particularly clear explanation
|
||||
- Need technical details for accuracy
|
||||
- Source is academic paper or official documentation
|
||||
- Initial search didn't provide sufficient depth
|
||||
|
||||
**What to extract from full articles:**
|
||||
- The author's own plain English summary (often in intro/conclusion)
|
||||
- Concrete examples with specific numbers or data
|
||||
- Diagrams or visual explanations (note what they show)
|
||||
- Comparison to previous/alternative approaches
|
||||
- Practical applications or use cases
|
||||
|
||||
**Reading academic papers:**
|
||||
- Start with abstract and conclusion
|
||||
- Look for "In this paper, we..." statements for plain English summary
|
||||
- Check "Related Work" section to understand context
|
||||
- Extract key innovation/contribution in one sentence
|
||||
- Find any "intuition" or "motivation" sections
|
||||
|
||||
### 3. Synthesize Multiple Sources
|
||||
|
||||
**When sources agree:**
|
||||
- Use the clearest explanation as your base
|
||||
- Incorporate best concrete examples from various sources
|
||||
- Combine different perspectives for completeness
|
||||
|
||||
**When sources conflict:**
|
||||
- Identify what they disagree about
|
||||
- Look for authoritative sources (original papers, official docs)
|
||||
- Note the disagreement in your explanation if significant
|
||||
- Don't hide uncertainty - acknowledge different perspectives
|
||||
|
||||
**Red flags to watch for:**
|
||||
- Single source makes claims not found elsewhere
|
||||
- Marketing language disguised as technical explanation
|
||||
- Overly simplified analogies that mislead
|
||||
- Cherry-picked benchmarks or examples
|
||||
|
||||
### 4. Extract from YouTube Videos
|
||||
|
||||
**When to use YouTube transcripts:**
|
||||
- User directly references a video
|
||||
- Video is from reputable educator/researcher
|
||||
- Need concrete examples from tutorial content
|
||||
- Want to see how concept is explained to learners
|
||||
|
||||
**Extracting from transcripts:**
|
||||
```bash
|
||||
# Basic usage - returns full transcript
|
||||
uv run scripts/get_youtube_transcript.py <video_url_or_id>
|
||||
|
||||
# With timestamps for reference
|
||||
uv run scripts/get_youtube_transcript.py <video_url_or_id> --timestamps
|
||||
|
||||
# Supports multiple URL formats
|
||||
uv run scripts/get_youtube_transcript.py dQw4w9WgXcQ
|
||||
uv run scripts/get_youtube_transcript.py https://www.youtube.com/watch?v=dQw4w9WgXcQ
|
||||
uv run scripts/get_youtube_transcript.py https://youtu.be/dQw4w9WgXcQ
|
||||
```
|
||||
|
||||
**What to look for in transcripts:**
|
||||
- The educator's own analogies (often well-tested)
|
||||
- Concrete examples with walkthroughs
|
||||
- Common student questions addressed
|
||||
- Simpler explanations before technical ones
|
||||
- Visual descriptions ("as you can see in this diagram...")
|
||||
|
||||
**Transcript limitations:**
|
||||
- May include verbal fillers and repetition
|
||||
- Missing visual context (slides, diagrams)
|
||||
- Informal language may need translation to written form
|
||||
|
||||
## Source Quality Hierarchy
|
||||
|
||||
**Tier 1 (Highest Trust):**
|
||||
- Original research papers from reputable venues
|
||||
- Official documentation from source (e.g., OpenAI docs for GPT)
|
||||
- University course materials
|
||||
- Books from established publishers
|
||||
|
||||
**Tier 2 (High Trust):**
|
||||
- Technical blogs from recognized experts
|
||||
- Conference presentations and talks
|
||||
- Reputable tech news sites (with technical depth)
|
||||
- Well-maintained wikis with citations
|
||||
|
||||
**Tier 3 (Use with Caution):**
|
||||
- Medium articles (verify author credentials)
|
||||
- Stack Overflow (good for practical issues, not concepts)
|
||||
- Reddit discussions (good for perspectives, not authority)
|
||||
- Tutorial sites (verify accuracy against Tier 1/2 sources)
|
||||
|
||||
**Tier 4 (Avoid):**
|
||||
- Marketing materials posing as education
|
||||
- Uncited claims
|
||||
- Sensationalized headlines
|
||||
- Anonymous sources without verifiable expertise
|
||||
|
||||
## Handling Uncertainty
|
||||
|
||||
**When research reveals gaps:**
|
||||
- Be explicit: "Based on the sources I found..."
|
||||
- Explain what you learned and what remains unclear
|
||||
- Offer to research specific aspects more deeply
|
||||
- Don't fill gaps with speculation
|
||||
|
||||
**When sources are insufficient:**
|
||||
- State what you know with confidence
|
||||
- Acknowledge limitations: "The available sources don't provide clear information on..."
|
||||
- Suggest where user might find more detail
|
||||
- Offer to continue researching if user wants
|
||||
|
||||
**When completely unfamiliar:**
|
||||
- Don't hide it: "This is a cutting-edge concept I need to research"
|
||||
- Do thorough research before explaining
|
||||
- Synthesize from multiple high-quality sources
|
||||
- Be clear about confidence level in your explanation
|
||||
|
||||
## Common Research Mistakes to Avoid
|
||||
|
||||
❌ **Relying on single source** - Always cross-reference
|
||||
❌ **Using first search result** - Evaluate multiple sources
|
||||
❌ **Ignoring publication date** - Recent developments need recent sources
|
||||
❌ **Accepting marketing claims** - Verify with technical sources
|
||||
❌ **Skipping paper abstracts** - Authors' own summaries are gold
|
||||
❌ **Over-trusting tutorials** - Verify technical accuracy
|
||||
❌ **Hiding uncertainty** - Better to acknowledge gaps
|
||||
|
||||
## Research Workflow Example
|
||||
|
||||
**User asks:** "Explain mixture of experts in LLMs"
|
||||
|
||||
**Step 1 - Quick assessment:**
|
||||
- Topic: Recent development (2023-2024)
|
||||
- Confidence: Medium (know concept but not latest implementations)
|
||||
- Decision: Research needed
|
||||
|
||||
**Step 2 - Broad search:**
|
||||
```
|
||||
web_search: "mixture of experts LLMs 2024 explained"
|
||||
```
|
||||
- Find: Mixtral announcement, technical blog posts, comparisons
|
||||
- Note: Different from traditional MoE in NLP
|
||||
- Extract: Core idea, recent models using it, key benefits
|
||||
|
||||
**Step 3 - Deep dive:**
|
||||
```
|
||||
web_fetch: [Best technical blog or paper URL]
|
||||
```
|
||||
- Extract: Technical details, architecture specifics
|
||||
- Find: Concrete comparison (Mixtral 8x7B vs GPT-3.5)
|
||||
- Note: Load balancing, routing mechanisms
|
||||
|
||||
**Step 4 - Synthesize:**
|
||||
- Core concept: Sparse activation of expert networks
|
||||
- Problem it solves: Scaling without proportional compute increase
|
||||
- How it works: Router selects subset of experts per token
|
||||
- Example: Mixtral uses 8 experts, activates 2 per token
|
||||
- Result: 47B parameters, 13B active per token
|
||||
|
||||
**Step 5 - Explain:**
|
||||
Use Status Quo → Problem → Solution structure with researched content.
|
||||
|
||||
## Integration with Teaching Principles
|
||||
|
||||
After research, apply teaching framework:
|
||||
|
||||
1. **Choose narrative structure** based on concept nature
|
||||
2. **Plain English first** - use clearest definition found
|
||||
3. **Concrete examples** - use specific instances from research
|
||||
4. **Strategic analogies** - adopt effective ones from sources
|
||||
5. **Cite implicitly** - "Recent research shows..." not "According to source X..."
|
||||
|
||||
Research informs content; teaching principles guide delivery.
|
||||
@@ -0,0 +1,116 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
YouTube Transcript Extractor
|
||||
|
||||
Extracts transcripts from YouTube videos using video IDs or URLs.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import re
|
||||
from youtube_transcript_api import YouTubeTranscriptApi
|
||||
from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound
|
||||
|
||||
|
||||
def extract_video_id(url_or_id):
|
||||
"""
|
||||
Extract YouTube video ID from various URL formats or return as-is if already an ID.
|
||||
|
||||
Supports:
|
||||
- https://www.youtube.com/watch?v=VIDEO_ID
|
||||
- https://youtu.be/VIDEO_ID
|
||||
- VIDEO_ID (direct ID)
|
||||
"""
|
||||
# Pattern for YouTube URLs
|
||||
patterns = [
|
||||
r'(?:youtube\.com\/watch\?v=)([a-zA-Z0-9_-]{11})',
|
||||
r'(?:youtu\.be\/)([a-zA-Z0-9_-]{11})',
|
||||
r'^([a-zA-Z0-9_-]{11})$' # Direct video ID
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, url_or_id)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_transcript(video_id, language='en'):
|
||||
"""
|
||||
Retrieve transcript for a YouTube video.
|
||||
|
||||
Args:
|
||||
video_id: YouTube video ID
|
||||
language: Language code (default: 'en')
|
||||
|
||||
Returns:
|
||||
List of transcript entries with 'text', 'start', and 'duration'
|
||||
"""
|
||||
try:
|
||||
api = YouTubeTranscriptApi()
|
||||
transcript = api.fetch(video_id, languages=[language])
|
||||
return transcript
|
||||
except TranscriptsDisabled:
|
||||
raise Exception(f"Transcripts are disabled for video: {video_id}")
|
||||
except NoTranscriptFound:
|
||||
raise Exception(f"No transcript found for video: {video_id}")
|
||||
except Exception as e:
|
||||
raise Exception(f"Error fetching transcript: {str(e)}")
|
||||
|
||||
|
||||
def format_transcript(transcript, include_timestamps=False):
|
||||
"""
|
||||
Format transcript entries into readable text.
|
||||
|
||||
Args:
|
||||
transcript: List of transcript entries
|
||||
include_timestamps: Whether to include timestamps
|
||||
|
||||
Returns:
|
||||
Formatted transcript text
|
||||
"""
|
||||
if include_timestamps:
|
||||
formatted = []
|
||||
for entry in transcript:
|
||||
minutes = int(entry.start // 60)
|
||||
seconds = int(entry.start % 60)
|
||||
formatted.append(f"[{minutes}:{seconds:02d}] {entry.text}")
|
||||
return '\n'.join(formatted)
|
||||
else:
|
||||
return ' '.join([entry.text for entry in transcript])
|
||||
|
||||
|
||||
def main():
|
||||
"""Main execution function"""
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python get_youtube_transcript.py <video_url_or_id> [--timestamps]")
|
||||
print("\nExamples:")
|
||||
print(" uv run get_youtube_transcript.py dQw4w9WgXcQ")
|
||||
print(" uv run get_youtube_transcript.py https://www.youtube.com/watch?v=dQw4w9WgXcQ")
|
||||
print(" uv run get_youtube_transcript.py dQw4w9WgXcQ --timestamps")
|
||||
sys.exit(1)
|
||||
|
||||
url_or_id = sys.argv[1]
|
||||
include_timestamps = '--timestamps' in sys.argv
|
||||
|
||||
# Extract video ID
|
||||
video_id = extract_video_id(url_or_id)
|
||||
if not video_id:
|
||||
print(f"Error: Could not extract video ID from: {url_or_id}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Get transcript
|
||||
transcript = get_transcript(video_id)
|
||||
|
||||
# Format and print
|
||||
formatted_text = format_transcript(transcript, include_timestamps)
|
||||
print(formatted_text)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {str(e)}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
135
ruoyi-extend/ruoyi-ai-copilot/src/main/resources/.claude/skills/ai-tutor/uv.lock
generated
Normal file
135
ruoyi-extend/ruoyi-ai-copilot/src/main/resources/.claude/skills/ai-tutor/uv.lock
generated
Normal file
@@ -0,0 +1,135 @@
|
||||
version = 1
|
||||
revision = 3
|
||||
requires-python = ">=3.12"
|
||||
|
||||
[[package]]
|
||||
name = "ai-tutor"
|
||||
version = "0.1.0"
|
||||
source = { virtual = "." }
|
||||
dependencies = [
|
||||
{ name = "youtube-transcript-api" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [{ name = "youtube-transcript-api", specifier = ">=1.2.3" }]
|
||||
|
||||
[[package]]
|
||||
name = "certifi"
|
||||
version = "2025.11.12"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "charset-normalizer"
|
||||
version = "3.4.4"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "defusedxml"
|
||||
version = "0.7.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "3.11"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "requests"
|
||||
version = "2.32.5"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "certifi" },
|
||||
{ name = "charset-normalizer" },
|
||||
{ name = "idna" },
|
||||
{ name = "urllib3" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "urllib3"
|
||||
version = "2.6.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/1e/24/a2a2ed9addd907787d7aa0355ba36a6cadf1768b934c652ea78acbd59dcd/urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797", size = 432930, upload-time = "2025-12-11T15:56:40.252Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/6d/b9/4095b668ea3678bf6a0af005527f39de12fb026516fb3df17495a733b7f8/urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd", size = 131182, upload-time = "2025-12-11T15:56:38.584Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "youtube-transcript-api"
|
||||
version = "1.2.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "defusedxml" },
|
||||
{ name = "requests" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/87/03/68c69b2d3e282d45cb3c07e5836a9146ff9574cde720570ffc7eb124e56b/youtube_transcript_api-1.2.3.tar.gz", hash = "sha256:76016b71b410b124892c74df24b07b052702cf3c53afb300d0a2c547c0b71b68", size = 469757, upload-time = "2025-10-13T15:57:17.532Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ef/75/a861661b73d862e323c12af96ecfb237fb4d1433e551183d4172d39d5275/youtube_transcript_api-1.2.3-py3-none-any.whl", hash = "sha256:0c1b32ea5e739f9efde8c42e3d43e67df475185af6f820109607577b83768375", size = 485140, upload-time = "2025-10-13T15:57:16.034Z" },
|
||||
]
|
||||
@@ -0,0 +1,30 @@
|
||||
© 2025 Anthropic, PBC. All rights reserved.
|
||||
|
||||
LICENSE: Use of these materials (including all code, prompts, assets, files,
|
||||
and other components of this Skill) is governed by your agreement with
|
||||
Anthropic regarding use of Anthropic's services. If no separate agreement
|
||||
exists, use is governed by Anthropic's Consumer Terms of Service or
|
||||
Commercial Terms of Service, as applicable:
|
||||
https://www.anthropic.com/legal/consumer-terms
|
||||
https://www.anthropic.com/legal/commercial-terms
|
||||
Your applicable agreement is referred to as the "Agreement." "Services" are
|
||||
as defined in the Agreement.
|
||||
|
||||
ADDITIONAL RESTRICTIONS: Notwithstanding anything in the Agreement to the
|
||||
contrary, users may not:
|
||||
|
||||
- Extract these materials from the Services or retain copies of these
|
||||
materials outside the Services
|
||||
- Reproduce or copy these materials, except for temporary copies created
|
||||
automatically during authorized use of the Services
|
||||
- Create derivative works based on these materials
|
||||
- Distribute, sublicense, or transfer these materials to any third party
|
||||
- Make, offer to sell, sell, or import any inventions embodied in these
|
||||
materials
|
||||
- Reverse engineer, decompile, or disassemble these materials
|
||||
|
||||
The receipt, viewing, or possession of these materials does not convey or
|
||||
imply any license or right beyond those expressly granted above.
|
||||
|
||||
Anthropic retains all right, title, and interest in these materials,
|
||||
including all copyrights, patents, and other intellectual property rights.
|
||||
@@ -0,0 +1,294 @@
|
||||
---
|
||||
name: pdf
|
||||
description: Comprehensive PDF manipulation toolkit for extracting text and tables, creating new PDFs, merging/splitting documents, and handling forms. When Claude needs to fill in a PDF form or programmatically process, generate, or analyze PDF documents at scale.
|
||||
license: Proprietary. LICENSE.txt has complete terms
|
||||
---
|
||||
|
||||
# PDF Processing Guide
|
||||
|
||||
## Overview
|
||||
|
||||
This guide covers essential PDF processing operations using Python libraries and command-line tools. For advanced features, JavaScript libraries, and detailed examples, see reference.md. If you need to fill out a PDF form, read forms.md and follow its instructions.
|
||||
|
||||
## Quick Start
|
||||
|
||||
```python
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
|
||||
# Read a PDF
|
||||
reader = PdfReader("document.pdf")
|
||||
print(f"Pages: {len(reader.pages)}")
|
||||
|
||||
# Extract text
|
||||
text = ""
|
||||
for page in reader.pages:
|
||||
text += page.extract_text()
|
||||
```
|
||||
|
||||
## Python Libraries
|
||||
|
||||
### pypdf - Basic Operations
|
||||
|
||||
#### Merge PDFs
|
||||
```python
|
||||
from pypdf import PdfWriter, PdfReader
|
||||
|
||||
writer = PdfWriter()
|
||||
for pdf_file in ["doc1.pdf", "doc2.pdf", "doc3.pdf"]:
|
||||
reader = PdfReader(pdf_file)
|
||||
for page in reader.pages:
|
||||
writer.add_page(page)
|
||||
|
||||
with open("merged.pdf", "wb") as output:
|
||||
writer.write(output)
|
||||
```
|
||||
|
||||
#### Split PDF
|
||||
```python
|
||||
reader = PdfReader("input.pdf")
|
||||
for i, page in enumerate(reader.pages):
|
||||
writer = PdfWriter()
|
||||
writer.add_page(page)
|
||||
with open(f"page_{i+1}.pdf", "wb") as output:
|
||||
writer.write(output)
|
||||
```
|
||||
|
||||
#### Extract Metadata
|
||||
```python
|
||||
reader = PdfReader("document.pdf")
|
||||
meta = reader.metadata
|
||||
print(f"Title: {meta.title}")
|
||||
print(f"Author: {meta.author}")
|
||||
print(f"Subject: {meta.subject}")
|
||||
print(f"Creator: {meta.creator}")
|
||||
```
|
||||
|
||||
#### Rotate Pages
|
||||
```python
|
||||
reader = PdfReader("input.pdf")
|
||||
writer = PdfWriter()
|
||||
|
||||
page = reader.pages[0]
|
||||
page.rotate(90) # Rotate 90 degrees clockwise
|
||||
writer.add_page(page)
|
||||
|
||||
with open("rotated.pdf", "wb") as output:
|
||||
writer.write(output)
|
||||
```
|
||||
|
||||
### pdfplumber - Text and Table Extraction
|
||||
|
||||
#### Extract Text with Layout
|
||||
```python
|
||||
import pdfplumber
|
||||
|
||||
with pdfplumber.open("document.pdf") as pdf:
|
||||
for page in pdf.pages:
|
||||
text = page.extract_text()
|
||||
print(text)
|
||||
```
|
||||
|
||||
#### Extract Tables
|
||||
```python
|
||||
with pdfplumber.open("document.pdf") as pdf:
|
||||
for i, page in enumerate(pdf.pages):
|
||||
tables = page.extract_tables()
|
||||
for j, table in enumerate(tables):
|
||||
print(f"Table {j+1} on page {i+1}:")
|
||||
for row in table:
|
||||
print(row)
|
||||
```
|
||||
|
||||
#### Advanced Table Extraction
|
||||
```python
|
||||
import pandas as pd
|
||||
|
||||
with pdfplumber.open("document.pdf") as pdf:
|
||||
all_tables = []
|
||||
for page in pdf.pages:
|
||||
tables = page.extract_tables()
|
||||
for table in tables:
|
||||
if table: # Check if table is not empty
|
||||
df = pd.DataFrame(table[1:], columns=table[0])
|
||||
all_tables.append(df)
|
||||
|
||||
# Combine all tables
|
||||
if all_tables:
|
||||
combined_df = pd.concat(all_tables, ignore_index=True)
|
||||
combined_df.to_excel("extracted_tables.xlsx", index=False)
|
||||
```
|
||||
|
||||
### reportlab - Create PDFs
|
||||
|
||||
#### Basic PDF Creation
|
||||
```python
|
||||
from reportlab.lib.pagesizes import letter
|
||||
from reportlab.pdfgen import canvas
|
||||
|
||||
c = canvas.Canvas("hello.pdf", pagesize=letter)
|
||||
width, height = letter
|
||||
|
||||
# Add text
|
||||
c.drawString(100, height - 100, "Hello World!")
|
||||
c.drawString(100, height - 120, "This is a PDF created with reportlab")
|
||||
|
||||
# Add a line
|
||||
c.line(100, height - 140, 400, height - 140)
|
||||
|
||||
# Save
|
||||
c.save()
|
||||
```
|
||||
|
||||
#### Create PDF with Multiple Pages
|
||||
```python
|
||||
from reportlab.lib.pagesizes import letter
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
|
||||
from reportlab.lib.styles import getSampleStyleSheet
|
||||
|
||||
doc = SimpleDocTemplate("report.pdf", pagesize=letter)
|
||||
styles = getSampleStyleSheet()
|
||||
story = []
|
||||
|
||||
# Add content
|
||||
title = Paragraph("Report Title", styles['Title'])
|
||||
story.append(title)
|
||||
story.append(Spacer(1, 12))
|
||||
|
||||
body = Paragraph("This is the body of the report. " * 20, styles['Normal'])
|
||||
story.append(body)
|
||||
story.append(PageBreak())
|
||||
|
||||
# Page 2
|
||||
story.append(Paragraph("Page 2", styles['Heading1']))
|
||||
story.append(Paragraph("Content for page 2", styles['Normal']))
|
||||
|
||||
# Build PDF
|
||||
doc.build(story)
|
||||
```
|
||||
|
||||
## Command-Line Tools
|
||||
|
||||
### pdftotext (poppler-utils)
|
||||
```bash
|
||||
# Extract text
|
||||
pdftotext input.pdf output.txt
|
||||
|
||||
# Extract text preserving layout
|
||||
pdftotext -layout input.pdf output.txt
|
||||
|
||||
# Extract specific pages
|
||||
pdftotext -f 1 -l 5 input.pdf output.txt # Pages 1-5
|
||||
```
|
||||
|
||||
### qpdf
|
||||
```bash
|
||||
# Merge PDFs
|
||||
qpdf --empty --pages file1.pdf file2.pdf -- merged.pdf
|
||||
|
||||
# Split pages
|
||||
qpdf input.pdf --pages . 1-5 -- pages1-5.pdf
|
||||
qpdf input.pdf --pages . 6-10 -- pages6-10.pdf
|
||||
|
||||
# Rotate pages
|
||||
qpdf input.pdf output.pdf --rotate=+90:1 # Rotate page 1 by 90 degrees
|
||||
|
||||
# Remove password
|
||||
qpdf --password=mypassword --decrypt encrypted.pdf decrypted.pdf
|
||||
```
|
||||
|
||||
### pdftk (if available)
|
||||
```bash
|
||||
# Merge
|
||||
pdftk file1.pdf file2.pdf cat output merged.pdf
|
||||
|
||||
# Split
|
||||
pdftk input.pdf burst
|
||||
|
||||
# Rotate
|
||||
pdftk input.pdf rotate 1east output rotated.pdf
|
||||
```
|
||||
|
||||
## Common Tasks
|
||||
|
||||
### Extract Text from Scanned PDFs
|
||||
```python
|
||||
# Requires: pip install pytesseract pdf2image
|
||||
import pytesseract
|
||||
from pdf2image import convert_from_path
|
||||
|
||||
# Convert PDF to images
|
||||
images = convert_from_path('scanned.pdf')
|
||||
|
||||
# OCR each page
|
||||
text = ""
|
||||
for i, image in enumerate(images):
|
||||
text += f"Page {i+1}:\n"
|
||||
text += pytesseract.image_to_string(image)
|
||||
text += "\n\n"
|
||||
|
||||
print(text)
|
||||
```
|
||||
|
||||
### Add Watermark
|
||||
```python
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
|
||||
# Create watermark (or load existing)
|
||||
watermark = PdfReader("watermark.pdf").pages[0]
|
||||
|
||||
# Apply to all pages
|
||||
reader = PdfReader("document.pdf")
|
||||
writer = PdfWriter()
|
||||
|
||||
for page in reader.pages:
|
||||
page.merge_page(watermark)
|
||||
writer.add_page(page)
|
||||
|
||||
with open("watermarked.pdf", "wb") as output:
|
||||
writer.write(output)
|
||||
```
|
||||
|
||||
### Extract Images
|
||||
```bash
|
||||
# Using pdfimages (poppler-utils)
|
||||
pdfimages -j input.pdf output_prefix
|
||||
|
||||
# This extracts all images as output_prefix-000.jpg, output_prefix-001.jpg, etc.
|
||||
```
|
||||
|
||||
### Password Protection
|
||||
```python
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
|
||||
reader = PdfReader("input.pdf")
|
||||
writer = PdfWriter()
|
||||
|
||||
for page in reader.pages:
|
||||
writer.add_page(page)
|
||||
|
||||
# Add password
|
||||
writer.encrypt("userpassword", "ownerpassword")
|
||||
|
||||
with open("encrypted.pdf", "wb") as output:
|
||||
writer.write(output)
|
||||
```
|
||||
|
||||
## Quick Reference
|
||||
|
||||
| Task | Best Tool | Command/Code |
|
||||
|------|-----------|--------------|
|
||||
| Merge PDFs | pypdf | `writer.add_page(page)` |
|
||||
| Split PDFs | pypdf | One page per file |
|
||||
| Extract text | pdfplumber | `page.extract_text()` |
|
||||
| Extract tables | pdfplumber | `page.extract_tables()` |
|
||||
| Create PDFs | reportlab | Canvas or Platypus |
|
||||
| Command line merge | qpdf | `qpdf --empty --pages ...` |
|
||||
| OCR scanned PDFs | pytesseract | Convert to image first |
|
||||
| Fill PDF forms | pdf-lib or pypdf (see forms.md) | See forms.md |
|
||||
|
||||
## Next Steps
|
||||
|
||||
- For advanced pypdfium2 usage, see reference.md
|
||||
- For JavaScript libraries (pdf-lib), see reference.md
|
||||
- If you need to fill out a PDF form, follow the instructions in forms.md
|
||||
- For troubleshooting guides, see reference.md
|
||||
@@ -0,0 +1,205 @@
|
||||
**CRITICAL: You MUST complete these steps in order. Do not skip ahead to writing code.**
|
||||
|
||||
If you need to fill out a PDF form, first check to see if the PDF has fillable form fields. Run this script from this file's directory:
|
||||
`python scripts/check_fillable_fields <file.pdf>`, and depending on the result go to either the "Fillable fields" or "Non-fillable fields" and follow those instructions.
|
||||
|
||||
# Fillable fields
|
||||
If the PDF has fillable form fields:
|
||||
- Run this script from this file's directory: `python scripts/extract_form_field_info.py <input.pdf> <field_info.json>`. It will create a JSON file with a list of fields in this format:
|
||||
```
|
||||
[
|
||||
{
|
||||
"field_id": (unique ID for the field),
|
||||
"page": (page number, 1-based),
|
||||
"rect": ([left, bottom, right, top] bounding box in PDF coordinates, y=0 is the bottom of the page),
|
||||
"type": ("text", "checkbox", "radio_group", or "choice"),
|
||||
},
|
||||
// Checkboxes have "checked_value" and "unchecked_value" properties:
|
||||
{
|
||||
"field_id": (unique ID for the field),
|
||||
"page": (page number, 1-based),
|
||||
"type": "checkbox",
|
||||
"checked_value": (Set the field to this value to check the checkbox),
|
||||
"unchecked_value": (Set the field to this value to uncheck the checkbox),
|
||||
},
|
||||
// Radio groups have a "radio_options" list with the possible choices.
|
||||
{
|
||||
"field_id": (unique ID for the field),
|
||||
"page": (page number, 1-based),
|
||||
"type": "radio_group",
|
||||
"radio_options": [
|
||||
{
|
||||
"value": (set the field to this value to select this radio option),
|
||||
"rect": (bounding box for the radio button for this option)
|
||||
},
|
||||
// Other radio options
|
||||
]
|
||||
},
|
||||
// Multiple choice fields have a "choice_options" list with the possible choices:
|
||||
{
|
||||
"field_id": (unique ID for the field),
|
||||
"page": (page number, 1-based),
|
||||
"type": "choice",
|
||||
"choice_options": [
|
||||
{
|
||||
"value": (set the field to this value to select this option),
|
||||
"text": (display text of the option)
|
||||
},
|
||||
// Other choice options
|
||||
],
|
||||
}
|
||||
]
|
||||
```
|
||||
- Convert the PDF to PNGs (one image for each page) with this script (run from this file's directory):
|
||||
`python scripts/convert_pdf_to_images.py <file.pdf> <output_directory>`
|
||||
Then analyze the images to determine the purpose of each form field (make sure to convert the bounding box PDF coordinates to image coordinates).
|
||||
- Create a `field_values.json` file in this format with the values to be entered for each field:
|
||||
```
|
||||
[
|
||||
{
|
||||
"field_id": "last_name", // Must match the field_id from `extract_form_field_info.py`
|
||||
"description": "The user's last name",
|
||||
"page": 1, // Must match the "page" value in field_info.json
|
||||
"value": "Simpson"
|
||||
},
|
||||
{
|
||||
"field_id": "Checkbox12",
|
||||
"description": "Checkbox to be checked if the user is 18 or over",
|
||||
"page": 1,
|
||||
"value": "/On" // If this is a checkbox, use its "checked_value" value to check it. If it's a radio button group, use one of the "value" values in "radio_options".
|
||||
},
|
||||
// more fields
|
||||
]
|
||||
```
|
||||
- Run the `fill_fillable_fields.py` script from this file's directory to create a filled-in PDF:
|
||||
`python scripts/fill_fillable_fields.py <input pdf> <field_values.json> <output pdf>`
|
||||
This script will verify that the field IDs and values you provide are valid; if it prints error messages, correct the appropriate fields and try again.
|
||||
|
||||
# Non-fillable fields
|
||||
If the PDF doesn't have fillable form fields, you'll need to visually determine where the data should be added and create text annotations. Follow the below steps *exactly*. You MUST perform all of these steps to ensure that the the form is accurately completed. Details for each step are below.
|
||||
- Convert the PDF to PNG images and determine field bounding boxes.
|
||||
- Create a JSON file with field information and validation images showing the bounding boxes.
|
||||
- Validate the the bounding boxes.
|
||||
- Use the bounding boxes to fill in the form.
|
||||
|
||||
## Step 1: Visual Analysis (REQUIRED)
|
||||
- Convert the PDF to PNG images. Run this script from this file's directory:
|
||||
`python scripts/convert_pdf_to_images.py <file.pdf> <output_directory>`
|
||||
The script will create a PNG image for each page in the PDF.
|
||||
- Carefully examine each PNG image and identify all form fields and areas where the user should enter data. For each form field where the user should enter text, determine bounding boxes for both the form field label, and the area where the user should enter text. The label and entry bounding boxes MUST NOT INTERSECT; the text entry box should only include the area where data should be entered. Usually this area will be immediately to the side, above, or below its label. Entry bounding boxes must be tall and wide enough to contain their text.
|
||||
|
||||
These are some examples of form structures that you might see:
|
||||
|
||||
*Label inside box*
|
||||
```
|
||||
┌────────────────────────┐
|
||||
│ Name: │
|
||||
└────────────────────────┘
|
||||
```
|
||||
The input area should be to the right of the "Name" label and extend to the edge of the box.
|
||||
|
||||
*Label before line*
|
||||
```
|
||||
Email: _______________________
|
||||
```
|
||||
The input area should be above the line and include its entire width.
|
||||
|
||||
*Label under line*
|
||||
```
|
||||
_________________________
|
||||
Name
|
||||
```
|
||||
The input area should be above the line and include the entire width of the line. This is common for signature and date fields.
|
||||
|
||||
*Label above line*
|
||||
```
|
||||
Please enter any special requests:
|
||||
________________________________________________
|
||||
```
|
||||
The input area should extend from the bottom of the label to the line, and should include the entire width of the line.
|
||||
|
||||
*Checkboxes*
|
||||
```
|
||||
Are you a US citizen? Yes □ No □
|
||||
```
|
||||
For checkboxes:
|
||||
- Look for small square boxes (□) - these are the actual checkboxes to target. They may be to the left or right of their labels.
|
||||
- Distinguish between label text ("Yes", "No") and the clickable checkbox squares.
|
||||
- The entry bounding box should cover ONLY the small square, not the text label.
|
||||
|
||||
### Step 2: Create fields.json and validation images (REQUIRED)
|
||||
- Create a file named `fields.json` with information for the form fields and bounding boxes in this format:
|
||||
```
|
||||
{
|
||||
"pages": [
|
||||
{
|
||||
"page_number": 1,
|
||||
"image_width": (first page image width in pixels),
|
||||
"image_height": (first page image height in pixels),
|
||||
},
|
||||
{
|
||||
"page_number": 2,
|
||||
"image_width": (second page image width in pixels),
|
||||
"image_height": (second page image height in pixels),
|
||||
}
|
||||
// additional pages
|
||||
],
|
||||
"form_fields": [
|
||||
// Example for a text field.
|
||||
{
|
||||
"page_number": 1,
|
||||
"description": "The user's last name should be entered here",
|
||||
// Bounding boxes are [left, top, right, bottom]. The bounding boxes for the label and text entry should not overlap.
|
||||
"field_label": "Last name",
|
||||
"label_bounding_box": [30, 125, 95, 142],
|
||||
"entry_bounding_box": [100, 125, 280, 142],
|
||||
"entry_text": {
|
||||
"text": "Johnson", // This text will be added as an annotation at the entry_bounding_box location
|
||||
"font_size": 14, // optional, defaults to 14
|
||||
"font_color": "000000", // optional, RRGGBB format, defaults to 000000 (black)
|
||||
}
|
||||
},
|
||||
// Example for a checkbox. TARGET THE SQUARE for the entry bounding box, NOT THE TEXT
|
||||
{
|
||||
"page_number": 2,
|
||||
"description": "Checkbox that should be checked if the user is over 18",
|
||||
"entry_bounding_box": [140, 525, 155, 540], // Small box over checkbox square
|
||||
"field_label": "Yes",
|
||||
"label_bounding_box": [100, 525, 132, 540], // Box containing "Yes" text
|
||||
// Use "X" to check a checkbox.
|
||||
"entry_text": {
|
||||
"text": "X",
|
||||
}
|
||||
}
|
||||
// additional form field entries
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Create validation images by running this script from this file's directory for each page:
|
||||
`python scripts/create_validation_image.py <page_number> <path_to_fields.json> <input_image_path> <output_image_path>
|
||||
|
||||
The validation images will have red rectangles where text should be entered, and blue rectangles covering label text.
|
||||
|
||||
### Step 3: Validate Bounding Boxes (REQUIRED)
|
||||
#### Automated intersection check
|
||||
- Verify that none of bounding boxes intersect and that the entry bounding boxes are tall enough by checking the fields.json file with the `check_bounding_boxes.py` script (run from this file's directory):
|
||||
`python scripts/check_bounding_boxes.py <JSON file>`
|
||||
|
||||
If there are errors, reanalyze the relevant fields, adjust the bounding boxes, and iterate until there are no remaining errors. Remember: label (blue) bounding boxes should contain text labels, entry (red) boxes should not.
|
||||
|
||||
#### Manual image inspection
|
||||
**CRITICAL: Do not proceed without visually inspecting validation images**
|
||||
- Red rectangles must ONLY cover input areas
|
||||
- Red rectangles MUST NOT contain any text
|
||||
- Blue rectangles should contain label text
|
||||
- For checkboxes:
|
||||
- Red rectangle MUST be centered on the checkbox square
|
||||
- Blue rectangle should cover the text label for the checkbox
|
||||
|
||||
- If any rectangles look wrong, fix fields.json, regenerate the validation images, and verify again. Repeat this process until the bounding boxes are fully accurate.
|
||||
|
||||
|
||||
### Step 4: Add annotations to the PDF
|
||||
Run this script from this file's directory to create a filled-out PDF using the information in fields.json:
|
||||
`python scripts/fill_pdf_form_with_annotations.py <input_pdf_path> <path_to_fields.json> <output_pdf_path>
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,70 @@
|
||||
from dataclasses import dataclass
|
||||
import json
|
||||
import sys
|
||||
|
||||
|
||||
# Script to check that the `fields.json` file that Claude creates when analyzing PDFs
|
||||
# does not have overlapping bounding boxes. See forms.md.
|
||||
|
||||
|
||||
@dataclass
|
||||
class RectAndField:
|
||||
rect: list[float]
|
||||
rect_type: str
|
||||
field: dict
|
||||
|
||||
|
||||
# Returns a list of messages that are printed to stdout for Claude to read.
|
||||
def get_bounding_box_messages(fields_json_stream) -> list[str]:
|
||||
messages = []
|
||||
fields = json.load(fields_json_stream)
|
||||
messages.append(f"Read {len(fields['form_fields'])} fields")
|
||||
|
||||
def rects_intersect(r1, r2):
|
||||
disjoint_horizontal = r1[0] >= r2[2] or r1[2] <= r2[0]
|
||||
disjoint_vertical = r1[1] >= r2[3] or r1[3] <= r2[1]
|
||||
return not (disjoint_horizontal or disjoint_vertical)
|
||||
|
||||
rects_and_fields = []
|
||||
for f in fields["form_fields"]:
|
||||
rects_and_fields.append(RectAndField(f["label_bounding_box"], "label", f))
|
||||
rects_and_fields.append(RectAndField(f["entry_bounding_box"], "entry", f))
|
||||
|
||||
has_error = False
|
||||
for i, ri in enumerate(rects_and_fields):
|
||||
# This is O(N^2); we can optimize if it becomes a problem.
|
||||
for j in range(i + 1, len(rects_and_fields)):
|
||||
rj = rects_and_fields[j]
|
||||
if ri.field["page_number"] == rj.field["page_number"] and rects_intersect(ri.rect, rj.rect):
|
||||
has_error = True
|
||||
if ri.field is rj.field:
|
||||
messages.append(f"FAILURE: intersection between label and entry bounding boxes for `{ri.field['description']}` ({ri.rect}, {rj.rect})")
|
||||
else:
|
||||
messages.append(f"FAILURE: intersection between {ri.rect_type} bounding box for `{ri.field['description']}` ({ri.rect}) and {rj.rect_type} bounding box for `{rj.field['description']}` ({rj.rect})")
|
||||
if len(messages) >= 20:
|
||||
messages.append("Aborting further checks; fix bounding boxes and try again")
|
||||
return messages
|
||||
if ri.rect_type == "entry":
|
||||
if "entry_text" in ri.field:
|
||||
font_size = ri.field["entry_text"].get("font_size", 14)
|
||||
entry_height = ri.rect[3] - ri.rect[1]
|
||||
if entry_height < font_size:
|
||||
has_error = True
|
||||
messages.append(f"FAILURE: entry bounding box height ({entry_height}) for `{ri.field['description']}` is too short for the text content (font size: {font_size}). Increase the box height or decrease the font size.")
|
||||
if len(messages) >= 20:
|
||||
messages.append("Aborting further checks; fix bounding boxes and try again")
|
||||
return messages
|
||||
|
||||
if not has_error:
|
||||
messages.append("SUCCESS: All bounding boxes are valid")
|
||||
return messages
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: check_bounding_boxes.py [fields.json]")
|
||||
sys.exit(1)
|
||||
# Input file should be in the `fields.json` format described in forms.md.
|
||||
with open(sys.argv[1]) as f:
|
||||
messages = get_bounding_box_messages(f)
|
||||
for msg in messages:
|
||||
print(msg)
|
||||
@@ -0,0 +1,226 @@
|
||||
import unittest
|
||||
import json
|
||||
import io
|
||||
from check_bounding_boxes import get_bounding_box_messages
|
||||
|
||||
|
||||
# Currently this is not run automatically in CI; it's just for documentation and manual checking.
|
||||
class TestGetBoundingBoxMessages(unittest.TestCase):
|
||||
|
||||
def create_json_stream(self, data):
|
||||
"""Helper to create a JSON stream from data"""
|
||||
return io.StringIO(json.dumps(data))
|
||||
|
||||
def test_no_intersections(self):
|
||||
"""Test case with no bounding box intersections"""
|
||||
data = {
|
||||
"form_fields": [
|
||||
{
|
||||
"description": "Name",
|
||||
"page_number": 1,
|
||||
"label_bounding_box": [10, 10, 50, 30],
|
||||
"entry_bounding_box": [60, 10, 150, 30]
|
||||
},
|
||||
{
|
||||
"description": "Email",
|
||||
"page_number": 1,
|
||||
"label_bounding_box": [10, 40, 50, 60],
|
||||
"entry_bounding_box": [60, 40, 150, 60]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
stream = self.create_json_stream(data)
|
||||
messages = get_bounding_box_messages(stream)
|
||||
self.assertTrue(any("SUCCESS" in msg for msg in messages))
|
||||
self.assertFalse(any("FAILURE" in msg for msg in messages))
|
||||
|
||||
def test_label_entry_intersection_same_field(self):
|
||||
"""Test intersection between label and entry of the same field"""
|
||||
data = {
|
||||
"form_fields": [
|
||||
{
|
||||
"description": "Name",
|
||||
"page_number": 1,
|
||||
"label_bounding_box": [10, 10, 60, 30],
|
||||
"entry_bounding_box": [50, 10, 150, 30] # Overlaps with label
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
stream = self.create_json_stream(data)
|
||||
messages = get_bounding_box_messages(stream)
|
||||
self.assertTrue(any("FAILURE" in msg and "intersection" in msg for msg in messages))
|
||||
self.assertFalse(any("SUCCESS" in msg for msg in messages))
|
||||
|
||||
def test_intersection_between_different_fields(self):
|
||||
"""Test intersection between bounding boxes of different fields"""
|
||||
data = {
|
||||
"form_fields": [
|
||||
{
|
||||
"description": "Name",
|
||||
"page_number": 1,
|
||||
"label_bounding_box": [10, 10, 50, 30],
|
||||
"entry_bounding_box": [60, 10, 150, 30]
|
||||
},
|
||||
{
|
||||
"description": "Email",
|
||||
"page_number": 1,
|
||||
"label_bounding_box": [40, 20, 80, 40], # Overlaps with Name's boxes
|
||||
"entry_bounding_box": [160, 10, 250, 30]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
stream = self.create_json_stream(data)
|
||||
messages = get_bounding_box_messages(stream)
|
||||
self.assertTrue(any("FAILURE" in msg and "intersection" in msg for msg in messages))
|
||||
self.assertFalse(any("SUCCESS" in msg for msg in messages))
|
||||
|
||||
def test_different_pages_no_intersection(self):
|
||||
"""Test that boxes on different pages don't count as intersecting"""
|
||||
data = {
|
||||
"form_fields": [
|
||||
{
|
||||
"description": "Name",
|
||||
"page_number": 1,
|
||||
"label_bounding_box": [10, 10, 50, 30],
|
||||
"entry_bounding_box": [60, 10, 150, 30]
|
||||
},
|
||||
{
|
||||
"description": "Email",
|
||||
"page_number": 2,
|
||||
"label_bounding_box": [10, 10, 50, 30], # Same coordinates but different page
|
||||
"entry_bounding_box": [60, 10, 150, 30]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
stream = self.create_json_stream(data)
|
||||
messages = get_bounding_box_messages(stream)
|
||||
self.assertTrue(any("SUCCESS" in msg for msg in messages))
|
||||
self.assertFalse(any("FAILURE" in msg for msg in messages))
|
||||
|
||||
def test_entry_height_too_small(self):
|
||||
"""Test that entry box height is checked against font size"""
|
||||
data = {
|
||||
"form_fields": [
|
||||
{
|
||||
"description": "Name",
|
||||
"page_number": 1,
|
||||
"label_bounding_box": [10, 10, 50, 30],
|
||||
"entry_bounding_box": [60, 10, 150, 20], # Height is 10
|
||||
"entry_text": {
|
||||
"font_size": 14 # Font size larger than height
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
stream = self.create_json_stream(data)
|
||||
messages = get_bounding_box_messages(stream)
|
||||
self.assertTrue(any("FAILURE" in msg and "height" in msg for msg in messages))
|
||||
self.assertFalse(any("SUCCESS" in msg for msg in messages))
|
||||
|
||||
def test_entry_height_adequate(self):
|
||||
"""Test that adequate entry box height passes"""
|
||||
data = {
|
||||
"form_fields": [
|
||||
{
|
||||
"description": "Name",
|
||||
"page_number": 1,
|
||||
"label_bounding_box": [10, 10, 50, 30],
|
||||
"entry_bounding_box": [60, 10, 150, 30], # Height is 20
|
||||
"entry_text": {
|
||||
"font_size": 14 # Font size smaller than height
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
stream = self.create_json_stream(data)
|
||||
messages = get_bounding_box_messages(stream)
|
||||
self.assertTrue(any("SUCCESS" in msg for msg in messages))
|
||||
self.assertFalse(any("FAILURE" in msg for msg in messages))
|
||||
|
||||
def test_default_font_size(self):
|
||||
"""Test that default font size is used when not specified"""
|
||||
data = {
|
||||
"form_fields": [
|
||||
{
|
||||
"description": "Name",
|
||||
"page_number": 1,
|
||||
"label_bounding_box": [10, 10, 50, 30],
|
||||
"entry_bounding_box": [60, 10, 150, 20], # Height is 10
|
||||
"entry_text": {} # No font_size specified, should use default 14
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
stream = self.create_json_stream(data)
|
||||
messages = get_bounding_box_messages(stream)
|
||||
self.assertTrue(any("FAILURE" in msg and "height" in msg for msg in messages))
|
||||
self.assertFalse(any("SUCCESS" in msg for msg in messages))
|
||||
|
||||
def test_no_entry_text(self):
|
||||
"""Test that missing entry_text doesn't cause height check"""
|
||||
data = {
|
||||
"form_fields": [
|
||||
{
|
||||
"description": "Name",
|
||||
"page_number": 1,
|
||||
"label_bounding_box": [10, 10, 50, 30],
|
||||
"entry_bounding_box": [60, 10, 150, 20] # Small height but no entry_text
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
stream = self.create_json_stream(data)
|
||||
messages = get_bounding_box_messages(stream)
|
||||
self.assertTrue(any("SUCCESS" in msg for msg in messages))
|
||||
self.assertFalse(any("FAILURE" in msg for msg in messages))
|
||||
|
||||
def test_multiple_errors_limit(self):
|
||||
"""Test that error messages are limited to prevent excessive output"""
|
||||
fields = []
|
||||
# Create many overlapping fields
|
||||
for i in range(25):
|
||||
fields.append({
|
||||
"description": f"Field{i}",
|
||||
"page_number": 1,
|
||||
"label_bounding_box": [10, 10, 50, 30], # All overlap
|
||||
"entry_bounding_box": [20, 15, 60, 35] # All overlap
|
||||
})
|
||||
|
||||
data = {"form_fields": fields}
|
||||
|
||||
stream = self.create_json_stream(data)
|
||||
messages = get_bounding_box_messages(stream)
|
||||
# Should abort after ~20 messages
|
||||
self.assertTrue(any("Aborting" in msg for msg in messages))
|
||||
# Should have some FAILURE messages but not hundreds
|
||||
failure_count = sum(1 for msg in messages if "FAILURE" in msg)
|
||||
self.assertGreater(failure_count, 0)
|
||||
self.assertLess(len(messages), 30) # Should be limited
|
||||
|
||||
def test_edge_touching_boxes(self):
|
||||
"""Test that boxes touching at edges don't count as intersecting"""
|
||||
data = {
|
||||
"form_fields": [
|
||||
{
|
||||
"description": "Name",
|
||||
"page_number": 1,
|
||||
"label_bounding_box": [10, 10, 50, 30],
|
||||
"entry_bounding_box": [50, 10, 150, 30] # Touches at x=50
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
stream = self.create_json_stream(data)
|
||||
messages = get_bounding_box_messages(stream)
|
||||
self.assertTrue(any("SUCCESS" in msg for msg in messages))
|
||||
self.assertFalse(any("FAILURE" in msg for msg in messages))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -0,0 +1,12 @@
|
||||
import sys
|
||||
from pypdf import PdfReader
|
||||
|
||||
|
||||
# Script for Claude to run to determine whether a PDF has fillable form fields. See forms.md.
|
||||
|
||||
|
||||
reader = PdfReader(sys.argv[1])
|
||||
if (reader.get_fields()):
|
||||
print("This PDF has fillable form fields")
|
||||
else:
|
||||
print("This PDF does not have fillable form fields; you will need to visually determine where to enter data")
|
||||
@@ -0,0 +1,35 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pdf2image import convert_from_path
|
||||
|
||||
|
||||
# Converts each page of a PDF to a PNG image.
|
||||
|
||||
|
||||
def convert(pdf_path, output_dir, max_dim=1000):
|
||||
images = convert_from_path(pdf_path, dpi=200)
|
||||
|
||||
for i, image in enumerate(images):
|
||||
# Scale image if needed to keep width/height under `max_dim`
|
||||
width, height = image.size
|
||||
if width > max_dim or height > max_dim:
|
||||
scale_factor = min(max_dim / width, max_dim / height)
|
||||
new_width = int(width * scale_factor)
|
||||
new_height = int(height * scale_factor)
|
||||
image = image.resize((new_width, new_height))
|
||||
|
||||
image_path = os.path.join(output_dir, f"page_{i+1}.png")
|
||||
image.save(image_path)
|
||||
print(f"Saved page {i+1} as {image_path} (size: {image.size})")
|
||||
|
||||
print(f"Converted {len(images)} pages to PNG images")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: convert_pdf_to_images.py [input pdf] [output directory]")
|
||||
sys.exit(1)
|
||||
pdf_path = sys.argv[1]
|
||||
output_directory = sys.argv[2]
|
||||
convert(pdf_path, output_directory)
|
||||
@@ -0,0 +1,41 @@
|
||||
import json
|
||||
import sys
|
||||
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
|
||||
# Creates "validation" images with rectangles for the bounding box information that
|
||||
# Claude creates when determining where to add text annotations in PDFs. See forms.md.
|
||||
|
||||
|
||||
def create_validation_image(page_number, fields_json_path, input_path, output_path):
|
||||
# Input file should be in the `fields.json` format described in forms.md.
|
||||
with open(fields_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
img = Image.open(input_path)
|
||||
draw = ImageDraw.Draw(img)
|
||||
num_boxes = 0
|
||||
|
||||
for field in data["form_fields"]:
|
||||
if field["page_number"] == page_number:
|
||||
entry_box = field['entry_bounding_box']
|
||||
label_box = field['label_bounding_box']
|
||||
# Draw red rectangle over entry bounding box and blue rectangle over the label.
|
||||
draw.rectangle(entry_box, outline='red', width=2)
|
||||
draw.rectangle(label_box, outline='blue', width=2)
|
||||
num_boxes += 2
|
||||
|
||||
img.save(output_path)
|
||||
print(f"Created validation image at {output_path} with {num_boxes} bounding boxes")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 5:
|
||||
print("Usage: create_validation_image.py [page number] [fields.json file] [input image path] [output image path]")
|
||||
sys.exit(1)
|
||||
page_number = int(sys.argv[1])
|
||||
fields_json_path = sys.argv[2]
|
||||
input_image_path = sys.argv[3]
|
||||
output_image_path = sys.argv[4]
|
||||
create_validation_image(page_number, fields_json_path, input_image_path, output_image_path)
|
||||
@@ -0,0 +1,152 @@
|
||||
import json
|
||||
import sys
|
||||
|
||||
from pypdf import PdfReader
|
||||
|
||||
|
||||
# Extracts data for the fillable form fields in a PDF and outputs JSON that
|
||||
# Claude uses to fill the fields. See forms.md.
|
||||
|
||||
|
||||
# This matches the format used by PdfReader `get_fields` and `update_page_form_field_values` methods.
|
||||
def get_full_annotation_field_id(annotation):
|
||||
components = []
|
||||
while annotation:
|
||||
field_name = annotation.get('/T')
|
||||
if field_name:
|
||||
components.append(field_name)
|
||||
annotation = annotation.get('/Parent')
|
||||
return ".".join(reversed(components)) if components else None
|
||||
|
||||
|
||||
def make_field_dict(field, field_id):
|
||||
field_dict = {"field_id": field_id}
|
||||
ft = field.get('/FT')
|
||||
if ft == "/Tx":
|
||||
field_dict["type"] = "text"
|
||||
elif ft == "/Btn":
|
||||
field_dict["type"] = "checkbox" # radio groups handled separately
|
||||
states = field.get("/_States_", [])
|
||||
if len(states) == 2:
|
||||
# "/Off" seems to always be the unchecked value, as suggested by
|
||||
# https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/pdfstandards/pdf/PDF32000_2008.pdf#page=448
|
||||
# It can be either first or second in the "/_States_" list.
|
||||
if "/Off" in states:
|
||||
field_dict["checked_value"] = states[0] if states[0] != "/Off" else states[1]
|
||||
field_dict["unchecked_value"] = "/Off"
|
||||
else:
|
||||
print(f"Unexpected state values for checkbox `${field_id}`. Its checked and unchecked values may not be correct; if you're trying to check it, visually verify the results.")
|
||||
field_dict["checked_value"] = states[0]
|
||||
field_dict["unchecked_value"] = states[1]
|
||||
elif ft == "/Ch":
|
||||
field_dict["type"] = "choice"
|
||||
states = field.get("/_States_", [])
|
||||
field_dict["choice_options"] = [{
|
||||
"value": state[0],
|
||||
"text": state[1],
|
||||
} for state in states]
|
||||
else:
|
||||
field_dict["type"] = f"unknown ({ft})"
|
||||
return field_dict
|
||||
|
||||
|
||||
# Returns a list of fillable PDF fields:
|
||||
# [
|
||||
# {
|
||||
# "field_id": "name",
|
||||
# "page": 1,
|
||||
# "type": ("text", "checkbox", "radio_group", or "choice")
|
||||
# // Per-type additional fields described in forms.md
|
||||
# },
|
||||
# ]
|
||||
def get_field_info(reader: PdfReader):
|
||||
fields = reader.get_fields()
|
||||
|
||||
field_info_by_id = {}
|
||||
possible_radio_names = set()
|
||||
|
||||
for field_id, field in fields.items():
|
||||
# Skip if this is a container field with children, except that it might be
|
||||
# a parent group for radio button options.
|
||||
if field.get("/Kids"):
|
||||
if field.get("/FT") == "/Btn":
|
||||
possible_radio_names.add(field_id)
|
||||
continue
|
||||
field_info_by_id[field_id] = make_field_dict(field, field_id)
|
||||
|
||||
# Bounding rects are stored in annotations in page objects.
|
||||
|
||||
# Radio button options have a separate annotation for each choice;
|
||||
# all choices have the same field name.
|
||||
# See https://westhealth.github.io/exploring-fillable-forms-with-pdfrw.html
|
||||
radio_fields_by_id = {}
|
||||
|
||||
for page_index, page in enumerate(reader.pages):
|
||||
annotations = page.get('/Annots', [])
|
||||
for ann in annotations:
|
||||
field_id = get_full_annotation_field_id(ann)
|
||||
if field_id in field_info_by_id:
|
||||
field_info_by_id[field_id]["page"] = page_index + 1
|
||||
field_info_by_id[field_id]["rect"] = ann.get('/Rect')
|
||||
elif field_id in possible_radio_names:
|
||||
try:
|
||||
# ann['/AP']['/N'] should have two items. One of them is '/Off',
|
||||
# the other is the active value.
|
||||
on_values = [v for v in ann["/AP"]["/N"] if v != "/Off"]
|
||||
except KeyError:
|
||||
continue
|
||||
if len(on_values) == 1:
|
||||
rect = ann.get("/Rect")
|
||||
if field_id not in radio_fields_by_id:
|
||||
radio_fields_by_id[field_id] = {
|
||||
"field_id": field_id,
|
||||
"type": "radio_group",
|
||||
"page": page_index + 1,
|
||||
"radio_options": [],
|
||||
}
|
||||
# Note: at least on macOS 15.7, Preview.app doesn't show selected
|
||||
# radio buttons correctly. (It does if you remove the leading slash
|
||||
# from the value, but that causes them not to appear correctly in
|
||||
# Chrome/Firefox/Acrobat/etc).
|
||||
radio_fields_by_id[field_id]["radio_options"].append({
|
||||
"value": on_values[0],
|
||||
"rect": rect,
|
||||
})
|
||||
|
||||
# Some PDFs have form field definitions without corresponding annotations,
|
||||
# so we can't tell where they are. Ignore these fields for now.
|
||||
fields_with_location = []
|
||||
for field_info in field_info_by_id.values():
|
||||
if "page" in field_info:
|
||||
fields_with_location.append(field_info)
|
||||
else:
|
||||
print(f"Unable to determine location for field id: {field_info.get('field_id')}, ignoring")
|
||||
|
||||
# Sort by page number, then Y position (flipped in PDF coordinate system), then X.
|
||||
def sort_key(f):
|
||||
if "radio_options" in f:
|
||||
rect = f["radio_options"][0]["rect"] or [0, 0, 0, 0]
|
||||
else:
|
||||
rect = f.get("rect") or [0, 0, 0, 0]
|
||||
adjusted_position = [-rect[1], rect[0]]
|
||||
return [f.get("page"), adjusted_position]
|
||||
|
||||
sorted_fields = fields_with_location + list(radio_fields_by_id.values())
|
||||
sorted_fields.sort(key=sort_key)
|
||||
|
||||
return sorted_fields
|
||||
|
||||
|
||||
def write_field_info(pdf_path: str, json_output_path: str):
|
||||
reader = PdfReader(pdf_path)
|
||||
field_info = get_field_info(reader)
|
||||
with open(json_output_path, "w") as f:
|
||||
json.dump(field_info, f, indent=2)
|
||||
print(f"Wrote {len(field_info)} fields to {json_output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: extract_form_field_info.py [input pdf] [output json]")
|
||||
sys.exit(1)
|
||||
write_field_info(sys.argv[1], sys.argv[2])
|
||||
@@ -0,0 +1,114 @@
|
||||
import json
|
||||
import sys
|
||||
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
|
||||
from extract_form_field_info import get_field_info
|
||||
|
||||
|
||||
# Fills fillable form fields in a PDF. See forms.md.
|
||||
|
||||
|
||||
def fill_pdf_fields(input_pdf_path: str, fields_json_path: str, output_pdf_path: str):
|
||||
with open(fields_json_path) as f:
|
||||
fields = json.load(f)
|
||||
# Group by page number.
|
||||
fields_by_page = {}
|
||||
for field in fields:
|
||||
if "value" in field:
|
||||
field_id = field["field_id"]
|
||||
page = field["page"]
|
||||
if page not in fields_by_page:
|
||||
fields_by_page[page] = {}
|
||||
fields_by_page[page][field_id] = field["value"]
|
||||
|
||||
reader = PdfReader(input_pdf_path)
|
||||
|
||||
has_error = False
|
||||
field_info = get_field_info(reader)
|
||||
fields_by_ids = {f["field_id"]: f for f in field_info}
|
||||
for field in fields:
|
||||
existing_field = fields_by_ids.get(field["field_id"])
|
||||
if not existing_field:
|
||||
has_error = True
|
||||
print(f"ERROR: `{field['field_id']}` is not a valid field ID")
|
||||
elif field["page"] != existing_field["page"]:
|
||||
has_error = True
|
||||
print(f"ERROR: Incorrect page number for `{field['field_id']}` (got {field['page']}, expected {existing_field['page']})")
|
||||
else:
|
||||
if "value" in field:
|
||||
err = validation_error_for_field_value(existing_field, field["value"])
|
||||
if err:
|
||||
print(err)
|
||||
has_error = True
|
||||
if has_error:
|
||||
sys.exit(1)
|
||||
|
||||
writer = PdfWriter(clone_from=reader)
|
||||
for page, field_values in fields_by_page.items():
|
||||
writer.update_page_form_field_values(writer.pages[page - 1], field_values, auto_regenerate=False)
|
||||
|
||||
# This seems to be necessary for many PDF viewers to format the form values correctly.
|
||||
# It may cause the viewer to show a "save changes" dialog even if the user doesn't make any changes.
|
||||
writer.set_need_appearances_writer(True)
|
||||
|
||||
with open(output_pdf_path, "wb") as f:
|
||||
writer.write(f)
|
||||
|
||||
|
||||
def validation_error_for_field_value(field_info, field_value):
|
||||
field_type = field_info["type"]
|
||||
field_id = field_info["field_id"]
|
||||
if field_type == "checkbox":
|
||||
checked_val = field_info["checked_value"]
|
||||
unchecked_val = field_info["unchecked_value"]
|
||||
if field_value != checked_val and field_value != unchecked_val:
|
||||
return f'ERROR: Invalid value "{field_value}" for checkbox field "{field_id}". The checked value is "{checked_val}" and the unchecked value is "{unchecked_val}"'
|
||||
elif field_type == "radio_group":
|
||||
option_values = [opt["value"] for opt in field_info["radio_options"]]
|
||||
if field_value not in option_values:
|
||||
return f'ERROR: Invalid value "{field_value}" for radio group field "{field_id}". Valid values are: {option_values}'
|
||||
elif field_type == "choice":
|
||||
choice_values = [opt["value"] for opt in field_info["choice_options"]]
|
||||
if field_value not in choice_values:
|
||||
return f'ERROR: Invalid value "{field_value}" for choice field "{field_id}". Valid values are: {choice_values}'
|
||||
return None
|
||||
|
||||
|
||||
# pypdf (at least version 5.7.0) has a bug when setting the value for a selection list field.
|
||||
# In _writer.py around line 966:
|
||||
#
|
||||
# if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0:
|
||||
# txt = "\n".join(annotation.get_inherited(FA.Opt, []))
|
||||
#
|
||||
# The problem is that for selection lists, `get_inherited` returns a list of two-element lists like
|
||||
# [["value1", "Text 1"], ["value2", "Text 2"], ...]
|
||||
# This causes `join` to throw a TypeError because it expects an iterable of strings.
|
||||
# The horrible workaround is to patch `get_inherited` to return a list of the value strings.
|
||||
# We call the original method and adjust the return value only if the argument to `get_inherited`
|
||||
# is `FA.Opt` and if the return value is a list of two-element lists.
|
||||
def monkeypatch_pydpf_method():
|
||||
from pypdf.generic import DictionaryObject
|
||||
from pypdf.constants import FieldDictionaryAttributes
|
||||
|
||||
original_get_inherited = DictionaryObject.get_inherited
|
||||
|
||||
def patched_get_inherited(self, key: str, default = None):
|
||||
result = original_get_inherited(self, key, default)
|
||||
if key == FieldDictionaryAttributes.Opt:
|
||||
if isinstance(result, list) and all(isinstance(v, list) and len(v) == 2 for v in result):
|
||||
result = [r[0] for r in result]
|
||||
return result
|
||||
|
||||
DictionaryObject.get_inherited = patched_get_inherited
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 4:
|
||||
print("Usage: fill_fillable_fields.py [input pdf] [field_values.json] [output pdf]")
|
||||
sys.exit(1)
|
||||
monkeypatch_pydpf_method()
|
||||
input_pdf = sys.argv[1]
|
||||
fields_json = sys.argv[2]
|
||||
output_pdf = sys.argv[3]
|
||||
fill_pdf_fields(input_pdf, fields_json, output_pdf)
|
||||
@@ -0,0 +1,108 @@
|
||||
import json
|
||||
import sys
|
||||
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
from pypdf.annotations import FreeText
|
||||
|
||||
|
||||
# Fills a PDF by adding text annotations defined in `fields.json`. See forms.md.
|
||||
|
||||
|
||||
def transform_coordinates(bbox, image_width, image_height, pdf_width, pdf_height):
|
||||
"""Transform bounding box from image coordinates to PDF coordinates"""
|
||||
# Image coordinates: origin at top-left, y increases downward
|
||||
# PDF coordinates: origin at bottom-left, y increases upward
|
||||
x_scale = pdf_width / image_width
|
||||
y_scale = pdf_height / image_height
|
||||
|
||||
left = bbox[0] * x_scale
|
||||
right = bbox[2] * x_scale
|
||||
|
||||
# Flip Y coordinates for PDF
|
||||
top = pdf_height - (bbox[1] * y_scale)
|
||||
bottom = pdf_height - (bbox[3] * y_scale)
|
||||
|
||||
return left, bottom, right, top
|
||||
|
||||
|
||||
def fill_pdf_form(input_pdf_path, fields_json_path, output_pdf_path):
|
||||
"""Fill the PDF form with data from fields.json"""
|
||||
|
||||
# `fields.json` format described in forms.md.
|
||||
with open(fields_json_path, "r") as f:
|
||||
fields_data = json.load(f)
|
||||
|
||||
# Open the PDF
|
||||
reader = PdfReader(input_pdf_path)
|
||||
writer = PdfWriter()
|
||||
|
||||
# Copy all pages to writer
|
||||
writer.append(reader)
|
||||
|
||||
# Get PDF dimensions for each page
|
||||
pdf_dimensions = {}
|
||||
for i, page in enumerate(reader.pages):
|
||||
mediabox = page.mediabox
|
||||
pdf_dimensions[i + 1] = [mediabox.width, mediabox.height]
|
||||
|
||||
# Process each form field
|
||||
annotations = []
|
||||
for field in fields_data["form_fields"]:
|
||||
page_num = field["page_number"]
|
||||
|
||||
# Get page dimensions and transform coordinates.
|
||||
page_info = next(p for p in fields_data["pages"] if p["page_number"] == page_num)
|
||||
image_width = page_info["image_width"]
|
||||
image_height = page_info["image_height"]
|
||||
pdf_width, pdf_height = pdf_dimensions[page_num]
|
||||
|
||||
transformed_entry_box = transform_coordinates(
|
||||
field["entry_bounding_box"],
|
||||
image_width, image_height,
|
||||
pdf_width, pdf_height
|
||||
)
|
||||
|
||||
# Skip empty fields
|
||||
if "entry_text" not in field or "text" not in field["entry_text"]:
|
||||
continue
|
||||
entry_text = field["entry_text"]
|
||||
text = entry_text["text"]
|
||||
if not text:
|
||||
continue
|
||||
|
||||
font_name = entry_text.get("font", "Arial")
|
||||
font_size = str(entry_text.get("font_size", 14)) + "pt"
|
||||
font_color = entry_text.get("font_color", "000000")
|
||||
|
||||
# Font size/color seems to not work reliably across viewers:
|
||||
# https://github.com/py-pdf/pypdf/issues/2084
|
||||
annotation = FreeText(
|
||||
text=text,
|
||||
rect=transformed_entry_box,
|
||||
font=font_name,
|
||||
font_size=font_size,
|
||||
font_color=font_color,
|
||||
border_color=None,
|
||||
background_color=None,
|
||||
)
|
||||
annotations.append(annotation)
|
||||
# page_number is 0-based for pypdf
|
||||
writer.add_annotation(page_number=page_num - 1, annotation=annotation)
|
||||
|
||||
# Save the filled PDF
|
||||
with open(output_pdf_path, "wb") as output:
|
||||
writer.write(output)
|
||||
|
||||
print(f"Successfully filled PDF form and saved to {output_pdf_path}")
|
||||
print(f"Added {len(annotations)} text annotations")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 4:
|
||||
print("Usage: fill_pdf_form_with_annotations.py [input pdf] [fields.json] [output pdf]")
|
||||
sys.exit(1)
|
||||
input_pdf = sys.argv[1]
|
||||
fields_json = sys.argv[2]
|
||||
output_pdf = sys.argv[3]
|
||||
|
||||
fill_pdf_form(input_pdf, fields_json, output_pdf)
|
||||
@@ -0,0 +1,30 @@
|
||||
© 2025 Anthropic, PBC. All rights reserved.
|
||||
|
||||
LICENSE: Use of these materials (including all code, prompts, assets, files,
|
||||
and other components of this Skill) is governed by your agreement with
|
||||
Anthropic regarding use of Anthropic's services. If no separate agreement
|
||||
exists, use is governed by Anthropic's Consumer Terms of Service or
|
||||
Commercial Terms of Service, as applicable:
|
||||
https://www.anthropic.com/legal/consumer-terms
|
||||
https://www.anthropic.com/legal/commercial-terms
|
||||
Your applicable agreement is referred to as the "Agreement." "Services" are
|
||||
as defined in the Agreement.
|
||||
|
||||
ADDITIONAL RESTRICTIONS: Notwithstanding anything in the Agreement to the
|
||||
contrary, users may not:
|
||||
|
||||
- Extract these materials from the Services or retain copies of these
|
||||
materials outside the Services
|
||||
- Reproduce or copy these materials, except for temporary copies created
|
||||
automatically during authorized use of the Services
|
||||
- Create derivative works based on these materials
|
||||
- Distribute, sublicense, or transfer these materials to any third party
|
||||
- Make, offer to sell, sell, or import any inventions embodied in these
|
||||
materials
|
||||
- Reverse engineer, decompile, or disassemble these materials
|
||||
|
||||
The receipt, viewing, or possession of these materials does not convey or
|
||||
imply any license or right beyond those expressly granted above.
|
||||
|
||||
Anthropic retains all right, title, and interest in these materials,
|
||||
including all copyrights, patents, and other intellectual property rights.
|
||||
@@ -0,0 +1,362 @@
|
||||
---
|
||||
name: xlsx
|
||||
description: "Comprehensive spreadsheet creation, editing, and analysis with support for formulas, formatting, data analysis, and visualization. When Claude needs to work with spreadsheets (.xlsx, .xlsm, .csv, .tsv, etc) for: (1) Creating new spreadsheets with formulas and formatting, (2) Reading or analyzing data, (3) Modify existing spreadsheets while preserving formulas, (4) Data analysis and visualization in spreadsheets, or (5) Recalculating formulas"
|
||||
license: Proprietary. LICENSE.txt has complete terms
|
||||
---
|
||||
|
||||
# Requirements for Outputs
|
||||
|
||||
## All Excel files
|
||||
|
||||
### Zero Formula Errors
|
||||
- Every Excel model MUST be delivered with ZERO formula errors (#REF!, #DIV/0!, #VALUE!, #N/A, #NAME?)
|
||||
|
||||
### Preserve Existing Templates (when updating templates)
|
||||
- Study and EXACTLY match existing format, style, and conventions when modifying files
|
||||
- Never impose standardized formatting on files with established patterns
|
||||
- Existing template conventions ALWAYS override these guidelines
|
||||
|
||||
## Financial models
|
||||
|
||||
### Color Coding Standards
|
||||
Unless otherwise stated by the user or existing template
|
||||
|
||||
#### Industry-Standard Color Conventions
|
||||
- **Blue text (RGB: 0,0,255)**: Hardcoded inputs, and numbers users will change for scenarios
|
||||
- **Black text (RGB: 0,0,0)**: ALL formulas and calculations
|
||||
- **Green text (RGB: 0,128,0)**: Links pulling from other worksheets within same workbook
|
||||
- **Red text (RGB: 255,0,0)**: External links to other files
|
||||
- **Yellow background (RGB: 255,255,0)**: Key assumptions needing attention or cells that need to be updated
|
||||
|
||||
### Number Formatting Standards
|
||||
|
||||
#### Required Format Rules
|
||||
- **Years**: Format as text strings (e.g., "2024" not "2,024")
|
||||
- **Currency**: Use $#,##0 format; ALWAYS specify units in headers ("Revenue ($mm)")
|
||||
- **Zeros**: Use number formatting to make all zeros "-", including percentages (e.g., "$#,##0;($#,##0);-")
|
||||
- **Percentages**: Default to 0.0% format (one decimal)
|
||||
- **Multiples**: Format as 0.0x for valuation multiples (EV/EBITDA, P/E)
|
||||
- **Negative numbers**: Use parentheses (123) not minus -123
|
||||
|
||||
### Formula Construction Rules
|
||||
|
||||
#### Assumptions Placement
|
||||
- Place ALL assumptions (growth rates, margins, multiples, etc.) in separate assumption cells
|
||||
- Use cell references instead of hardcoded values in formulas
|
||||
- Example: Use =B5*(1+$B$6) instead of =B5*1.05
|
||||
|
||||
#### Formula Error Prevention
|
||||
- Verify all cell references are correct
|
||||
- Check for off-by-one errors in ranges
|
||||
- Ensure consistent formulas across all projection periods
|
||||
- Test with edge cases (zero values, negative numbers)
|
||||
- Verify no unintended circular references
|
||||
|
||||
#### Documentation Requirements for Hardcodes
|
||||
- Comment or in cells beside (if end of table). Format: "Source: [System/Document], [Date], [Specific Reference], [URL if applicable]"
|
||||
- Examples:
|
||||
- "Source: Company 10-K, FY2024, Page 45, Revenue Note, [SEC EDGAR URL]"
|
||||
- "Source: Company 10-Q, Q2 2025, Exhibit 99.1, [SEC EDGAR URL]"
|
||||
- "Source: Bloomberg Terminal, 8/15/2025, AAPL US Equity"
|
||||
- "Source: FactSet, 8/20/2025, Consensus Estimates Screen"
|
||||
|
||||
# XLSX creation, editing, and analysis
|
||||
|
||||
## Overview
|
||||
|
||||
A user may ask you to create, edit, or analyze the contents of an .xlsx file. You have different tools and workflows available for different tasks.
|
||||
|
||||
## Important Requirements
|
||||
|
||||
**LibreOffice Required for Formula Recalculation**: You can assume LibreOffice is installed for recalculating formula values using the `recalc.py` script. The script automatically configures LibreOffice on first run
|
||||
|
||||
## Reading and analyzing data
|
||||
|
||||
### Data analysis with pandas
|
||||
For data analysis, visualization, and basic operations, use **pandas** which provides powerful data manipulation capabilities:
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
|
||||
# Read Excel
|
||||
df = pd.read_excel('file.xlsx') # Default: first sheet
|
||||
all_sheets = pd.read_excel('file.xlsx', sheet_name=None) # All sheets as dict
|
||||
|
||||
# Analyze
|
||||
df.head() # Preview data
|
||||
df.info() # Column info
|
||||
df.describe() # Statistics
|
||||
|
||||
# Write Excel
|
||||
df.to_excel('output.xlsx', index=False)
|
||||
```
|
||||
|
||||
## Excel File Workflows
|
||||
|
||||
## CRITICAL: Use Formulas, Not Hardcoded Values
|
||||
|
||||
**Always use Excel formulas instead of calculating values in Python and hardcoding them.** This ensures the spreadsheet remains dynamic and updateable.
|
||||
|
||||
### ❌ WRONG - Hardcoding Calculated Values
|
||||
```python
|
||||
# Bad: Calculating in Python and hardcoding result
|
||||
total = df['Sales'].sum()
|
||||
sheet['B10'] = total # Hardcodes 5000
|
||||
|
||||
# Bad: Computing growth rate in Python
|
||||
growth = (df.iloc[-1]['Revenue'] - df.iloc[0]['Revenue']) / df.iloc[0]['Revenue']
|
||||
sheet['C5'] = growth # Hardcodes 0.15
|
||||
|
||||
# Bad: Python calculation for average
|
||||
avg = sum(values) / len(values)
|
||||
sheet['D20'] = avg # Hardcodes 42.5
|
||||
```
|
||||
|
||||
### ✅ CORRECT - Using Excel Formulas
|
||||
```python
|
||||
# Good: Let Excel calculate the sum
|
||||
sheet['B10'] = '=SUM(B2:B9)'
|
||||
|
||||
# Good: Growth rate as Excel formula
|
||||
sheet['C5'] = '=(C4-C2)/C2'
|
||||
|
||||
# Good: Average using Excel function
|
||||
sheet['D20'] = '=AVERAGE(D2:D19)'
|
||||
```
|
||||
|
||||
This applies to ALL calculations - totals, percentages, ratios, differences, etc. The spreadsheet should be able to recalculate when source data changes.
|
||||
|
||||
## Common Workflow
|
||||
1. **Choose tool**: pandas for data, openpyxl for formulas/formatting
|
||||
2. **Create/Load**: Create new workbook or load existing file
|
||||
3. **Modify**: Add/edit data, formulas, and formatting
|
||||
4. **Save**: Write to file
|
||||
5. **Recalculate formulas (MANDATORY IF USING FORMULAS)**: Use the recalc.py script
|
||||
```bash
|
||||
python recalc.py output.xlsx
|
||||
```
|
||||
6. **Verify and fix any errors**:
|
||||
- The script returns JSON with error details
|
||||
- If `status` is `errors_found`, check `error_summary` for specific error types and locations
|
||||
- Fix the identified errors and recalculate again
|
||||
- Common errors to fix:
|
||||
- `#REF!`: Invalid cell references
|
||||
- `#DIV/0!`: Division by zero
|
||||
- `#VALUE!`: Wrong data type in formula
|
||||
- `#NAME?`: Unrecognized formula name
|
||||
|
||||
### Creating new Excel files
|
||||
|
||||
```python
|
||||
# Using openpyxl for formulas and formatting
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, PatternFill, Alignment
|
||||
|
||||
wb = Workbook()
|
||||
sheet = wb.active
|
||||
|
||||
# Add data
|
||||
sheet['A1'] = 'Hello'
|
||||
sheet['B1'] = 'World'
|
||||
sheet.append(['Row', 'of', 'data'])
|
||||
|
||||
# Add formula
|
||||
sheet['B2'] = '=SUM(A1:A10)'
|
||||
|
||||
# Formatting
|
||||
sheet['A1'].font = Font(bold=True, color='FF0000')
|
||||
sheet['A1'].fill = PatternFill('solid', start_color='FFFF00')
|
||||
sheet['A1'].alignment = Alignment(horizontal='center')
|
||||
|
||||
# Column width
|
||||
sheet.column_dimensions['A'].width = 20
|
||||
|
||||
wb.save('output.xlsx')
|
||||
```
|
||||
|
||||
### Editing existing Excel files
|
||||
|
||||
```python
|
||||
# Using openpyxl to preserve formulas and formatting
|
||||
from openpyxl import load_workbook
|
||||
|
||||
# Load existing file
|
||||
wb = load_workbook('existing.xlsx')
|
||||
sheet = wb.active # or wb['SheetName'] for specific sheet
|
||||
|
||||
# Working with multiple sheets
|
||||
for sheet_name in wb.sheetnames:
|
||||
sheet = wb[sheet_name]
|
||||
print(f"Sheet: {sheet_name}")
|
||||
|
||||
# Modify cells
|
||||
sheet['A1'] = 'New Value'
|
||||
sheet.insert_rows(2) # Insert row at position 2
|
||||
sheet.delete_cols(3) # Delete column 3
|
||||
|
||||
# Add new sheet
|
||||
new_sheet = wb.create_sheet('NewSheet')
|
||||
new_sheet['A1'] = 'Data'
|
||||
|
||||
wb.save('modified.xlsx')
|
||||
```
|
||||
|
||||
## Recalculating formulas
|
||||
|
||||
Excel files created or modified by openpyxl contain formulas as strings but not calculated values. Use the provided `recalc.py` script to recalculate formulas:
|
||||
|
||||
```bash
|
||||
# Recalculate existing Excel file
|
||||
python recalc.py <excel_file> [timeout_seconds]
|
||||
|
||||
# Create new Excel workbook
|
||||
python recalc.py --create <output_file> '<json_data>'
|
||||
```
|
||||
|
||||
Examples:
|
||||
```bash
|
||||
python recalc.py output.xlsx 30
|
||||
|
||||
python recalc.py --create output.xlsx '{"Sheet1": {"data": [["Name", "Value"], ["Test", 100]]}}'
|
||||
```
|
||||
|
||||
The script:
|
||||
- Automatically sets up LibreOffice macro on first run
|
||||
- Recalculates all formulas in all sheets
|
||||
- Scans ALL cells for Excel errors (#REF!, #DIV/0!, etc.)
|
||||
- Returns JSON with detailed error locations and counts
|
||||
- Works on Windows, Linux, and macOS
|
||||
- Can create new workbooks from JSON configuration
|
||||
|
||||
## Creating workbooks with recalc.py
|
||||
|
||||
The `--create` mode allows you to create new Excel workbooks directly from JSON configuration:
|
||||
|
||||
```bash
|
||||
python recalc.py --create <output_file> '<json_data>'
|
||||
```
|
||||
|
||||
**Default Behavior**: If you provide a relative path (e.g., `test.xlsx`), the file will be created in the **project root** directory (`ruoyi-ai-v3/`). To save to a specific directory, use an absolute path.
|
||||
|
||||
### JSON Format
|
||||
|
||||
```json
|
||||
{
|
||||
"Sheet1": {
|
||||
"data": [
|
||||
["Header1", "Header2", "Header3"],
|
||||
["Value1", "Value2", "=A2*B2"],
|
||||
["Value3", "Value4", "=SUM(C2:C10)"]
|
||||
]
|
||||
},
|
||||
"Sheet2": {
|
||||
"data": [
|
||||
["Name", "Score"],
|
||||
["Alice", 95],
|
||||
["Bob", 87]
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Examples
|
||||
|
||||
**Simple data table (saves to project root)**:
|
||||
```bash
|
||||
python recalc.py --create data.xlsx '{"Sheet1": {"data": [["Name", "Age"], ["John", 30], ["Jane", 28]]}}'
|
||||
# Creates: ruoyi-ai-v3/data.xlsx
|
||||
```
|
||||
|
||||
**With formulas (saves to project root)**:
|
||||
```bash
|
||||
python recalc.py --create model.xlsx '{"Sheet1": {"data": [["A", "B", "Sum"], [10, 20, "=A2+B2"], [5, 15, "=A3+B3"]]}}'
|
||||
# Creates: ruoyi-ai-v3/model.xlsx
|
||||
```
|
||||
|
||||
**Save to custom directory (use absolute path)**:
|
||||
```bash
|
||||
python recalc.py --create "D:\Downloads\custom.xlsx" '{"Sheet1": {"data": [["A", "B"], [1, 2]]}}'
|
||||
# Creates: D:\Downloads\custom.xlsx
|
||||
```
|
||||
|
||||
### File Path Options
|
||||
|
||||
| Input | Output Location |
|
||||
|-------|-----------------|
|
||||
| `test.xlsx` | `ruoyi-ai-v3/test.xlsx` (project root) |
|
||||
| `output/test.xlsx` | `ruoyi-ai-v3/output/test.xlsx` |
|
||||
| `D:\Downloads\test.xlsx` | `D:\Downloads\test.xlsx` (absolute path) |
|
||||
|
||||
### Features
|
||||
|
||||
- First row is automatically formatted as bold header
|
||||
- Supports both values and Excel formulas (strings starting with `=`)
|
||||
- Multiple sheets can be created in one command
|
||||
- Output file extension determines format (.xlsx, .xlsm, etc.)
|
||||
|
||||
## Formula Verification Checklist
|
||||
|
||||
Quick checks to ensure formulas work correctly:
|
||||
|
||||
### Essential Verification
|
||||
- [ ] **Test 2-3 sample references**: Verify they pull correct values before building full model
|
||||
- [ ] **Column mapping**: Confirm Excel columns match (e.g., column 64 = BL, not BK)
|
||||
- [ ] **Row offset**: Remember Excel rows are 1-indexed (DataFrame row 5 = Excel row 6)
|
||||
|
||||
### Common Pitfalls
|
||||
- [ ] **NaN handling**: Check for null values with `pd.notna()`
|
||||
- [ ] **Far-right columns**: FY data often in columns 50+
|
||||
- [ ] **Multiple matches**: Search all occurrences, not just first
|
||||
- [ ] **Division by zero**: Check denominators before using `/` in formulas (#DIV/0!)
|
||||
- [ ] **Wrong references**: Verify all cell references point to intended cells (#REF!)
|
||||
- [ ] **Cross-sheet references**: Use correct format (Sheet1!A1) for linking sheets
|
||||
|
||||
### Formula Testing Strategy
|
||||
- [ ] **Start small**: Test formulas on 2-3 cells before applying broadly
|
||||
- [ ] **Verify dependencies**: Check all cells referenced in formulas exist
|
||||
- [ ] **Test edge cases**: Include zero, negative, and very large values
|
||||
|
||||
### Interpreting recalc.py Output
|
||||
The script returns JSON with error details:
|
||||
```json
|
||||
{
|
||||
"status": "success", // or "errors_found"
|
||||
"total_errors": 0, // Total error count
|
||||
"total_formulas": 42, // Number of formulas in file
|
||||
"error_summary": { // Only present if errors found
|
||||
"#REF!": {
|
||||
"count": 2,
|
||||
"locations": ["Sheet1!B5", "Sheet1!C10"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### Library Selection
|
||||
- **pandas**: Best for data analysis, bulk operations, and simple data export
|
||||
- **openpyxl**: Best for complex formatting, formulas, and Excel-specific features
|
||||
|
||||
### Working with openpyxl
|
||||
- Cell indices are 1-based (row=1, column=1 refers to cell A1)
|
||||
- Use `data_only=True` to read calculated values: `load_workbook('file.xlsx', data_only=True)`
|
||||
- **Warning**: If opened with `data_only=True` and saved, formulas are replaced with values and permanently lost
|
||||
- For large files: Use `read_only=True` for reading or `write_only=True` for writing
|
||||
- Formulas are preserved but not evaluated - use recalc.py to update values
|
||||
|
||||
### Working with pandas
|
||||
- Specify data types to avoid inference issues: `pd.read_excel('file.xlsx', dtype={'id': str})`
|
||||
- For large files, read specific columns: `pd.read_excel('file.xlsx', usecols=['A', 'C', 'E'])`
|
||||
- Handle dates properly: `pd.read_excel('file.xlsx', parse_dates=['date_column'])`
|
||||
|
||||
## Code Style Guidelines
|
||||
**IMPORTANT**: When generating Python code for Excel operations:
|
||||
- Write minimal, concise Python code without unnecessary comments
|
||||
- Avoid verbose variable names and redundant operations
|
||||
- Avoid unnecessary print statements
|
||||
|
||||
**For Excel files themselves**:
|
||||
- Add comments to cells with complex formulas or important assumptions
|
||||
- Document data sources for hardcoded values
|
||||
- Include notes for key calculations and model sections
|
||||
@@ -0,0 +1,318 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Excel Formula Recalculation Script
|
||||
Recalculates all formulas in an Excel file using LibreOffice
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import subprocess
|
||||
import os
|
||||
import platform
|
||||
from pathlib import Path
|
||||
from openpyxl import load_workbook
|
||||
|
||||
|
||||
def get_soffice_path():
|
||||
"""Find LibreOffice soffice executable path for the current OS"""
|
||||
system = platform.system()
|
||||
|
||||
if system == 'Windows':
|
||||
# Common LibreOffice installation paths on Windows
|
||||
possible_paths = [
|
||||
Path(os.environ.get('PROGRAMFILES', 'C:\\Program Files')) / 'LibreOffice' / 'program' / 'soffice.exe',
|
||||
Path(os.environ.get('PROGRAMFILES(X86)', 'C:\\Program Files (x86)')) / 'LibreOffice' / 'program' / 'soffice.exe',
|
||||
Path(os.path.expanduser('~')) / 'AppData' / 'Local' / 'LibreOffice' / 'program' / 'soffice.exe',
|
||||
]
|
||||
|
||||
for path in possible_paths:
|
||||
if path.exists():
|
||||
return str(path)
|
||||
|
||||
# Try to find it using where command
|
||||
try:
|
||||
result = subprocess.run(['where', 'soffice.exe'], capture_output=True, text=True, timeout=5)
|
||||
if result.returncode == 0:
|
||||
return result.stdout.strip().split('\n')[0]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
else:
|
||||
# For Linux and macOS, soffice is usually in PATH
|
||||
return 'soffice'
|
||||
|
||||
|
||||
def setup_libreoffice_macro():
|
||||
"""Setup LibreOffice macro for recalculation if not already configured"""
|
||||
system = platform.system()
|
||||
|
||||
if system == 'Darwin':
|
||||
macro_dir = os.path.expanduser('~/Library/Application Support/LibreOffice/4/user/basic/Standard')
|
||||
elif system == 'Windows':
|
||||
# Windows path for LibreOffice config
|
||||
appdata = os.path.expanduser('~\\AppData\\Roaming\\LibreOffice\\4\\user\\basic\\Standard')
|
||||
macro_dir = appdata
|
||||
else:
|
||||
# Linux
|
||||
macro_dir = os.path.expanduser('~/.config/libreoffice/4/user/basic/Standard')
|
||||
|
||||
macro_file = os.path.join(macro_dir, 'Module1.xba')
|
||||
|
||||
if os.path.exists(macro_file):
|
||||
try:
|
||||
with open(macro_file, 'r') as f:
|
||||
if 'RecalculateAndSave' in f.read():
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not os.path.exists(macro_dir):
|
||||
soffice_path = get_soffice_path()
|
||||
if not soffice_path:
|
||||
return False
|
||||
|
||||
try:
|
||||
subprocess.run([soffice_path, '--headless', '--terminate_after_init'],
|
||||
capture_output=True, timeout=10)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
os.makedirs(macro_dir, exist_ok=True)
|
||||
|
||||
macro_content = '''<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE script:module PUBLIC "-//OpenOffice.org//DTD OfficeDocument 1.0//EN" "module.dtd">
|
||||
<script:module xmlns:script="http://openoffice.org/2000/script" script:name="Module1" script:language="StarBasic">
|
||||
Sub RecalculateAndSave()
|
||||
ThisComponent.calculateAll()
|
||||
ThisComponent.store()
|
||||
ThisComponent.close(True)
|
||||
End Sub
|
||||
</script:module>'''
|
||||
|
||||
try:
|
||||
with open(macro_file, 'w') as f:
|
||||
f.write(macro_content)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def recalc(filename, timeout=30):
|
||||
"""
|
||||
Recalculate formulas in Excel file and report any errors
|
||||
|
||||
Args:
|
||||
filename: Path to Excel file
|
||||
timeout: Maximum time to wait for recalculation (seconds)
|
||||
|
||||
Returns:
|
||||
dict with error locations and counts
|
||||
"""
|
||||
if not Path(filename).exists():
|
||||
return {'error': f'File {filename} does not exist'}
|
||||
|
||||
abs_path = str(Path(filename).absolute())
|
||||
|
||||
if not setup_libreoffice_macro():
|
||||
return {'error': 'Failed to setup LibreOffice macro'}
|
||||
|
||||
soffice_path = get_soffice_path()
|
||||
if not soffice_path:
|
||||
return {'error': 'LibreOffice not found. Please install LibreOffice.'}
|
||||
|
||||
cmd = [
|
||||
soffice_path, '--headless', '--norestore',
|
||||
'vnd.sun.star.script:Standard.Module1.RecalculateAndSave?language=Basic&location=application',
|
||||
abs_path
|
||||
]
|
||||
|
||||
system = platform.system()
|
||||
|
||||
# Handle timeout for different operating systems
|
||||
if system == 'Windows':
|
||||
# Windows: use taskkill as fallback, but rely on subprocess timeout
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
|
||||
except subprocess.TimeoutExpired:
|
||||
return {'error': f'LibreOffice recalculation timed out after {timeout} seconds'}
|
||||
elif system == 'Linux':
|
||||
# Linux: use timeout command
|
||||
timeout_cmd = 'timeout'
|
||||
cmd = [timeout_cmd, str(timeout)] + cmd
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
elif system == 'Darwin':
|
||||
# macOS: try gtimeout first, fallback to timeout handling
|
||||
timeout_cmd = None
|
||||
try:
|
||||
subprocess.run(['gtimeout', '--version'], capture_output=True, timeout=1, check=False)
|
||||
timeout_cmd = 'gtimeout'
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
|
||||
if timeout_cmd:
|
||||
cmd = [timeout_cmd, str(timeout)] + cmd
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
else:
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
|
||||
except subprocess.TimeoutExpired:
|
||||
return {'error': f'LibreOffice recalculation timed out after {timeout} seconds'}
|
||||
else:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
if result.returncode != 0 and result.returncode != 124: # 124 is timeout exit code
|
||||
error_msg = result.stderr or 'Unknown error during recalculation'
|
||||
if 'Module1' in error_msg or 'RecalculateAndSave' not in error_msg:
|
||||
return {'error': 'LibreOffice macro not configured properly. Error: ' + error_msg}
|
||||
else:
|
||||
return {'error': error_msg}
|
||||
|
||||
# Check for Excel errors in the recalculated file - scan ALL cells
|
||||
try:
|
||||
wb = load_workbook(filename, data_only=True)
|
||||
|
||||
excel_errors = ['#VALUE!', '#DIV/0!', '#REF!', '#NAME?', '#NULL!', '#NUM!', '#N/A']
|
||||
error_details = {err: [] for err in excel_errors}
|
||||
total_errors = 0
|
||||
|
||||
for sheet_name in wb.sheetnames:
|
||||
ws = wb[sheet_name]
|
||||
# Check ALL rows and columns - no limits
|
||||
for row in ws.iter_rows():
|
||||
for cell in row:
|
||||
if cell.value is not None and isinstance(cell.value, str):
|
||||
for err in excel_errors:
|
||||
if err in cell.value:
|
||||
location = f"{sheet_name}!{cell.coordinate}"
|
||||
error_details[err].append(location)
|
||||
total_errors += 1
|
||||
break
|
||||
|
||||
wb.close()
|
||||
|
||||
# Build result summary
|
||||
result = {
|
||||
'status': 'success' if total_errors == 0 else 'errors_found',
|
||||
'total_errors': total_errors,
|
||||
'error_summary': {}
|
||||
}
|
||||
|
||||
# Add non-empty error categories
|
||||
for err_type, locations in error_details.items():
|
||||
if locations:
|
||||
result['error_summary'][err_type] = {
|
||||
'count': len(locations),
|
||||
'locations': locations[:20] # Show up to 20 locations
|
||||
}
|
||||
|
||||
# Add formula count for context - also check ALL cells
|
||||
wb_formulas = load_workbook(filename, data_only=False)
|
||||
formula_count = 0
|
||||
for sheet_name in wb_formulas.sheetnames:
|
||||
ws = wb_formulas[sheet_name]
|
||||
for row in ws.iter_rows():
|
||||
for cell in row:
|
||||
if cell.value and isinstance(cell.value, str) and cell.value.startswith('='):
|
||||
formula_count += 1
|
||||
wb_formulas.close()
|
||||
|
||||
result['total_formulas'] = formula_count
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
return {'error': str(e)}
|
||||
|
||||
|
||||
def get_project_root():
|
||||
"""Get project root directory (ruoyi-ai-v3)"""
|
||||
current = os.path.dirname(os.path.abspath(__file__))
|
||||
# Traverse up 8 levels from xlsx/recalc.py to project root
|
||||
for _ in range(8):
|
||||
current = os.path.dirname(current)
|
||||
return current
|
||||
|
||||
|
||||
def create_workbook(output_path, sheets_config):
|
||||
"""
|
||||
Create a new Excel workbook with specified sheets and data
|
||||
|
||||
Args:
|
||||
output_path: Path where the workbook will be saved
|
||||
sheets_config: Dict with sheet configurations
|
||||
Example: {
|
||||
'Sheet1': {
|
||||
'data': [
|
||||
['Header1', 'Header2'],
|
||||
['Value1', '=B2*2']
|
||||
],
|
||||
'formulas': False # Whether sheet contains formulas
|
||||
}
|
||||
}
|
||||
|
||||
Returns:
|
||||
dict with success/error status
|
||||
"""
|
||||
try:
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, Alignment
|
||||
|
||||
wb = Workbook()
|
||||
wb.remove(wb.active) # Remove default sheet
|
||||
|
||||
for sheet_name, config in sheets_config.items():
|
||||
ws = wb.create_sheet(sheet_name)
|
||||
data = config.get('data', [])
|
||||
|
||||
for row_idx, row_data in enumerate(data, 1):
|
||||
for col_idx, cell_value in enumerate(row_data, 1):
|
||||
cell = ws.cell(row=row_idx, column=col_idx, value=cell_value)
|
||||
# Make header row bold
|
||||
if row_idx == 1:
|
||||
cell.font = Font(bold=True)
|
||||
|
||||
wb.save(output_path)
|
||||
return {'status': 'success', 'message': f'Workbook created at {output_path}'}
|
||||
|
||||
except Exception as e:
|
||||
return {'status': 'error', 'message': str(e)}
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage:")
|
||||
print(" python recalc.py <excel_file> [timeout_seconds] # Recalculate formulas")
|
||||
print(" python recalc.py --create <output_file> <data_json> # Create new workbook")
|
||||
print("\nRecalculate formulas:")
|
||||
print(" Recalculates all formulas in an Excel file using LibreOffice")
|
||||
print(" Returns JSON with error details")
|
||||
print("\nCreate workbook:")
|
||||
print(" data_json format: '{\"Sheet1\": {\"data\": [[\"A\", \"B\"], [1, 2]]}}'")
|
||||
sys.exit(1)
|
||||
|
||||
if sys.argv[1] == '--create':
|
||||
if len(sys.argv) < 4:
|
||||
print("Error: --create requires output_file and data_json")
|
||||
sys.exit(1)
|
||||
output_file = sys.argv[2]
|
||||
# If relative path, place in project root
|
||||
if not os.path.isabs(output_file):
|
||||
project_root = get_project_root()
|
||||
output_file = os.path.join(project_root, output_file)
|
||||
data_json = sys.argv[3]
|
||||
try:
|
||||
sheets_config = json.loads(data_json)
|
||||
result = create_workbook(output_file, sheets_config)
|
||||
print(json.dumps(result, indent=2))
|
||||
except json.JSONDecodeError as e:
|
||||
print(json.dumps({'status': 'error', 'message': f'Invalid JSON: {str(e)}'}, indent=2))
|
||||
else:
|
||||
filename = sys.argv[1]
|
||||
timeout = int(sys.argv[2]) if len(sys.argv) > 2 else 30
|
||||
result = recalc(filename, timeout)
|
||||
print(json.dumps(result, indent=2))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -3,11 +3,11 @@ spring:
|
||||
name: springAI-alibaba-copilot
|
||||
ai:
|
||||
openai:
|
||||
base-url: https://dashscope.aliyuncs.com/compatible-mode
|
||||
api-key: xx
|
||||
base-url: https://api.ppio.com/openai
|
||||
api-key: sk_xx
|
||||
chat:
|
||||
options:
|
||||
model: qwen-plus
|
||||
model: zai-org/glm-4.7-flash
|
||||
server:
|
||||
port: 8080
|
||||
|
||||
@@ -85,3 +85,7 @@ logging:
|
||||
file: "%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n"
|
||||
file:
|
||||
name: logs/copilot-file-ops.log
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -35,56 +35,11 @@ async function sendMessage() {
|
||||
setButtonsEnabled(false);
|
||||
|
||||
try {
|
||||
const response = await fetch('/api/chat/message', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({message: message})
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (data.success) {
|
||||
// 如果是异步任务(工具调用),建立SSE连接
|
||||
if (data.taskId && data.asyncTask) {
|
||||
// 先显示等待状态的工具卡片
|
||||
showWaitingToolCard();
|
||||
logStreamManager.startLogStream(data.taskId);
|
||||
showStatus('任务已启动,正在建立实时连接...', 'success');
|
||||
} else if (data.streamResponse) {
|
||||
// 流式对话响应
|
||||
handleStreamResponse(message);
|
||||
showStatus('开始流式对话...', 'success');
|
||||
} else {
|
||||
// 同步任务,直接显示结果
|
||||
addMessage('assistant', data.message);
|
||||
|
||||
// 显示连续对话统计信息
|
||||
let statusMessage = 'Message sent successfully';
|
||||
if (data.totalTurns && data.totalTurns > 1) {
|
||||
statusMessage += ` (${data.totalTurns} turns`;
|
||||
if (data.totalDurationMs) {
|
||||
statusMessage += `, ${(data.totalDurationMs / 1000).toFixed(1)}s`;
|
||||
}
|
||||
statusMessage += ')';
|
||||
|
||||
if (data.reachedMaxTurns) {
|
||||
statusMessage += ' - Reached max turns limit';
|
||||
}
|
||||
if (data.stopReason) {
|
||||
statusMessage += ` - ${data.stopReason}`;
|
||||
}
|
||||
}
|
||||
showStatus(statusMessage, 'success');
|
||||
}
|
||||
} else {
|
||||
addMessage('assistant', data.message);
|
||||
showStatus('Error: ' + data.message, 'error');
|
||||
}
|
||||
// 直接处理流式响应
|
||||
handleStreamResponse(message);
|
||||
showStatus('开始流式对话...', 'success');
|
||||
} catch (error) {
|
||||
console.error('Error:', error);
|
||||
// 更安全的错误处理
|
||||
const errorMessage = error && error.message ? error.message : 'Unknown error occurred';
|
||||
addMessage('assistant', 'Sorry, there was an error processing your request: ' + errorMessage);
|
||||
showStatus('Network error: ' + errorMessage, 'error');
|
||||
@@ -190,6 +145,19 @@ function showWaitingToolCard() {
|
||||
messagesContainer.scrollTop = messagesContainer.scrollHeight;
|
||||
}
|
||||
|
||||
// 解析SSE格式的数据 (data: content)
|
||||
function parseSseData(line) {
|
||||
const trimmedLine = line.trim();
|
||||
if (!trimmedLine) return null;
|
||||
|
||||
// SSE格式: data: content
|
||||
if (trimmedLine.startsWith('data:')) {
|
||||
return trimmedLine.substring(5).trim(); // 去掉 "data:" 前缀
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// 处理流式响应
|
||||
function handleStreamResponse(userMessage) {
|
||||
console.log('🌊 开始处理流式响应,消息:', userMessage);
|
||||
@@ -227,7 +195,7 @@ function handleStreamResponse(userMessage) {
|
||||
const streamIndicator = streamContainer.querySelector('.stream-indicator');
|
||||
let fullContent = '';
|
||||
|
||||
fetch('/api/chat/stream', {
|
||||
fetch('/api/chat/message', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
@@ -236,50 +204,79 @@ function handleStreamResponse(userMessage) {
|
||||
})
|
||||
.then(response => {
|
||||
if (!response.ok) {
|
||||
throw new Error('Network response was not ok');
|
||||
throw new Error(`HTTP error! status: ${response.status}`);
|
||||
}
|
||||
|
||||
// 获取响应体的ReadableStream
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = ''; // 用于缓存不完整的数据
|
||||
|
||||
function readStream() {
|
||||
return reader.read().then(({done, value}) => {
|
||||
// 处理流式数据
|
||||
const processStream = () => {
|
||||
return reader.read().then(({ done, value }) => {
|
||||
if (done) {
|
||||
console.log('✅ 流式响应完成');
|
||||
streamIndicator.style.display = 'none';
|
||||
streamContainer.classList.remove('streaming');
|
||||
showStatus('流式对话完成', 'success');
|
||||
// 处理剩余的缓存数据
|
||||
if (buffer.trim()) {
|
||||
const remainingLines = buffer.split('\n');
|
||||
for (const line of remainingLines) {
|
||||
const content = parseSseData(line);
|
||||
if (content && content !== '[DONE]') {
|
||||
fullContent += content;
|
||||
}
|
||||
}
|
||||
}
|
||||
// 移除加载指示器
|
||||
streamIndicator.remove();
|
||||
// 格式化最终内容
|
||||
streamContent.innerHTML = formatMessage(fullContent);
|
||||
return;
|
||||
}
|
||||
|
||||
const chunk = decoder.decode(value, {stream: true});
|
||||
console.log('📨 收到流式数据块:', chunk);
|
||||
// 解码数据块并添加到缓存
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
console.log('📨 收到数据块,缓存内容:', buffer);
|
||||
|
||||
// 按行处理数据(SSE格式为逐行)
|
||||
const lines = buffer.split('\n');
|
||||
|
||||
// 最后一行可能不完整,保留在缓存中
|
||||
buffer = lines.pop() || '';
|
||||
|
||||
// 处理SSE格式的数据
|
||||
const lines = chunk.split('\n');
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('data: ')) {
|
||||
const data = line.substring(6);
|
||||
if (data === '[DONE]') {
|
||||
console.log('✅ 流式响应完成');
|
||||
streamIndicator.style.display = 'none';
|
||||
streamContainer.classList.remove('streaming');
|
||||
showStatus('流式对话完成', 'success');
|
||||
return;
|
||||
}
|
||||
// 解析SSE格式的数据 (data: content)
|
||||
const content = parseSseData(line);
|
||||
|
||||
// 追加内容
|
||||
fullContent += data;
|
||||
streamContent.textContent = fullContent;
|
||||
messagesContainer.scrollTop = messagesContainer.scrollHeight;
|
||||
if (!content) continue;
|
||||
|
||||
// 检查是否是完成标记
|
||||
if (content === '[DONE]') {
|
||||
console.log('✅ 收到完成信号');
|
||||
streamIndicator.remove();
|
||||
streamContent.innerHTML = formatMessage(fullContent);
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
// 添加内容到全局变量
|
||||
fullContent += content;
|
||||
|
||||
// 实时更新UI(删除之前的加载指示器并显示内容)
|
||||
if (streamIndicator.parentNode) {
|
||||
streamIndicator.style.display = 'none';
|
||||
}
|
||||
streamContent.innerHTML = formatMessage(fullContent);
|
||||
}
|
||||
|
||||
return readStream();
|
||||
});
|
||||
}
|
||||
// 滚动到最新位置
|
||||
messagesContainer.scrollTop = messagesContainer.scrollHeight;
|
||||
|
||||
return readStream();
|
||||
// 继续读取下一个数据块
|
||||
return processStream();
|
||||
});
|
||||
};
|
||||
|
||||
return processStream();
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('❌ 流式响应错误:', error);
|
||||
|
||||
@@ -3,13 +3,13 @@
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta content="width=device-width, initial-scale=1.0" name="viewport">
|
||||
<title>SpringAI Alibaba Copilot</title>
|
||||
<title>Copilot</title>
|
||||
<link href="/css/main.css" rel="stylesheet">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="header">
|
||||
<h1>🤖 SpringAI Alibaba 编码助手</h1>
|
||||
<h1>🤖编码助手</h1>
|
||||
<p>AI助手将分析您的需求,制定执行计划,并逐步完成任务</p>
|
||||
</div>
|
||||
|
||||
@@ -78,7 +78,7 @@
|
||||
onclick="quickAction('Create a complete React project with components, styles, and package.json')">
|
||||
⚛️ Create React project
|
||||
</div>
|
||||
<div class="quick-action"
|
||||
<div class="quick-action"/>
|
||||
onclick="quickAction('Create a full-stack todo app with HTML, CSS, JavaScript frontend and Node.js backend')">
|
||||
📋 Create Todo App
|
||||
</div>
|
||||
|
||||
@@ -1,106 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-parent</artifactId>
|
||||
<version>3.4.4</version>
|
||||
<relativePath/> <!-- lookup parent from repository -->
|
||||
</parent>
|
||||
<groupId>org.ruoyi</groupId>
|
||||
<artifactId>ruoyi-mcp-server</artifactId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
<name>ruoyi-mcp-serve</name>
|
||||
<description>ruoyi-mcp-serve</description>
|
||||
<url/>
|
||||
<licenses>
|
||||
<license/>
|
||||
</licenses>
|
||||
<developers>
|
||||
<developer/>
|
||||
</developers>
|
||||
<scm>
|
||||
<connection/>
|
||||
<developerConnection/>
|
||||
<tag/>
|
||||
<url/>
|
||||
</scm>
|
||||
<properties>
|
||||
<java.version>17</java.version>
|
||||
<spring-ai.version>1.0.0</spring-ai.version>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-starter-mcp-server-webmvc</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- Hutool 工具类 -->
|
||||
<dependency>
|
||||
<groupId>cn.hutool</groupId>
|
||||
<artifactId>hutool-all</artifactId>
|
||||
<version>5.8.25</version>
|
||||
</dependency>
|
||||
|
||||
<!-- OkHttp for HTTP requests -->
|
||||
<dependency>
|
||||
<groupId>com.squareup.okhttp3</groupId>
|
||||
<artifactId>okhttp</artifactId>
|
||||
<version>4.12.0</version>
|
||||
</dependency>
|
||||
|
||||
<!-- PlantUML -->
|
||||
<dependency>
|
||||
<groupId>net.sourceforge.plantuml</groupId>
|
||||
<artifactId>plantuml</artifactId>
|
||||
<version>1.2024.3</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Spring AI Tika for document parsing -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-tika-document-reader</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Lombok -->
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-bom</artifactId>
|
||||
<version>${spring-ai.version}</version>
|
||||
<type>pom</type>
|
||||
<scope>import</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
</project>
|
||||
@@ -1,19 +0,0 @@
|
||||
package org.ruoyi.mcpserve;
|
||||
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
|
||||
/**
|
||||
* MCP Server 应用启动类
|
||||
* 工具通过 DynamicToolCallbackProvider 动态加载
|
||||
*
|
||||
* @author ageer
|
||||
*/
|
||||
@SpringBootApplication
|
||||
public class RuoyiMcpServeApplication {
|
||||
|
||||
public static void main(String[] args) {
|
||||
SpringApplication.run(RuoyiMcpServeApplication.class, args);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,104 +0,0 @@
|
||||
package org.ruoyi.mcpserve.config;
|
||||
|
||||
import org.ruoyi.mcpserve.tools.McpTool;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.ai.tool.ToolCallback;
|
||||
import org.springframework.ai.tool.ToolCallbackProvider;
|
||||
import org.springframework.ai.tool.method.MethodToolCallbackProvider;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* 动态工具回调提供者
|
||||
* 根据配置动态加载启用的MCP工具
|
||||
*
|
||||
* @author OpenX
|
||||
*/
|
||||
@Component
|
||||
public class DynamicToolCallbackProvider implements ToolCallbackProvider {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(DynamicToolCallbackProvider.class);
|
||||
|
||||
private final McpToolsConfig mcpToolsConfig;
|
||||
private final List<McpTool> allTools;
|
||||
private volatile ToolCallback[] cachedCallbacks;
|
||||
|
||||
public DynamicToolCallbackProvider(McpToolsConfig mcpToolsConfig, List<McpTool> allTools) {
|
||||
this.mcpToolsConfig = mcpToolsConfig;
|
||||
this.allTools = allTools;
|
||||
log.info("发现 {} 个MCP工具", allTools.size());
|
||||
}
|
||||
|
||||
@Override
|
||||
public ToolCallback[] getToolCallbacks() {
|
||||
if (cachedCallbacks == null) {
|
||||
synchronized (this) {
|
||||
if (cachedCallbacks == null) {
|
||||
cachedCallbacks = buildToolCallbacks();
|
||||
}
|
||||
}
|
||||
}
|
||||
return cachedCallbacks;
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建工具回调数组
|
||||
*/
|
||||
private ToolCallback[] buildToolCallbacks() {
|
||||
List<Object> enabledTools = allTools.stream()
|
||||
.filter(tool -> {
|
||||
boolean enabled = mcpToolsConfig.isToolEnabled(tool.getToolName());
|
||||
if (enabled) {
|
||||
log.info("启用工具: {}", tool.getToolName());
|
||||
} else {
|
||||
log.info("禁用工具: {}", tool.getToolName());
|
||||
}
|
||||
return enabled;
|
||||
})
|
||||
.collect(Collectors.toList());
|
||||
|
||||
if (enabledTools.isEmpty()) {
|
||||
log.warn("没有启用任何MCP工具");
|
||||
return new ToolCallback[0];
|
||||
}
|
||||
|
||||
// 使用 MethodToolCallbackProvider 构建工具回调
|
||||
MethodToolCallbackProvider provider = MethodToolCallbackProvider.builder()
|
||||
.toolObjects(enabledTools.toArray())
|
||||
.build();
|
||||
|
||||
return provider.getToolCallbacks();
|
||||
}
|
||||
|
||||
/**
|
||||
* 刷新工具缓存,用于配置变更后重新加载
|
||||
*/
|
||||
public void refreshTools() {
|
||||
synchronized (this) {
|
||||
cachedCallbacks = null;
|
||||
log.info("工具缓存已清除,将在下次调用时重新加载");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有已注册的工具名称
|
||||
*/
|
||||
public List<String> getRegisteredToolNames() {
|
||||
return allTools.stream()
|
||||
.map(McpTool::getToolName)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取已启用的工具名称
|
||||
*/
|
||||
public List<String> getEnabledToolNames() {
|
||||
return allTools.stream()
|
||||
.filter(tool -> mcpToolsConfig.isToolEnabled(tool.getToolName()))
|
||||
.map(McpTool::getToolName)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
@@ -1,75 +0,0 @@
|
||||
package org.ruoyi.mcpserve.config;
|
||||
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* MCP工具动态配置类
|
||||
* 支持通过配置文件启用/禁用各个工具
|
||||
*
|
||||
* @author OpenX
|
||||
*/
|
||||
@Data
|
||||
@Component
|
||||
@ConfigurationProperties(prefix = "mcp.tools")
|
||||
public class McpToolsConfig {
|
||||
|
||||
/**
|
||||
* 工具启用配置
|
||||
* key: 工具名称
|
||||
* value: 是否启用
|
||||
*/
|
||||
private Map<String, Boolean> enabled = new HashMap<>();
|
||||
|
||||
/**
|
||||
* 检查工具是否启用
|
||||
* 默认情况下,如果未配置则启用
|
||||
*
|
||||
* @param toolName 工具名称
|
||||
* @return 是否启用
|
||||
*/
|
||||
public boolean isToolEnabled(String toolName) {
|
||||
return enabled.getOrDefault(toolName, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* 动态启用工具
|
||||
*
|
||||
* @param toolName 工具名称
|
||||
*/
|
||||
public void enableTool(String toolName) {
|
||||
enabled.put(toolName, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* 动态禁用工具
|
||||
*
|
||||
* @param toolName 工具名称
|
||||
*/
|
||||
public void disableTool(String toolName) {
|
||||
enabled.put(toolName, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* 动态设置工具启用状态
|
||||
*
|
||||
* @param toolName 工具名称
|
||||
* @param enable 是否启用
|
||||
*/
|
||||
public void setToolEnabled(String toolName, boolean enable) {
|
||||
enabled.put(toolName, enable);
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量设置工具启用状态
|
||||
*
|
||||
* @param toolStates 工具状态映射
|
||||
*/
|
||||
public void setToolsEnabled(Map<String, Boolean> toolStates) {
|
||||
enabled.putAll(toolStates);
|
||||
}
|
||||
}
|
||||
@@ -1,65 +0,0 @@
|
||||
package org.ruoyi.mcpserve.config;
|
||||
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* 工具配置属性类
|
||||
*
|
||||
* @author OpenX
|
||||
*/
|
||||
@Data
|
||||
@Component
|
||||
@ConfigurationProperties(prefix = "tools")
|
||||
public class ToolsProperties {
|
||||
|
||||
/**
|
||||
* Pexels图片搜索配置
|
||||
*/
|
||||
private Pexels pexels = new Pexels();
|
||||
|
||||
/**
|
||||
* Tavily搜索配置
|
||||
*/
|
||||
private Tavily tavily = new Tavily();
|
||||
|
||||
/**
|
||||
* 文件操作配置
|
||||
*/
|
||||
private FileConfig file = new FileConfig();
|
||||
|
||||
@Data
|
||||
public static class Pexels {
|
||||
/**
|
||||
* Pexels API密钥
|
||||
*/
|
||||
private String apiKey;
|
||||
|
||||
/**
|
||||
* API地址
|
||||
*/
|
||||
private String apiUrl;
|
||||
}
|
||||
|
||||
@Data
|
||||
public static class Tavily {
|
||||
/**
|
||||
* Tavily API密钥
|
||||
*/
|
||||
private String apiKey;
|
||||
|
||||
/**
|
||||
* API地址
|
||||
*/
|
||||
private String baseUrl;
|
||||
}
|
||||
|
||||
@Data
|
||||
public static class FileConfig {
|
||||
/**
|
||||
* 文件保存目录
|
||||
*/
|
||||
private String saveDir;
|
||||
}
|
||||
}
|
||||
@@ -1,128 +0,0 @@
|
||||
package org.ruoyi.mcpserve.controller;
|
||||
import org.ruoyi.mcpserve.config.DynamicToolCallbackProvider;
|
||||
import org.ruoyi.mcpserve.config.McpToolsConfig;
|
||||
import org.springframework.ai.tool.ToolCallback;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* MCP工具测试Controller
|
||||
* 用于查看已加载的工具信息
|
||||
*
|
||||
* @author OpenX
|
||||
*/
|
||||
@RestController
|
||||
@RequestMapping("/tools")
|
||||
public class ToolsController {
|
||||
|
||||
private final DynamicToolCallbackProvider toolCallbackProvider;
|
||||
private final McpToolsConfig mcpToolsConfig;
|
||||
|
||||
public ToolsController(DynamicToolCallbackProvider toolCallbackProvider, McpToolsConfig mcpToolsConfig) {
|
||||
this.toolCallbackProvider = toolCallbackProvider;
|
||||
this.mcpToolsConfig = mcpToolsConfig;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有工具信息
|
||||
*/
|
||||
@GetMapping
|
||||
public Map<String, Object> getToolsInfo() {
|
||||
Map<String, Object> result = new HashMap<>();
|
||||
|
||||
// 所有已注册的工具
|
||||
result.put("registered", toolCallbackProvider.getRegisteredToolNames());
|
||||
|
||||
// 已加载的工具回调详情
|
||||
List<Map<String, String>> callbacks = Stream.of(toolCallbackProvider.getToolCallbacks())
|
||||
.map(callback -> {
|
||||
Map<String, String> info = new HashMap<>();
|
||||
info.put("name", callback.getToolDefinition().name());
|
||||
info.put("description", callback.getToolDefinition().description());
|
||||
return info;
|
||||
})
|
||||
.collect(Collectors.toList());
|
||||
result.put("callbacks", callbacks);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 刷新工具缓存
|
||||
*/
|
||||
@PostMapping("/refresh")
|
||||
public Map<String, String> refreshTools() {
|
||||
toolCallbackProvider.refreshTools();
|
||||
Map<String, String> result = new HashMap<>();
|
||||
result.put("message", "工具缓存已刷新");
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 启用指定工具
|
||||
*/
|
||||
@PostMapping("/enable/{toolName}")
|
||||
public Map<String, Object> enableTool(@PathVariable String toolName) {
|
||||
mcpToolsConfig.enableTool(toolName);
|
||||
toolCallbackProvider.refreshTools();
|
||||
|
||||
Map<String, Object> result = new HashMap<>();
|
||||
result.put("toolName", toolName);
|
||||
result.put("enabled", true);
|
||||
result.put("message", "工具已启用");
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 禁用指定工具
|
||||
*/
|
||||
@PostMapping("/disable/{toolName}")
|
||||
public Map<String, Object> disableTool(@PathVariable String toolName) {
|
||||
mcpToolsConfig.disableTool(toolName);
|
||||
toolCallbackProvider.refreshTools();
|
||||
|
||||
Map<String, Object> result = new HashMap<>();
|
||||
result.put("toolName", toolName);
|
||||
result.put("enabled", false);
|
||||
result.put("message", "工具已禁用");
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量设置工具状态
|
||||
* 请求体示例: {"basic": true, "terminal": false, "plantuml": true}
|
||||
*/
|
||||
@PostMapping("/batch")
|
||||
public Map<String, Object> batchSetTools(@RequestBody Map<String, Boolean> toolStates) {
|
||||
mcpToolsConfig.setToolsEnabled(toolStates);
|
||||
toolCallbackProvider.refreshTools();
|
||||
|
||||
Map<String, Object> result = new HashMap<>();
|
||||
result.put("updated", toolStates);
|
||||
result.put("enabled", toolCallbackProvider.getEnabledToolNames());
|
||||
result.put("message", "工具状态已更新");
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有工具的启用状态
|
||||
*/
|
||||
@GetMapping("/status")
|
||||
public Map<String, Object> getToolsStatus() {
|
||||
Map<String, Object> result = new HashMap<>();
|
||||
List<String> registered = toolCallbackProvider.getRegisteredToolNames();
|
||||
|
||||
Map<String, Boolean> status = new HashMap<>();
|
||||
for (String toolName : registered) {
|
||||
status.put(toolName, mcpToolsConfig.isToolEnabled(toolName));
|
||||
}
|
||||
|
||||
result.put("status", status);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
package org.ruoyi.mcpserve.tools;
|
||||
|
||||
import org.springframework.ai.tool.annotation.Tool;
|
||||
import org.springframework.ai.tool.annotation.ToolParam;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* 基础工具类
|
||||
*
|
||||
* @author OpenX
|
||||
*/
|
||||
@Component
|
||||
public class BasicTools implements McpTool {
|
||||
|
||||
public static final String TOOL_NAME = "basic";
|
||||
|
||||
@Override
|
||||
public String getToolName() {
|
||||
return TOOL_NAME;
|
||||
}
|
||||
|
||||
@Tool(description = "获取一个指定前缀的随机数")
|
||||
public String add(@ToolParam(description = "字符前缀") String prefix) {
|
||||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyMMdd");
|
||||
String format = LocalDate.now().format(formatter);
|
||||
String replace = prefix + UUID.randomUUID().toString().replace("-", "");
|
||||
return format + replace;
|
||||
}
|
||||
|
||||
@Tool(description = "获取当前时间")
|
||||
public LocalDateTime getCurrentTime() {
|
||||
return LocalDateTime.now();
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user