commit 5238d739420f64e4bb275c1e0fab5febbc05d892 Author: Ismael Sampaio Date: Sun Mar 16 20:56:15 2025 -0300 Initial Commit diff --git a/.github/prompts/pdfExtractor.prompt.md b/.github/prompts/pdfExtractor.prompt.md new file mode 100644 index 0000000..2bbb23f --- /dev/null +++ b/.github/prompts/pdfExtractor.prompt.md @@ -0,0 +1,6 @@ +# Extrator de pdf + +O extrator é um agente de IA construído com o langchain em python, seguindo as seguintes instruções: + Dentro de um PDF deve extrair as seguintes informações: +- Os índices dos capítulos retornando as linhas de início e do fim +- Separar cada capítulo, criando um PDF diferente para cada um diff --git a/.github/prompts/projeto.prompt.md b/.github/prompts/projeto.prompt.md new file mode 100644 index 0000000..d24f579 --- /dev/null +++ b/.github/prompts/projeto.prompt.md @@ -0,0 +1,8 @@ +# Escopo do projeto: + +Vai consumir um modelo llm para realizar consultas em um banco de dados, possuindo as seguintes características: +- Agnóstico de banco de dados, podendo ser usado para qualquer tipo +- A resposta retorna a lógica do que foi feito junto com o resultado do seu sql exceto: + - Caso altere qualquer tipo de dado no banco, retorne apenas o sql para o próprio usuário executar + - Caso sela apenas consulta realize a consulta, retorne qual foi o sql e o resultado da consulta +- Usará llm local, com o llama cpp e suporte a vulkan \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..519a8e7 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,11 @@ +{ + "python.testing.unittestArgs": [ + "-v", + "-s", + "./test", + "-p", + "test_*.py" + ], + "python.testing.pytestEnabled": false, + "python.testing.unittestEnabled": true +} \ No newline at end of file diff --git a/agentsConfig.ini b/agentsConfig.ini new file mode 100644 index 0000000..67ec7c8 --- /dev/null +++ b/agentsConfig.ini @@ -0,0 +1,3 @@ +[PDFAgent] +model = qwen2.5:1.5b +modelProvider = ollama \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..f50bf19 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1 @@ +# Este arquivo transforma o diretório em um pacote Python diff --git a/src/interfaces/agents/agent.py b/src/interfaces/agents/agent.py new file mode 100644 index 0000000..138528c --- /dev/null +++ b/src/interfaces/agents/agent.py @@ -0,0 +1,23 @@ +from abc import ABC, abstractmethod +from typing import List, Dict, Any + +class Agent(ABC): + @abstractmethod + def get_agent_tools(self) -> List[Dict[str, Any]]: + """ + Get the tools available for the agent. + + Returns: + List[Dict[str, Any]]: A list of dictionaries representing the tools. + """ + pass + + @abstractmethod + def get_agent_details(self) -> Dict[str, Any]: + """ + Get the details of the agent. + + Returns: + Dict[str, Any]: A dictionary containing agent details. + """ + pass \ No newline at end of file diff --git a/src/interfaces/models/modelInference.py b/src/interfaces/models/modelInference.py new file mode 100644 index 0000000..f0fd74f --- /dev/null +++ b/src/interfaces/models/modelInference.py @@ -0,0 +1,16 @@ +from abc import ABC, abstractmethod +from ast import Dict +from typing import List, Dict, Any, Optional + + +class ModelManager(ABC): + + @abstractmethod + def get_available_models(self) -> List[Dict[str, Any]]: + pass + + @abstractmethod + def get_model_details(self, model_name: str) -> Optional[Dict[str, Any]]: + pass + + \ No newline at end of file diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..dee9893 --- /dev/null +++ b/src/main.py @@ -0,0 +1,9 @@ +from service.ollama.ollamaModelManager import OllamaModelManager + +# Utilizando nossa classe para obter os modelos +ollama_manager = OllamaModelManager() +models = ollama_manager.get_available_models() +# print(models) + +details = ollama_manager.get_model_details("qwen2.5:1.5b") +print(details) \ No newline at end of file diff --git a/src/modules/pdf/pdfAgent.py b/src/modules/pdf/pdfAgent.py new file mode 100644 index 0000000..8ca5738 --- /dev/null +++ b/src/modules/pdf/pdfAgent.py @@ -0,0 +1,33 @@ +from langchain.agents import AgentExecutor +from langchain_community.tools import BaseTool + +import fitz +import os + +from src.interfaces.agents.agent import Agent + + +class PDFAgent(Agent): + + def __init__(self): + """ + Initialize a PDF agent. + This initializes the PDF agent which handles operations related to PDF documents. + The agent attribute needs to be set with an appropriate agent implementation + during instantiation. + Attributes: + agent: The agent implementation for PDF operations. + """ + + + self.agent + + def get_agent_details(self): + return super().get_agent_details() + + def get_agent_tools(self): + return super().get_agent_tools() + + + + diff --git a/src/modules/pdf/tools/metadata.pdf.tool.py b/src/modules/pdf/tools/metadata.pdf.tool.py new file mode 100644 index 0000000..8d82110 --- /dev/null +++ b/src/modules/pdf/tools/metadata.pdf.tool.py @@ -0,0 +1,15 @@ +import fitz +from langchain_community.tools import BaseTool + +class PDFMetadataTool(BaseTool): + name="pdf_metadata" + description="Extract metadata from PDF file" + + def _run(self, path: str): + try: + with fitz.open(path) as doc: + metadata = { + "páginas": len(doc) + } + except Exception as e: + return f"Error: {e}" \ No newline at end of file diff --git a/src/modules/pdf/tools/textExtract.pdf.tool.py b/src/modules/pdf/tools/textExtract.pdf.tool.py new file mode 100644 index 0000000..13fa6fc --- /dev/null +++ b/src/modules/pdf/tools/textExtract.pdf.tool.py @@ -0,0 +1,10 @@ +from langchain_community.tools import BaseTool + + +class PDFextractTextTool(BaseTool): + name="pdf_text_extract" + description="Extract text from PDF file" + + def _run(self, path: str): + try: + \ No newline at end of file diff --git a/src/modules/pdf/utils.py b/src/modules/pdf/utils.py new file mode 100644 index 0000000..a761cc0 --- /dev/null +++ b/src/modules/pdf/utils.py @@ -0,0 +1,18 @@ +import fitz +import os + + +class PDFUtils: + @staticmethod + def get_total_lines(pdf_path: str): + pdf = fitz.open(pdf_path) + return len(pdf) + + +# Get the user's home directory and construct the path to the PDF +home_dir = os.path.expanduser("~") +pdf_path = os.path.join(home_dir, "Downloads", "Manuais.pdf") + +# Call the method with the full path +total_pages = PDFUtils.get_total_lines(pdf_path) +print(f"Total pages in PDF: {total_pages}") \ No newline at end of file diff --git a/src/service/ollama/ollamaModelManager.py b/src/service/ollama/ollamaModelManager.py new file mode 100644 index 0000000..e2c6781 --- /dev/null +++ b/src/service/ollama/ollamaModelManager.py @@ -0,0 +1,46 @@ +import requests + +from src.interfaces.models.modelInference import ModelManager + + +class OllamaModelManager(ModelManager): + def __init__(self, base_url = "http://localhost:11434"): + self.base_url = base_url + + def get_available_models(self): + try: + print(requests.__file__) + print(self.base_url) + response = requests.get(f"{self.base_url}/api/tags") + data = response.json() + return data.get('models', []) + except Exception as e: + print(f"Erro ao obter modelos: {e}") + return [] + + def get_model_details(self, model_name): + try: + + # Obter detalhes completos do modelo via API + response = requests.post( + f"{self.base_url}/api/show", + json={"name": model_name} + ) + model_details = response.json() + + # O tamanho do contexto geralmente está disponível em model_details['parameters']['context_length'] + # ou em outro campo similar dependendo do modelo + context_size = model_details.get('parameters', {}).get('context_length', 'Não disponível') + + # Podemos adicionar outros detalhes relevantes + details = { + "name": model_name, + "context_size": context_size, + "model_type": model_details.get('modelfile', {}).get('parameter', 'Não disponível'), + "license": model_details.get('license', 'Não disponível') + } + + return details + except Exception as e: + print(f"Error getting model details: {e}") + return {"name": model_name, "error": str(e)} diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 0000000..f50bf19 --- /dev/null +++ b/src/utils/__init__.py @@ -0,0 +1 @@ +# Este arquivo transforma o diretório em um pacote Python diff --git a/src/utils/getModels.py b/src/utils/getModels.py new file mode 100644 index 0000000..2f7eeac --- /dev/null +++ b/src/utils/getModels.py @@ -0,0 +1,9 @@ + + +from service.ollama.ollamaModelManager import OllamaModelManager + + +ollama_service = OllamaModelManager() +models = ollama_service.get_available_models() + +print(models) diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/test_ollamaModelManager.py b/test/test_ollamaModelManager.py new file mode 100644 index 0000000..055ed35 --- /dev/null +++ b/test/test_ollamaModelManager.py @@ -0,0 +1,81 @@ +import unittest +from unittest.mock import patch, MagicMock + +from src.service.ollama.ollamaModelManager import OllamaModelManager + +class TestOllamaModelManager(unittest.TestCase): + + def setUp(self): + self.model_manager = OllamaModelManager() + self.model_manager.base_url = "http://test-url:11434" + + @patch('service.ollama.ollamaModelManager.requests.get') + def test_get_available_models_success(self, mock_get): + # Setup mock response + mock_response = MagicMock() + mock_response.json.return_value = { + 'models': [ + {'name': 'model1'}, + {'name': 'model2'} + ] + } + mock_get.return_value = mock_response + + # Call method + result = self.model_manager.get_available_models() + + # Assertions + mock_get.assert_called_once_with("http://test-url:11434/api/tags") + self.assertEqual(len(result), 2) + self.assertEqual(result, [{'name': 'model1'}, {'name': 'model2'}]) + + @patch('service.ollama.ollamaModelManager.requests.get') + def test_get_available_models_exception(self, mock_get): + # Setup mock to raise exception + mock_get.side_effect = Exception("Connection error") + + # Call method + result = self.model_manager.get_available_models() + + # Assertions + self.assertEqual(result, []) + + @patch('service.ollama.ollamaModelManager.requests.post') + def test_get_model_details_success(self, mock_post): + # Setup mock response + mock_response = MagicMock() + mock_response.json.return_value = { + 'parameters': {'context_length': 4096}, + 'modelfile': {'parameter': 'llama2'}, + 'license': 'Apache 2.0' + } + mock_post.return_value = mock_response + + # Call method + result = self.model_manager.get_model_details('llama2') + + # Assertions + mock_post.assert_called_once_with( + "http://test-url:11434/api/show", + json={"name": "llama2"} + ) + self.assertEqual(result['name'], 'llama2') + self.assertEqual(result['context_size'], 4096) + self.assertEqual(result['license'], 'Apache 2.0') + + @patch('service.ollama.ollamaModelManager.requests.post') + def test_get_model_details_exception(self, mock_post): + # Setup mock to raise exception + mock_post.side_effect = Exception("API error") + + # Call method + result = self.model_manager.get_model_details('unknown_model') + + # Assertions + self.assertEqual(result['name'], 'unknown_model') + self.assertTrue('error' in result) + self.assertEqual(result['error'], 'API error') + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file