Initial Commit

This commit is contained in:
2025-03-16 20:56:15 -03:00
commit 5238d73942
17 changed files with 290 additions and 0 deletions

View File

@@ -0,0 +1,6 @@
# Extrator de pdf
O extrator é um agente de IA construído com o langchain em python, seguindo as seguintes instruções:
Dentro de um PDF deve extrair as seguintes informações:
- Os índices dos capítulos retornando as linhas de início e do fim
- Separar cada capítulo, criando um PDF diferente para cada um

8
.github/prompts/projeto.prompt.md vendored Normal file
View File

@@ -0,0 +1,8 @@
# Escopo do projeto:
Vai consumir um modelo llm para realizar consultas em um banco de dados, possuindo as seguintes características:
- Agnóstico de banco de dados, podendo ser usado para qualquer tipo
- A resposta retorna a lógica do que foi feito junto com o resultado do seu sql exceto:
- Caso altere qualquer tipo de dado no banco, retorne apenas o sql para o próprio usuário executar
- Caso sela apenas consulta realize a consulta, retorne qual foi o sql e o resultado da consulta
- Usará llm local, com o llama cpp e suporte a vulkan

11
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,11 @@
{
"python.testing.unittestArgs": [
"-v",
"-s",
"./test",
"-p",
"test_*.py"
],
"python.testing.pytestEnabled": false,
"python.testing.unittestEnabled": true
}

3
agentsConfig.ini Normal file
View File

@@ -0,0 +1,3 @@
[PDFAgent]
model = qwen2.5:1.5b
modelProvider = ollama

1
src/__init__.py Normal file
View File

@@ -0,0 +1 @@
# Este arquivo transforma o diretório em um pacote Python

View File

@@ -0,0 +1,23 @@
from abc import ABC, abstractmethod
from typing import List, Dict, Any
class Agent(ABC):
@abstractmethod
def get_agent_tools(self) -> List[Dict[str, Any]]:
"""
Get the tools available for the agent.
Returns:
List[Dict[str, Any]]: A list of dictionaries representing the tools.
"""
pass
@abstractmethod
def get_agent_details(self) -> Dict[str, Any]:
"""
Get the details of the agent.
Returns:
Dict[str, Any]: A dictionary containing agent details.
"""
pass

View File

@@ -0,0 +1,16 @@
from abc import ABC, abstractmethod
from ast import Dict
from typing import List, Dict, Any, Optional
class ModelManager(ABC):
@abstractmethod
def get_available_models(self) -> List[Dict[str, Any]]:
pass
@abstractmethod
def get_model_details(self, model_name: str) -> Optional[Dict[str, Any]]:
pass

9
src/main.py Normal file
View File

@@ -0,0 +1,9 @@
from service.ollama.ollamaModelManager import OllamaModelManager
# Utilizando nossa classe para obter os modelos
ollama_manager = OllamaModelManager()
models = ollama_manager.get_available_models()
# print(models)
details = ollama_manager.get_model_details("qwen2.5:1.5b")
print(details)

View File

@@ -0,0 +1,33 @@
from langchain.agents import AgentExecutor
from langchain_community.tools import BaseTool
import fitz
import os
from src.interfaces.agents.agent import Agent
class PDFAgent(Agent):
def __init__(self):
"""
Initialize a PDF agent.
This initializes the PDF agent which handles operations related to PDF documents.
The agent attribute needs to be set with an appropriate agent implementation
during instantiation.
Attributes:
agent: The agent implementation for PDF operations.
"""
self.agent
def get_agent_details(self):
return super().get_agent_details()
def get_agent_tools(self):
return super().get_agent_tools()

View File

@@ -0,0 +1,15 @@
import fitz
from langchain_community.tools import BaseTool
class PDFMetadataTool(BaseTool):
name="pdf_metadata"
description="Extract metadata from PDF file"
def _run(self, path: str):
try:
with fitz.open(path) as doc:
metadata = {
"páginas": len(doc)
}
except Exception as e:
return f"Error: {e}"

View File

@@ -0,0 +1,10 @@
from langchain_community.tools import BaseTool
class PDFextractTextTool(BaseTool):
name="pdf_text_extract"
description="Extract text from PDF file"
def _run(self, path: str):
try:

18
src/modules/pdf/utils.py Normal file
View File

@@ -0,0 +1,18 @@
import fitz
import os
class PDFUtils:
@staticmethod
def get_total_lines(pdf_path: str):
pdf = fitz.open(pdf_path)
return len(pdf)
# Get the user's home directory and construct the path to the PDF
home_dir = os.path.expanduser("~")
pdf_path = os.path.join(home_dir, "Downloads", "Manuais.pdf")
# Call the method with the full path
total_pages = PDFUtils.get_total_lines(pdf_path)
print(f"Total pages in PDF: {total_pages}")

View File

@@ -0,0 +1,46 @@
import requests
from src.interfaces.models.modelInference import ModelManager
class OllamaModelManager(ModelManager):
def __init__(self, base_url = "http://localhost:11434"):
self.base_url = base_url
def get_available_models(self):
try:
print(requests.__file__)
print(self.base_url)
response = requests.get(f"{self.base_url}/api/tags")
data = response.json()
return data.get('models', [])
except Exception as e:
print(f"Erro ao obter modelos: {e}")
return []
def get_model_details(self, model_name):
try:
# Obter detalhes completos do modelo via API
response = requests.post(
f"{self.base_url}/api/show",
json={"name": model_name}
)
model_details = response.json()
# O tamanho do contexto geralmente está disponível em model_details['parameters']['context_length']
# ou em outro campo similar dependendo do modelo
context_size = model_details.get('parameters', {}).get('context_length', 'Não disponível')
# Podemos adicionar outros detalhes relevantes
details = {
"name": model_name,
"context_size": context_size,
"model_type": model_details.get('modelfile', {}).get('parameter', 'Não disponível'),
"license": model_details.get('license', 'Não disponível')
}
return details
except Exception as e:
print(f"Error getting model details: {e}")
return {"name": model_name, "error": str(e)}

1
src/utils/__init__.py Normal file
View File

@@ -0,0 +1 @@
# Este arquivo transforma o diretório em um pacote Python

9
src/utils/getModels.py Normal file
View File

@@ -0,0 +1,9 @@
from service.ollama.ollamaModelManager import OllamaModelManager
ollama_service = OllamaModelManager()
models = ollama_service.get_available_models()
print(models)

0
test/__init__.py Normal file
View File

View File

@@ -0,0 +1,81 @@
import unittest
from unittest.mock import patch, MagicMock
from src.service.ollama.ollamaModelManager import OllamaModelManager
class TestOllamaModelManager(unittest.TestCase):
def setUp(self):
self.model_manager = OllamaModelManager()
self.model_manager.base_url = "http://test-url:11434"
@patch('service.ollama.ollamaModelManager.requests.get')
def test_get_available_models_success(self, mock_get):
# Setup mock response
mock_response = MagicMock()
mock_response.json.return_value = {
'models': [
{'name': 'model1'},
{'name': 'model2'}
]
}
mock_get.return_value = mock_response
# Call method
result = self.model_manager.get_available_models()
# Assertions
mock_get.assert_called_once_with("http://test-url:11434/api/tags")
self.assertEqual(len(result), 2)
self.assertEqual(result, [{'name': 'model1'}, {'name': 'model2'}])
@patch('service.ollama.ollamaModelManager.requests.get')
def test_get_available_models_exception(self, mock_get):
# Setup mock to raise exception
mock_get.side_effect = Exception("Connection error")
# Call method
result = self.model_manager.get_available_models()
# Assertions
self.assertEqual(result, [])
@patch('service.ollama.ollamaModelManager.requests.post')
def test_get_model_details_success(self, mock_post):
# Setup mock response
mock_response = MagicMock()
mock_response.json.return_value = {
'parameters': {'context_length': 4096},
'modelfile': {'parameter': 'llama2'},
'license': 'Apache 2.0'
}
mock_post.return_value = mock_response
# Call method
result = self.model_manager.get_model_details('llama2')
# Assertions
mock_post.assert_called_once_with(
"http://test-url:11434/api/show",
json={"name": "llama2"}
)
self.assertEqual(result['name'], 'llama2')
self.assertEqual(result['context_size'], 4096)
self.assertEqual(result['license'], 'Apache 2.0')
@patch('service.ollama.ollamaModelManager.requests.post')
def test_get_model_details_exception(self, mock_post):
# Setup mock to raise exception
mock_post.side_effect = Exception("API error")
# Call method
result = self.model_manager.get_model_details('unknown_model')
# Assertions
self.assertEqual(result['name'], 'unknown_model')
self.assertTrue('error' in result)
self.assertEqual(result['error'], 'API error')
if __name__ == '__main__':
unittest.main()