Spaces:
Running
Running
File size: 3,491 Bytes
42f5b98 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
"""Chunk entity model for semantic code units."""
from dataclasses import dataclass, field
from enum import Enum
from typing import Optional
from uuid import uuid4
class ChunkType(str, Enum):
"""Type of code chunk."""
FUNCTION = "function"
CLASS = "class"
METHOD = "method"
MODULE = "module"
TEXT = "text"
DOCSTRING = "docstring"
COMMENT = "comment"
@dataclass
class ChunkMetadata:
"""Metadata for a code chunk."""
file_path: str
start_line: int
end_line: int
chunk_type: ChunkType
language: Optional[str] = None
name: Optional[str] = None
signature: Optional[str] = None
docstring: Optional[str] = None
parent_name: Optional[str] = None
@property
def line_range(self) -> str:
"""Get line range as string."""
return f"{self.start_line}-{self.end_line}"
@property
def citation(self) -> str:
"""Get citation format."""
return f"[{self.file_path}:{self.start_line}-{self.end_line}]"
@dataclass
class Chunk:
"""A semantic unit of code or documentation."""
content: str
metadata: ChunkMetadata
repo_id: str
id: str = field(default_factory=lambda: str(uuid4()))
embedding: Optional[list[float]] = None
@property
def file_path(self) -> str:
"""Convenience accessor for file path."""
return self.metadata.file_path
@property
def start_line(self) -> int:
"""Convenience accessor for start line."""
return self.metadata.start_line
@property
def end_line(self) -> int:
"""Convenience accessor for end line."""
return self.metadata.end_line
@property
def chunk_type(self) -> ChunkType:
"""Convenience accessor for chunk type."""
return self.metadata.chunk_type
@property
def name(self) -> Optional[str]:
"""Convenience accessor for name."""
return self.metadata.name
@property
def citation(self) -> str:
"""Get citation format."""
return self.metadata.citation
def to_dict(self) -> dict:
"""Convert to dictionary for storage."""
return {
"id": self.id,
"content": self.content,
"repo_id": self.repo_id,
"file_path": self.metadata.file_path,
"start_line": self.metadata.start_line,
"end_line": self.metadata.end_line,
"chunk_type": self.metadata.chunk_type.value,
"language": self.metadata.language,
"name": self.metadata.name,
"signature": self.metadata.signature,
"docstring": self.metadata.docstring,
"parent_name": self.metadata.parent_name,
}
@classmethod
def from_dict(cls, data: dict, embedding: Optional[list[float]] = None) -> "Chunk":
"""Create from dictionary."""
metadata = ChunkMetadata(
file_path=data["file_path"],
start_line=data["start_line"],
end_line=data["end_line"],
chunk_type=ChunkType(data["chunk_type"]),
language=data.get("language"),
name=data.get("name"),
signature=data.get("signature"),
docstring=data.get("docstring"),
parent_name=data.get("parent_name"),
)
return cls(
id=data["id"],
content=data["content"],
metadata=metadata,
repo_id=data["repo_id"],
embedding=embedding,
)
|