cad-to-data

SKILL.md

CAD To Data

Overview

Based on DDC methodology (Chapter 2.4), this skill converts CAD and BIM files to structured data, extracting element properties, quantities, and relationships from Revit, IFC, DWG, and DGN files.

Book Reference: "Преобразование данных в структурированную форму" / "Data Transformation to Structured Form"

Quick Start

from dataclasses import dataclass, field
from enum import Enum
from typing import List, Dict, Optional, Any, Tuple, Generator
from datetime import datetime
import json

class CADFormat(Enum):
    """Supported CAD/BIM formats"""
    IFC = "ifc"
    RVT = "rvt"
    DWG = "dwg"
    DXF = "dxf"
    DGN = "dgn"
    NWD = "nwd"
    STEP = "step"

class ElementCategory(Enum):
    """BIM element categories"""
    WALL = "wall"
    FLOOR = "floor"
    ROOF = "roof"
    CEILING = "ceiling"
    DOOR = "door"
    WINDOW = "window"
    COLUMN = "column"
    BEAM = "beam"
    STAIR = "stair"
    RAMP = "ramp"
    FURNITURE = "furniture"
    EQUIPMENT = "equipment"
    PIPE = "pipe"
    DUCT = "duct"
    CABLE_TRAY = "cable_tray"
    SPACE = "space"
    GENERIC = "generic"

@dataclass
class Point3D:
    """3D point"""
    x: float
    y: float
    z: float

@dataclass
class BoundingBox3D:
    """3D bounding box"""
    min_point: Point3D
    max_point: Point3D

    @property
    def width(self) -> float:
        return abs(self.max_point.x - self.min_point.x)

    @property
    def depth(self) -> float:
        return abs(self.max_point.y - self.min_point.y)

    @property
    def height(self) -> float:
        return abs(self.max_point.z - self.min_point.z)

    @property
    def volume(self) -> float:
        return self.width * self.depth * self.height

@dataclass
class MaterialInfo:
    """Material information"""
    name: str
    category: str
    color: Optional[str] = None
    area: float = 0.0
    volume: float = 0.0
    properties: Dict[str, Any] = field(default_factory=dict)

@dataclass
class CADElement:
    """Extracted CAD/BIM element"""
    id: str
    guid: str
    name: str
    category: ElementCategory
    type_name: str
    level: Optional[str] = None
    bounding_box: Optional[BoundingBox3D] = None
    properties: Dict[str, Any] = field(default_factory=dict)
    quantities: Dict[str, float] = field(default_factory=dict)
    materials: List[MaterialInfo] = field(default_factory=list)
    relationships: Dict[str, List[str]] = field(default_factory=dict)

@dataclass
class CADLayer:
    """CAD layer information"""
    name: str
    color: Optional[str] = None
    line_type: Optional[str] = None
    visible: bool = True
    element_count: int = 0

@dataclass
class CADExtractionResult:
    """Result of CAD extraction"""
    file_path: str
    file_format: CADFormat
    elements: List[CADElement]
    layers: List[CADLayer]
    levels: List[str]
    total_elements: int
    categories: Dict[str, int]
    extraction_time: float
    metadata: Dict[str, Any] = field(default_factory=dict)


class IFCExtractor:
    """Extract data from IFC files"""

    def __init__(self):
        self.schema_version = "IFC4"
        self.element_mapping = self._build_element_mapping()

    def _build_element_mapping(self) -> Dict[str, ElementCategory]:
        """Map IFC types to categories"""
        return {
            "IfcWall": ElementCategory.WALL,
            "IfcWallStandardCase": ElementCategory.WALL,
            "IfcSlab": ElementCategory.FLOOR,
            "IfcRoof": ElementCategory.ROOF,
            "IfcCeiling": ElementCategory.CEILING,
            "IfcDoor": ElementCategory.DOOR,
            "IfcWindow": ElementCategory.WINDOW,
            "IfcColumn": ElementCategory.COLUMN,
            "IfcBeam": ElementCategory.BEAM,
            "IfcStair": ElementCategory.STAIR,
            "IfcRamp": ElementCategory.RAMP,
            "IfcFurnishingElement": ElementCategory.FURNITURE,
            "IfcPipeSegment": ElementCategory.PIPE,
            "IfcDuctSegment": ElementCategory.DUCT,
            "IfcCableCarrierSegment": ElementCategory.CABLE_TRAY,
            "IfcSpace": ElementCategory.SPACE,
        }

    def extract(
        self,
        file_path: str,
        categories: Optional[List[ElementCategory]] = None
    ) -> CADExtractionResult:
        """
        Extract data from IFC file.

        Args:
            file_path: Path to IFC file
            categories: Optional filter for categories

        Returns:
            Extraction result
        """
        start_time = datetime.now()

        # In production, use ifcopenshell:
        # import ifcopenshell
        # ifc_file = ifcopenshell.open(file_path)

        # Simulated extraction
        elements = self._simulate_ifc_elements()

        # Filter by category if specified
        if categories:
            elements = [e for e in elements if e.category in categories]

        # Build category counts
        category_counts = {}
        for element in elements:
            cat = element.category.value
            category_counts[cat] = category_counts.get(cat, 0) + 1

        # Extract levels
        levels = list(set(e.level for e in elements if e.level))

        extraction_time = (datetime.now() - start_time).total_seconds()

        return CADExtractionResult(
            file_path=file_path,
            file_format=CADFormat.IFC,
            elements=elements,
            layers=[],  # IFC doesn't use layers in traditional sense
            levels=levels,
            total_elements=len(elements),
            categories=category_counts,
            extraction_time=extraction_time,
            metadata={
                "schema": self.schema_version,
                "project_name": "Sample Project"
            }
        )

    def _simulate_ifc_elements(self) -> List[CADElement]:
        """Simulate IFC element extraction"""
        elements = []

        # Sample walls
        for i in range(10):
            elements.append(CADElement(
                id=f"wall_{i}",
                guid=f"1234567890ABCDEF{i:04d}",
                name=f"Basic Wall {i}",
                category=ElementCategory.WALL,
                type_name="Basic Wall:200mm Concrete",
                level="Level 1",
                bounding_box=BoundingBox3D(
                    min_point=Point3D(i * 5, 0, 0),
                    max_point=Point3D(i * 5 + 5, 0.2, 3)
                ),
                properties={
                    "IsExternal": True,
                    "FireRating": "1 HR",
                    "LoadBearing": True
                },
                quantities={
                    "Length": 5.0,
                    "Height": 3.0,
                    "Width": 0.2,
                    "Area": 15.0,
                    "Volume": 3.0
                },
                materials=[
                    MaterialInfo(
                        name="Concrete",
                        category="Concrete",
                        area=15.0,
                        volume=3.0
                    )
                ]
            ))

        # Sample doors
        for i in range(5):
            elements.append(CADElement(
                id=f"door_{i}",
                guid=f"DOOR0000000000{i:04d}",
                name=f"Single Door {i}",
                category=ElementCategory.DOOR,
                type_name="Single Flush:900x2100",
                level="Level 1",
                properties={
                    "FireRating": "None",
                    "IsExternal": False
                },
                quantities={
                    "Width": 0.9,
                    "Height": 2.1,
                    "Area": 1.89
                },
                relationships={
                    "host_wall": [f"wall_{i}"]
                }
            ))

        # Sample spaces
        for i in range(3):
            elements.append(CADElement(
                id=f"space_{i}",
                guid=f"SPACE000000000{i:04d}",
                name=f"Room {i+101}",
                category=ElementCategory.SPACE,
                type_name="Office",
                level="Level 1",
                quantities={
                    "Area": 25.0 + i * 5,
                    "Volume": 75.0 + i * 15,
                    "Perimeter": 20.0 + i * 2
                },
                properties={
                    "OccupancyType": "Office",
                    "DesignOccupancy": 4
                }
            ))

        return elements

    def get_quantities(
        self,
        elements: List[CADElement],
        quantity_type: str = "all"
    ) -> Dict[str, float]:
        """Aggregate quantities from elements"""
        totals = {}

        for element in elements:
            for qty_name, qty_value in element.quantities.items():
                if quantity_type == "all" or qty_name.lower() == quantity_type.lower():
                    key = f"{element.category.value}_{qty_name}"
                    totals[key] = totals.get(key, 0) + qty_value

        return totals


class DWGExtractor:
    """Extract data from DWG/DXF files"""

    def __init__(self):
        self.supported_entities = ["LINE", "POLYLINE", "CIRCLE", "ARC", "TEXT", "MTEXT", "INSERT", "HATCH"]

    def extract(
        self,
        file_path: str,
        layers: Optional[List[str]] = None
    ) -> CADExtractionResult:
        """Extract data from DWG file"""
        start_time = datetime.now()

        # In production, use ezdxf:
        # import ezdxf
        # doc = ezdxf.readfile(file_path)

        # Simulated extraction
        elements, cad_layers = self._simulate_dwg_extraction()

        # Filter by layers if specified
        if layers:
            elements = [e for e in elements if e.properties.get("layer") in layers]

        extraction_time = (datetime.now() - start_time).total_seconds()

        return CADExtractionResult(
            file_path=file_path,
            file_format=CADFormat.DWG,
            elements=elements,
            layers=cad_layers,
            levels=[],
            total_elements=len(elements),
            categories={"generic": len(elements)},
            extraction_time=extraction_time,
            metadata={"units": "millimeters"}
        )

    def _simulate_dwg_extraction(self) -> Tuple[List[CADElement], List[CADLayer]]:
        """Simulate DWG extraction"""
        elements = []
        layers = [
            CADLayer("Walls", "Red", "Continuous", True, 15),
            CADLayer("Doors", "Blue", "Continuous", True, 8),
            CADLayer("Windows", "Cyan", "Continuous", True, 12),
            CADLayer("Dimensions", "Green", "Continuous", True, 50),
            CADLayer("Text", "White", "Continuous", True, 25),
        ]

        # Simulate polylines (walls)
        for i in range(15):
            elements.append(CADElement(
                id=f"polyline_{i}",
                guid=f"PL{i:08d}",
                name=f"Polyline {i}",
                category=ElementCategory.GENERIC,
                type_name="POLYLINE",
                properties={
                    "layer": "Walls",
                    "color": "Red",
                    "closed": True
                },
                quantities={
                    "Length": 10.5 + i * 0.5
                }
            ))

        return elements, layers


class CADDataConverter:
    """
    Convert CAD/BIM files to structured data.
    Based on DDC methodology Chapter 2.4.
    """

    def __init__(self):
        self.ifc_extractor = IFCExtractor()
        self.dwg_extractor = DWGExtractor()

    def convert(
        self,
        file_path: str,
        output_format: str = "json"
    ) -> Dict[str, Any]:
        """
        Convert CAD file to structured data.

        Args:
            file_path: Path to CAD file
            output_format: Output format (json, csv, dataframe)

        Returns:
            Structured data
        """
        # Detect file format
        file_format = self._detect_format(file_path)

        # Extract based on format
        if file_format == CADFormat.IFC:
            result = self.ifc_extractor.extract(file_path)
        elif file_format in [CADFormat.DWG, CADFormat.DXF]:
            result = self.dwg_extractor.extract(file_path)
        else:
            raise ValueError(f"Unsupported format: {file_format}")

        # Convert to output format
        return self._format_output(result, output_format)

    def _detect_format(self, file_path: str) -> CADFormat:
        """Detect CAD file format"""
        extension = file_path.lower().split(".")[-1]

        format_map = {
            "ifc": CADFormat.IFC,
            "rvt": CADFormat.RVT,
            "dwg": CADFormat.DWG,
            "dxf": CADFormat.DXF,
            "dgn": CADFormat.DGN,
            "nwd": CADFormat.NWD,
        }

        return format_map.get(extension, CADFormat.IFC)

    def _format_output(
        self,
        result: CADExtractionResult,
        format: str
    ) -> Dict[str, Any]:
        """Format extraction result"""
        output = {
            "file": result.file_path,
            "format": result.file_format.value,
            "total_elements": result.total_elements,
            "categories": result.categories,
            "levels": result.levels,
            "extraction_time": result.extraction_time,
            "elements": []
        }

        for element in result.elements:
            output["elements"].append({
                "id": element.id,
                "guid": element.guid,
                "name": element.name,
                "category": element.category.value,
                "type": element.type_name,
                "level": element.level,
                "properties": element.properties,
                "quantities": element.quantities,
                "materials": [
                    {"name": m.name, "area": m.area, "volume": m.volume}
                    for m in element.materials
                ]
            })

        return output

    def extract_quantities(
        self,
        file_path: str,
        categories: Optional[List[ElementCategory]] = None
    ) -> Dict[str, Any]:
        """Extract quantity takeoff from CAD file"""
        file_format = self._detect_format(file_path)

        if file_format == CADFormat.IFC:
            result = self.ifc_extractor.extract(file_path, categories)
        else:
            result = self.dwg_extractor.extract(file_path)

        # Aggregate quantities by category
        quantities = {}
        for element in result.elements:
            cat = element.category.value
            if cat not in quantities:
                quantities[cat] = {
                    "count": 0,
                    "totals": {}
                }

            quantities[cat]["count"] += 1

            for qty_name, qty_value in element.quantities.items():
                if qty_name not in quantities[cat]["totals"]:
                    quantities[cat]["totals"][qty_name] = 0
                quantities[cat]["totals"][qty_name] += qty_value

        return {
            "file": file_path,
            "quantities": quantities,
            "summary": {
                "total_elements": result.total_elements,
                "categories": list(quantities.keys())
            }
        }

    def extract_schedule(
        self,
        file_path: str,
        category: ElementCategory,
        fields: List[str]
    ) -> List[Dict]:
        """Extract schedule data for specific category"""
        file_format = self._detect_format(file_path)

        if file_format == CADFormat.IFC:
            result = self.ifc_extractor.extract(file_path, [category])
        else:
            result = self.dwg_extractor.extract(file_path)

        schedule = []
        for element in result.elements:
            if element.category == category:
                row = {"id": element.id, "name": element.name, "type": element.type_name}

                for field in fields:
                    if field in element.properties:
                        row[field] = element.properties[field]
                    elif field in element.quantities:
                        row[field] = element.quantities[field]

                schedule.append(row)

        return schedule

    def export_to_json(
        self,
        result: CADExtractionResult,
        output_path: str
    ):
        """Export extraction result to JSON file"""
        output = self._format_output(result, "json")

        with open(output_path, 'w') as f:
            json.dump(output, f, indent=2)

    def generate_report(self, result: CADExtractionResult) -> str:
        """Generate extraction report"""
        report = f"""
# CAD Extraction Report

**File:** {result.file_path}
**Format:** {result.file_format.value}
**Total Elements:** {result.total_elements}
**Extraction Time:** {result.extraction_time:.2f}s

## Elements by Category
"""
        for cat, count in result.categories.items():
            report += f"- **{cat.title()}:** {count}\n"

        if result.levels:
            report += "\n## Levels\n"
            for level in result.levels:
                report += f"- {level}\n"

        if result.layers:
            report += "\n## Layers\n"
            for layer in result.layers:
                report += f"- {layer.name}: {layer.element_count} elements\n"

        return report

Common Use Cases

Extract IFC Data

converter = CADDataConverter()

# Convert IFC to structured data
data = converter.convert("building.ifc", output_format="json")

print(f"Total elements: {data['total_elements']}")
print(f"Categories: {data['categories']}")

# Access elements
for element in data['elements'][:5]:
    print(f"  {element['name']}: {element['type']}")

Extract Quantities

quantities = converter.extract_quantities(
    "building.ifc",
    categories=[ElementCategory.WALL, ElementCategory.FLOOR]
)

print(f"Wall count: {quantities['quantities']['wall']['count']}")
print(f"Total wall area: {quantities['quantities']['wall']['totals']['Area']}")

Generate Schedule

door_schedule = converter.extract_schedule(
    "building.ifc",
    category=ElementCategory.DOOR,
    fields=["Width", "Height", "FireRating", "IsExternal"]
)

for door in door_schedule:
    print(f"{door['name']}: {door.get('Width')}x{door.get('Height')}")

Generate Report

ifc_extractor = IFCExtractor()
result = ifc_extractor.extract("building.ifc")

report = converter.generate_report(result)
print(report)

Quick Reference

Component Purpose
CADDataConverter Main conversion engine
IFCExtractor IFC file extraction
DWGExtractor DWG/DXF extraction
CADElement Extracted element data
CADExtractionResult Complete extraction result
ElementCategory BIM element categories

Resources

Next Steps

Weekly Installs
16
GitHub Stars
55
First Seen
11 days ago
Installed on
gemini-cli16
github-copilot16
codex16
amp16
cline16
kimi-cli16