-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnace_builder.py
More file actions
63 lines (54 loc) · 2.26 KB
/
nace_builder.py
File metadata and controls
63 lines (54 loc) · 2.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import pandas as pd
from node import Node
import logging
logger = logging.getLogger(__name__)
class NACEBuilder():
'''Class to build the tree of the NACE classification from
the excel defining the NACE classification'''
def __init__(self):
self.nodes: dict = {}
self.root: Node = None
def build_from_excel(self, filename: str = None) -> (dict, Node):
"""Build NACE graph from your Excel file"""
# Load Excel
df = pd.read_excel(filename)
logger.info(f"✅ Loaded {len(df)} entries")
# Create all nodes
for _, row in df.iterrows():
node = Node(
code=str(row['CODE']),
name=row['NAME'],
desc=row['NAME'], # Using name as desc
level=int(row['LEVEL']),
parent_code=row['PARENT_CODE'] if pd.notna(row['PARENT_CODE']) else None,
includes=row['Includes'] if pd.notna(row['Includes']) else None,
includes_also=row['IncludesAlso'] if pd.notna(row['IncludesAlso']) else None,
excludes=row['Excludes'] if pd.notna(row['Excludes']) else None,
implementation_rule=(
row['Implementation_rule'] if pd.notna(row['Implementation_rule']) else None
)
)
self.nodes[node.code] = node
# Build relationships
for node in self.nodes.values():
if node.parent_code and node.parent_code in self.nodes:
parent = self.nodes[node.parent_code]
parent.add_child(node)
# Find root
roots = [n for n in self.nodes.values() if not n.parent_code]
if len(roots) == 1:
self.root = roots[0]
else:
# Create artificial root
self.root = Node(
code='NACE',
name='NACE Rev. 2.1',
desc='Statistical Classification of Economic Activities',
level=0
)
self.nodes["NACE"] = self.root
for root in roots:
self.root.add_child(root)
root.add_parent(self.root.code)
logger.info(f"Built graph with {len(self.nodes)} nodes")
return self.nodes, self.root