Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions tonic_textual/classes/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def __init__(
id: str,
name: str,
files: List[Dict[str, Any]],
custom_pii_entity_ids: List[str],
generator_config: Optional[Dict[str, PiiState]] = None,
generator_metadata: Optional[Dict[str, BaseMetadata]] = None,
label_block_lists: Optional[Dict[str, List[str]]] = None,
Expand All @@ -66,13 +67,14 @@ def __init__(
docx_comment_policy_name: Optional[docx_comment_policy] = docx_comment_policy.remove,
docx_table_policy_name: Optional[docx_table_policy] = docx_table_policy.remove,
pdf_signature_policy_name: Optional[pdf_signature_policy] = pdf_signature_policy.redact,
pdf_synth_mode_policy: Optional[pdf_synth_mode_policy] = pdf_synth_mode_policy.V1
pdf_synth_mode_policy: Optional[pdf_synth_mode_policy] = pdf_synth_mode_policy.V1,
):
self.__initialize(
client,
id,
name,
files,
custom_pii_entity_ids,
generator_config,
generator_metadata,
label_block_lists,
Expand All @@ -90,6 +92,7 @@ def __initialize(
id: str,
name: str,
files: List[Dict[str, Any]],
custom_pii_entity_ids: List[str],
generator_config: Optional[Dict[str, PiiState]] = None,
generator_metadata: Optional[Dict[str, BaseMetadata]] = None,
label_block_lists: Optional[Dict[str, List[str]]] = None,
Expand All @@ -110,8 +113,20 @@ def __initialize(
self.datasetfile_service = DatasetFileService(self.client)
self.generator_config = generator_config
self.generator_metadata = generator_metadata
self.label_block_lists = label_block_lists
self.label_allow_lists = label_allow_lists

allow_list: Dict[str,List[str]] = {}
for k in label_allow_lists:
v = label_allow_lists[k]
allow_list[k] = v['regexes']

block_list: Dict[str,List[str]] = {}
for k in label_block_lists:
v = label_block_lists[k]
block_list[k] = v['regexes']


self.label_block_lists = block_list
self.label_allow_lists = allow_list
self.docx_image_policy = docx_image_policy_name
self.docx_comment_policy = docx_comment_policy_name
self.docx_table_policy = docx_table_policy_name
Expand All @@ -136,6 +151,7 @@ def __initialize(
)
for f in files
]
self.custom_pii_entity_ids=custom_pii_entity_ids

if len(self.files) > 0:
self.num_columns = max([f.num_columns for f in self.files])
Expand Down Expand Up @@ -289,6 +305,7 @@ def edit(
new_dataset["id"],
new_dataset["name"],
new_dataset["files"],
new_dataset["customPiiEntityIds"],
convert_payload_to_generator_config(new_dataset["generatorSetup"]),
convert_payload_to_generator_metadata(new_dataset["generatorMetadata"]),
new_dataset["labelBlockLists"],
Expand Down Expand Up @@ -640,6 +657,7 @@ def __refetch_dataset(self):
updated_dataset["id"],
updated_dataset["name"],
updated_dataset["files"],
updated_dataset["customPiiEntityIds"],
convert_payload_to_generator_config(updated_dataset["generatorSetup"]),
convert_payload_to_generator_metadata(updated_dataset["generatorMetadata"]),
updated_dataset["labelBlockLists"],
Expand Down
6 changes: 6 additions & 0 deletions tonic_textual/enums/pii_type.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
from enum import Enum

class DeprecatedPiiType(str, Enum):
WORK_OF_ART = "WORK_OF_ART"
PERSON = "PERSON"
PROJECT_NAME = "PROJECT_NAME"
USERNAME = "USERNAME"
US_DRIVER_LICENSE = "US_DRIVER_LICENSE"

class PiiType(str, Enum):
NUMERIC_VALUE = "NUMERIC_VALUE"
Expand Down
1 change: 1 addition & 0 deletions tonic_textual/services/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def get_dataset(self, dataset_name):
dataset["id"],
dataset["name"],
dataset["files"],
dataset["customPiiEntityIds"],
convert_payload_to_generator_config(dataset.get("generatorSetup")),
convert_payload_to_generator_metadata(generator_metadata_raw),
dataset.get("labelBlockLists"),
Expand Down