Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,67 @@ of create and modify timestamps for every AVU returned from the server:
datetime.datetime(2022, 9, 19, 15, 26, 7)
```

Disabling AVU reloads from the iRODS server
-------------------------------------------

With the default setting of reload = True, an iRODSMetaCollection will
proactively read all current AVUs back from the iRODS server after any
metadata write done by the client. This helps methods such as items()
to return an up-to-date result. Changing that default can, however, greatly
increase code efficiency if for example a lot of AVUs must be added or deleted
at once without reading any back again.

```
# Make a metadata view in which AVUs are not reloaded, for quick update:
non_current_metadata_view = obj.metadata(reload = False)
for i in range(10):
non_current_metadata_view.add("my_key", "my_value_"+str(i))

# Force reload of AVUs and display:
current_metadata = obj.metadata().items()
from pprint import pp
print(f"{current_metadata = }")
```

Subclassing iRODSMeta
---------------------
The keyword option `iRODSMeta_type` can be used to set up any iRODSMeta
subclass as the translator between native iRODS metadata APIs
and the way in which the AVUs thus conveyed should be represented to the
client.

An example is the `irods.meta.iRODSBinOrStringMeta` class which uses the
`base64` module to "hide" arbitrary bytestrings within the `value` and
`units` attributes of an iRODS metadata AVU:

```
from irods.meta import iRODSBinOrStringMeta as MyMeta
d = session.data_objects.get('/path/to/object')
unencodable_octets = '\u1000'.encode('utf8')[:-1]

# Use our custom client-metadata type to store arbitrary octet strings
meta_view = d.metadata(iRODSMeta_type = MyMeta)
meta_view.set(m1 := MyMeta('mybinary', unencodable_octets, b'\x02'))

# Show that traditional AVU's can exist alongside the custom kind.
irods.client_configuration.connections.xml_parser_default = 'QUASI_XML'
meta_view.set(m2 := MyMeta('mytext', '\1', '\2'))

try:
# These two lines are equivalent:
assert {m1,m2} <= (all_avus := set(meta_view.items()))
assert {tuple(m1),tuple(m2)} <= all_avus
finally:
del meta_view['mytext'], meta_view['mybinary']
```

Whereas the content of native iRODS AVUs must obey some valid text encoding as
determined by the resident ICAT DB, the above is a possible alternative - albeit
one semantically bound to the local application that defines the needed
translations. Still, this can be a valid usage for users who need a guarantee
that any given octet string they might generate can be placed into metadata without
violating standard text encodings.

Atomic operations on metadata
-----------------------------

Expand Down
54 changes: 36 additions & 18 deletions irods/manager/metadata_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,44 @@ class InvalidAtomicAVURequest(Exception):

class MetadataManager(Manager):

def __init__(self, *_):
self._opts = {
'admin':False,
'timestamps':False,
'iRODSMeta_type':iRODSMeta
}
super().__init__(*_)

@property
def use_timestamps(self):
return getattr(self, "_use_ts", False)
return self._opts['timestamps']

__kw : Dict[str, Any] = {} # default (empty) keywords


def _updated_keywords(self, opts):
kw_ = self.__kw.copy()
kw_.update(opts)
return kw_

def __call__(self, admin=False, timestamps=False, **irods_kw_opt):
if admin:
irods_kw_opt.update([(kw.ADMIN_KW, "")])
def get_api_keywords(self): return self.__kw.copy()

def __call__(self, **flags):
# Make a new shallow copy of the manager object, but update options from parameter list.
new_self = copy.copy(self)
new_self._use_ts = timestamps
new_self.__kw = irods_kw_opt
new_self._opts = copy.copy(self._opts)

# Update the flags that do bookkeeping in the returned(new) manager object.
new_self._opts.update(
(key,val) for key,val in flags.items() if val is not None
)

# Update the ADMIN_KW flag in the returned(new) object.
if new_self._opts.get('admin'):
self.__kw[kw.ADMIN_KW] = ""
else:
self.__kw.pop(kw.ADMIN_KW, None)

return new_self

@staticmethod
Expand All @@ -67,6 +88,9 @@ def _model_class_to_resource_description(model_cls):
}[model_cls]

def get(self, model_cls, path):
if not path:
# Short circuit. This should be of the same type as the object returned at the function's end.
return []
resource_type = self._model_class_to_resource_type(model_cls)
model = {
"d": DataObjectMeta,
Expand Down Expand Up @@ -96,9 +120,9 @@ def meta_opts(row):
return opts

return [
iRODSMeta(
row[model.name], row[model.value], row[model.units], **meta_opts(row)
)
self._opts['iRODSMeta_type'](None,None,None)._from_column_triple(
row[model.name], row[model.value], row[model.units],
**meta_opts(row))
for row in results
]

Expand All @@ -109,9 +133,7 @@ def add(self, model_cls, path, meta, **opts):
"add",
"-" + resource_type,
path,
meta.name,
meta.value,
meta.units,
*meta._to_column_triple(),
**self._updated_keywords(opts)
)
request = iRODSMessage(
Expand All @@ -128,9 +150,7 @@ def remove(self, model_cls, path, meta, **opts):
"rm",
"-" + resource_type,
path,
meta.name,
meta.value,
meta.units,
*meta._to_column_triple(),
**self._updated_keywords(opts)
)
request = iRODSMessage(
Expand Down Expand Up @@ -167,9 +187,7 @@ def set(self, model_cls, path, meta, **opts):
"set",
"-" + resource_type,
path,
meta.name,
meta.value,
meta.units,
*meta._to_column_triple(),
**self._updated_keywords(opts)
)
request = iRODSMessage(
Expand Down
71 changes: 58 additions & 13 deletions irods/meta.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,40 @@
import base64
import copy


class iRODSMeta:

def _to_column_triple(self):
return (self.name ,self.FX(self.value)) + (('',) if not self.units else (self.FX(self.units),))

def _from_column_triple(self, name, value, units, **kw):
self.__low_level_init(name,
self.RX(value),
units=None if not units else self.RX(units),
**kw)
return self

RX = FX = staticmethod(lambda _:_)
INIT_KW_ARGS = 'units avu_id create_time modify_time'.split()

def __init__(
self, name, value, units=None, avu_id=None, create_time=None, modify_time=None
self, name, value, /, units=None, *, avu_id=None, create_time=None, modify_time=None,
):
self.avu_id = avu_id
# Defer initialization for iRODSMeta(attribute,value,...) if neither attribute nor value is True under
# a 'bool' transformation. In so doing we streamline initialization for iRODSMeta (and any subclasses)
# for alternatively populating via _from_column_triple(...).
# This is the pathway for allowing user-defined encodings of the iRODSMeta (byte-)string AVU components.)
if name or value:
# Note: calling locals() inside the dict comprehension would not access variables in this frame.
local_vars = locals()
kw = {name:local_vars.get(name) for name in self.INIT_KW_ARGS}
self.__low_level_init(name, value, **kw)

def __low_level_init(self, name, value, **kw):
self.name = name
self.value = value
self.units = units
self.create_time = create_time
self.modify_time = modify_time
for attr in self.INIT_KW_ARGS:
setattr(self, attr, kw.get(attr))

def __eq__(self, other):
return tuple(self) == tuple(other)
Expand All @@ -20,7 +46,20 @@ def __iter__(self):
yield self.units

def __repr__(self):
return "<iRODSMeta {avu_id} {name} {value} {units}>".format(**vars(self))
return f"<{self.__class__.__name__} {self.avu_id} {self.name} {self.value} {self.units}>"

def __hash__(self):
return hash(tuple(self))

class iRODSBinOrStringMeta(iRODSMeta):

@staticmethod
def RX(value):
return value if value[0] != '\\' else base64.decodebytes(value[1:].encode('utf8'))

@staticmethod
def FX(value):
return b'\\' + base64.encodebytes(value).strip() if isinstance(value,(bytes,bytearray)) else value


class BadAVUOperationKeyword(Exception):
Expand Down Expand Up @@ -84,14 +123,16 @@ def __init__(self, operation, avu, **kw):
setattr(self, atr, locals()[atr])


import copy


class iRODSMetaCollection:

def __call__(self, admin=False, timestamps=False, **opts):
def __call__(self, **opts):
"""Optional parameters in **opts are:

admin (default: False): apply ADMIN_KW to future metadata operations.
timestamps (default: False): attach (ctime,mtime) timestamp attributes to AVUs received from iRODS.
"""
x = copy.copy(self)
x._manager = (x._manager)(admin, timestamps, **opts)
x._manager = (x._manager)(**opts)
x._reset_metadata()
return x

Expand All @@ -102,7 +143,11 @@ def __init__(self, manager, model_cls, path):
self._reset_metadata()

def _reset_metadata(self):
self._meta = self._manager.get(self._model_cls, self._path)
m = self._manager
if not hasattr(self,"_meta"):
self._meta = m.get(None, "")
if m._opts.setdefault('reload',True):
self._meta = m.get(self._model_cls, self._path)

def get_all(self, key):
"""
Expand All @@ -129,7 +174,7 @@ def get_one(self, key):
def _get_meta(self, *args):
if not len(args):
raise ValueError("Must specify an iRODSMeta object or key, value, units)")
return args[0] if len(args) == 1 else iRODSMeta(*args)
return args[0] if len(args) == 1 else self._manager._opts['iRODSMeta_type'](*args)

def apply_atomic_operations(self, *avu_ops):
self._manager.apply_atomic_operations(self._model_cls, self._path, *avu_ops)
Expand Down
72 changes: 72 additions & 0 deletions irods/test/meta_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
AVUOperation,
BadAVUOperationValue,
BadAVUOperationKeyword,
iRODSBinOrStringMeta,
)
from irods.models import DataObject, Collection, Resource, CollectionMeta
import irods.test.helpers as helpers
Expand Down Expand Up @@ -798,6 +799,77 @@ def test_xml_mode_addresses_odd_metadata_characters__issue_582(self):
# in use, with the "odd" characters being present in the metadata value.
del obj.metadata[attr_str]

def test_binary_avu_fields__issue_707(self):
meta_coll = self.obj.metadata(iRODSMeta_type=iRODSBinOrStringMeta)
illegal_unicode_sequence = '\u1000'.encode('utf8')[:2]
avu_name = 'issue709'
meta_coll.set(
avu_name,
(value:=b'value_'+illegal_unicode_sequence),
(units:=b'units_'+illegal_unicode_sequence)
)

self.assertEqual(
meta_coll.get_one(avu_name),
(avu_name, value, units)
)
meta_coll.add(*(new_avu:=iRODSMeta(avu_name, '\u1000', '\u1001')))
all_relevant_avus = meta_coll.get_all(avu_name)
self.assertIn(new_avu, all_relevant_avus)

# these could be in a separate test. TODO - make issue regarding hash and sets of iRODSMeta
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

found a TODO

all_avus = meta_coll.items()
self.assertLessEqual(
set(all_relevant_avus),
set(all_avus),
)
self.assertIn(
(avu_name,
(value:=b'value_'+illegal_unicode_sequence),
(units:=b'units_'+illegal_unicode_sequence)), all_relevant_avus)

def test_cascading_changes_of_metadata_manager_options__issue_709(self):
d = None
get_option = lambda metacoll, key: metacoll._manager._opts[key]
try:
d = self.sess.data_objects.create(f'{self.coll.path}/issue_709_test_1')
m = d.metadata
self.assertEqual(get_option(m,'admin'),False)

m2 = m(admin = True)
self.assertEqual(get_option(m2,'timestamps'),False)
self.assertEqual(get_option(m2,'admin'),True)

m3 = m2(timestamps = True)
self.assertEqual(get_option(m3,'timestamps'), True)
self.assertEqual(get_option(m3,'admin'), True)
self.assertEqual(m3._manager.get_api_keywords().get(kw.ADMIN_KW), "")

m4 = m3(admin = False)
self.assertEqual(get_option(m4,'admin'), False)
self.assertEqual(m4._manager.get_api_keywords().get(kw.ADMIN_KW), None)
finally:
if d:
d.unlink(force=True)

def test_reload_can_be_deactivated__issue_768(self):
# Set an initial AVU
metacoll = self.obj.metadata
metacoll.set(i:=iRODSMeta('aa','bb','cc'))

# Initial defaults will always reload the AVU list from the server, so new AVU should be seen.
self.assertIn(i, metacoll.items())

# Setting reload option to False will prevent reload of object AVUs, so an AVU just set should not be seen.
(mc:=metacoll(reload=False)).set(i2:=iRODSMeta('xx','yy','zz'))
items = mc.items()
self.assertIn(i, items)
self.assertNotIn(i2, items)

# Restore old setting. Check that both AVUs are seen as present.
items_reloaded = mc(reload=True).items()
self.assertIn(i, items_reloaded)
self.assertIn(i2, items_reloaded)

if __name__ == "__main__":
# let the tests find the parent irods lib
Expand Down
Loading