import typing as T
from enum import Enum
import jsonschema
[docs]class Direction(Enum):
GET = 0
PUT = 1
[docs]class Type(object):
def __init__(self, *args, title=None, description=None, required=False,
default=None, examples=None, format=None, read_only=None,
write_only=None, sys_defined=None, **kwargs):
if len(args) > 0 or len(kwargs) > 0:
raise ValueError()
self.title = title
self.description = description
self.required = required
self.default = default
self.sys_defined = sys_defined
self.examples = examples
self.format = format
self.read_only = read_only
self.write_only = write_only
@property
def schema(self) -> dict:
retval = {}
if self.title is not None:
retval['title'] = self.title
if self.description is not None:
retval['description'] = self.description
if self.default is not None:
retval['default'] = self.default() if callable(self.default) else self.default
if self.sys_defined is not None:
retval['sysDefined'] = self.sys_defined
if self.examples is not None:
retval['examples'] = self.examples
if self.format is not None:
retval['format'] = self.format
if self.read_only is not None:
retval['readOnly'] = self.read_only
if self.write_only is not None:
retval['writeOnly'] = self.write_only
return retval
[docs] def full_text_search_representation(self, data) -> T.Optional[str]:
return None
[docs] def validate(self, data):
"""Validate the data.
:returns: the Type for which validation succeeded. See also
:meth:`OneOf.validate`
:rtype: Type
"""
jsonschema.validate(data, self.schema)
return self
# noinspection PyMethodMayBeStatic
[docs] def canonicalize(self, data, direction=Direction.GET):
revert_to_default = data is None and self.required is not None
# Currently for sys-defined values the default is used (can be callable)
sys_override = self.sys_defined is True and direction is Direction.PUT
if revert_to_default or sys_override:
return self.default() if callable(self.default) else self.default
return data
[docs]class List(Type):
def __init__(self, item_type: Type, *args, required=False, default=None,
allow_empty=True, unique_items=None, **kwargs):
if default is None and required and allow_empty:
default = []
super().__init__(*args, required=required, default=default, **kwargs)
self.item_type = item_type
self.allow_empty = allow_empty
self.unique_items = unique_items
@property
def schema(self) -> dict:
retval = dict(super().schema)
retval.update({
'type': 'array',
'items': self.item_type.schema
})
if self.unique_items is not None:
retval['uniqueItems'] = bool(self.unique_items)
if not self.allow_empty:
retval['minItems'] = 1
return retval
[docs] def canonicalize(self, data: T.Optional[list], **kwargs) -> T.Optional[list]:
data = super().canonicalize(data, **kwargs)
if data is None:
return None
if not isinstance(data, list):
raise TypeError("{}: not a list".format(data))
retval = []
for datum in data:
value = self.item_type.canonicalize(datum, **kwargs)
if value is not None:
retval.append(value)
return retval
[docs] def full_text_search_representation(self, data: T.Iterable):
"""We must check whether the given data is really a list, jsonld may
flatten lists."""
if type(data) is list:
retval = '\n\n'.join([
self.item_type.full_text_search_representation(v)
for v in data if v is not None
])
return retval if len(retval) > 0 else None
return self.item_type.full_text_search_representation(data)
[docs]class OneOf(Type):
def __init__(self, *types, **kwargs):
super().__init__(**kwargs)
self.types = list(types)
@property
def schema(self) -> dict:
retval = dict(super().schema)
retval['oneOf'] = [v.schema for v in self.types]
return retval
[docs] def validate(self, data) -> Type:
for type in self.types:
try:
jsonschema.validate(data, self.schema)
return type
except jsonschema.ValidationError:
pass
raise jsonschema.ValidationError("Not valid for any type")
[docs] def full_text_search_representation(self, data: T.Any):
raise NotImplementedError()
[docs] def canonicalize(self, data: T.Any, **kwargs):
return self.validate(data).canonicalize(data, **kwargs)
[docs]class Object(Type):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.properties: T.List[T.Tuple[str, Type]] = []
@property
def property_names(self):
return [x[0] for x in self.properties]
def __getitem__(self, item):
for name, value in self.properties:
if name == item:
return value
raise KeyError()
[docs] def add(self, name, value, before=None):
if name in self.property_names:
raise ValueError()
property = (name, value)
if before is None:
self.properties.append(property)
else:
insert_position = self.property_names.index(before)
self.properties.insert(insert_position, property)
return self
@property
def schema(self) -> dict:
retval = dict(super().schema)
retval.update({
'type': 'object',
'properties': {
name: value.schema
for name, value in self.properties
},
'x-order': [name for name, value in self.properties]
})
required = [name for name, value in self.properties if value.required]
if len(required) > 0:
retval['required'] = required
return retval
[docs] def full_text_search_representation(self, data: dict):
ftsr = (
value.full_text_search_representation(data[key])
for key, value in self.properties
if key in data
)
retval = '\n\n'.join(v for v in ftsr if v is not None)
return retval if len(retval) > 0 else None
[docs] def canonicalize(self, data: dict, **kwargs):
data = super().canonicalize(data, **kwargs)
if data is None:
return None
if not isinstance(data, dict):
raise TypeError("{}: not a dict".format(data))
retval = {}
for key, type_ in self.properties:
canonical_value = None
if type_.sys_defined is True:
type_data = data[key] if key in data else None
canonical_value = type_.canonicalize(type_data, **kwargs)
elif key in data:
canonical_value = type_.canonicalize(data[key], **kwargs)
if canonical_value is not None:
retval[key] = canonical_value
return retval
[docs]class String(Type):
def __init__(self, *args, pattern=None, max_length=None, allow_empty=False,
**kwargs):
super().__init__(*args, **kwargs)
self.pattern = pattern
self.max_length = max_length
self.allow_empty = allow_empty
@property
def schema(self) -> dict:
retval = dict(super().schema)
retval['type'] = 'string'
if self.pattern is not None:
retval['pattern'] = self.pattern
if self.max_length is not None:
retval['maxLength'] = self.max_length
if not self.allow_empty:
retval['minLength'] = 1
return retval
[docs] def full_text_search_representation(self, data: str):
return data
[docs] def canonicalize(self, data: str, **kwargs):
data = super().canonicalize(data, **kwargs)
if data is None:
return None
if not isinstance(data, str):
raise TypeError("{}: not a string".format(data))
retval = data.strip().replace('\r\n', '\n')
return retval if len(retval) > 0 or self.allow_empty else None
[docs]class PlainTextLine(String):
def __init__(self, *args, pattern=None, **kwargs):
assert pattern is None
super().__init__(*args, pattern=r'^[^\n\r]*?\S[^\n\r]*$', **kwargs)
[docs]class Date(String):
def __init__(self, *args, format=None, pattern=None, **kwargs):
assert format is None and pattern is None
super().__init__(*args, format='date', pattern=r'^\d\d\d\d-[01]\d-[0-3]\d(?:T[012]\d:[0-5]\d:[0-5]\d(?:\.\d+)?)?(?:Z|[01]\d(?::[0-5]\d)?)?$', **kwargs)
[docs] def canonicalize(self, data: str, **kwargs) -> T.Optional[str]:
data = super().canonicalize(data, **kwargs)
if data is None:
return None
if not isinstance(data, str):
raise TypeError("{}: not a string".format(data))
return data[:10]
[docs]class Language(String):
def __init__(self, *args, format=None, pattern=None, **kwargs):
assert format is None and pattern is None
super().__init__(*args, format='lang', pattern=r'^(?:lang1:\w\w|lang2:\w\w\w)$', **kwargs)
[docs]class Enum(String):
def __init__(self, values, *args, allow_empty=None, **kwargs):
assert allow_empty is None
super().__init__(*args, **kwargs)
self.values = values
self.dict = {key: value for key, value in values}
@property
def schema(self) -> dict:
retval = dict(super().schema)
retval['enum'] = [v[0] for v in self.values]
retval['enumNames'] = [v[1] for v in self.values]
return retval
[docs] def full_text_search_representation(self, data: str):
return self.dict[data]
[docs]class Integer(Type):
def __init__(self, *args, multipleOf=None,
maximum=None, exclusiveMaximum=None,
minimum=None, exclusiveMinimum=None,
**kwargs):
super().__init__(*args, **kwargs)
self.multipleOf = multipleOf
self.maximum = maximum
self.exclusiveMaximum = exclusiveMaximum
self.minimum = minimum
self.exclusiveMinimum = exclusiveMinimum
@property
def schema(self) -> dict:
retval = dict(super().schema)
retval['type'] = 'number'
for k in {'multipleOf', 'maximum', 'exclusiveMaximum', 'minimum', 'exclusiveMinimum'}:
v = getattr(self, k)
if v is not None:
assert isinstance(v, int)
retval[k] = v
return retval
[docs] def full_text_search_representation(self, data: T.Any):
return str(data) if isinstance(data, int) else None
[docs] def canonicalize(self, data, **kwargs):
data = super().canonicalize(data, **kwargs)
if data is None:
return None
if isinstance(data, int):
return data
if isinstance(data, str):
retval = int(data.strip())
if len(str(retval)) != len(data):
raise ValueError("{}: not an integer".format(data))
return retval
raise TypeError("{}: not an integer".format(data))
DISTRIBUTION = Object()
DISTRIBUTION.add('dct:title', String())
DISTRIBUTION.add('dct:description', String())
DISTRIBUTION.add('dct:issued', Date())
DISTRIBUTION.add('dct:modified', Date())
DISTRIBUTION.add('dc:identifier', PlainTextLine())
DISTRIBUTION.add('dct:license', String())
DISTRIBUTION.add('dct:rights', String())
DISTRIBUTION.add('dcat:accessURL', String(format='uri'))
DISTRIBUTION.add('dcat:downloadURL', String(format='uri'))
DISTRIBUTION.add('dcat:mediaType', String(pattern=r'^[-\w.]+/[-\w.]+$'))
DISTRIBUTION.add('dct:format', String())
DISTRIBUTION.add('dcat:byteSize', Integer(minimum=0))
VCARD = Object()
VCARD.add('vcard:fn', PlainTextLine(required=True))
FOAF_AGENT = Object()
FOAF_AGENT.add('foaf:name', PlainTextLine(required=True))
DATASET = Object()
DATASET.add('dct:title', String())
DATASET.add('dct:description', String())
DATASET.add('dct:issued', Date())
DATASET.add('dct:modified', Date())
DATASET.add('dct:identifier', PlainTextLine())
DATASET.add('dcat:keyword', List(PlainTextLine()))
DATASET.add('dct:language', Language())
DATASET.add('dcat:contactPoint', VCARD)
DATASET.add('dct:Temporal', String())
DATASET.add('dct:Spatial', String())
DATASET.add('dct:accrualPeriodicity', String())
DATASET.add('dcat:landingPage', String(format='uri'))
DATASET.add('dcat:theme', String(format='uri'))
DATASET.add('dct:publisher', FOAF_AGENT)
DATASET.add('dcat:distribution', DISTRIBUTION)
# import json
# print(json.dumps(
# DATASET.schema,
# indent=' ', sort_keys=True
# ))