import os, os.path import sys import time import dataset import annotations NAME = "laydi-cmd" VERSION = "0.1.0" PROJECT_VERSION_STRING = "Laydi project version 1" def is_project_directory(dirname): """Verifies that a directory is a laydi project""" if not os.path.isdir(dirname): return False ## Verify that the version is correct. version_fn = os.path.join(dirname, "VERSION") if not os.path.exists(version_fn): return False fd = open(version_fn) line = fd.readline() fd.close() if line.strip() != PROJECT_VERSION_STRING: return False ## Require directories to be present. if not os.path.isdir(os.path.join(dirname, "annotations")): return False if not os.path.isdir(os.path.join(dirname, "data")): return False if not os.path.isdir(os.path.join(dirname, "selections")): return False if not os.path.isdir(os.path.join(dirname, "exports")): return False ## If no tests failed, return True return True def make_project_directory(dirname, force=False): """Creates a project directory force: ignore that directory exists and proceed anyway. """ if os.path.exists(dirname) and not force: return False rootdir = dirname anndir = os.path.join(dirname, "annotations") seldir = os.path.join(dirname, "selections") datadir = os.path.join(dirname, "data") exportdir = os.path.join(dirname, "exports") version_file_path = os.path.join(dirname, "VERSION") os.makedirs(rootdir) for d in [anndir, seldir, datadir, exportdir]: os.mkdir(d) fd = open(version_file_path, "w") print >> fd, PROJECT_VERSION_STRING fd.close() class Universe(object): """A Universe is a collection of all existing identifiers in a set of datasets""" def __init__(self): self.refcount = {} def register_dim(self, dim): """Increase reference count for identifiers in Dimension object dim""" d = self.refcount.get(dim.name, None) if d == None: d = {} self.refcount[dim.name] = d for i in dim: d[i] = d.get(i, 0) + 1 def register_ds(self, ds): """Increase reference count for identifiers in all Dimensions of dataset ds""" for dim in ds.dims: self.register_dim(dim) def unregister_dim(self, dim): """Update reference count for identifiers in Dimension object dim Update reference count for identifiers in Dimension object dim, and remove all identifiers with a reference count of 0, as they do not (by definition) exist any longer. """ ids = self.refcount[dim.name] for i in dim: refcount = ids[i] if refcount == 1: ids.pop(i) else: ids[i] -= 1 if len(ids) == 0: self.refcount.pop(dim.name) def unregister_ds(self, ds): """Update reference count for identifiers along Dimensions in Dataset ds. Update reference count for identifiers along all Dimensions in Dataset ds, and remove all identifiers with a reference count of 0, as they do not (by definition) exist any longer. """ for dim in ds: self.register_dim(dim) def register(self, obj): if isinstance(obj, Dataset): self.register_ds(obj) else: self.register_dim(obj) def unregister(self, obj): if isinstance(obj, Dataset): self.unregister_ds(obj) else: self.unregister_dim(obj) def __getitem___(self, dimname): return set(self.references[dimname].keys()) def __iter__(self): return self.references.keys().__iter__() class Dimension(object): """A Dimension represents the set of identifiers an object has along an axis. """ def __init__(self, name, ids=[]): self.name = name self.idset = set(ids) self.idlist = list(ids) if len(self.idset) != len(self.idlist): raise Exception("Duplicate identifiers are not allowed") def __getitem__(self, element): return self.idlist[element] def __getslice__(self, start, end): return self.idlist[start:end] def __contains__(self, element): return self.idset.__contains__(element) def __str__(self): return "%s: %s" % (self.name, str(self.idlist)) def __len__(self): return len(self.idlist) def __iter__(self): return iter(self.idlist) def intersection(self, dim): if self.name != dim.name: return None return Dimension(self.name, self.idset.intersection(dim.idset)) def as_tuple(self): return (self.name, self.idlist) def verify(self): for i in self.idlist: if " " in i or "\t" in i: raise Exception("Invalid identifier: %s" % i) class Directory(object): def __init__(self, path): self.path = path self.files = set() self.timestamp = -1 self.update() def update(self): now = time.time() newfiles = set(os.listdir(self.path)) for fn in newfiles - self.files: if os.path.isdir(os.path.join(self.path, fn)): self.dir_created(fn) else: self.file_created(fn) for fn in self.files - newfiles: if os.path.isdir(os.path.join(self.path, fn)): self.dir_deleted(fn) else: self.file_removed(fn) for fn in self.files.intersection(newfiles): filepath = os.path.join(self.path, fn) if os.path.getctime(filepath) >= self.timestamp: if os.path.isdir(filepath): self.dir_changed(fn) else: self.file_changed(fn) self.files = newfiles self.timestamp = now def file_created(self, fn): print "file created: %s" % fn pass def file_changed(self, fn): print "file changed: %s" % fn pass def file_removed(self, fn): print "file removed: %s" % fn pass def dir_created(self, fn): print "directory created: %s" % fn pass def dir_changed(self, fn): print "directory changed: %s" % fn pass def dir_removed(self, fn): print "directory removed: %s" % fn pass class DataDirectory(Directory): def __init__(self, dirname, project): self.project = project self.datasets= [] self.dsfiles = {} Directory.__init__(self, dirname) def file_created(self, fn): """Called from update() when new files are created. Load new datasets that have appeared since last update. """ filepath = os.path.join(self.path, fn) name, ext = os.path.splitext(fn) if ext == ".ftsv": ds = dataset.read_ftsv(filepath) self.datasets.append(ds) self.dsfiles[fn] = ds def file_changed(self, fn): """Called from update() when files are changed. Delete old dataset and load the new one when dataset files have been changed. """ filepath = os.path.join(self.path, fn) name, ext = os.path.splitext(fn) if ext == ".ftsv": oldds = self.dsfiles[fn] self.datasets.remove(oldds) ds = dataset.read_ftsv(filepath) self.datasets.append(ds) self.dsfiles[fn] = ds def file_removed(self, fn): """Called from update() when a file is deleted Removes the associated dataset if a dataset file is removed. """ filepath = os.path.join(self.path, fn) name, ext = os.path.splitext(fn) if ext == ".ftsv": ds = self.dsfiles[fn] self.datasets.remove(ds) self.dsfiles.pop(fn) def dir_created(self, fn): """Called from update() when a subdirectory is created. Instantiate new handlers for the directory if possible. """ filepath = os.path.join(self.path, fn) class SelectionParentDirectory(Directory): def __init__(self, dirname, project): self.project = project self.handlers = {} Directory.__init__(self, dirname) def dimensions(self): return self.handlers.keys() def __getitem__(self, key): return self.handlers[key] def file_created(self, fn): pass def file_changed(self, fn): pass def file_removed(self, fn): pass def dir_created(self, fn): print("dir_created: %s" % fn) dimname = os.path.split(fn)[-1] self.handlers[dimname] = SelectionDirectory(os.path.join(self.path, fn), dimname, self.project) def dir_removed(self, fn): print("dir_removed: %s" % fn) dimname = os.path.split(fn)[-1] removed = self.handlers.pop(dimname) def update(self): Directory.update(self) for e in self.handlers.values(): e.update() class SelectionDirectory(Directory): def __init__(self, fn, dimname, project): self.project = project self.dimension = dimname self.selections = {} Directory.__init__(self, fn) def read_selection_file(self, fn): """Reads a selection file and returns the corresponding Dimension object. Warnings are printed to terminal on duplicated ids and invalid ids. """ print "read_selection_file(%s)" % (fn,) fd = open(fn) ids = [] for line in fd.readlines(): e = line.strip() if e.startswith("#") or e == "": continue ids.append(e) fd.close() return Dimension(self.dimname, ids) def file_created(self, fn): """Called from update() when new files are created. Load new datasets that have appeared since last update. """ print "loading selection: %s [%s]" % (fn, self.dimension) filepath = os.path.join(self.path, fn) name, ext = os.path.splitext(fn) if ext == ".sel": sel = read_selection_files(fn) self.dsfiles[fn] = ds def file_changed(self, fn): """Called from update() when files are changed. Delete old dataset and load the new one when dataset files have been changed. """ filepath = os.path.join(self.path, fn) name, ext = os.path.splitext(fn) if ext == ".ftsv": oldds = self.dsfiles[fn] self.datasets.remove(oldds) ds = dataset.read_ftsv(filepath) self.datasets.append(ds) self.dsfiles[fn] = ds def file_removed(self, fn): """Called from update() when a file is deleted Removes the associated dataset if a dataset file is removed. """ filepath = os.path.join(self.path, fn) name, ext = os.path.splitext(fn) if ext == ".ftsv": ds = self.dsfiles[fn] self.datasets.remove(ds) self.dsfiles.pop(fn) def dir_created(self, fn): """Called from update() when a subdirectory is created. Instantiate new handlers for the directory if possible. """ filepath = os.path.join(self.path, fn) class AnnotationDirectory(Directory): def __init__(self, dirname, project): self.project = project self.dirname = dirname Directory.__init__(self, dirname) def file_created(self, fn): annotations.read_annotations_file(os.path.join(self.dirname, fn)) def file_changed(self, fn): annotations.read_annotations_file(os.path.join(self.dirname, fn)) def file_removed(self, fn): print "File removed: %s" % fn class Project(object): def __init__(self, dirname): """Opens a project directory. The directory must exist and be a valid project.""" ## Set path names. self.rootdir = dirname self.anndir = os.path.join(dirname, "annotations") self.seldir = os.path.join(dirname, "selections") self.datadir = os.path.join(dirname, "data") self.exportdir = os.path.join(dirname, "exports") version_file_path = os.path.join(dirname, "VERSION") self.universe = Universe() self.data = DataDirectory(self.datadir, self) self.annotations = AnnotationDirectory(self.anndir, self) self.selections = SelectionParentDirectory(self.seldir, self) def update(self): print "updating project" self.data.update() self.selections.update()