Index > Knowledge base release Edit on GitHub

Knowledge base release

Table of Contents

Release workflow

For a development release run

./release.org --build

For a SCKAN release run

./release.org --build --sckan --no-blaze

If something breaks part way through the pipeline and you need to rerun you can use the --resume option. For example:

./release.org --build --sckan --no-blaze --resume

To build the SciGraph portion of the release you need to have the pyontutils repo on your system and installed then run

~/git/pyontutils/nifstd/scigraph/bin/run-load-graph-sparc-sckan

Building a release

obtain this file run this block

what should happen

  1. retrieve the latest version of this file
  2. retrieve blazegraph.jar
  3. retrieve blazegraph-runner.jar
  4. retrieve (and possibly first build) the latest version of curation-export.ttl and protcur.ttl
  5. retrieve (possibly from the build cache for the ttl files) the hypothesis annotations from group
  6. export the #lang protc/ur representation for inclusion this might require a python setup to be fully reproducible but we're going to skip that for now
  7. load the ttl files into blazesgraph.jnl
  8. set the properties files for blazegraph
  9. put everything in a release hierarchy
  10. run test queries
  11. package everything together in a single archive including queries.org and maybe this file for prov
;; helpers

(defvar-local release-dir nil)
(defvar-local prefixes-path nil)

(defvar-local curies
    '((owl . "http://www.w3.org/2002/07/owl#")
      (ilxtr . "http://uri.interlex.org/tgbugs/uris/readable/")))

(defmacro unless-file-exists-p (file &rest body)
  (declare (indent 1))
  `(unless (file-exists-p ,file)
     ,@body))

(defun url-copy-file-safe (url newname &optional ok-if-already-exists &rest _ignored)
  "There does not seem to be a way to capture `url-http-response-status' when
calling `url-copy-file' so we have to check whether the url exists first."
  (with-url-handler-mode
    (unless-file-exists-p url
      (error "Url does not exist! %s" url)))
  (url-copy-file url newname ok-if-already-exists))

(defun build-dir ()
  "rule for where to put the build directory"
  ;; (make-temp-file )  ;; FIXME TODO sigh
  (file-name-as-directory (concat (temporary-file-directory) "build")))

(defun find-resume-dir (&optional build-dir)
  "Find the latest release directory in BUILD-DIR that has not been packaged.

This uses the file system state to track incomplete builds. It
seems to be a better approach than trying to store some state
somewhere else that could get out of sync, just like we check
for individual step completion by checking if the file exist.
Yes this is make."
  (let* ((default-directory (or build-dir (build-dir)))
         (latest-release-dir
          (and
           (file-directory-p build-dir)
           (car
            (sort
             (cl-remove-if-not
              #'file-directory-p
              (directory-files
               default-directory 'full
               (concat "^release-.+" (if sckan-release "-sckan$" "Z$"))
               'nosort))
             #'string>))))
         (to-resume
          (and latest-release-dir ; XXX and0 would be fun wouldn't it?
               (not (file-exists-p (concat latest-release-dir ".zip")))
               latest-release-dir)))
    to-resume))

(defun py-x ()
  (or (executable-find "pypy3")
      (executable-find "python")))

(defun make-prov-record (path)
  ;; reminder: because the id is always build:prov processes can
  ;; add additional prov triples independently as needed without
  ;; needing to extend the core python code
  (run-command
   (py-x) "-m" "pyontutils.ontload"
   "prov" "blazegraph" path))

(defun get-ontology (iri merged-path)
  (run-command ;; FIXME not clear that this is the best approach here
   "robot" "merge"
   "--input-iri" iri
   "--output" merged-path)
  (run-command
   (py-x)
   "-m"
   (if sckan-release "pyontutils.qnamefix" "ttlser.ttlfmt")
   merged-path))

(defun reason-ontology (ontology-path reasoned-path)
  (run-command
   "robot" "reason"
   ;; "--catalog"                  catalog-path ; use robot merge instead?
   "--reasoner"                 "ELK"
   "--create-new-ontology"      "true"
   "--axiom-generators"         "SubClass EquivalentClass DisjointClasses"
   "--exclude-duplicate-axioms" "true"
   "--exclude-tautologies"      "structural"
   "--input"                    ontology-path
   "reduce"
   "--output"                   reasoned-path)
  (run-command
   (py-x)
   "-m"
   (if sckan-release "pyontutils.qnamefix" "ttlser.ttlfmt")
   reasoned-path))

(defun read-ttl-file (path)
  "Convert path to ttl file into a list of triples. Extremely inefficient."
  (let ((bstr
         ;; wow the infinite hang bug is back with a fucking vengance
         ;; (ow-run-command (py-x) "-m" "ttlser.ttlfmt" path "-t" "nt" "-o" "/dev/stdout")
         (with-temp-buffer
           (call-process (py-x) nil (list (current-buffer) nil) nil
                         "-m" "ttlser.ttlfmt" path "-t" "nt" "-o" "/dev/stdout")
           (buffer-string))))
    ;; FIXME run-command has a buffer like this internally, a variant
    ;; that would allow us to return a buffer output instead of a string might be nice
    (with-temp-buffer
      (insert bstr)
      (let ((mi (point-min))
            (ma (point-max)))
        (replace-regexp "^Process.+$" "" nil mi ma)
        (replace-regexp "<" "" nil mi ma)
        (replace-regexp ">" "" nil mi ma)
        (replace-regexp "\n\n" "" nil mi ma)
        (goto-char ma) (backward-char) (when (looking-at "\n") (delete-char 1))
        (replace-regexp "^" "(" nil mi ma) ; diverges vim behavior by replacing \n at eof :/
        (replace-regexp "\\.$" ")" nil mi ma)
        (replace-regexp "#" "\\\\#" nil mi ma) ; apparently sharps are special, I think I missed that in reader tests
        (goto-char (point-min))
        (insert "(")
        (goto-char (point-max))
        (insert ")")
        (read (buffer-string))))))

(defun get-apinat-paths (triples)
  (mapcar
   (lambda (uri) (file-name-nondirectory uri))
   (cl-remove-if-not
    (lambda (uri) (string-search "ApiNATOMY" uri)) ; FIXME hack
    (if sckan-release
        (select-predicate
         triples
         (intern (expand-curie 'owl:imports)))
      (select-predicate
       triples
       (intern (expand-curie 'owl:imports))
       ;;(intern (expand-curie 'ilxtr:imports-big))
       (intern (expand-curie 'ilxtr:imports-dev))
       ;;(intern (expand-curie 'ilxtr:imports-rel))
       )))))

(defun sub (tr &optional match)  (if match (or (and (symbolp tr) (eq (car tr)   match)) (equal (car tr)   match)) (car tr)))

(defun pred (tr &optional match) (if match (or (and (symbolp tr) (eq (cadr tr)  match)) (equal (cadr tr)  match)) (cadr tr)))

(defun obj (tr &optional match)  (if match (or (and (symbolp tr) (eq (caddr tr) match)) (equal (caddr tr) match)) (caddr tr)))

(defun ematch (triples select match matchf &rest matches)
  (cl-loop
   for tr in triples
   when (and (select tr match) (or (not matchf) (matchf matches)))
   collect tr))

(defun expand-curie (curie &optional local-curies)
  (let* ((local-curies (or local-curies curies))
         (curie (or (and (symbolp curie) (symbol-name curie)) curie))
         (p-s (split-string curie ":"))
         (prefix (intern (car p-s)))
         (suffix (cadr p-s)))
    (concat (alist-get prefix local-curies) suffix)))

(defun select-predicate (triples &rest predicates)
  (cl-loop
   for tr in triples
   when (cl-loop for p in predicates when (pred tr p) return t)
   collect (symbol-name (obj tr))))

;; 0 ensure commands
(defun step-ensure-commands ()
  "make sure all the commands we need are present on the system"
  (unless (py-x)
    (error "Cannot continue. No python implementation found."))
  (dolist (command '("zip" "tar" "rsync"))
    (unless (executable-find command)
      (error "Cannot continue. No executable found for %s" command))))

;; 0.25 ensure python modules
(defun step-ensure-python-modules ()
  "Make sure all required python modules are present."
  (dolist (module '("protcur" "ttlser"))
    (run-command (py-x) "-c"
                 (format "import importlib.util; assert importlib.util.find_spec('%s'), '%s'"
                         module module))))

;; 0.5 ensure services
(defun step-ensure-services ()
  "Make sure that all required services are accessible"
  ;; protcur needs scigraph queries to be working
  ' ; no longer required, moved network calls out of this step
  (run-command "scig" "t" "brain"))

;; 1
;; a slight chicken and egg problem

;; 2
(defun step-fetch-blazegraph ()
  "retrieve the blazegraph jar file"
  (let* ((url-template "https://github.com/blazegraph/database/releases/download/%s/blazegraph.jar")
         (version "BLAZEGRAPH_2_1_6_RC")
         (url (format url-template version))
         (cypher 'sha256)
         (checksum "930c38b5bce7c0ae99701c1f6ef3057c52f3f385d938e1397a3e05561c7df5de")
         (path-jar "blazegraph.jar"))
    (securl cypher checksum url path-jar)
    path-jar))

(defun step-arrange-blazegraph (build-dir path-jar)
  "put the blazegraph jar in the right location"
  (let* ((name (file-name-nondirectory path-jar))
         (target-path (concat "opt/" name)))
    (unless-file-exists-p target-path
      (copy-file (concat build-dir path-jar) target-path))
    target-path))

;; 3
(defun step-fetch-blazegraph-runner ()
  "fetch and arrange blazegraph-runner"
  (let* ((url-template "https://github.com/balhoff/blazegraph-runner/releases/download/%s/%s.tgz")
         (version "v1.6")
         (path "blazegraph-runner-1.6")
         (url (format url-template version path))
         (cypher 'sha256)
         (checksum "4f2c01d6d75093361f75d8671c6d426fea3273a04168bcd0beea880527111271")
         (path-tar (concat path ".tgz")))
    (securl cypher checksum url path-tar) ;; FIXME should this error on mismatch?
    ;; FIXME test untar from different folder will put it in that folder not the location of the tar
    (unless (file-directory-p path)
      (run-command "tar" "xvzf" path-tar))
    (file-name-as-directory (concat default-directory path))))

(defun step-annotations ()
  "fetch annotations and render in #lang protc/ur"
  (let ((hypothesis-annos "data/annotations.json")
        (protcur-path "data/protcur-sparc.rkt"))
    (unless (and (file-exists-p hypothesis-annos)
                 (file-exists-p protcur-path))
      (unless-file-exists-p hypothesis-annos
        ;; 5
        (run-command "rsync" "--rsh" "ssh" "cassava-sparc:.cache/hyputils/annos-*.json" hypothesis-annos)
        (when sckan-release
          (ow-babel-eval "clean-annotations-group") ; FIXME org babel doesn't specify a way to pass an error!?
          (let* ((large-file-warning-threshold)
                 (anno-buffer (find-file-noselect hypothesis-annos)))
            (with-current-buffer anno-buffer
              (goto-char 0)
              (re-search-forward "group:sparc-curation"))
            (kill-buffer anno-buffer))
          (message
           "%S" ; TODO check if we can safely strip out the group at this stage
           '(FIXME TODO replace the group id with some garbage so that it doesn't leak))))
      ;; 6 FIXME TODO this requires scigraph to be running FIXME this is a very slow step
      (run-command (py-x) "-m" "protcur.cli" "convert" hypothesis-annos protcur-path))))

(defun step-load-store (path-br-bin &optional no-load)
  "download latest ttl files and load into blazegraph"
  (let ((exec-path (cons path-br-bin exec-path))
        (journal-path "data/blazegraph.jnl")
        (prov-path "data/prov-record.ttl") ; FIXME coordinated by convention with SciGraph load
        (p-path "data/protcur.ttl")
        (pj-path "data/protcur.json")
        (ce-path (concat "data/curation-export" (and sckan-release "-published") ".ttl"))
        (mis-path "data/sparc-methods.ttl")
        (mis-r-path "data/sparc-methods-reasoned.ttl")
        (sct-path "data/sparc-community-terms.ttl")
        (sml-path "data/sparc-missing-labels.ttl")

        (ch-path "data/chebi.ttl")
        (ub-path "data/uberon.ttl")
        (em-path "data/emapa.ttl")
        (ap-path "data/approach.ttl")
        (me-path "data/methods.ttl") ; FIXME ideally we want to pull in the near import closure for this
        (npo-path "data/npo.ttl")

        (em-r-path "data/emapa-reasoned.ttl")
        (ub-r-path "data/uberon-reasoned.ttl")
        (me-r-path "data/methods-reasoned.ttl")
        (npo-r-path "data/npo-reasoned.ttl")

        (rguc "https://raw.githubusercontent.com/")
        (ont-git-ref "dev")
        (cass-ont "https://cassava.ucsd.edu/sparc/ontologies/")
        (cass-px "https://cassava.ucsd.edu/sparc/preview/exports/")

        (apinat-base-url "https://cassava.ucsd.edu/ApiNATOMY/ontologies/")

        (sparc-data-path "data/sparc-data.ttl")
        (sparc-data-source "resources/scigraph/sparc-data.ttl")
        (apinat-sentinel-path "data/keast-bladder.ttl")
        apinat-paths)
    ;; 4
    (unless-file-exists-p prov-path
      ;; note that load prov explicitly does not change on resume
      (make-prov-record prov-path))
    (unless-file-exists-p p-path
      ;; FIXME decouple this location
      (url-copy-file-safe (concat cass-ont "protcur.ttl")
                          p-path))
    (unless-file-exists-p pj-path
      ;; FIXME decouple this location
      (url-copy-file-safe (concat cass-ont "protcur.json")
                          pj-path))
    (unless-file-exists-p ce-path
      (url-copy-file-safe (concat cass-px (file-name-nondirectory ce-path))
                          ce-path))
    (unless-file-exists-p mis-path
      (url-copy-file-safe (concat rguc "SciCrunch/NIF-Ontology/" ont-git-ref "/ttl/sparc-methods.ttl")
                          mis-path))
    (unless-file-exists-p mis-r-path
      (reason-ontology mis-path mis-r-path))
    (unless-file-exists-p sct-path
      (url-copy-file-safe (concat rguc "SciCrunch/NIF-Ontology/" ont-git-ref "/ttl/sparc-community-terms.ttl")
                          sct-path))
    (unless-file-exists-p sml-path
      (url-copy-file-safe (concat rguc "SciCrunch/NIF-Ontology/" ont-git-ref "/ttl/sparc-missing-labels.ttl")
                          sml-path))
    ;; load apinatomy files
    (unless-file-exists-p sparc-data-path
      ;; FIXME timestamp the release, but coordinate with SciGraph
      ;; XXX REMINDER sparc-data.ttl is NOT used as an entry point for loading
      (url-copy-file-safe (concat rguc "SciCrunch/sparc-curation/master/" sparc-data-source)
                          sparc-data-path))

    (unless (and (file-exists-p apinat-sentinel-path)
                 (file-exists-p journal-path))
      (setq apinat-paths (get-apinat-paths (read-ttl-file sparc-data-path)))
      (mapcar
       (lambda (a-path)
         (let ((dapath (concat "data/" a-path)))
           (unless-file-exists-p dapath
             (url-copy-file-safe (concat apinat-base-url a-path)
                                 dapath))))
       apinat-paths))

    ;; retrieve, reason, and load various ontologies
    ;; FIXME when to patch
    ;; chebi
    (unless-file-exists-p ch-path ; doesn't need to be reasoned
      (url-copy-file-safe
       (concat rguc "SciCrunch/NIF-Ontology/" ont-git-ref "/ttl/generated/chebislim.ttl") ch-path))
    ;; uberon
    (unless-file-exists-p ub-path
      (get-ontology "http://purl.obolibrary.org/obo/uberon.owl" ub-path))
    (unless-file-exists-p ub-r-path
      (reason-ontology ub-path ub-r-path))
    ;; emapa
    (unless-file-exists-p em-path
      (get-ontology "http://purl.obolibrary.org/obo/emapa.owl" em-path))
    (unless-file-exists-p em-r-path
      (reason-ontology em-path em-r-path))
    ;; approach
    (unless-file-exists-p ap-path ; this doesn't need to be reasoned
      (url-copy-file-safe (concat rguc "SciCrunch/NIF-Ontology/" ont-git-ref "/ttl/approach.ttl") ap-path))
    ;; methods
    (unless-file-exists-p me-path
      ;; FIXME this pulls in a staggering amount of the nif ontology and is quite large
      ;; FIXME reasoner errors between methods-helper, ro, and pato prevent this
      ;;(get-ontology (concat rguc "SciCrunch/NIF-Ontology/dev/ttl/methods.ttl") me-path)
      (url-copy-file-safe (concat rguc "SciCrunch/NIF-Ontology/" ont-git-ref "/ttl/methods.ttl") me-path))
    (unless-file-exists-p me-r-path
      (reason-ontology me-path me-r-path))
    ;; npo
    (unless-file-exists-p npo-path
      (get-ontology (concat rguc "SciCrunch/NIF-Ontology/" ont-git-ref "/ttl/npo.ttl") npo-path))
    (unless-file-exists-p npo-r-path ; FIXME (npo-path npo-r-path) ?? npo newer than npo-r issues sigh make
      (reason-ontology npo-path npo-r-path))

    ;; 7
    (unless (or no-load (file-exists-p journal-path))
      ;; FIXME if this ever runs as an update instead of load it might be tricky
      ;; FIXME remove the path if we fail on this step?
      (let (backtrace-on-error-noninteractive)
        (apply
         #'run-command
         `("blazegraph-runner" ,(concat "--journal=" journal-path)
           "load" "--use-ontology-graph" ,p-path ,ce-path
           ;;"http://purl.obolibrary.org/obo/uberon.owl"
           ,ch-path
           ,em-path
           ,ub-path
           ,ap-path
           ,me-path
           ,npo-path
           ,mis-path
           ,sct-path

           ,em-r-path
           ,ub-r-path
           ,me-r-path
           ,npo-r-path
           ,mis-r-path

           ,@(mapcar (lambda (p) (concat "data/" p)) apinat-paths)

           ,prov-path))))))

(defun step-add-prefixes-file ()
  "Get or create the latest prefix specification file."
  ;; NOTE generation of the sparql-prefixes block from source ontologies
  ;; is done asynchronously in python and is a bit out of date
  (let ((prefixes-path "data/prefixes.conf"))
    (unless-file-exists-p prefixes-path
      ;; TODO mark generate prefixes as safe as we do
      ;; for nonl in sparc-curation/docs/queries.org
      (ow-babel-eval "generate-prefixes"))))

(defun step-add-query-org (path-queries)
  "copy the file that contains the queries into the release"
  (let ((target-path (concat default-directory (file-name-nondirectory path-queries))))
    (unless-file-exists-p target-path
      (let ((buffer (magit-find-file-noselect "HEAD" path-queries)))
        (with-current-buffer buffer
          (write-region (point-min) (point-max) target-path))))))

;; 10

(defun step-test ()
  "run test"
  ' ; TODO
  (run-command "emacs" "-Q" "--batch" TODO)
  )

(defun step-package (release-dir)
  "run from build-dir to compress the release dir for distribution"
  (let* ((release-dir-name (file-name-base (directory-file-name release-dir)))
         (path-zip (concat release-dir-name ".zip")))
    (unless-file-exists-p path-zip
      (run-command "zip" "-r" path-zip release-dir-name))))

;; N release

(defun step-release (build-dir &optional this-release-dir release-time no-blaze no-load no-annos)
  (step-ensure-commands)
  (step-ensure-python-modules)
  (step-ensure-services)
  ;; FIXME something about the resume process is still broken
  (unless (file-directory-p build-dir)
    (mkdir build-dir))
  (let* ((path-queries (concat default-directory "queries.org"))
         (release-dir ; have to use this-release-dir so that lexical binding works
          (or (and this-release-dir (file-name-as-directory this-release-dir))
              (file-name-as-directory
               (concat build-dir
                       "release-"
                       (format-time-string "%Y-%m-%dT%H%M%SZ"
                                           (or release-time (current-time)) t)
                       ;; XXX we put the type of release after the date to preserve sort order
                       ;; a dirty hack but simpler than the alternative
                       (and sckan-release "-sckan"))))))
    (unless (file-directory-p release-dir)
      (mkdir release-dir))
    (let* ((default-directory build-dir)
           (path-jar (step-fetch-blazegraph))
           (path (step-fetch-blazegraph-runner)))
      (let ((default-directory release-dir))
        (unless (file-directory-p "data")
          (mkdir "data"))
        (unless no-blaze
          (unless (file-directory-p "opt")
            (mkdir "opt"))
          (step-arrange-blazegraph build-dir path-jar))
        (unless no-annos
          (step-annotations))
        (step-load-store (concat path "bin") no-load)
        (step-add-prefixes-file)
        (unless no-blaze
          (step-add-query-org path-queries))
        (unless no-load
          (step-test)))
      (unless no-load
        (step-package release-dir)))))

Clean annotations

import json
from pathlib import Path

rel_path = Path(rel_path) if rel_path else Path.cwd()
ap = rel_path / 'data/annotations.json'

with open(ap, 'rt') as f:
    aj = json.load(f)

for a in aj[0]:
    a['permissions']['read'] = ['group:sparc-curation']
    a['group'] = 'sparc-curation'

with open(ap, 'wt') as f:
    json.dump(aj, f)

Generate sparql prefixes

import augpathlib as aug
from pyontutils.core import OntGraph, OntResPath
from pyontutils.namespaces import OntCuries
g = OntGraph()
OntCuries.populate(g)
bpath = aug.LocalPath('/tmp/build/')  # FIXME abstract
rel_path = (aug.LocalPath(rel_path) if rel_path else
            sorted(c for c in bpath.children
                   if c.is_dir() and c.name[-1] == 'Z')[-1])
data_path = rel_path / 'data'
_ = [OntResPath(t).metadata().graph.namespace_manager.populate(g)
     for t in data_path.rglob('*.ttl')]
prefixes = '\n'.join(
    sorted([f'PREFIX {k}: <{v}>' for k, v in dict(g.namespace_manager).items()
            if not (k.startswith('ns') or
                    k.startswith('default') or
                    k.startswith('local'))],
           key=lambda s: list(reversed(s.rsplit("<", 1)))))
prefixes_path = (rel_path / prefixes_path if prefixes_path else
                 data_path / 'prefixes.conf')
prefixes_path.data = (_ for _ in (prefixes.encode(),))
str(prefixes_path)

Deploy journal to local server

_sckanl="$(ls -d /tmp/build/release-*-sckan | sort -u | tail -n 1)"
pushd /var/lib/blazegraph
/etc/init.d/blazegraph stop
mv blazegraph.jnl blazegraph.jnl.$(date +%s)
cp -a ${_sckanl}/data/blazegraph.jnl .
chown blazegraph:blazegraph blazegraph.jnl
/etc/init.d/blazegraph start
popd

Date: 2022-12-08T12:32:24-08:00

Author: Tom Gillespie

Created: 2022-12-22 Thu 01:38

Validate