View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Jan Wielemaker
   4    E-mail:        J.Wielemaker@vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  2010-2014, University of Amsterdam
   7                              VU University Amsterdam
   8    All rights reserved.
   9
  10    Redistribution and use in source and binary forms, with or without
  11    modification, are permitted provided that the following conditions
  12    are met:
  13
  14    1. Redistributions of source code must retain the above copyright
  15       notice, this list of conditions and the following disclaimer.
  16
  17    2. Redistributions in binary form must reproduce the above copyright
  18       notice, this list of conditions and the following disclaimer in
  19       the documentation and/or other materials provided with the
  20       distribution.
  21
  22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33    POSSIBILITY OF SUCH DAMAGE.
  34*/
  35
  36:- module(rdf_library,
  37          [ rdf_attach_library/1,       % +Dir
  38            rdf_load_library/1,         % +Ontology
  39            rdf_load_library/2,         % +Ontology, +Options
  40            rdf_list_library/0,
  41            rdf_list_library/1,         % +Ontology
  42            rdf_list_library/2,         % +Ontology, +Options
  43            rdf_library_source/2,       % +Ontology, -SourceURL
  44            rdf_library_index/2,        % ?Id, ?Facet
  45            rdf_current_manifest/1      % -Manifest
  46          ]).
  47:- use_module(library(semweb/rdf_db)).
  48:- use_module(library(semweb/turtle)).
  49:- use_module(library(rdf)).
  50:- use_module(library(lists)).
  51:- use_module(library(option)).
  52:- use_module(library(debug)).
  53:- use_module(library(error)).
  54:- use_module(library(pairs)).
  55:- use_module(library(date)).
  56:- use_module(library(uri)).
  57:- use_module(library(http/http_open)).
  58:- use_module(library(thread)).
  59:- use_module(library(apply)).
  60:- use_module(library(solution_sequences)).
  61
  62:- predicate_options(rdf_list_library/2, 2,
  63                     [ indent(atom),
  64                       show_graph(boolean),
  65                       show_source(boolean),
  66                       show_virtual(boolean)
  67                     ]).
  68:- predicate_options(rdf_load_library/2, 2,
  69                     [ concurrent(positive_integer),
  70                       import(boolean),
  71                       load(boolean),
  72                       base_uri(atom),
  73                       claimed_source(atom),
  74                       not_found(oneof([error,warning,silent]))
  75                     ]).
  76
  77/** <module> RDF Library Manager
  78
  79This module manages an ontology library. Such   a  library consists of a
  80directory with manifest files named =|Manifest.rdf|= or =|Manifest.ttl|=
  81(Turtle). The manifest files define ontologies  appearing in the library
  82as well as namespace mnemonics and dependencies.
  83
  84The typical usage scenario is
  85
  86==
  87?- rdf_attach_library('/some/directory').
  88?- rdf_load_library(my_ontology).
  89==
  90
  91@tbd    Add caching info
  92@tbd    Allow using Manifests on HTTP servers
  93@author Jan Wielemaker
  94*/
  95
  96:- rdf_register_ns(lib,  'http://www.swi-prolog.org/rdf/library/').
  97:- rdf_register_ns(void, 'http://rdfs.org/ns/void#').
  98:- rdf_register_ns(vann, 'http://purl.org/vocab/vann/').
  99
 100:- dynamic
 101    manifest/2,                     % Path, Time
 102    library_db/3.                   % Name, URL, Facets
 103
 104%       Force compile-time namespace expansion
 105
 106:- rdf_meta
 107    edge(+, r,r,o).
 108
 109                 /*******************************
 110                 *            LOADING           *
 111                 *******************************/
 112
 113%!  rdf_load_library(+Id) is det.
 114%!  rdf_load_library(+Id, +Options) is det.
 115%
 116%   Load ontologies from the  library.  A   library  must  first  be
 117%   attached using rdf_attach_library/1.  Defined Options are:
 118%
 119%           * import(Bool)
 120%           If =true= (default), also load ontologies that are
 121%           explicitely imported.
 122%
 123%           * base_uri(URI)
 124%           BaseURI used for loading RDF.  Local definitions in
 125%           ontologies overrule this option.
 126%
 127%           * claimed_source(URL)
 128%           URL from which we claim to have loaded the data.
 129%
 130%           * not_found(+Level)
 131%           The system does a pre-check for the existence of
 132%           all references RDF databases.  If Level is =error=
 133%           it reports missing databases as an error and fails.
 134%           If =warning= it prints them, but continues.  If
 135%           =silent=, no checks are preformed.  Default is =error=.
 136%
 137%           * concurrent(Threads)
 138%           Perform the load concurrently using N threads.  If not
 139%           specified, the number is determined by
 140%           guess_concurrency/2.
 141%
 142%           * load(+Bool)
 143%           If =false=, to all the preparation, but do not execute
 144%           the actual loading.  See also rdf_list_library/2.
 145
 146rdf_load_library(Id) :-
 147    rdf_load_library(Id, []).
 148
 149rdf_load_library(Id, Options) :-
 150    cleaned_load_commands(Id, Cmds, Options),
 151    (   option(concurrent(Threads), Options)
 152    ->  true
 153    ;   guess_concurrency(Cmds, Threads)
 154    ),
 155    length(Cmds, NSources),
 156    print_message(informational, rdf(loading(NSources, Threads))),
 157    (   option(load(true), Options, true)
 158    ->  concurrent(Threads, Cmds, [])
 159    ;   true
 160    ).
 161
 162%!  rdf_library_source(+Id, -Source) is nondet.
 163%
 164%   True of Source is the URL that is  part of the given library Id.
 165%   This predicate finds all indirect   dependencies.  It does _not_
 166%   check whether the source exists or is valid.
 167%
 168%   @see uri_file_name/2 for converting file:// URLs to a filename.
 169
 170rdf_library_source(Id, Source) :-
 171    cleaned_load_commands(Id, Cmds,
 172                          [ import(true),
 173                            not_found(silent)
 174                          ]),
 175    member(rdf_load(Source, _), Cmds).
 176
 177
 178cleaned_load_commands(Id, Cmds, Options) :-
 179    load_commands(Id, Options, Pairs),
 180    pairs_values(Pairs, Commands),
 181    list_to_set(Commands, Cmds2),
 182    delete_virtual(Cmds2, Cmds3),
 183    find_conflicts(Cmds3),
 184    check_existence(Cmds3, Cmds, Options).
 185
 186delete_virtual([], []).
 187delete_virtual([virtual(_)|T0], T) :-
 188    !,
 189    delete_virtual(T0, T).
 190delete_virtual([H|T0], [H|T]) :-
 191    delete_virtual(T0, T).
 192
 193
 194%!  find_conflicts(+LoadCommands) is semidet.
 195%
 196%   Find possibly conflicting options for loading the same source
 197
 198find_conflicts(Commands) :-
 199    no_source_with_different_options(Commands),
 200    no_sources_in_same_graph(Commands).
 201
 202%!  no_source_with_different_options(+Commands) is semidet.
 203%
 204%   True if there are not multiple calls to load the same graph, but
 205%   with  different  load-options.  Prints  a    warning  and  fails
 206%   otherwise.
 207
 208no_source_with_different_options(Commands) :-
 209    sort(Commands, Cmds),
 210    conflicts(Cmds, Conflicts),
 211    report_conflicts(Conflicts),
 212    Conflicts == [].
 213
 214conflicts([], []).
 215conflicts([C1, C2|T0], [C1-C2|T]) :-
 216    conflict(C1, C2),
 217    !,
 218    conflicts([C2|T0], T).
 219conflicts([_|T0], T) :-
 220    conflicts(T0, T).
 221
 222conflict(rdf_load(Src, Options1), rdf_load(Src, Options2)) :-
 223    sort(Options1, S1),
 224    sort(Options2, S2),
 225    S1 \== S2.
 226
 227report_conflicts([]).
 228report_conflicts([C1-C2|T]) :-
 229    print_message(warning, rdf(load_conflict(C1,C2))),
 230    report_conflicts(T).
 231
 232%!  no_sources_in_same_graph(+Commands) is semidet.
 233%
 234%   True if there are not two load   commands  referring to the same
 235%   graph.
 236
 237no_sources_in_same_graph(Commands) :-
 238    map_list_to_pairs(command_graph, Commands, Keyed),
 239    keysort(Keyed, KeySorted),
 240    group_pairs_by_key(KeySorted, SourcesByGraph),
 241    (   member(Graph-Sources, SourcesByGraph),
 242        Sources = [_,_|_]
 243    ->  forall(( member(Graph-Sources, SourcesByGraph),
 244                 Sources = [_,_|_]
 245               ),
 246               print_message(error,
 247                             rdf(multiple_source_for_graph(Graph, Sources)))),
 248        fail
 249    ;   true
 250    ).
 251
 252command_graph(rdf_load(_, Options), Graph) :-
 253    option(graph(Graph), Options),
 254    !.
 255command_graph(rdf_load(URL, _), URL) :- !.
 256command_graph(_, _).                    % Other command.  Each variable it its own key
 257
 258
 259%!  check_existence(+CommandsIn, -Commands, +Options) is det.
 260%
 261%   Report existence errors. Fail if at   least  one source does not
 262%   exist. and the not_found level is not =silent=.
 263%
 264%   @error existence_error(urls, ListOfUrls)
 265
 266check_existence(CommandsIn, Commands, Options) :-
 267    option(not_found(Level), Options, error),
 268    must_be(oneof([error,warning,silent]), Level),
 269    (   Level == silent
 270    ->  Commands = CommandsIn
 271    ;   missing_urls(CommandsIn, Commands, Missing),
 272        (   Missing == []
 273        ->  true
 274        ;   Level == warning
 275        ->  report_missing(Missing, Level)
 276        ;   existence_error(urls, Missing)
 277        )
 278    ).
 279
 280
 281missing_urls([], [], []).
 282missing_urls([H|T0], Cmds, Missing) :-
 283    H = rdf_load(URL, _),
 284    (   catch(exists_url(URL, _Ext), error(existence_error(_,_), _), fail)
 285    ->  Cmds = [H|T],
 286        missing_urls(T0, T, Missing)
 287    ;   Missing = [URL|T],
 288        missing_urls(T0, Cmds, T)
 289    ).
 290
 291report_missing([], _).
 292report_missing([H|T], Level) :-
 293    print_message(Level, error(existence_error(url, H), _)),
 294    report_missing(T, Level).
 295
 296%!  guess_concurrency(+Commands, -Threads) is det.
 297%
 298%   How much concurrency to use? Set to   the  number of CPUs if all
 299%   input comes from  files  or  5   if  network  based  loading  is
 300%   demanded.
 301
 302guess_concurrency(Commands, Threads) :-
 303    count_uris(Commands, FileURLs, OtherURLs),
 304    (   FileURLs > 0
 305    ->  (   current_prolog_flag(cpu_count, CPUs)
 306        ->  true
 307        ;   CPUs = 1
 308        ),
 309        FileThreads is min(FileURLs, CPUs)
 310    ;   FileThreads = 0
 311    ),
 312    (   OtherURLs > 0
 313    ->  OtherThreads is min(5, OtherURLs)
 314    ;   OtherThreads = 0
 315    ),
 316    Threads is FileThreads + OtherThreads.
 317
 318count_uris([], 0, 0).
 319count_uris([rdf_load(URL, _)|T], F, NF) :-
 320    count_uris(T, F0, NF0),
 321    (   web_url(URL)
 322    ->  NF is NF0 + 1,
 323        F = F0
 324    ;   F is F0 + 1,
 325        NF = NF0
 326    ).
 327
 328
 329%!  load_commands(+Id, +Options, -Pairs:list(Level-Command)) is det.
 330%
 331%   Commands are the RDF commands to execute for rdf_load_library/2.
 332%   Splitting  in  command  collection  and   execution  allows  for
 333%   concurrent execution as well  as   forward  checking of possible
 334%   problems.
 335%
 336%   @tbd    Fix poor style; avoid assert/retract.
 337
 338:- thread_local
 339    command/2.
 340
 341load_commands(Id, Options, Commands) :-
 342    retractall(command(_,_)),
 343    rdf_update_library_index,
 344    dry_load(Id, 1, Options),
 345    findall(Level-Cmd, retract(command(Level, Cmd)), Commands).
 346
 347dry_load(Id, Level, Options) :-
 348    (   library(Id, File, Facets)
 349    ->  merge_base_uri(Facets, Options, Options1),
 350        merge_source(Facets, Options1, Options2),
 351        merge_blanks(Facets, Options2, Options3),
 352        merge_format(Facets, Options3, Options4),
 353        (   \+ memberchk(virtual, Facets)
 354        ->  load_options(Options4, File, RdfOptions),
 355            assert(command(Level, rdf_load(File, RdfOptions)))
 356        ;   assert(command(Level, virtual(File)))
 357        ),
 358        (   option(import(true), Options, true)
 359        ->  Level1 is Level + 1,
 360            forall(member(imports(Type, Import), Facets),
 361                   import(Import, Level1, [type(Type)|Options4]))
 362        ;   true
 363        )
 364    ;   existence_error(ontology, Id)
 365    ).
 366
 367merge_base_uri(Facets, Options0, Options) :-
 368    (   option(base_uri(Base), Facets)
 369    ->  exclude(name_option(base_uri), Options0, Options1),
 370        Options = [base_uri(Base)|Options1]
 371    ;   Options = Options0
 372    ).
 373
 374merge_source(Facets, Options0, Options) :-
 375    (   option(claimed_source(Base), Facets)
 376    ->  exclude(name_option(claimed_source), Options0, Options1),
 377        Options = [claimed_source(Base)|Options1]
 378    ;   Options = Options0
 379    ).
 380
 381merge_blanks(Facets, Options0, Options) :-
 382    (   option(blank_nodes(Share), Facets)
 383    ->  exclude(name_option(blank_nodes), Options0, Options1),
 384        Options = [blank_nodes(Share)|Options1]
 385    ;   Options = Options0
 386    ).
 387
 388merge_format(Facets, Options0, Options) :-
 389    (   option(format(Format), Facets)
 390    ->  exclude(name_option(format), Options0, Options1),
 391        Options = [format(Format)|Options1]
 392    ;   Options = Options0
 393    ).
 394
 395name_option(Name, Term) :-
 396    functor(Term, Name, 1).
 397
 398load_options(Options, File, RDFOptions) :-
 399    findall(O, load_option(Options, File, O), RDFOptions).
 400
 401load_option(Options, File, graph(Source)) :-
 402    option(claimed_source(Source0), Options),
 403    (   sub_atom(Source0, _, _, 0, /)
 404    ->  file_base_name(File, Base),
 405        atom_concat(Source0, Base, Source)
 406    ;   atom_concat(Source, #, Source0)
 407    ->  true
 408    ).
 409load_option(Options, File, base_uri(BaseURI)) :-
 410    option(base_uri(Base0), Options),
 411    sub_atom(/, _, _, 0, Base0),
 412    atom_concat(Base0, File, BaseURI).
 413load_option(Options, _File, blank_nodes(Share)) :-
 414    option(blank_nodes(Share), Options).
 415load_option(Options, _File, format(Format)) :-
 416    option(format(Format), Options).
 417
 418%!  import(+URL, +Level, +Options) is det.
 419
 420import(Path, Level, Options) :-
 421    option(type(data_dump), Options),
 422    !,
 423    load_options(Options, Path, RdfOptions),
 424    assert(command(Level, rdf_load(Path, RdfOptions))).
 425import(Path, Level, Options) :-
 426    (   (   library(Id, Path, _)
 427        ->  true
 428        ;   manifest_for_path(Path, Manifest),
 429            catch(exists_url(Manifest, _Ext), _, fail)
 430        ->  process_manifest(Manifest),
 431            library(Id, Path, _)
 432        )
 433    ->  dry_load(Id, Level, Options)
 434    ;   load_options(Options, Path, RdfOptions),
 435        assert(command(Level, rdf_load(Path, RdfOptions)))
 436    ).
 437
 438manifest_for_path(URL, Manifest) :-
 439    file_directory_name(URL, Parent),
 440    manifest_file(Base),
 441    rdf_extension(Ext),
 442    atomic_list_concat([Parent, /, Base, '.', Ext], Manifest).
 443
 444%!  rdf_list_library(+Id) is det.
 445%!  rdf_list_library(+Id, +Options) is det.
 446%
 447%   Print library dependency tree to the terminal.  Options include
 448%   options for rdf_load_library/2 and
 449%
 450%           * show_source(+Boolean)
 451%           If =true= (default), show location we are loading
 452%
 453%           * show_graph(+Boolean)
 454%           If =true= (default =false=), show name of graph
 455%
 456%           * show_virtual(+Boolean)
 457%           If =false= (default =true=), do not show virtual
 458%           repositories.
 459%
 460%           * indent(Atom)
 461%           Atom repeated for indentation levels
 462
 463rdf_list_library(Id) :-
 464    rdf_list_library(Id, []).
 465rdf_list_library(Id, Options) :-
 466    load_commands(Id, Options, Commands),
 467    maplist(print_load(Options), Commands).
 468
 469print_load(Options, _Level-virtual(_)) :-
 470    option(show_virtual(false), Options),
 471    !.
 472print_load(Options, Level-Command) :-
 473    option(indent(Indent), Options, '. '),
 474    forall(between(2, Level, _), format(Indent)),
 475    print_command(Command, Options),
 476    format('~N').
 477
 478print_command(virtual(URL), _Options) :-
 479    format('<~w>', [URL]).
 480print_command(rdf_load(URL), Options) :-
 481    print_command(rdf_load(URL, []), Options).
 482print_command(rdf_load(URL, RDFOptions), Options) :-
 483    (   option(show_source(true), Options, true)
 484    ->  format('~w', [URL]),
 485        (   option(blank_nodes(noshare), RDFOptions)
 486        ->  format(' <not shared>')
 487        ;   true
 488        ),
 489        (   exists_url(URL, Ext)
 490        ->  (   Ext == ''
 491            ->  true
 492            ;   format('[.~w]', [Ext])
 493            )
 494        ;   format(' [NOT FOUND]')
 495        )
 496    ;   true
 497    ),
 498    (   option(show_graph(true), Options, false),
 499        option(graph(Base), RDFOptions)
 500    ->  format('~N\tSource: ~w', [Base])
 501    ;   true
 502    ).
 503
 504exists_url(URL, Ext) :-
 505    uri_file_name(URL, Path),
 506    !,
 507    add_storage_extension(Path, Ext, PathEx),
 508    access_file(PathEx, read),
 509    !.
 510exists_url(URL, Ext) :-
 511    uri_components(URL, Components),
 512    uri_data(scheme, Components, Scheme),
 513    atom(Scheme),
 514    url_scheme(Scheme),
 515    add_storage_extension(URL, Ext, URLEx),
 516    catch(http_open(URLEx, Stream, [ method(head) ]), _, fail),
 517    !,
 518    close(Stream).
 519
 520:- multifile
 521    rdf_db:rdf_storage_encoding/2.
 522
 523add_storage_extension(File, '', File).
 524add_storage_extension(File, Ext, FileEx) :-
 525    rdf_db:rdf_storage_encoding(Ext, _Format),
 526    \+ file_name_extension(_, Ext, File),
 527    file_name_extension(File, Ext, FileEx).
 528
 529url_scheme(http).
 530url_scheme(https).
 531
 532
 533%!  rdf_list_library
 534%
 535%   Prints known RDF library identifiers to current output.
 536
 537rdf_list_library :-
 538    rdf_update_library_index,
 539    (   rdf_library_index(Id, title(TitleLiteral)),
 540        plain_string(TitleLiteral, Title),
 541        format('~w ~t~20|~w', [Id, Title]),
 542        (   rdf_library_index(Id, version(Version))
 543        ->  format(' (version ~w)', [Version])
 544        ;   true
 545        ),
 546        nl,
 547        fail
 548    ;   true
 549    ).
 550
 551plain_string(String, String) :-
 552    atomic(String),
 553    !.
 554plain_string(lang(en, String), String) :- !.
 555plain_string(lang(_, String), String) :- !.
 556plain_string(type(_, String), String) :- !.
 557
 558%!  rdf_library_index(?Id, ?Facet) is nondet.
 559%
 560%   Query the content of the library.  Defined facets are:
 561%
 562%           * source(URL)
 563%           Location from which to load the ontology
 564%
 565%           * title(Atom)
 566%           Title used for the ontology
 567%
 568%           * comment(Atom)
 569%           Additional comments for the ontology
 570%
 571%           * version(Atom)
 572%           Version information on the ontology
 573%
 574%           * imports(Type, URL)
 575%           URLs needed by this ontology. May succeed multiple
 576%           times.  Type is one of =ontology=, =schema= or =instances=.
 577%
 578%           * base_uri(BaseURI)
 579%           Base URI to use when loading documents. If BaseURI
 580%           ends in =|/|=, the actual filename is attached.
 581%
 582%           * claimed_source(Source)
 583%           URL from which we claim to have loaded the RDF. If
 584%           Source ends in =|/|=, the actual filename is
 585%           attached.
 586%
 587%           * blank_nodes(Share)
 588%           Defines how equivalent blank nodes are handled, where
 589%           Share is one of =share= or =noshare=.  Default is to
 590%           share.
 591%
 592%           * format(Format)
 593%           Format of the resource.  Can be used to overrule
 594%           if the format as derived from the HTTP content type
 595%           is wrong.
 596%
 597%           * provides_ns(URL)
 598%           Ontology provides definitions in the namespace URL.
 599%           The formal definition of this is troublesome, but in
 600%           practice it means the ontology has triples whose
 601%           subjects are in the given namespace.
 602%
 603%           * uses_ns(URL)
 604%           The ontology depends on the given namespace.  Normally
 605%           means it contains triples that have predicates or
 606%           objects in the given namespace.
 607%
 608%           * manifest(URL)
 609%           URL of the manifest in which this ontology is defined.
 610%
 611%           * virtual
 612%           Entry is virtual (cannot be loaded)
 613
 614rdf_library_index(Id, Facet) :-
 615    library(Id, Path, Facets),
 616    (   Facet = source(Path)
 617    ;   member(Facet, Facets)
 618    ).
 619
 620
 621                 /*******************************
 622                 *      MANIFEST PROCESSING     *
 623                 *******************************/
 624
 625%!  rdf_attach_library(+Source)
 626%
 627%   Attach manifest from Source.  Source is one of
 628%
 629%           * URL
 630%           Load single manifest from this URL
 631%           * File
 632%           Load single manifest from this file
 633%           * Directory
 634%           Scan all subdirectories and load all =|Manifest.ttl|= or
 635%           =|Manifest.rdf|= found.  If Directory is a path-alias
 636%           (e.g., ontology(.)), _all_ referenced directories are
 637%           scanned for manifest files.
 638%
 639%   Encountered namespaces are registered   using rdf_register_ns/2.
 640%   Encountered ontologies are added to the index. If a manifest was
 641%   already loaded it will be reloaded  if the modification time has
 642%   changed.
 643
 644rdf_attach_library(URL) :-
 645    atom(URL),
 646    uri_is_global(URL),
 647    \+ is_absolute_file_name(URL),   % avoid interpreting C: as a schema
 648    !,
 649    process_manifest(URL).
 650rdf_attach_library(File) :-
 651    absolute_file_name(File, Path,
 652                       [ extensions([rdf,ttl]),
 653                         access(read),
 654                         file_errors(fail)
 655                       ]),
 656    !,
 657    process_manifest(Path).
 658rdf_attach_library(Dir) :-
 659    forall(absolute_file_name(Dir, Path,
 660                              [ file_type(directory),
 661                                access(read),
 662                                solutions(all)
 663                              ]),
 664           attach_dir(Path, [])).
 665
 666
 667%!  rdf_update_library_index
 668%
 669%   Reload all Manifest files.
 670
 671rdf_update_library_index :-
 672    forall(manifest(Location, _Time),
 673           process_manifest(Location)).
 674
 675attach_dir(Path, Visited) :-
 676    memberchk(Path, Visited),
 677    !.
 678attach_dir(Path, Visited) :-
 679    atom_concat(Path, '/*', Pattern),
 680    expand_file_name(Pattern, Members),
 681    (   manifest_file(MBase),
 682        rdf_extension(Ext),
 683        atomic_list_concat([Path, /, MBase, '.', Ext], Manifest),
 684        exists_file(Manifest)
 685    ->  process_manifest(Manifest)
 686    ;   print_message(silent, rdf(no_manifest(Path)))
 687    ),
 688    (   member(Dir, Members),
 689        exists_directory(Dir),
 690        file_base_name(Dir, Base),
 691        \+ hidden_base(Base),
 692        attach_dir(Dir, [Path|Visited]),
 693        fail ; true
 694    ).
 695
 696hidden_base('CVS').
 697hidden_base('cvs').                     % Windows
 698
 699%!  process_manifest(+Location) is det.
 700%
 701%   Process a manifest file, registering  encountered namespaces and
 702%   creating clauses for library/3. No op if manifest was loaded and
 703%   not changed. Removes old data if the manifest was changed.
 704%
 705%   @param  Location is either a path name or a URL.
 706
 707process_manifest(Source) :-
 708    (   web_url(Source)
 709    ->  uri_normalized(Source, Manifest)
 710    ;   uri_file_name(Source, Manifest0)
 711    ->  absolute_file_name(Manifest0, ManifestFile),
 712        uri_file_name(Manifest, ManifestFile)
 713    ;   absolute_file_name(Source, ManifestFile),
 714        uri_file_name(Manifest, ManifestFile)
 715    ),                              % Manifest is a canonical URI
 716    source_time(Manifest, MT),
 717    (   manifest(Manifest, Time),
 718        (   MT =< Time
 719        ->  !
 720        ;   retractall(manifest(Manifest, Time)),
 721            library_db(Id, URL, Facets),
 722            memberchk(manifest(Manifest), Facets),
 723            retractall(library_db(Id, URL, Facets)),
 724            fail
 725        )
 726    ;   read_triples(Manifest, Triples),
 727        process_triples(Manifest, Triples),
 728        print_message(informational, rdf(manifest(loaded, Manifest))),
 729        assert(manifest(Manifest, MT))
 730    ).
 731
 732process_triples(Manifest, Triples) :-
 733    findall(ns(Mnemonic, NameSpace),
 734            extract_namespace(Triples, Mnemonic, NameSpace),
 735            NameSpaces),
 736    findall(Ontology,
 737            extract_ontology(Triples, Ontology),
 738            Ontologies),
 739    maplist(define_namespace, NameSpaces),
 740    maplist(assert_ontology(Manifest), Ontologies).
 741
 742%!  extract_namespace(+Triples, -Mnemonic, -NameSpace)
 743%
 744%   True if Mnemonic is an abbreviation of NameSpace.
 745
 746extract_namespace(Triples, Mnemonic, Namespace) :-
 747    edge(Triples, Decl, lib:mnemonic, literal(Mnemonic)),
 748    edge(Triples, Decl, lib:namespace, Namespace).
 749extract_namespace(Triples, Mnemonic, Namespace) :-
 750    edge(Triples, Decl, vann:preferredNamespacePrefix, literal(Mnemonic)),
 751    edge(Triples, Decl, vann:preferredNamespaceUri, literal(Namespace)).
 752
 753%!  extract_ontology(+Triples, -Ontology) is nondet.
 754%
 755%   Extract definition of an ontology
 756
 757extract_ontology(Triples, library(Name, URL, Options)) :-
 758    distinct(URL, ontology(Triples, URL)),
 759    file_base_name(URL, BaseName),
 760    file_name_extension(Name, _, BaseName),
 761    findall(Facet, facet(Triples, URL, Facet), Options0),
 762    sort(Options0, Options1),
 763    keep_specialized_facets(Options1, Options).
 764
 765ontology(Triples, URL) :-
 766    edge(Triples, URL, rdf:type, Type),
 767    ontology_type(Type).
 768
 769keep_specialized_facets(All, Special) :-
 770    exclude(more_general(All), All, Special).
 771
 772more_general(All, Facet) :-
 773    generalized(Facet, Special),
 774    memberchk(Special, All).
 775
 776generalized(imports(ontology, Path), imports(Other, Path)) :-
 777    dif(Other, ontology).
 778
 779ontology_type(X) :-
 780    (   rdf_equal(X, lib:'Ontology')
 781    ;   rdf_equal(X, lib:'Schema')
 782    ;   rdf_equal(X, lib:'Instances')
 783    ;   rdf_equal(X, void:'Dataset')
 784    ;   rdf_equal(X, void:'Linkset')
 785    ).
 786
 787%!  facet(+Triples, +File, -Facet) is nondet.
 788%
 789%   Enumerate facets about File from   Triples. Facets are described
 790%   with rdf_library_index/2.
 791
 792facet(Triples, File, title(Title)) :-
 793    edge(Triples, File, dcterms:title, literal(Title)).
 794facet(Triples, File, version(Version)) :-
 795    edge(Triples, File, owl:versionInfo, literal(Version)).
 796facet(Triples, File, comment(Comment)) :-
 797    edge(Triples, File, rdfs:comment, literal(Comment)).
 798facet(Triples, File, base_uri(BaseURI)) :-
 799    edge(Triples, File, lib:baseURI, BaseURI).
 800facet(Triples, File, claimed_source(Source)) :-
 801    edge(Triples, File, lib:source, Source).
 802facet(Triples, File, format(Format)) :-
 803    edge(Triples, File, lib:format, literal(Format)).
 804facet(Triples, File, blank_nodes(Mode)) :-
 805    edge(Triples, File, lib:blankNodes, literal(Mode)),
 806    must_be(oneof([share,noshare]), Mode).
 807facet(Triples, File, imports(ontology, Path)) :-
 808    edge(Triples, File, owl:imports, Path).
 809facet(Triples, File, imports(schema, Path)) :-
 810    edge(Triples, File, lib:schema, Path).
 811facet(Triples, File, imports(instances, Path)) :-
 812    edge(Triples, File, lib:instances, Path).
 813facet(Triples, File, imports(subset, Path)) :-
 814    edge(Triples, File, void:subset, Path).
 815facet(Triples, File, imports(data_dump, Path)) :-
 816    edge(Triples, File, void:dataDump, Path).
 817facet(Triples, File, provides_ns(NS)) :-
 818    edge(Triples, File, lib:providesNamespace, NSDecl),
 819    edge(Triples, NSDecl, lib:namespace, NS).
 820facet(Triples, File, uses_ns(NS)) :-
 821    edge(Triples, File, lib:usesNamespace, NSDecl),
 822    edge(Triples, NSDecl, lib:namespace, NS).
 823facet(Triples, File, virtual) :-
 824    (   edge(Triples, File, rdf:type, lib:'Virtual')
 825    ;   edge(Triples, File, rdf:type, void:'Dataset')
 826    ;   edge(Triples, File, rdf:type, void:'Linkset')
 827    ) -> true.
 828
 829%!  edge(+Triples, ?S, ?P, ?O) is nondet.
 830%
 831%   Like rdf_has/3 over a list of Triples.
 832
 833edge(Triples, S, P, O) :-
 834    nonvar(P),
 835    !,
 836    sub_p(SubP, P),
 837    member(rdf(S,SubP,O), Triples).
 838edge(Triples, S, P, O) :-
 839    member(rdf(S,SubP,O), Triples),
 840    sub_p(SubP, P).
 841
 842sub_p(P, P).
 843sub_p(Sub, P) :-
 844    (   nonvar(Sub)
 845    ->  sub_property_of(Sub, Sub1),
 846        sub_p(Sub1, P)
 847    ;   sub_property_of(Sub1, P),
 848        sub_p(Sub, Sub1)
 849    ).
 850
 851:- rdf_meta
 852    sub_property_of(r,r).
 853
 854sub_property_of(void:subset,         owl:imports).
 855sub_property_of(dcterms:description, rdfs:comment).
 856sub_property_of(void:dataDump,       owl:imports).
 857sub_property_of(dc:title,            dcterms:title).
 858
 859%!  source_time(+Source, -Modified) is semidet.
 860%
 861%   Modified is the last modification time of Source.
 862%
 863%   @error  existence_error(Type, Source).
 864
 865source_time(URL, Modified) :-
 866    web_url(URL),
 867    !,
 868    http_open(URL, Stream,
 869              [ header(last_modified, Date),
 870                method(head)
 871              ]),
 872    close(Stream),
 873    Date \== '',
 874    parse_time(Date, Modified).
 875source_time(URL, Modified) :-
 876    uri_file_name(URL, File),
 877    !,
 878    time_file(File, Modified).
 879source_time(File, Modified) :-
 880    time_file(File, Modified).
 881
 882web_url(URL) :-
 883    sub_atom(URL, 0, _, _, 'http://').
 884
 885
 886%!  read_triples(+URL, -Triples) is det.
 887%
 888%   Read RDF/XML or Turtle file into a list of triples.
 889
 890read_triples(FileURL, Triples) :-
 891    uri_file_name(FileURL, File),
 892    !,
 893    (   file_name_extension(_, rdf, File)
 894    ->  load_rdf(File, Triples)
 895    ;   rdf_load_turtle(File, Triples, [])
 896    ).
 897read_triples(HTTPURL, Triples) :-
 898    file_name_extension(_, Ext, HTTPURL),
 899    setup_call_cleanup(
 900        http_open(HTTPURL, In, []),
 901        stream_triples(In, Ext, Triples),
 902        close(In)).
 903
 904stream_triples(Stream, rdf, Triples) :-
 905    load_rdf(stream(Stream), Triples).
 906stream_triples(Stream, ttl, Triples) :-
 907    rdf_load_turtle(stream(Stream), Triples, []).
 908
 909
 910manifest_file('void').                  % make order optional?
 911manifest_file('Manifest').
 912manifest_file('manifest').
 913
 914rdf_extension(ttl).
 915rdf_extension(rdf).
 916
 917
 918%!  assert_ontology(+Manifest, +Term:library(Name, File, Facets)) is det.
 919%
 920%   Add ontology to our library.
 921%
 922%   @tbd    Proper behaviour of re-definition?
 923
 924assert_ontology(Manifest, Term) :-
 925    Term = library(Name, URL, Facets),
 926    (   library(Name, _URL2, Facets2)
 927    ->  memberchk(manifest(Manifest2), Facets2),
 928        print_message(warning, rdf(redefined(Manifest, Name, Manifest2)))
 929    ;   true
 930    ),
 931    assert(library_db(Name, URL,
 932                   [ manifest(Manifest)
 933                   | Facets
 934                   ])).
 935
 936
 937%!  library(?Id, ?URL, ?Facets)
 938%
 939%   Access DB for library information.
 940
 941library(Id, URL, Facets) :-
 942    nonvar(URL),
 943    normalize_url(URL, CanonicalURL),
 944    library_db(Id, CanonicalURL, Facets).
 945library(Id, URL, Facets) :-
 946    library_db(Id, URL, Facets).
 947
 948%!  normalize_url(+URL, -Normalized)
 949%
 950%   Like uri_normalized/2, but we  also   need  (platform dependent)
 951%   filename canonization.
 952
 953normalize_url(URL, CanonicalURL) :-
 954    uri_file_name(URL, File),
 955    !,
 956    absolute_file_name(File, CanFile),
 957    uri_file_name(CanonicalURL, CanFile).
 958normalize_url(URL, CanonicalURL) :-
 959    uri_normalized(URL, CanonicalURL).
 960
 961%!  define_namespace(NS:ns(Mnemonic, Namespace)) is det.
 962%
 963%   Add namespace declaration for Mnemonic.
 964
 965define_namespace(ns(Mnemonic, Namespace)) :-
 966    debug(rdf_library, 'Adding NS ~w = ~q', [Mnemonic, Namespace]),
 967    rdf_register_ns(Mnemonic, Namespace,
 968                    [
 969                        ]).
 970
 971%!  rdf_current_manifest(-URL) is nondet.
 972%
 973%   True if URL is the URL of a currently loaded manifest file.
 974
 975rdf_current_manifest(URL) :-
 976    manifest(URL, _Time).
 977
 978
 979
 980                 /*******************************
 981                 *            MESSAGES          *
 982                 *******************************/
 983
 984:- multifile
 985    prolog:message/3.
 986
 987prolog:message(rdf(no_manifest(Path))) -->
 988    [ 'Directory ~w has no Manifest.{ttl,rdf} file'-[Path] ].
 989prolog:message(rdf(redefined(Manifest, Name, Manifest2))) -->
 990    [ '~w: Ontology ~w already defined in ~w'-
 991      [Manifest, Name, Manifest2]
 992    ].
 993prolog:message(rdf(manifest(loaded, Manifest))) -->
 994    [ 'Loaded RDF manifest ~w'-[Manifest]
 995    ].
 996prolog:message(rdf(load_conflict(C1, C2))) -->
 997    [ 'Conflicting loads: ~p <-> ~p'-[C1, C2] ].
 998prolog:message(rdf(multiple_source_for_graph(Graph, Sources))) -->
 999    [ 'Multiple sources for graph ~p:'-[Graph] ],
1000    sources(Sources).
1001prolog:message(rdf(loading(Files, Threads))) -->
1002    [ 'Loading ~D files using ~D threads ...'-[Files, Threads] ].
1003
1004sources([]) --> [].
1005sources([rdf_load(From, _Options)|T]) -->
1006    [ nl, '\t~p'-[From] ],
1007    sources(T).