View source with raw comments or as raw
   1:- module(rdf_cache,
   2          [ rdf_set_cache_options/1,    % +Options
   3            rdf_cache_file/3            % +URL, +RW, -File
   4          ]).
   5:- use_module(library(error)).
   6:- use_module(library(filesex)).
   7
   8/** <module> Cache RDF triples
   9
  10The library library(semweb/rdf_cache) defines the   caching strategy for
  11triples sources. When using large RDF   sources, caching triples greatly
  12speedup loading RDF documents. The cache  library implements two caching
  13strategies that are controlled by rdf_set_cache_options/1.
  14
  15*|Local caching|* This approach  applies  to   files  only.  Triples are
  16cached in a sub-directory of  the   directory  holding  the source. This
  17directory is called =|.cache|= (=|_cache|=  on   Windows).  If the cache
  18option =create_local_directory= is =true=, a  cache directory is created
  19if posible.
  20
  21*|Global caching|* This approach applies  to   all  sources,  except for
  22unnamed streams. Triples are cached in   directory  defined by the cache
  23option =global_directory=.
  24
  25When loading an RDF file, the system   scans  the configured cache files
  26unless cache(false) is specified as option   to rdf_load/2 or caching is
  27disabled. If caching is enabled but no cache exists, the system will try
  28to create a cache file. First it will try to do this locally. On failure
  29it will try to configured global cache.
  30*/
  31
  32:- dynamic
  33    cache_option/1.
  34
  35set_setfault_options :-
  36    assert(cache_option(enabled(true))),
  37    (   current_prolog_flag(windows, true)
  38    ->  assert(cache_option(local_directory('_cache')))
  39    ;   assert(cache_option(local_directory('.cache')))
  40    ).
  41
  42:- set_setfault_options.                % _only_ when loading!
  43
  44%!  rdf_set_cache_options(+Options)
  45%
  46%   Change the cache policy.  Provided options are:
  47%
  48%     * enabled(Boolean)
  49%     If =true=, caching is enabled.
  50%
  51%     * local_directory(Name).
  52%     Plain name of local directory.  Default =|.cache|=
  53%     (=|_cache|= on Windows).
  54%
  55%     * create_local_directory(Bool)
  56%     If =true=, try to create local cache directories
  57%
  58%     * global_directory(Dir)
  59%     Writeable directory for storing cached parsed files.
  60%
  61%     * create_global_directory(Bool)
  62%     If =true=, try to create the global cache directory.
  63
  64rdf_set_cache_options([]) :- !.
  65rdf_set_cache_options([H|T]) :-
  66    !,
  67    rdf_set_cache_options(H),
  68    rdf_set_cache_options(T).
  69rdf_set_cache_options(Opt) :-
  70    functor(Opt, Name, Arity),
  71    arg(1, Opt, Value),
  72    (   cache_option(Name, Type)
  73    ->  must_be(Type, Value)
  74    ;   domain_error(cache_option, Opt)
  75    ),
  76    functor(Gen, Name, Arity),
  77    retractall(cache_option(Gen)),
  78    expand_option(Opt, EOpt),
  79    assert(cache_option(EOpt)).
  80
  81cache_option(enabled,                 boolean).
  82cache_option(local_directory,         atom).
  83cache_option(create_local_directory,  boolean).
  84cache_option(global_directory,        atom).
  85cache_option(create_global_directory, boolean).
  86
  87expand_option(global_directory(Local), global_directory(Global)) :-
  88    !,
  89    absolute_file_name(Local, Global).
  90expand_option(Opt, Opt).
  91
  92
  93%!  rdf_cache_file(+URL, +ReadWrite, -File) is semidet.
  94%
  95%   File is the cache file  for  URL.   If  ReadWrite  is =read=, it
  96%   returns the name of an existing file.  If =write= it returns
  97%   where a new cache file can be overwritten or created.
  98
  99rdf_cache_file(_URL, _, _File) :-
 100    cache_option(enabled(false)),
 101    !,
 102    fail.
 103rdf_cache_file(URL, read, File) :-
 104    !,
 105    (   atom_concat('file://', Path, URL),
 106        cache_option(local_directory(Local)),
 107        file_directory_name(Path, Dir),
 108        local_cache_file(URL, LocalFile),
 109        atomic_list_concat([Dir, Local, LocalFile], /, File)
 110    ;   cache_option(global_directory(Dir)),
 111        url_cache_file(URL, Dir, trp, read, File)
 112    ),
 113    access_file(File, read),
 114    !.
 115rdf_cache_file(URL, write, File) :-
 116    !,
 117    (   atom_concat('file://', Path, URL),
 118        cache_option(local_directory(Local)),
 119        file_directory_name(Path, Dir),
 120        (   cache_option(create_local_directory(true))
 121        ->  RWDir = write
 122        ;   RWDir = read
 123        ),
 124        ensure_dir(Dir, Local, RWDir, CacheDir),
 125        local_cache_file(URL, LocalFile),
 126        atomic_list_concat([CacheDir, LocalFile], /, File)
 127    ;   cache_option(global_directory(Dir)),
 128        ensure_global_cache(Dir),
 129        url_cache_file(URL, Dir, trp, write, File)
 130    ),
 131    access_file(File, write),
 132    !.
 133
 134
 135ensure_global_cache(Dir) :-
 136    exists_directory(Dir),
 137    !.
 138ensure_global_cache(Dir) :-
 139    cache_option(create_global_directory(true)),
 140    make_directory_path(Dir),
 141    print_message(informational, rdf(cache_created(Dir))).
 142
 143
 144                 /*******************************
 145                 *         LOCAL CACHE          *
 146                 *******************************/
 147
 148%!  local_cache_file(+FileURL, -File) is det.
 149%
 150%   Return the name of the cache file   for FileURL. The name is the
 151%   plain filename with the .trp extension.  As   the  URL is a file
 152%   URL, it is guaranteed  to  be   a  valid  filename.  Assumes the
 153%   hosting OS can handle  multiple   exensions  (=|.x.y|=)  though.
 154%   These days thats even true on Windows.
 155
 156local_cache_file(URL, File) :-
 157    file_base_name(URL, Name),
 158    file_name_extension(Name, trp, File).
 159
 160
 161                 /*******************************
 162                 *         GLOBAL CACHE         *
 163                 *******************************/
 164
 165%!  url_cache_file(+URL, +Dir, +Ext, +RW, -Path) is semidet.
 166%
 167%   Determine location of cache-file for the   given  URL in Dir. If
 168%   Ext is provided, the  returned  Path   is  ensured  to  have the
 169%   specified extension.
 170%
 171%   @param RW       If =read=, no directories are created and the call
 172%                   fails if URL is not in the cache.
 173
 174url_cache_file(URL, Dir, Ext, RW, Path) :-
 175    term_hash(URL, Hash0),
 176    Hash is Hash0 + 100000,         % make sure > 4 characters
 177    format(string(Hex), '~16r', [Hash]),
 178    sub_atom(Hex, _, 2, 0, L1),
 179    ensure_dir(Dir, L1, RW, Dir1),
 180    sub_atom(Hex, _, 2, 2, L2),
 181    ensure_dir(Dir1, L2, RW, Dir2),
 182    url_to_file(URL, File),
 183    ensure_ext(File, Ext, FileExt),
 184    atomic_list_concat([Dir2, /, FileExt], Path).
 185
 186ensure_dir(D0, Sub, RW, Dir) :-
 187    atomic_list_concat([D0, /, Sub], Dir),
 188    (   exists_directory(Dir)
 189    ->  true
 190    ;   RW == write
 191    ->  catch(make_directory(Dir), _, fail)
 192    ).
 193
 194ensure_ext(File, '', File) :- !.
 195ensure_ext(File, Ext, File) :-
 196    file_name_extension(_, Ext, File),
 197    !.
 198ensure_ext(File, Ext, FileExt) :-
 199    file_name_extension(File, Ext, FileExt).
 200
 201%!  url_to_file(+URL, -File)
 202%
 203%   Convert a URL in something that fits  in a file, i.e. avoiding /
 204%   and :. We  simply  replace  these  by   -.  We  could  also  use
 205%   www_form_encode/2, but confusion when to replace  as well as the
 206%   fact that we loose the '.' (extension)   makes this a less ideal
 207%   choice.  We could also consider base64 encoding of the name.
 208
 209url_to_file(URL, File) :-
 210    atom_codes(URL, Codes),
 211    phrase(safe_file_name(Codes), FileCodes),
 212    atom_codes(File, FileCodes).
 213
 214safe_file_name([]) -->
 215    [].
 216safe_file_name([H|T]) -->
 217    replace(H),
 218    !,
 219    safe_file_name(T).
 220safe_file_name([H|T]) -->
 221    [H],
 222    safe_file_name(T).
 223
 224%!  replace(+Code)//
 225%
 226%   Replace a character  code  that  cannot   safely  be  put  in  a
 227%   filename. Should we use %XX?
 228
 229replace(0'/)  --> "-".                  % directory separator
 230replace(0'\\) --> "-".                  % not allowed in Windows filename
 231replace(0':)  --> "-".                  % idem
 232replace(0'?)  --> "-".                  % idem
 233replace(0'*)  --> "-".                  % idem
 234
 235
 236                 /*******************************
 237                 *             MESSAGES         *
 238                 *******************************/
 239
 240:- multifile prolog:message/3.
 241
 242prolog:message(rdf(cache_created(Dir))) -->
 243    [ 'Created RDF cache directory ~w'-[Dir] ].