1
0
mirror of https://github.com/samba-team/samba.git synced 2025-02-23 09:57:40 +03:00

s3:mdssvc: add Elasticsearch backend

Signed-off-by: Ralph Boehme <slow@samba.org>
Reviewed-by: Noel Power <noel.power@suse.com>
This commit is contained in:
Ralph Boehme 2019-08-05 16:25:01 +02:00
parent c338bdf5a4
commit f5510d7db3
21 changed files with 2351 additions and 2 deletions

View File

@ -0,0 +1,14 @@
<samba:parameter name="elasticsearch:address"
context="S"
type="string"
xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
<description>
<para>
Specifies the name of the Elasticsearch server to use for Spotlight
queries when using the Elasticsearch backend.
</para>
</description>
<value type="default">localhost</value>
<value type="example">needle.haystack.samba.org</value>
</samba:parameter>

View File

@ -0,0 +1,16 @@
<samba:parameter name="elasticsearch:index"
context="S"
type="string"
xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
<description>
<para>
Specifies the name of the Elasticsearch index to use for Spotlight queries
when using the Elasticsearch backend. The default value of "_all" is a
special Elasticsearch value that performs the search operation on all
indices.
</para>
</description>
<value type="default">_all</value>
<value type="example">spotlight</value>
</samba:parameter>

View File

@ -0,0 +1,14 @@
<samba:parameter name="elasticsearch:mappings"
context="G"
type="string"
xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
<description>
<para>
Path to a file specifying metadata attribute mappings in JSON format. Use
by the Elasticsearch backend of the Spotlight RPC service.
</para>
</description>
<value type="default">&pathconfig.SAMBA_DATADIR;/elasticsearch_mappings.json</value>
<value type="example">/usr/share/foo/mymappings.json</value>
</samba:parameter>

View File

@ -0,0 +1,15 @@
<samba:parameter name="elasticsearch:max results"
context="S"
type="integer"
xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
<description>
<para>
Path to a file specifying metadata attribute mappings in JSON format. Used
by the Elasticsearch backend of the Spotlight RPC service. A value of 0
means no limit.
</para>
</description>
<value type="default">100</value>
<value type="example">10</value>
</samba:parameter>

View File

@ -0,0 +1,14 @@
<samba:parameter name="elasticsearch:port"
context="S"
type="integer"
xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
<description>
<para>
Specifies the TCP port of the Elasticsearch server to use for Spotlight
queries when using the Elasticsearch backend.
</para>
</description>
<value type="default">9200</value>
<value type="example">9201</value>
</samba:parameter>

View File

@ -0,0 +1,14 @@
<samba:parameter name="elasticsearch:use tls"
context="S"
type="boolean"
xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
<description>
<para>
Specifies whether to use HTTPS when talking to the Elasticsearch server
used for Spotlight queries when using the Elasticsearch backend.
</para>
</description>
<value type="default">no</value>
<value type="example">yes</value>
</samba:parameter>

View File

@ -19,6 +19,10 @@
Gnome Tracker.
</para></listitem>
<listitem><para><constant>elasticsearch</constant> -
a backend that uses JSON and REST over HTTP(s) to query an
Elasticsearch server.
</para></listitem>
</itemizedlist>
</para>
</description>

View File

@ -252,6 +252,7 @@ enum mangled_names_options {MANGLED_NAMES_NO, MANGLED_NAMES_YES, MANGLED_NAMES_I
enum spotlight_backend_options {
SPOTLIGHT_BACKEND_NOINDEX,
SPOTLIGHT_BACKEND_TRACKER,
SPOTLIGHT_BACKEND_ES,
};
/*

View File

@ -354,6 +354,7 @@ static const struct enum_list enum_ntlm_auth[] = {
static const struct enum_list enum_spotlight_backend[] = {
{SPOTLIGHT_BACKEND_NOINDEX, "noindex"},
{SPOTLIGHT_BACKEND_TRACKER, "tracker"},
{SPOTLIGHT_BACKEND_ES, "elasticsearch"},
{-1, NULL}
};

View File

@ -0,0 +1,142 @@
{
"attribute_mappings": {
"*": {
"type": "fts",
"attribute": ""
},
"kMDItemTextContent": {
"type": "str",
"attribute": "content"
},
"_kMDItemGroupId": {
"type": "type",
"attribute": "file.content_type"
},
"kMDItemContentType": {
"type": "type",
"attribute": "file.content_type"
},
"kMDItemContentTypeTree": {
"type": "type",
"attribute": "file.content_type"
},
"kMDItemFSContentChangeDate": {
"type": "date",
"attribute": "file.last_modified"
},
"kMDItemFSCreationDate": {
"type": "date",
"attribute": "file.created"
},
"kMDItemFSName": {
"type": "str",
"attribute": "file.filename"
},
"kMDItemFSOwnerGroupID": {
"type": "str",
"attribute": "attributes.owner"
},
"kMDItemFSOwnerUserID": {
"type": "str",
"attribute": "attributes.group"
},
"kMDItemFSSize": {
"type": "num",
"attribute": "file.filesize"
},
"kMDItemPath": {
"type": "str",
"attribute": "path.real"
},
"kMDItemAttributeChangeDate": {
"type": "date",
"attribute": "file.last_modified"
},
"kMDItemAuthors": {
"type": "str",
"attribute": "meta.author"
},
"kMDItemContentCreationDate": {
"type": "date",
"attribute": "file.created"
},
"kMDItemContentModificationDate": {
"type": "date",
"attribute": "file.last_modified"
},
"kMDItemCreator": {
"type": "str",
"attribute": "meta.raw.creator"
},
"kMDItemDescription": {
"type": "str",
"attribute": "meta.raw.description"
},
"kMDItemDisplayName": {
"type": "str",
"attribute": "file.filename"
},
"kMDItemDurationSeconds": {
"type": "num",
"attribute": "meta.raw.xmpDM:duration"
},
"kMDItemNumberOfPages": {
"type": "num",
"attribute": "meta.raw.xmpTPg:NPages"
},
"kMDItemTitle": {
"type": "str",
"attribute": "meta.title"
},
"kMDItemAlbum": {
"type": "str",
"attribute": "meta.raw.xmpDM:album"
},
"kMDItemBitsPerSample": {
"type": "num",
"attribute": "meta.raw.tiff:BitsPerSample"
},
"kMDItemPixelHeight": {
"type": "num",
"attribute": "meta.raw.Image Height"
},
"kMDItemPixelWidth": {
"type": "num",
"attribute": "meta.raw.Image Width"
},
"kMDItemResolutionHeightDPI": {
"type": "num",
"attribute": "meta.raw.Y Resolution"
},
"kMDItemResolutionWidthDPI": {
"type": "num",
"attribute": "meta.raw.X Resolution"
}
},
"mime_mappings": {
"1": "message/rfc822",
"2": "text/x-vcard",
"6": "text/x-vcard",
"7": "video/*",
"8": "application/octet-stream",
"9": "text/directory",
"10": "audio/*",
"11": "application/pdf",
"12": "application/vnd.oasis.opendocument.presentation",
"13": "image/*",
"public.content": "message/rfc822 application/pdf application/vnd.oasis.opendocument.presentation image/* text/*",
"public.jpeg": "image/jpeg",
"public.tiff": "image/tiff",
"com.compuserve.gif": "image/gif",
"public.png": "image/png",
"com.microsoft.bmp": "image/bmp",
"public.mp3": "audio/mpeg",
"public.mpeg-4-audio": "audio/x-aac",
"public.text": "text/*",
"public.plain-text": "text/plain",
"public.rtf": "text/rtf",
"public.html": "text/html",
"public.xml": "text/xml",
"public.archive": "application/zip application/x-bzip application/x-bzip2 application/x-tar application/x-7z-compressed"
}
}

View File

@ -0,0 +1,92 @@
/*
Unix SMB/CIFS implementation.
Main metadata server / Spotlight routines / Elasticsearch backend
Copyright (C) Ralph Boehme 2019
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
%{
#include "includes.h"
#include "rpc_server/mdssvc/es_parser.tab.h"
#define YY_NO_INPUT
#define mdsyylalloc SMB_MALLOC
#define mdsyylrealloc SMB_REALLOC
static char *strip_quote(const char *phrase);
%}
%option nounput noyyalloc noyyrealloc prefix="mdsyyl"
ASC [a-zA-Z0-9_\*\:\-\.]
U [\x80-\xbf]
U2 [\xc2-\xdf]
U3 [\xe0-\xef]
U4 [\xf0-\xf4]
SPECIAL [\!\#\$\%\&\'\(\)\+\,\.\/\;\<\=\>\?\@\[\]\^\`\{\}\|\~\\]
ESCHAR [\"\*]
BLANK [ \t\n]
UANY {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
UPHRASE {UANY}|{SPECIAL}|{BLANK}|\\{ESCHAR}
%%
InRange return FUNC_INRANGE;
\$time\.iso return DATE_ISO;
false {mdsyyllval.bval = false; return BOOLEAN;}
true {mdsyyllval.bval = true; return BOOLEAN;}
\" return QUOTE;
\( return OBRACE;
\) return CBRACE;
\&\& return AND;
\|\| return OR;
\=\= return EQUAL;
\!\= return UNEQUAL;
\= return EQUAL;
\< return LT;
\> return GT;
\, return COMMA;
{UANY}+ {mdsyyllval.sval = talloc_strdup(talloc_tos(), yytext); return WORD;}
\"{UPHRASE}+\" {mdsyyllval.sval = strip_quote(yytext); return PHRASE;}
{BLANK} /* ignore */
%%
static char *strip_quote(const char *phrase)
{
size_t phrase_len = 0;
char *stripped_phrase = NULL;
if (phrase == NULL) {
return NULL;
}
phrase_len = strlen(phrase);
if (phrase_len < 2 ||
phrase[0] != '\"' ||
phrase[phrase_len - 1] != '\"')
{
return talloc_strdup(talloc_tos(), phrase);
}
phrase++;
stripped_phrase = talloc_strndup(talloc_tos(), phrase, phrase_len - 2);
if (stripped_phrase == NULL) {
return NULL;
}
return stripped_phrase;
}

View File

@ -0,0 +1,241 @@
/*
Unix SMB/CIFS implementation.
Main metadata server / Spotlight routines / Elasticsearch backend
Copyright (C) Ralph Boehme 2019
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "es_mapping.h"
/*
* Escaping of special characters in Lucene query syntax across HTTP and JSON
* ==========================================================================
*
* These characters in Lucene queries need escaping [1]:
*
* + - & | ! ( ) { } [ ] ^ " ~ * ? : \ /
*
* Additionally JSON requires escaping of:
*
* " \
*
* Characters already escaped by the mdssvc client:
*
* * " \
*
* The following table contains the resulting escaped strings, beginning with the
* search term, the corresponding Spotlight query and the final string that gets
* sent to the target Elasticsearch server.
*
* string | mdfind | http
* -------+--------+------
* x!x x!x x\\!x
* x&x x&x x\\&x
* x+x x+x x\\+x
* x-x x-x x\\-x
* x.x x.x x\\.x
* x<x x<x x\\<x
* x>x x>x x\\>x
* x=x x=x x\\=x
* x?x x?x x\\?x
* x[x x[x x\\[x
* x]x x]x x\\]x
* x^x x^x x\\^x
* x{x x{x x\\{x
* x}x x}x x\\}x
* x|x x|x x\\|x
* x x x x x\\ x
* x*x x\*x x\\*x
* x\x x\\x x\\\\x
* x"x x\"x x\\\"x
*
* Special cases:
* x y It's not possible to search for terms including spaces, Spotlight
* will search for x OR y.
* x(x Search for terms including ( and ) doesn not work with Spotlight.
*
* [1] <http://lucene.apache.org/core/8_2_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters>
*/
static char *escape_str(TALLOC_CTX *mem_ctx,
const char *in,
const char *escape_list,
const char *escape_exceptions)
{
char *out = NULL;
size_t in_len;
size_t new_len;
size_t in_pos;
size_t out_pos = 0;
if (in == NULL) {
return NULL;
}
in_len = strlen(in);
if (escape_list == NULL) {
escape_list = "";
}
if (escape_exceptions == NULL) {
escape_exceptions = "";
}
/*
* Allocate enough space for the worst case: every char needs to be
* escaped and requires an additional char.
*/
new_len = (in_len * 2) + 1;
if (new_len <= in_len) {
return NULL;
}
out = talloc_zero_array(mem_ctx, char, new_len);
if (out == NULL) {
return NULL;
}
for (in_pos = 0, out_pos = 0; in_pos < in_len; in_pos++, out_pos++) {
if (strchr(escape_list, in[in_pos]) != NULL &&
strchr(escape_exceptions, in[in_pos]) == NULL)
{
out[out_pos++] = '\\';
}
out[out_pos] = in[in_pos];
}
return out;
}
char *es_escape_str(TALLOC_CTX *mem_ctx,
const char *in,
const char *exceptions)
{
const char *lucene_escape_list = "+-&|!(){}[]^\"~*?:\\/ ";
const char *json_escape_list = "\\\"";
char *lucene_escaped = NULL;
char *full_escaped = NULL;
lucene_escaped = escape_str(mem_ctx,
in,
lucene_escape_list,
exceptions);
if (lucene_escaped == NULL) {
return NULL;
}
full_escaped = escape_str(mem_ctx,
lucene_escaped,
json_escape_list,
NULL);
TALLOC_FREE(lucene_escaped);
return full_escaped;
}
struct es_attr_map *es_map_sl_attr(TALLOC_CTX *mem_ctx,
json_t *kmd_map,
const char *sl_attr)
{
struct es_attr_map *es_map = NULL;
const char *typestr = NULL;
enum ssm_type type;
char *es_attr = NULL;
size_t i;
int cmp;
int ret;
static struct {
const char *typestr;
enum ssm_type typeval;
} ssmt_type_map[] = {
{"bool", ssmt_bool},
{"num", ssmt_num},
{"str", ssmt_str},
{"fts", ssmt_fts},
{"date", ssmt_date},
{"type", ssmt_type},
};
if (sl_attr == NULL) {
return NULL;
}
ret = json_unpack(kmd_map,
"{s: {s: s}}",
sl_attr,
"type",
&typestr);
if (ret != 0) {
DBG_ERR("No JSON type mapping for [%s]\n", sl_attr);
return NULL;
}
ret = json_unpack(kmd_map,
"{s: {s: s}}",
sl_attr,
"attribute",
&es_attr);
if (ret != 0) {
DBG_ERR("No JSON attribute mapping for [%s]\n", sl_attr);
return NULL;
}
for (i = 0; i < ARRAY_SIZE(ssmt_type_map); i++) {
cmp = strcmp(typestr, ssmt_type_map[i].typestr);
if (cmp == 0) {
type = ssmt_type_map[i].typeval;
break;
}
}
if (i == ARRAY_SIZE(ssmt_type_map)) {
return NULL;
}
es_map = talloc_zero(mem_ctx, struct es_attr_map);
if (es_map == NULL) {
return NULL;
}
es_map->type = type;
es_map->name = es_escape_str(es_map, es_attr, NULL);
if (es_map->name == NULL) {
TALLOC_FREE(es_map);
return false;
}
return es_map;
}
const char *es_map_sl_type(json_t *mime_map,
const char *sl_type)
{
const char *mime_type = NULL;
int ret;
if (sl_type == NULL) {
return NULL;
}
ret = json_unpack(mime_map,
"{s: s}",
sl_type,
&mime_type);
if (ret != 0) {
return NULL;
}
return mime_type;
}

View File

@ -0,0 +1,49 @@
/*
Unix SMB/CIFS implementation.
Main metadata server / Spotlight routines / Elasticsearch backend
Copyright (c) Ralph Boehme 2019
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _ES_MAPPING_H_
#define _ES_MAPPING_H_
#include <jansson.h>
enum ssm_type {
ssmt_bool, /* a boolean value */
ssmt_num, /* a numeric value */
ssmt_str, /* a string value */
ssmt_fts, /* a string value */
ssmt_date, /* date values */
ssmt_type /* kMDItemContentType, requires special mapping */
};
struct es_attr_map {
enum ssm_type type;
const char *name;
};
char *es_escape_str(TALLOC_CTX *mem_ctx,
const char *in,
const char *exceptions);
struct es_attr_map *es_map_sl_attr(TALLOC_CTX *mem_ctx,
json_t *kmd_map,
const char *sl_attr);
const char *es_map_sl_type(json_t *mime_map,
const char *sl_type);
#endif

View File

@ -0,0 +1,625 @@
/*
Unix SMB/CIFS implementation.
Main metadata server / Spotlight routines / Elasticsearch backend
Copyright (C) Ralph Boehme 2019
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
%{
#include "includes.h"
#include "rpc_server/mdssvc/mdssvc.h"
#include "rpc_server/mdssvc/mdssvc_es.h"
#include "rpc_server/mdssvc/es_parser.tab.h"
#include "rpc_server/mdssvc/es_mapping.h"
#include <jansson.h>
/*
* allow building with -O3 -Wp,-D_FORTIFY_SOURCE=2
*
* /tmp/samba-testbase/.../mdssvc/es_parser.y: In function
* mdsyylparse:
* es_parser.tab.c:1124:6: error: assuming pointer wraparound
* does not occur when comparing P +- C1 with P +- C2
* [-Werror=strict-overflow]
*
* The generated code in es_parser.tab.c looks like this:
*
* if (yyss + yystacksize - 1 <= yyssp)
*/
#pragma GCC diagnostic ignored "-Wstrict-overflow"
#define YYMALLOC SMB_MALLOC
#define YYREALLOC SMB_REALLOC
struct yy_buffer_state;
typedef struct yy_buffer_state *YY_BUFFER_STATE;
int mdsyyllex(void);
void mdsyylerror(char const *);
void *mdsyylterminate(void);
YY_BUFFER_STATE mdsyyl_scan_string(const char *str);
void mdsyyl_delete_buffer(YY_BUFFER_STATE buffer);
/* forward declarations */
static char *isodate_to_sldate(const char *s);
static char *map_expr(const struct es_attr_map *attr,
char op,
const char *val1,
const char *val2);
/* global vars, eg needed by the lexer */
struct es_parser_state {
TALLOC_CTX *frame;
json_t *kmd_map;
json_t *mime_map;
YY_BUFFER_STATE s;
const char *result;
} *global_es_parser_state;
%}
%code provides {
#include <stdbool.h>
#include <jansson.h>
#include "rpc_server/mdssvc/mdssvc.h"
/* 2001-01-01T00:00:00Z - Unix Epoch = SP_RAW_TIME_OFFSET */
#define SP_RAW_TIME_OFFSET 978307200
int mdsyylwrap(void);
bool map_spotlight_to_es_query(TALLOC_CTX *mem_ctx,
json_t *mappings,
const char *path_scope,
const char *query_string,
char **_es_query);
}
%union {
bool bval;
const char *sval;
struct es_attr_map *attr_map;
}
%name-prefix "mdsyyl"
%expect 1
%error-verbose
%type <sval> match expr line function value isodate
%type <attr_map> attribute
%token <sval> WORD PHRASE
%token <bval> BOOLEAN
%token FUNC_INRANGE
%token DATE_ISO
%token OBRACE CBRACE EQUAL UNEQUAL GT LT COMMA QUOTE
%left OR
%left AND
%%
input:
/* empty */
| input line
;
line:
expr {
global_es_parser_state->result = $1;
}
;
expr:
OBRACE expr CBRACE {
if ($2 == NULL) YYABORT;
$$ = talloc_asprintf(talloc_tos(), "(%s)", $2);
if ($$ == NULL) YYABORT;
}
| expr AND expr {
$$ = talloc_asprintf(talloc_tos(), "(%s) AND (%s)", $1, $3);
if ($$ == NULL) YYABORT;
}
| expr OR expr {
$$ = talloc_asprintf(talloc_tos(), "%s OR %s", $1, $3);
if ($$ == NULL) YYABORT;
}
| match {
$$ = $1;
}
| BOOLEAN {
/*
* We can't properly handle these in expressions, fortunately this
* is probably only ever used by OS X as sole element in an
* expression ie "False" (when Finder window selected our share
* but no search string entered yet). Packet traces showed that OS
* X Spotlight server then returns a failure (ie -1) which is what
* we do here too by calling YYABORT.
*/
YYABORT;
};
match:
attribute EQUAL value {
$$ = map_expr($1, '=', $3, NULL);
if ($$ == NULL) YYABORT;
}
| attribute UNEQUAL value {
$$ = map_expr($1, '!', $3, NULL);
if ($$ == NULL) YYABORT;
}
| attribute LT value {
$$ = map_expr($1, '<', $3, NULL);
if ($$ == NULL) YYABORT;
}
| attribute GT value {
$$ = map_expr($1, '>', $3, NULL);
if ($$ == NULL) YYABORT;
}
| function {
$$ = $1;
}
| match WORD {
$$ = $1;
};
function:
FUNC_INRANGE OBRACE attribute COMMA WORD COMMA WORD CBRACE {
$$ = map_expr($3, '~', $5, $7);
if ($$ == NULL) YYABORT;
};
attribute:
WORD {
$$ = es_map_sl_attr(global_es_parser_state->frame,
global_es_parser_state->kmd_map,
$1);
if ($$ == NULL) YYABORT;
};
value:
PHRASE {
$$ = $1;
}
| isodate {
$$ = $1;
};
isodate:
DATE_ISO OBRACE WORD CBRACE {
$$ = isodate_to_sldate($3);
if ($$ == NULL) YYABORT;
};
%%
/*
* Spotlight has two date formats:
* - seconds since 2001-01-01 00:00:00Z
* - as string "$time.iso(%Y-%m-%dT%H:%M:%SZ)"
* This function converts the latter to the former as string, so the parser
* can work on a uniform format.
*/
static char *isodate_to_sldate(const char *isodate)
{
struct es_parser_state *s = global_es_parser_state;
struct tm tm;
const char *p = NULL;
char *tstr = NULL;
time_t t;
p = strptime(isodate, "%Y-%m-%dT%H:%M:%SZ", &tm);
if (p == NULL) {
DBG_ERR("strptime [%s] failed\n", isodate);
return NULL;
}
t = timegm(&tm);
t -= SP_RAW_TIME_OFFSET;
tstr = talloc_asprintf(s->frame, "%jd", (intmax_t)t);
if (tstr == NULL) {
return NULL;
}
return tstr;
}
static char *map_type(const struct es_attr_map *attr,
char op,
const char *val)
{
struct es_parser_state *s = global_es_parser_state;
const char *mime_type_list = NULL;
char *esc_mime_type_list = NULL;
const char *not = NULL;
const char *end = NULL;
char *es = NULL;
mime_type_list = es_map_sl_type(s->mime_map, val);
if (mime_type_list == NULL) {
DBG_ERR("Mapping type [%s] failed\n", val);
return NULL;
}
esc_mime_type_list = es_escape_str(s->frame,
mime_type_list,
"* ");
if (esc_mime_type_list == NULL) {
return NULL;
}
switch (op) {
case '=':
not = "";
end = "";
break;
case '!':
not = "(NOT ";
end = ")";
break;
default:
DBG_ERR("Mapping type [%s] unexpected op [%c]\n", val, op);
return NULL;
}
es = talloc_asprintf(s->frame,
"%s%s:(%s)%s",
not,
attr->name,
esc_mime_type_list,
end);
if (es == NULL) {
return NULL;
}
return es;
}
static char *map_num(const struct es_attr_map *attr,
char op,
const char *val1,
const char *val2)
{
struct es_parser_state *s = global_es_parser_state;
char *es = NULL;
switch (op) {
case '>':
es = talloc_asprintf(s->frame,
"%s:{%s TO *}",
attr->name,
val1);
break;
case '<':
es = talloc_asprintf(s->frame,
"%s:{* TO %s}",
attr->name,
val1);
break;
case '~':
es = talloc_asprintf(s->frame,
"%s:[%s TO %s]",
attr->name,
val1,
val2);
break;
case '=':
es = talloc_asprintf(s->frame,
"%s:%s",
attr->name,
val1);
break;
case '!':
es = talloc_asprintf(s->frame,
"(NOT %s:%s)",
attr->name,
val1);
break;
default:
DBG_ERR("Mapping num unexpected op [%c]\n", op);
return NULL;
}
if (es == NULL) {
return NULL;
}
return es;
}
static char *map_fts(const struct es_attr_map *attr,
char op,
const char *val)
{
struct es_parser_state *s = global_es_parser_state;
const char *not = NULL;
const char *end = NULL;
char *esval = NULL;
char *es = NULL;
esval = es_escape_str(s->frame, val, "*\\\"");
if (esval == NULL) {
yyerror("es_escape_str failed");
return NULL;
}
switch (op) {
case '=':
not = "";
end = "";
break;
case '!':
not = "(NOT ";
end = ")";
break;
default:
DBG_ERR("Mapping fts [%s] unexpected op [%c]\n", val, op);
return NULL;
}
es = talloc_asprintf(s->frame,
"%s%s%s",
not,
esval,
end);
if (es == NULL) {
return NULL;
}
return es;
}
static char *map_str(const struct es_attr_map *attr,
char op,
const char *val)
{
struct es_parser_state *s = global_es_parser_state;
char *esval = NULL;
char *es = NULL;
const char *not = NULL;
const char *end = NULL;
esval = es_escape_str(s->frame, val, "*\\\"");
if (esval == NULL) {
yyerror("es_escape_str failed");
return NULL;
}
switch (op) {
case '=':
not = "";
end = "";
break;
case '!':
not = "(NOT ";
end = ")";
break;
default:
DBG_ERR("Mapping string [%s] unexpected op [%c]\n", val, op);
return NULL;
}
es = talloc_asprintf(s->frame,
"%s%s:%s%s",
not,
attr->name,
esval,
end);
if (es == NULL) {
return NULL;
}
return es;
}
/*
* Convert Spotlight date seconds since 2001-01-01 00:00:00Z
* to a date string in the format %Y-%m-%dT%H:%M:%SZ.
*/
static char *map_sldate_to_esdate(TALLOC_CTX *mem_ctx,
const char *sldate)
{
struct tm *tm = NULL;
char *esdate = NULL;
char buf[21];
size_t len;
time_t t;
int error;
t = (time_t)smb_strtoull(sldate, NULL, 10, &error, SMB_STR_STANDARD);
if (error != 0) {
DBG_ERR("smb_strtoull [%s] failed\n", sldate);
return NULL;
}
t += SP_RAW_TIME_OFFSET;
tm = gmtime(&t);
if (tm == NULL) {
DBG_ERR("localtime [%s] failed\n", sldate);
return NULL;
}
len = strftime(buf, sizeof(buf),
"%Y-%m-%dT%H:%M:%SZ", tm);
if (len != 20) {
DBG_ERR("strftime [%s] failed\n", sldate);
return NULL;
}
esdate = es_escape_str(mem_ctx, buf, NULL);
if (esdate == NULL) {
yyerror("es_escape_str failed");
return NULL;
}
return esdate;
}
static char *map_date(const struct es_attr_map *attr,
char op,
const char *sldate1,
const char *sldate2)
{
struct es_parser_state *s = global_es_parser_state;
char *esdate1 = NULL;
char *esdate2 = NULL;
char *es = NULL;
if (op == '~' && sldate2 == NULL) {
DBG_ERR("Date range query, but second date is NULL\n");
return NULL;
}
esdate1 = map_sldate_to_esdate(s->frame, sldate1);
if (esdate1 == NULL) {
DBG_ERR("map_sldate_to_esdate [%s] failed\n", sldate1);
return NULL;
}
if (sldate2 != NULL) {
esdate2 = map_sldate_to_esdate(s->frame, sldate2);
if (esdate2 == NULL) {
DBG_ERR("map_sldate_to_esdate [%s] failed\n", sldate2);
return NULL;
}
}
switch (op) {
case '>':
es = talloc_asprintf(s->frame,
"%s:{%s TO *}",
attr->name,
esdate1);
break;
case '<':
es = talloc_asprintf(s->frame,
"%s:{* TO %s}",
attr->name,
esdate1);
break;
case '~':
es = talloc_asprintf(s->frame,
"%s:[%s TO %s]",
attr->name,
esdate1,
esdate2);
break;
case '=':
es = talloc_asprintf(s->frame,
"%s:%s",
attr->name,
esdate1);
break;
case '!':
es = talloc_asprintf(s->frame,
"(NOT %s:%s)",
attr->name,
esdate1);
break;
}
if (es == NULL) {
return NULL;
}
return es;
}
static char *map_expr(const struct es_attr_map *attr,
char op,
const char *val1,
const char *val2)
{
char *es = NULL;
switch (attr->type) {
case ssmt_type:
es = map_type(attr, op, val1);
break;
case ssmt_num:
es = map_num(attr, op, val1, val2);
break;
case ssmt_fts:
es = map_fts(attr, op, val1);
break;
case ssmt_str:
es = map_str(attr, op, val1);
break;
case ssmt_date:
es = map_date(attr, op, val1, val2);
break;
default:
break;
}
if (es == NULL) {
DBG_ERR("Mapping [%s %c %s (%s)] failed\n",
attr->name, op, val1, val2 ? val2 : "");
return NULL;
}
return es;
}
void mdsyylerror(const char *str)
{
DBG_ERR("Parser failed: %s\n", str);
}
int mdsyylwrap(void)
{
return 1;
}
/**
* Map a Spotlight RAW query string to a ES query string
**/
bool map_spotlight_to_es_query(TALLOC_CTX *mem_ctx,
json_t *mappings,
const char *path_scope,
const char *query_string,
char **_es_query)
{
struct es_parser_state s = {
.frame = talloc_stackframe(),
};
int result;
char *es_query = NULL;
s.kmd_map = json_object_get(mappings, "attribute_mappings");
if (s.kmd_map == NULL) {
DBG_ERR("Failed to load attribute_mappings from JSON\n");
return false;
}
s.mime_map = json_object_get(mappings, "mime_mappings");
if (s.mime_map == NULL) {
DBG_ERR("Failed to load mime_mappings from JSON\n");
return false;
}
s.s = mdsyyl_scan_string(query_string);
if (s.s == NULL) {
DBG_WARNING("Failed to parse [%s]\n", query_string);
TALLOC_FREE(s.frame);
return false;
}
global_es_parser_state = &s;
result = mdsyylparse();
global_es_parser_state = NULL;
mdsyyl_delete_buffer(s.s);
if (result != 0) {
TALLOC_FREE(s.frame);
return false;
}
es_query = talloc_asprintf(mem_ctx,
"(%s) AND path.real.fulltext:\\\"%s\\\"",
s.result, path_scope);
TALLOC_FREE(s.frame);
if (es_query == NULL) {
return false;
}
*_es_query = es_query;
return true;
}

View File

@ -0,0 +1,99 @@
/*
Unix SMB/CIFS implementation.
Main metadata server / Spotlight routines / ES backend
Copyright (C) Ralph Boehme 2019
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "rpc_server/mdssvc/mdssvc.h"
#include "rpc_server/mdssvc/mdssvc_es.h"
#include "rpc_server/mdssvc/es_parser.tab.h"
#include "rpc_server/mdssvc/es_mapping.h"
/*
* Examples:
*
* $ ./spotlight2es '_kMDItemGroupId=="11"'
* ...
* $ ./spotlight2es '*=="test*"||kMDItemTextContent=="test*"'
* ...
*/
int main(int argc, char **argv)
{
TALLOC_CTX *mem_ctx = NULL;
json_t *mappings = NULL;
json_error_t json_error;
char *default_path = NULL;
char *path = NULL;
const char *query_string = NULL;
const char *path_scope = NULL;
char *es_query = NULL;
bool ok;
if (argc != 2) {
printf("usage: %s QUERY\n", argv[0]);
return 1;
}
query_string = argv[1];
path_scope = "/foo/bar";
lp_load_global(get_dyn_CONFIGFILE());
mem_ctx = talloc_init("es_parser_test");
if (mem_ctx == NULL) {
return 1;
}
default_path = talloc_asprintf(mem_ctx,
"%s/mdssvc/elasticsearch_mappings.json",
get_dyn_SAMBA_DATADIR());
if (default_path == NULL) {
TALLOC_FREE(mem_ctx);
return 1;
}
path = lp_parm_talloc_string(mem_ctx,
GLOBAL_SECTION_SNUM,
"elasticsearch",
"mappings",
default_path);
TALLOC_FREE(default_path);
if (path == NULL) {
TALLOC_FREE(mem_ctx);
return 1;
}
mappings = json_load_file(path, 0, &json_error);
if (mappings == NULL) {
DBG_ERR("Opening mapping file [%s] failed: %s\n",
path, strerror(errno));
TALLOC_FREE(mem_ctx);
return 1;
}
ok = map_spotlight_to_es_query(mem_ctx,
mappings,
path_scope,
query_string,
&es_query);
printf("%s\n", ok ? es_query : "*mapping failed*");
json_decref(mappings);
talloc_free(mem_ctx);
return ok ? 0 : 1;
}

View File

@ -31,6 +31,9 @@
#ifdef HAVE_SPOTLIGHT_BACKEND_TRACKER
#include "mdssvc_tracker.h"
#endif
#ifdef HAVE_SPOTLIGHT_BACKEND_ES
#include "mdssvc_es.h"
#endif
#undef DBGC_CLASS
#define DBGC_CLASS DBGC_RPC_SRV
@ -1422,6 +1425,15 @@ static struct mdssvc_ctx *mdssvc_init(struct tevent_context *ev)
return NULL;
}
#ifdef HAVE_SPOTLIGHT_BACKEND_ES
ok = mdsscv_backend_es.init(mdssvc_ctx);
if (!ok) {
DBG_ERR("backend init failed\n");
TALLOC_FREE(mdssvc_ctx);
return NULL;
}
#endif
#ifdef HAVE_SPOTLIGHT_BACKEND_TRACKER
ok = mdsscv_backend_tracker.init(mdssvc_ctx);
if (!ok) {
@ -1457,6 +1469,14 @@ bool mds_shutdown(void)
if (!ok) {
goto fail;
}
#ifdef HAVE_SPOTLIGHT_BACKEND_ES
ok = mdsscv_backend_es.shutdown(mdssvc_ctx);
if (!ok) {
goto fail;
}
#endif
#ifdef HAVE_SPOTLIGHT_BACKEND_TRACKER
ok = mdsscv_backend_tracker.shutdown(mdssvc_ctx);
if (!ok) {
@ -1528,6 +1548,13 @@ struct mds_ctx *mds_init_ctx(TALLOC_CTX *mem_ctx,
case SPOTLIGHT_BACKEND_NOINDEX:
mds_ctx->backend = &mdsscv_backend_noindex;
break;
#ifdef HAVE_SPOTLIGHT_BACKEND_ES
case SPOTLIGHT_BACKEND_ES:
mds_ctx->backend = &mdsscv_backend_es;
break;
#endif
#ifdef HAVE_SPOTLIGHT_BACKEND_TRACKER
case SPOTLIGHT_BACKEND_TRACKER:
mds_ctx->backend = &mdsscv_backend_tracker;

View File

@ -0,0 +1,835 @@
/*
Unix SMB/CIFS implementation.
Main metadata server / Spotlight routines / ES backend
Copyright (C) Ralph Boehme 2019
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "system/filesys.h"
#include "lib/util/time_basic.h"
#include "lib/tls/tls.h"
#include "lib/util/tevent_ntstatus.h"
#include "libcli/http/http.h"
#include "lib/util/tevent_unix.h"
#include "credentials.h"
#include "mdssvc.h"
#include "mdssvc_es.h"
#include "rpc_server/mdssvc/es_parser.tab.h"
#include <jansson.h>
#undef DBGC_CLASS
#define DBGC_CLASS DBGC_RPC_SRV
#define MDSSVC_ELASTIC_QUERY_TEMPLATE \
"{" \
" \"from\": %zu," \
" \"size\": %zu," \
" \"_source\": [%s]," \
" \"query\": {" \
" \"query_string\": {" \
" \"query\": \"%s\"" \
" }" \
" }" \
"}"
#define MDSSVC_ELASTIC_SOURCES \
"\"path.real\""
static bool mdssvc_es_init(struct mdssvc_ctx *mdssvc_ctx)
{
struct mdssvc_es_ctx *mdssvc_es_ctx = NULL;
json_error_t json_error;
char *default_path = NULL;
char *path = NULL;
mdssvc_es_ctx = talloc_zero(mdssvc_ctx, struct mdssvc_es_ctx);
if (mdssvc_es_ctx == NULL) {
return false;
}
mdssvc_es_ctx->mdssvc_ctx = mdssvc_ctx;
mdssvc_es_ctx->creds = cli_credentials_init_anon(mdssvc_es_ctx);
if (mdssvc_es_ctx->creds == NULL) {
TALLOC_FREE(mdssvc_es_ctx);
return false;
}
default_path = talloc_asprintf(
mdssvc_es_ctx,
"%s/mdssvc/elasticsearch_mappings.json",
get_dyn_SAMBA_DATADIR());
if (default_path == NULL) {
TALLOC_FREE(mdssvc_es_ctx);
return false;
}
path = lp_parm_talloc_string(mdssvc_es_ctx,
GLOBAL_SECTION_SNUM,
"elasticsearch",
"mappings",
default_path);
TALLOC_FREE(default_path);
if (path == NULL) {
TALLOC_FREE(mdssvc_es_ctx);
return false;
}
mdssvc_es_ctx->mappings = json_load_file(path, 0, &json_error);
if (mdssvc_es_ctx->mappings == NULL) {
DBG_ERR("Opening mapping file [%s] failed: %s\n",
path, json_error.text);
TALLOC_FREE(path);
TALLOC_FREE(mdssvc_es_ctx);
return false;
}
TALLOC_FREE(path);
mdssvc_ctx->backend_private = mdssvc_es_ctx;
return true;
}
static bool mdssvc_es_shutdown(struct mdssvc_ctx *mdssvc_ctx)
{
return true;
}
static struct tevent_req *mds_es_connect_send(
TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
struct mds_es_ctx *mds_es_ctx);
static int mds_es_connect_recv(struct tevent_req *req);
static void mds_es_connected(struct tevent_req *subreq);
static bool mds_es_next_search_trigger(struct mds_es_ctx *mds_es_ctx);
static bool mds_es_connect(struct mds_ctx *mds_ctx)
{
struct mdssvc_es_ctx *mdssvc_es_ctx = talloc_get_type_abort(
mds_ctx->mdssvc_ctx->backend_private, struct mdssvc_es_ctx);
struct mds_es_ctx *mds_es_ctx = NULL;
struct tevent_req *subreq = NULL;
mds_es_ctx = talloc_zero(mds_ctx, struct mds_es_ctx);
if (mds_es_ctx == NULL) {
return false;
}
*mds_es_ctx = (struct mds_es_ctx) {
.mdssvc_es_ctx = mdssvc_es_ctx,
.mds_ctx = mds_ctx,
};
mds_ctx->backend_private = mds_es_ctx;
subreq = mds_es_connect_send(
mds_es_ctx,
mdssvc_es_ctx->mdssvc_ctx->ev_ctx,
mds_es_ctx);
if (subreq == NULL) {
TALLOC_FREE(mds_es_ctx);
return false;
}
tevent_req_set_callback(subreq, mds_es_connected, mds_es_ctx);
return true;
}
static void mds_es_connected(struct tevent_req *subreq)
{
struct mds_es_ctx *mds_es_ctx = tevent_req_callback_data(
subreq, struct mds_es_ctx);
int ret;
bool ok;
ret = mds_es_connect_recv(subreq);
TALLOC_FREE(subreq);
if (ret != 0) {
DBG_ERR("HTTP connect failed\n");
return;
}
ok = mds_es_next_search_trigger(mds_es_ctx);
if (!ok) {
DBG_ERR("mds_es_next_search_trigger failed\n");
}
return;
}
struct mds_es_connect_state {
struct tevent_context *ev;
struct mds_es_ctx *mds_es_ctx;
struct tevent_queue_entry *qe;
const char *server_addr;
uint16_t server_port;
struct tstream_tls_params *tls_params;
};
static void mds_es_http_connect_done(struct tevent_req *subreq);
static void mds_es_http_waited(struct tevent_req *subreq);
static struct tevent_req *mds_es_connect_send(
TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
struct mds_es_ctx *mds_es_ctx)
{
struct tevent_req *req = NULL;
struct tevent_req *subreq = NULL;
struct mds_es_connect_state *state = NULL;
bool use_tls;
NTSTATUS status;
req = tevent_req_create(mem_ctx, &state, struct mds_es_connect_state);
if (req == NULL) {
return NULL;
}
*state = (struct mds_es_connect_state) {
.ev = ev,
.mds_es_ctx = mds_es_ctx,
};
state->server_addr = lp_parm_talloc_string(
state,
mds_es_ctx->mds_ctx->snum,
"elasticsearch",
"address",
"localhost");
state->server_port = lp_parm_int(
mds_es_ctx->mds_ctx->snum,
"elasticsearch",
"port",
9200);
use_tls = lp_parm_bool(
mds_es_ctx->mds_ctx->snum,
"elasticsearch",
"use tls",
false);
DBG_DEBUG("Connecting to HTTP%s [%s] port [%"PRIu16"]\n",
use_tls ? "S" : "", state->server_addr, state->server_port);
if (use_tls) {
const char *ca_file = lp__tls_cafile();
const char *crl_file = lp__tls_crlfile();
const char *tls_priority = lp_tls_priority();
enum tls_verify_peer_state verify_peer = lp_tls_verify_peer();
status = tstream_tls_params_client(state,
ca_file,
crl_file,
tls_priority,
verify_peer,
state->server_addr,
&state->tls_params);
if (!NT_STATUS_IS_OK(status)) {
DBG_ERR("Failed tstream_tls_params_client - %s\n",
nt_errstr(status));
tevent_req_nterror(req, status);
return tevent_req_post(req, ev);
}
}
subreq = http_connect_send(state,
state->ev,
state->server_addr,
state->server_port,
mds_es_ctx->mdssvc_es_ctx->creds,
state->tls_params);
if (tevent_req_nomem(subreq, req)) {
return tevent_req_post(req, ev);
}
tevent_req_set_callback(subreq, mds_es_http_connect_done, req);
return req;
}
static void mds_es_http_connect_done(struct tevent_req *subreq)
{
struct tevent_req *req = tevent_req_callback_data(
subreq, struct tevent_req);
struct mds_es_connect_state *state = tevent_req_data(
req, struct mds_es_connect_state);
int error;
error = http_connect_recv(subreq,
state->mds_es_ctx,
&state->mds_es_ctx->http_conn);
TALLOC_FREE(subreq);
if (error != 0) {
DBG_ERR("HTTP connect failed, retrying...\n");
subreq = tevent_wakeup_send(
state->mds_es_ctx,
state->mds_es_ctx->mdssvc_es_ctx->mdssvc_ctx->ev_ctx,
tevent_timeval_current_ofs(10, 0));
if (tevent_req_nomem(subreq, req)) {
return;
}
tevent_req_set_callback(subreq,
mds_es_http_waited,
req);
return;
}
DBG_DEBUG("Connected to HTTP%s [%s] port [%"PRIu16"]\n",
state->tls_params ? "S" : "",
state->server_addr, state->server_port);
tevent_req_done(req);
return;
}
static void mds_es_http_waited(struct tevent_req *subreq)
{
struct tevent_req *req = tevent_req_callback_data(
subreq, struct tevent_req);
struct mds_es_connect_state *state = tevent_req_data(
req, struct mds_es_connect_state);
bool ok;
ok = tevent_wakeup_recv(subreq);
TALLOC_FREE(subreq);
if (!ok) {
tevent_req_error(req, ETIMEDOUT);
return;
}
subreq = mds_es_connect_send(
state->mds_es_ctx,
state->mds_es_ctx->mdssvc_es_ctx->mdssvc_ctx->ev_ctx,
state->mds_es_ctx);
if (tevent_req_nomem(subreq, req)) {
return;
}
tevent_req_set_callback(subreq, mds_es_connected, state->mds_es_ctx);
}
static int mds_es_connect_recv(struct tevent_req *req)
{
return tevent_req_simple_recv_unix(req);
}
static void mds_es_reconnect_on_error(struct sl_es_search *s)
{
struct mds_es_ctx *mds_es_ctx = s->mds_es_ctx;
struct tevent_req *subreq = NULL;
if (s->slq != NULL) {
s->slq->state = SLQ_STATE_ERROR;
}
DBG_WARNING("Reconnecting HTTP...\n");
TALLOC_FREE(mds_es_ctx->http_conn);
subreq = mds_es_connect_send(
mds_es_ctx,
mds_es_ctx->mdssvc_es_ctx->mdssvc_ctx->ev_ctx,
mds_es_ctx);
if (subreq == NULL) {
DBG_ERR("mds_es_connect_send failed\n");
return;
}
tevent_req_set_callback(subreq, mds_es_connected, mds_es_ctx);
}
static int search_destructor(struct sl_es_search *s)
{
DLIST_REMOVE(s->mds_es_ctx->searches, s);
return 0;
}
static struct tevent_req *mds_es_search_send(TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
struct sl_es_search *s);
static int mds_es_search_recv(struct tevent_req *req);
static void mds_es_search_done(struct tevent_req *subreq);
static bool mds_es_search(struct sl_query *slq)
{
struct mds_es_ctx *mds_es_ctx = talloc_get_type_abort(
slq->mds_ctx->backend_private, struct mds_es_ctx);
struct sl_es_search *s = NULL;
bool ok;
s = talloc_zero(slq, struct sl_es_search);
if (s == NULL) {
return false;
}
*s = (struct sl_es_search) {
.ev = mds_es_ctx->mdssvc_es_ctx->mdssvc_ctx->ev_ctx,
.mds_es_ctx = mds_es_ctx,
.slq = slq,
.size = MAX_SL_RESULTS,
};
/* 0 would mean no limit */
s->max = lp_parm_ulonglong(s->slq->mds_ctx->snum,
"elasticsearch",
"max results",
MAX_SL_RESULTS);
DBG_DEBUG("Spotlight query: '%s'\n", slq->query_string);
ok = map_spotlight_to_es_query(
s,
mds_es_ctx->mdssvc_es_ctx->mappings,
slq->path_scope,
slq->query_string,
&s->es_query);
if (!ok) {
TALLOC_FREE(s);
return false;
}
DBG_DEBUG("Elasticsearch query: '%s'\n", s->es_query);
slq->backend_private = s;
slq->state = SLQ_STATE_RUNNING;
DLIST_ADD_END(mds_es_ctx->searches, s);
talloc_set_destructor(s, search_destructor);
return mds_es_next_search_trigger(mds_es_ctx);
}
static bool mds_es_next_search_trigger(struct mds_es_ctx *mds_es_ctx)
{
struct tevent_req *subreq = NULL;
struct sl_es_search *s = mds_es_ctx->searches;
if (mds_es_ctx->http_conn == NULL) {
DBG_DEBUG("Waiting for HTTP connection...\n");
return true;
}
if (s == NULL) {
DBG_DEBUG("No pending searches, idling...\n");
return true;
}
if (s->pending) {
DBG_DEBUG("Search pending [%p]\n", s);
return true;
}
subreq = mds_es_search_send(s, s->ev, s);
if (subreq == NULL) {
return false;
}
tevent_req_set_callback(subreq, mds_es_search_done, s);
return true;
}
static void mds_es_search_done(struct tevent_req *subreq)
{
struct sl_es_search *s = tevent_req_callback_data(
subreq, struct sl_es_search);
struct mds_es_ctx *mds_es_ctx = s->mds_es_ctx;
struct sl_query *slq = s->slq;
int ret;
bool ok;
DBG_DEBUG("Search done for search [%p]\n", s);
DLIST_REMOVE(mds_es_ctx->searches, s);
ret = mds_es_search_recv(subreq);
TALLOC_FREE(subreq);
if (ret != 0) {
mds_es_reconnect_on_error(s);
return;
}
if (slq == NULL) {
/*
* Closed by the user. This is the only place where we free "s"
* explicitly because the talloc parent slq is already gone.
* Everywhere else we rely on the destructor of slq to free s"."
*/
TALLOC_FREE(s);
goto trigger;
}
SLQ_DEBUG(10, slq, "search done");
if (s->total == 0 || s->from >= s->max) {
slq->state = SLQ_STATE_DONE;
goto trigger;
}
if (slq->query_results->num_results >= MAX_SL_RESULTS) {
slq->state = SLQ_STATE_FULL;
goto trigger;
}
/*
* Reschedule this query as there are more results waiting in the
* Elasticsearch server and the client result queue has room as
* well. But put it at the end of the list of active queries as a simple
* heuristic that should ensure all client queries are dispatched to the
* server.
*/
DLIST_ADD_END(mds_es_ctx->searches, s);
trigger:
ok = mds_es_next_search_trigger(mds_es_ctx);
if (!ok) {
DBG_ERR("mds_es_next_search_trigger failed\n");
}
}
static void mds_es_search_http_send_done(struct tevent_req *subreq);
static void mds_es_search_http_read_done(struct tevent_req *subreq);
struct mds_es_search_state {
struct tevent_context *ev;
struct sl_es_search *s;
struct tevent_queue_entry *qe;
struct http_request http_request;
struct http_request *http_response;
};
static int mds_es_search_pending_destructor(struct sl_es_search *s)
{
/*
* s is a child of slq which may get freed when a user closes a
* query. To maintain the HTTP request/response sequence on the HTTP
* channel, we keep processing pending requests and free s when we
* receive the HTTP response for pending requests.
*/
DBG_DEBUG("Preserving pending search [%p]\n", s);
s->slq = NULL;
return -1;
}
static void mds_es_search_set_pending(struct sl_es_search *s)
{
DBG_DEBUG("Set pending [%p]\n", s);
SLQ_DEBUG(10, s->slq, "pending");
s->pending = true;
talloc_set_destructor(s, mds_es_search_pending_destructor);
}
static void mds_es_search_unset_pending(struct sl_es_search *s)
{
DBG_DEBUG("Unset pending [%p]\n", s);
if (s->slq != NULL) {
SLQ_DEBUG(10, s->slq, "unset pending");
}
s->pending = false;
talloc_set_destructor(s, NULL);
}
static struct tevent_req *mds_es_search_send(TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
struct sl_es_search *s)
{
struct tevent_req *req = NULL;
struct tevent_req *subreq = NULL;
struct mds_es_search_state *state = NULL;
const char *index = NULL;
char *elastic_query = NULL;
char *uri = NULL;
size_t elastic_query_len;
char *elastic_query_len_str = NULL;
char *hostname = NULL;
bool pretty = false;
req = tevent_req_create(mem_ctx, &state, struct mds_es_search_state);
if (req == NULL) {
return NULL;
}
*state = (struct mds_es_search_state) {
.ev = ev,
.s = s,
};
if (!tevent_req_set_endtime(req, ev, timeval_current_ofs(60, 0))) {
return tevent_req_post(req, s->ev);
}
index = lp_parm_const_string(s->slq->mds_ctx->snum,
"elasticsearch",
"index",
"_all");
if (tevent_req_nomem(index, req)) {
return tevent_req_post(req, ev);
}
if (DEBUGLVL(10)) {
pretty = true;
}
uri = talloc_asprintf(state,
"/%s/_search%s",
index,
pretty ? "?pretty" : "");
if (tevent_req_nomem(uri, req)) {
return tevent_req_post(req, ev);
}
elastic_query = talloc_asprintf(state,
MDSSVC_ELASTIC_QUERY_TEMPLATE,
s->from,
s->size,
MDSSVC_ELASTIC_SOURCES,
s->es_query);
if (tevent_req_nomem(elastic_query, req)) {
return tevent_req_post(req, ev);
}
DBG_DEBUG("Elastic query: '%s'\n", elastic_query);
elastic_query_len = strlen(elastic_query);
state->http_request = (struct http_request) {
.type = HTTP_REQ_POST,
.uri = uri,
.body = data_blob_const(elastic_query, elastic_query_len),
.major = '1',
.minor = '1',
};
elastic_query_len_str = talloc_asprintf(state, "%zu", elastic_query_len);
if (tevent_req_nomem(elastic_query_len_str, req)) {
return tevent_req_post(req, ev);
}
hostname = get_myname(state);
if (tevent_req_nomem(hostname, req)) {
return tevent_req_post(req, ev);
}
http_add_header(state, &state->http_request.headers,
"Content-Type", "application/json");
http_add_header(state, &state->http_request.headers,
"Accept", "application/json");
http_add_header(state, &state->http_request.headers,
"User-Agent", "Samba/mdssvc");
http_add_header(state, &state->http_request.headers,
"Host", hostname);
http_add_header(state, &state->http_request.headers,
"Content-Length", elastic_query_len_str);
subreq = http_send_request_send(state,
ev,
s->mds_es_ctx->http_conn,
&state->http_request);
if (tevent_req_nomem(subreq, req)) {
return tevent_req_post(req, ev);
}
mds_es_search_set_pending(s);
tevent_req_set_callback(subreq, mds_es_search_http_send_done, req);
return req;
}
static void mds_es_search_http_send_done(struct tevent_req *subreq)
{
struct tevent_req *req = tevent_req_callback_data(
subreq, struct tevent_req);
struct mds_es_search_state *state = tevent_req_data(
req, struct mds_es_search_state);
NTSTATUS status;
DBG_DEBUG("Sent out search [%p]\n", state->s);
status = http_send_request_recv(subreq);
TALLOC_FREE(subreq);
if (!NT_STATUS_IS_OK(status)) {
tevent_req_error(req, map_errno_from_nt_status(status));
return;
}
if (state->s->mds_es_ctx->mds_ctx == NULL) {
mds_es_search_unset_pending(state->s);
tevent_req_error(req, ECANCELED);
return;
}
subreq = http_read_response_send(state,
state->ev,
state->s->mds_es_ctx->http_conn,
MAX_SL_RESULTS * 8192);
if (tevent_req_nomem(subreq, req)) {
return;
}
tevent_req_set_callback(subreq, mds_es_search_http_read_done, req);
}
static void mds_es_search_http_read_done(struct tevent_req *subreq)
{
struct tevent_req *req = tevent_req_callback_data(
subreq, struct tevent_req);
struct mds_es_search_state *state = tevent_req_data(
req, struct mds_es_search_state);
struct sl_es_search *s = state->s;
struct sl_query *slq = s->slq;
json_t *root = NULL;
json_t *matches = NULL;
json_t *match = NULL;
size_t i;
json_error_t error;
int hits;
NTSTATUS status;
int ret;
bool ok;
DBG_DEBUG("Got response for search [%p]\n", s);
mds_es_search_unset_pending(s);
status = http_read_response_recv(subreq, state, &state->http_response);
TALLOC_FREE(subreq);
if (!NT_STATUS_IS_OK(status)) {
DBG_DEBUG("HTTP response failed: %s\n", nt_errstr(status));
tevent_req_error(req, map_errno_from_nt_status(status));
return;
}
if (slq == NULL) {
tevent_req_done(req);
return;
}
if (s->mds_es_ctx->mds_ctx == NULL) {
tevent_req_error(req, ECANCELED);
return;
}
switch (state->http_response->response_code) {
case 200:
break;
default:
DBG_ERR("HTTP server response: %u\n",
state->http_response->response_code);
goto fail;
}
DBG_DEBUG("JSON response:\n%s\n",
talloc_strndup(talloc_tos(),
(char *)state->http_response->body.data,
state->http_response->body.length));
root = json_loadb((char *)state->http_response->body.data,
state->http_response->body.length,
0,
&error);
if (root == NULL) {
DBG_ERR("json_loadb failed\n");
goto fail;
}
if (s->total == 0) {
/*
* Get the total number of results the first time, format
* used by Elasticsearch 7.0 or newer
*/
ret = json_unpack(root, "{s: {s: {s: i}}}",
"hits", "total", "value", &s->total);
if (ret != 0) {
/* Format used before 7.0 */
ret = json_unpack(root, "{s: {s: i}}",
"hits", "total", &s->total);
if (ret != 0) {
DBG_ERR("json_unpack failed\n");
goto fail;
}
}
DBG_DEBUG("Total: %zu\n", s->total);
if (s->total == 0) {
json_decref(root);
tevent_req_done(req);
return;
}
}
if (s->max == 0 || s->max > s->total) {
s->max = s->total;
}
ret = json_unpack(root, "{s: {s:o}}",
"hits", "hits", &matches);
if (ret != 0 || matches == NULL) {
DBG_ERR("json_unpack hits failed\n");
goto fail;
}
hits = json_array_size(matches);
if (hits == 0) {
DBG_ERR("Hu?! No results?\n");
goto fail;
}
DBG_DEBUG("Hits: %d\n", hits);
for (i = 0; i < hits; i++) {
const char *path = NULL;
match = json_array_get(matches, i);
if (match == NULL) {
DBG_ERR("Hu?! No value for index %zu\n", i);
goto fail;
}
ret = json_unpack(match,
"{s: {s: {s: s}}}",
"_source",
"path",
"real",
&path);
if (ret != 0) {
DBG_ERR("Missing path.real in JSON result\n");
goto fail;
}
ok = mds_add_result(slq, path);
if (!ok) {
DBG_ERR("error adding result for path: %s\n", path);
goto fail;
}
}
json_decref(root);
s->from += hits;
slq->state = SLQ_STATE_RESULTS;
tevent_req_done(req);
return;
fail:
if (root != NULL) {
json_decref(root);
}
slq->state = SLQ_STATE_ERROR;
tevent_req_error(req, EINVAL);
return;
}
static int mds_es_search_recv(struct tevent_req *req)
{
return tevent_req_simple_recv_unix(req);
}
static bool mds_es_search_cont(struct sl_query *slq)
{
struct sl_es_search *s = talloc_get_type_abort(
slq->backend_private, struct sl_es_search);
SLQ_DEBUG(10, slq, "continue");
DLIST_ADD_END(s->mds_es_ctx->searches, s);
return mds_es_next_search_trigger(s->mds_es_ctx);
}
struct mdssvc_backend mdsscv_backend_es = {
.init = mdssvc_es_init,
.shutdown = mdssvc_es_shutdown,
.connect = mds_es_connect,
.search_start = mds_es_search,
.search_cont = mds_es_search_cont,
};

View File

@ -0,0 +1,108 @@
/*
Unix SMB/CIFS implementation.
Main metadata server / Spotlight routines / HTTP/ES/JSON backend
Copyright (C) Ralph Boehme 2019
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _MDSSVC_ES_H_
#define _MDSSVC_ES_H_
#include <jansson.h>
/*
* Some global state
*/
struct mdssvc_es_ctx {
struct mdssvc_ctx *mdssvc_ctx;
struct cli_credentials *creds;
json_t *mappings;
};
/*
* Per mdssvc RPC bind state
*/
struct mds_es_ctx {
/*
* Pointer to higher level mds_ctx
*/
struct mds_ctx *mds_ctx;
/*
* Pointer to our global context
*/
struct mdssvc_es_ctx *mdssvc_es_ctx;
/*
* The HTTP connection handle to the ES server
*/
struct http_conn *http_conn;
/*
* List of pending searches
*/
struct sl_es_search *searches;
};
/* Per search request */
struct sl_es_search {
/*
* List pointers
*/
struct sl_es_search *prev, *next;
/*
* Search is being executed. Only the list head can be pending.
*/
bool pending;
/*
* Shorthand to our tevent context
*/
struct tevent_context *ev;
/*
* Pointer to the RPC connection ctx the request is using
*/
struct mds_es_ctx *mds_es_ctx;
/*
* The upper mdssvc.c level query context
*/
struct sl_query *slq;
/*
* Maximum number of results we process and total number of
* results of a query.
*/
size_t total;
size_t max;
/*
* For paging results
*/
size_t from;
size_t size;
/*
* The translated Es query
*/
char *es_query;
};
extern struct mdssvc_backend mdsscv_backend_es;
#endif /* _MDSSVC_ES_H_ */

View File

@ -161,6 +161,18 @@ if bld.env.spotlight_backend_tracker:
'''
rpc_mdssvc_deps += 'tevent-glib-glue ' + bld.env['libtracker']
if bld.env.spotlight_backend_es:
rpc_mdssvc_sources += '''
mdssvc/mdssvc_es.c
mdssvc/es_mapping.c
mdssvc/es_parser.y
mdssvc/es_lexer.l
'''
rpc_mdssvc_deps += ' http jansson'
if bld.SAMBA3_IS_ENABLED_MODULE('rpc_mdssvc_module'):
bld.INSTALL_FILES(bld.env.SAMBA_DATADIR,
'mdssvc/elasticsearch_mappings.json')
bld.SAMBA3_MODULE('rpc_mdssvc_module',
subsystem='rpc',
allow_undefined_symbols=True,

View File

@ -1789,6 +1789,13 @@ main() {
and conf.CONFIG_GET('HAVE_UTF8_NORMALISATION')
)
with_spotlight_es_backend = (
conf.CONFIG_SET('HAVE_JSON_OBJECT')
and conf.env['BISON']
and conf.env['FLEX']
and conf.CONFIG_GET('HAVE_UTF8_NORMALISATION')
)
conf.env.with_spotlight = False
if Options.options.with_spotlight is not False:
backends = ['noindex']
@ -1804,14 +1811,23 @@ main() {
Logs.warn('Missing libtracker-sparql development files for Spotlight backend "tracker"')
if not conf.CONFIG_SET('HAVE_GLIB'):
Logs.warn('Missing glib-2.0 development files for Spotlight backend "tracker"')
if not conf.CONFIG_GET('HAVE_JSON_OBJECT'):
Logs.warn('Missing libjansson development files for Spotlight backend "elasticsearch"')
if with_spotlight_tracker_backend:
conf.env.spotlight_backend_tracker = True
backends.append('tracker')
conf.DEFINE('HAVE_SPOTLIGHT_BACKEND_TRACKER', '1')
if Options.options.with_spotlight is True and not conf.env.spotlight_backend_tracker:
conf.fatal("Unmet dependencies for Spotlight backend")
if with_spotlight_es_backend:
conf.env.spotlight_backend_es = True
backends.append('elasticsearch')
conf.DEFINE('HAVE_SPOTLIGHT_BACKEND_ES', '1')
if (Options.options.with_spotlight is True
and not conf.env.spotlight_backend_tracker
and not conf.env.spotlight_backend_es):
conf.fatal("Unmet dependencies for Spotlight backends")
Logs.info("Building with Spotlight support, available backends: %s" % ', '.join(backends))
default_static_modules.extend(TO_LIST('rpc_mdssvc_module'))

View File

@ -1335,6 +1335,16 @@ bld.SAMBA3_BINARY('spotlight2sparql',
enabled=bld.env.spotlight_backend_tracker,
install=False)
bld.SAMBA3_BINARY('spotlight2es',
source='''
rpc_server/mdssvc/es_parser_test.c
rpc_server/mdssvc/es_parser.y
rpc_server/mdssvc/es_lexer.l
rpc_server/mdssvc/es_mapping.c''',
deps='samba3-util talloc jansson smbconf',
enabled=bld.env.spotlight_backend_es,
install=False)
bld.SAMBA3_BINARY('tevent_glib_glue_test',
source='lib/tevent_glib_glue_tests.c',
deps='''