2013-12-28 06:03:50 +04:00
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd .
Copyright 2013 Lennart Poettering
systemd is free software ; you can redistribute it and / or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation ; either version 2.1 of the License , or
( at your option ) any later version .
systemd is distributed in the hope that it will be useful , but
WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public License
along with systemd ; If not , see < http : //www.gnu.org/licenses/>.
* * */
# include <string.h>
# include "util.h"
# include "xml.h"
enum {
2014-06-06 21:41:24 +04:00
STATE_NULL ,
2013-12-28 06:03:50 +04:00
STATE_TEXT ,
STATE_TAG ,
STATE_ATTRIBUTE ,
} ;
2014-06-06 21:41:24 +04:00
static void inc_lines ( unsigned * line , const char * s , size_t n ) {
const char * p = s ;
if ( ! line )
return ;
for ( ; ; ) {
const char * f ;
f = memchr ( p , ' \n ' , n ) ;
if ( ! f )
return ;
n - = ( f - p ) + 1 ;
p = f + 1 ;
( * line ) + + ;
}
}
2013-12-28 06:03:50 +04:00
/* We don't actually do real XML here. We only read a simplistic
* subset , that is a bit less strict that XML and lacks all the more
* complex features , like entities , or namespaces . However , we do
* support some HTML5 - like simplifications */
2014-06-06 21:41:24 +04:00
int xml_tokenize ( const char * * p , char * * name , void * * state , unsigned * line ) {
2013-12-28 06:03:50 +04:00
const char * c , * e , * b ;
char * ret ;
int t ;
assert ( p ) ;
assert ( * p ) ;
assert ( name ) ;
assert ( state ) ;
t = PTR_TO_INT ( * state ) ;
c = * p ;
2014-06-06 21:41:24 +04:00
if ( t = = STATE_NULL ) {
if ( line )
* line = 1 ;
t = STATE_TEXT ;
}
2013-12-28 06:03:50 +04:00
for ( ; ; ) {
if ( * c = = 0 )
return XML_END ;
switch ( t ) {
case STATE_TEXT : {
int x ;
e = strchrnul ( c , ' < ' ) ;
if ( e > c ) {
/* More text... */
ret = strndup ( c , e - c ) ;
if ( ! ret )
return - ENOMEM ;
2014-06-06 21:41:24 +04:00
inc_lines ( line , c , e - c ) ;
2013-12-28 06:03:50 +04:00
* name = ret ;
* p = e ;
* state = INT_TO_PTR ( STATE_TEXT ) ;
return XML_TEXT ;
}
assert ( * e = = ' < ' ) ;
b = c + 1 ;
if ( startswith ( b , " !-- " ) ) {
/* A comment */
e = strstr ( b + 3 , " --> " ) ;
if ( ! e )
return - EINVAL ;
2014-06-06 21:41:24 +04:00
inc_lines ( line , b , e + 3 - b ) ;
2013-12-28 06:03:50 +04:00
c = e + 3 ;
continue ;
}
if ( * b = = ' ? ' ) {
/* Processing instruction */
e = strstr ( b + 1 , " ?> " ) ;
if ( ! e )
return - EINVAL ;
2014-06-06 21:41:24 +04:00
inc_lines ( line , b , e + 2 - b ) ;
2013-12-28 06:03:50 +04:00
c = e + 2 ;
continue ;
}
if ( * b = = ' ! ' ) {
/* DTD */
e = strchr ( b + 1 , ' > ' ) ;
if ( ! e )
return - EINVAL ;
2014-06-06 21:41:24 +04:00
inc_lines ( line , b , e + 1 - b ) ;
2013-12-28 06:03:50 +04:00
c = e + 1 ;
continue ;
}
if ( * b = = ' / ' ) {
/* A closing tag */
x = XML_TAG_CLOSE ;
b + + ;
} else
x = XML_TAG_OPEN ;
e = strpbrk ( b , WHITESPACE " /> " ) ;
if ( ! e )
return - EINVAL ;
ret = strndup ( b , e - b ) ;
if ( ! ret )
return - ENOMEM ;
* name = ret ;
* p = e ;
* state = INT_TO_PTR ( STATE_TAG ) ;
return x ;
}
case STATE_TAG :
b = c + strspn ( c , WHITESPACE ) ;
if ( * b = = 0 )
return - EINVAL ;
2014-06-06 21:41:24 +04:00
inc_lines ( line , c , b - c ) ;
2013-12-28 06:03:50 +04:00
e = b + strcspn ( b , WHITESPACE " =/> " ) ;
if ( e > b ) {
/* An attribute */
ret = strndup ( b , e - b ) ;
if ( ! ret )
return - ENOMEM ;
* name = ret ;
* p = e ;
* state = INT_TO_PTR ( STATE_ATTRIBUTE ) ;
return XML_ATTRIBUTE_NAME ;
}
if ( startswith ( b , " /> " ) ) {
/* An empty tag */
* name = NULL ; /* For empty tags we return a NULL name, the caller must be prepared for that */
* p = b + 2 ;
* state = INT_TO_PTR ( STATE_TEXT ) ;
return XML_TAG_CLOSE_EMPTY ;
}
if ( * b ! = ' > ' )
return - EINVAL ;
c = b + 1 ;
t = STATE_TEXT ;
continue ;
case STATE_ATTRIBUTE :
if ( * c = = ' = ' ) {
c + + ;
if ( * c = = ' \' ' | | * c = = ' \" ' ) {
/* Tag with a quoted value */
e = strchr ( c + 1 , * c ) ;
if ( ! e )
return - EINVAL ;
2014-06-06 21:41:24 +04:00
inc_lines ( line , c , e - c ) ;
2013-12-28 06:03:50 +04:00
ret = strndup ( c + 1 , e - c - 1 ) ;
if ( ! ret )
return - ENOMEM ;
* name = ret ;
* p = e + 1 ;
* state = INT_TO_PTR ( STATE_TAG ) ;
return XML_ATTRIBUTE_VALUE ;
}
/* Tag with a value without quotes */
b = strpbrk ( c , WHITESPACE " > " ) ;
if ( ! b )
b = c ;
ret = strndup ( c , b - c ) ;
if ( ! ret )
return - ENOMEM ;
* name = ret ;
* p = b ;
* state = INT_TO_PTR ( STATE_TAG ) ;
return XML_ATTRIBUTE_VALUE ;
}
t = STATE_TAG ;
continue ;
}
}
assert_not_reached ( " Bad state " ) ;
}