1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2024-10-26 20:25:14 +03:00

Debugging of strange results and tuning, Daniel

This commit is contained in:
Daniel Veillard 2002-10-07 13:17:22 +00:00
parent 9b00613d05
commit a6287a463c
2 changed files with 27 additions and 19 deletions

View File

@ -54,22 +54,22 @@ libxml2.registerErrorHandler(callback, None)
#
TABLES={
"symbols" : """CREATE TABLE symbols (
name varchar(255) NOT NULL,
module varchar(255) NOT NULL,
name varchar(255) BINARY NOT NULL,
module varchar(255) BINARY NOT NULL,
type varchar(25) NOT NULL,
descr varchar(255),
UNIQUE KEY name (name),
KEY module (module))""",
"words" : """CREATE TABLE words (
name varchar(50) NOT NULL,
symbol varchar(255) NOT NULL,
name varchar(50) BINARY NOT NULL,
symbol varchar(255) BINARY NOT NULL,
relevance int,
KEY name (name),
KEY symbol (symbol),
UNIQUE KEY ID (name, symbol))""",
"wordsHTML" : """CREATE TABLE wordsHTML (
name varchar(50) NOT NULL,
resource varchar(255) NOT NULL,
name varchar(50) BINARY NOT NULL,
resource varchar(255) BINARY NOT NULL,
section varchar(255),
id varchar(50),
relevance int,
@ -77,8 +77,8 @@ TABLES={
KEY resource (resource),
UNIQUE KEY ref (name, resource))""",
"pages" : """CREATE TABLE pages (
resource varchar(255) NOT NULL,
title varchar(255) NOT NULL,
resource varchar(255) BINARY NOT NULL,
title varchar(255) BINARY NOT NULL,
UNIQUE KEY name (resource))""",
"Queries" : """CREATE TABLE Queries (
ID int(11) NOT NULL auto_increment,
@ -403,9 +403,7 @@ def addWordHTML(word, resource, id, section, relevance):
if wordsDictHTML.has_key(word):
d = wordsDictHTML[word]
if d == None:
return 0
if len(d) > 15:
wordsDictHTML[word] = None
print "skipped %s" % (word)
return 0
try:
(r,i,s) = d[resource]
@ -418,7 +416,8 @@ def addWordHTML(word, resource, id, section, relevance):
pass
else:
wordsDictHTML[word] = {}
wordsDictHTML[word][resource] = (relevance, id, section)
d = wordsDictHTML[word];
d[resource] = (relevance, id, section)
return relevance
def addStringHTML(str, resource, id, section, relevance):
@ -440,6 +439,8 @@ def addStringHTML(str, resource, id, section, relevance):
str = string.replace(str, "/", " ")
str = string.replace(str, "*", " ")
str = string.replace(str, ":", " ")
str = string.replace(str, "#", " ")
str = string.replace(str, "!", " ")
str = string.replace(str, "\n", " ")
str = string.replace(str, "\r", " ")
str = string.replace(str, "\xc2", " ")
@ -447,7 +448,14 @@ def addStringHTML(str, resource, id, section, relevance):
l = string.split(str)
for word in l:
if len(word) > 2:
ret = ret + addWordHTML(word, resource, id, section, relevance)
try:
r = addWordHTML(word, resource, id, section, relevance)
if r <= 0:
print "addWordHTML failed: %s %s" % (word, resource)
ret = ret + r
except:
print "addWordHTML failed: %s %s %d" % (word, resource, relevance)
print sys.exc_type, sys.exc_value
return ret
@ -776,7 +784,7 @@ def analyzeHTML(doc, resource):
elif item.type == 'text':
analyzeHTMLText(doc, resource, item, section, id)
para = para + 1
elif item.name == 'text':
elif item.name == 'p':
analyzeHTMLPara(doc, resource, item, section, id)
para = para + 1
elif item.name == 'pre':

View File

@ -125,7 +125,7 @@ simply provide a set of keywords:
$result = NULL;
$j = 0;
if ($word) {
$result = mysql_query ("SELECT words.relevance, symbols.name, symbols.type, symbols.module, symbols.descr FROM words, symbols WHERE words.name='$word' and words.symbol = symbols.name ORDER BY words.relevance DESC");
$result = mysql_query ("SELECT words.relevance, symbols.name, symbols.type, symbols.module, symbols.descr FROM words, symbols WHERE LCASE(words.name) LIKE LCASE('$word') and words.symbol = symbols.name ORDER BY words.relevance DESC");
if ($result) {
$j = mysql_num_rows($result);
if ($j == 0)
@ -139,7 +139,7 @@ simply provide a set of keywords:
$result = NULL;
$j = 0;
if ($word) {
$result = mysql_query ("SELECT relevance, name, id, resource, section FROM wordsHTML WHERE name='$word' ORDER BY relevance DESC");
$result = mysql_query ("SELECT relevance, name, id, resource, section FROM wordsHTML WHERE LCASE(name) LIKE LCASE('$word') ORDER BY relevance DESC");
if ($result) {
$j = mysql_num_rows($result);
if ($j == 0)
@ -177,7 +177,7 @@ simply provide a set of keywords:
$desc = mysql_result($result, $i, 4);
if (array_key_exists($name, $results)) {
list($r,$t,$m,$d,$w,$u) = $results[$name];
$results[$name] = array($r + $relevance + 40,
$results[$name] = array(($r + $relevance) * 2,
$t,$m,$d,$w,$u);
} else {
$id = strtoupper($name);
@ -201,9 +201,9 @@ simply provide a set of keywords:
if ($id != "") {
$url = $url + "#$id";
}
$results[$name + "_html_" + $number+ "_" + $i ] =
$results[$name + "_html_" + $number+ "_" + $i] =
array($relevance, "documentation",
$module, $desc, $word, $url);
$module, $desc, $name, $url);
}
mysql_free_result($result);
}