mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2024-10-26 20:25:14 +03:00
Debugging of strange results and tuning, Daniel
This commit is contained in:
parent
9b00613d05
commit
a6287a463c
36
doc/index.py
36
doc/index.py
@ -54,22 +54,22 @@ libxml2.registerErrorHandler(callback, None)
|
||||
#
|
||||
TABLES={
|
||||
"symbols" : """CREATE TABLE symbols (
|
||||
name varchar(255) NOT NULL,
|
||||
module varchar(255) NOT NULL,
|
||||
name varchar(255) BINARY NOT NULL,
|
||||
module varchar(255) BINARY NOT NULL,
|
||||
type varchar(25) NOT NULL,
|
||||
descr varchar(255),
|
||||
UNIQUE KEY name (name),
|
||||
KEY module (module))""",
|
||||
"words" : """CREATE TABLE words (
|
||||
name varchar(50) NOT NULL,
|
||||
symbol varchar(255) NOT NULL,
|
||||
name varchar(50) BINARY NOT NULL,
|
||||
symbol varchar(255) BINARY NOT NULL,
|
||||
relevance int,
|
||||
KEY name (name),
|
||||
KEY symbol (symbol),
|
||||
UNIQUE KEY ID (name, symbol))""",
|
||||
"wordsHTML" : """CREATE TABLE wordsHTML (
|
||||
name varchar(50) NOT NULL,
|
||||
resource varchar(255) NOT NULL,
|
||||
name varchar(50) BINARY NOT NULL,
|
||||
resource varchar(255) BINARY NOT NULL,
|
||||
section varchar(255),
|
||||
id varchar(50),
|
||||
relevance int,
|
||||
@ -77,8 +77,8 @@ TABLES={
|
||||
KEY resource (resource),
|
||||
UNIQUE KEY ref (name, resource))""",
|
||||
"pages" : """CREATE TABLE pages (
|
||||
resource varchar(255) NOT NULL,
|
||||
title varchar(255) NOT NULL,
|
||||
resource varchar(255) BINARY NOT NULL,
|
||||
title varchar(255) BINARY NOT NULL,
|
||||
UNIQUE KEY name (resource))""",
|
||||
"Queries" : """CREATE TABLE Queries (
|
||||
ID int(11) NOT NULL auto_increment,
|
||||
@ -403,9 +403,7 @@ def addWordHTML(word, resource, id, section, relevance):
|
||||
if wordsDictHTML.has_key(word):
|
||||
d = wordsDictHTML[word]
|
||||
if d == None:
|
||||
return 0
|
||||
if len(d) > 15:
|
||||
wordsDictHTML[word] = None
|
||||
print "skipped %s" % (word)
|
||||
return 0
|
||||
try:
|
||||
(r,i,s) = d[resource]
|
||||
@ -418,7 +416,8 @@ def addWordHTML(word, resource, id, section, relevance):
|
||||
pass
|
||||
else:
|
||||
wordsDictHTML[word] = {}
|
||||
wordsDictHTML[word][resource] = (relevance, id, section)
|
||||
d = wordsDictHTML[word];
|
||||
d[resource] = (relevance, id, section)
|
||||
return relevance
|
||||
|
||||
def addStringHTML(str, resource, id, section, relevance):
|
||||
@ -440,6 +439,8 @@ def addStringHTML(str, resource, id, section, relevance):
|
||||
str = string.replace(str, "/", " ")
|
||||
str = string.replace(str, "*", " ")
|
||||
str = string.replace(str, ":", " ")
|
||||
str = string.replace(str, "#", " ")
|
||||
str = string.replace(str, "!", " ")
|
||||
str = string.replace(str, "\n", " ")
|
||||
str = string.replace(str, "\r", " ")
|
||||
str = string.replace(str, "\xc2", " ")
|
||||
@ -447,7 +448,14 @@ def addStringHTML(str, resource, id, section, relevance):
|
||||
l = string.split(str)
|
||||
for word in l:
|
||||
if len(word) > 2:
|
||||
ret = ret + addWordHTML(word, resource, id, section, relevance)
|
||||
try:
|
||||
r = addWordHTML(word, resource, id, section, relevance)
|
||||
if r <= 0:
|
||||
print "addWordHTML failed: %s %s" % (word, resource)
|
||||
ret = ret + r
|
||||
except:
|
||||
print "addWordHTML failed: %s %s %d" % (word, resource, relevance)
|
||||
print sys.exc_type, sys.exc_value
|
||||
|
||||
return ret
|
||||
|
||||
@ -776,7 +784,7 @@ def analyzeHTML(doc, resource):
|
||||
elif item.type == 'text':
|
||||
analyzeHTMLText(doc, resource, item, section, id)
|
||||
para = para + 1
|
||||
elif item.name == 'text':
|
||||
elif item.name == 'p':
|
||||
analyzeHTMLPara(doc, resource, item, section, id)
|
||||
para = para + 1
|
||||
elif item.name == 'pre':
|
||||
|
@ -125,7 +125,7 @@ simply provide a set of keywords:
|
||||
$result = NULL;
|
||||
$j = 0;
|
||||
if ($word) {
|
||||
$result = mysql_query ("SELECT words.relevance, symbols.name, symbols.type, symbols.module, symbols.descr FROM words, symbols WHERE words.name='$word' and words.symbol = symbols.name ORDER BY words.relevance DESC");
|
||||
$result = mysql_query ("SELECT words.relevance, symbols.name, symbols.type, symbols.module, symbols.descr FROM words, symbols WHERE LCASE(words.name) LIKE LCASE('$word') and words.symbol = symbols.name ORDER BY words.relevance DESC");
|
||||
if ($result) {
|
||||
$j = mysql_num_rows($result);
|
||||
if ($j == 0)
|
||||
@ -139,7 +139,7 @@ simply provide a set of keywords:
|
||||
$result = NULL;
|
||||
$j = 0;
|
||||
if ($word) {
|
||||
$result = mysql_query ("SELECT relevance, name, id, resource, section FROM wordsHTML WHERE name='$word' ORDER BY relevance DESC");
|
||||
$result = mysql_query ("SELECT relevance, name, id, resource, section FROM wordsHTML WHERE LCASE(name) LIKE LCASE('$word') ORDER BY relevance DESC");
|
||||
if ($result) {
|
||||
$j = mysql_num_rows($result);
|
||||
if ($j == 0)
|
||||
@ -177,7 +177,7 @@ simply provide a set of keywords:
|
||||
$desc = mysql_result($result, $i, 4);
|
||||
if (array_key_exists($name, $results)) {
|
||||
list($r,$t,$m,$d,$w,$u) = $results[$name];
|
||||
$results[$name] = array($r + $relevance + 40,
|
||||
$results[$name] = array(($r + $relevance) * 2,
|
||||
$t,$m,$d,$w,$u);
|
||||
} else {
|
||||
$id = strtoupper($name);
|
||||
@ -201,9 +201,9 @@ simply provide a set of keywords:
|
||||
if ($id != "") {
|
||||
$url = $url + "#$id";
|
||||
}
|
||||
$results[$name + "_html_" + $number+ "_" + $i ] =
|
||||
$results[$name + "_html_" + $number+ "_" + $i] =
|
||||
array($relevance, "documentation",
|
||||
$module, $desc, $word, $url);
|
||||
$module, $desc, $name, $url);
|
||||
}
|
||||
mysql_free_result($result);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user