Hi,
I have problem with indexing simple structire.
Indexing single element works, but dong it in a little bit more complex
code fails.
require ‘ferret’
include Ferret # 0.11.5 on win32
require ‘engtagger’ # http://engtagger.rubyforge.org/
class CorpusIndexedElement
attr_accessor :sentence, :pos, :offset, :word
def initialize(sentence, offset, word, pos)
@sentence = sentence
@offset = offset
@word = word
@pos = pos
end
def to_ferret_index_hash
#puts sentence, offset, word, pos
{ :sentence => sentence, :offset => offset, :word => word, :pos =>
pos }
end
end
def index_corpus_sentence(tagger, sentence)
tagged = tagger.add_tags(sentence)
result = []
offset = 0
tagged.split.each do |element|
m = /<(\D*)>(.)<(\D)>/.match(element)
result << CorpusIndexedElement.new(sentence, offset, m[2], m[1])
offset += m[2].size + 1
end
result
end
sentences = [
“Chocolate comprises a number of raw and processed foods that are
produced from the seed of the tropical cacao tree.”,
“Pure, unsweetened chocolate contains primarily cocoa solids and cocoa
butter in varying proportions.”
]
tagger = EngTagger.new
index = Ferret::I.new
sentences.each do |sentence|
index_corpus_sentence(tagger, sentence).each do |element|
index << element.to_ferret_index_hash
end
end
this gives no results
puts index.search(‘word: “and”’)
but his works:
#index << CorpusIndexedElement.new(“a test
sentence”,0,“test”,“xy”).to_ferret_index_hash
#puts index.search(‘word: “test”’)
I run out of ideas
Cheers,
Tomasz