Module NKS::Acts::SodaSearch::ClassMethods
In: lib/acts_as_soda_search.rb

Methods

Included Modules

NKS::Acts::SodaSearch::InstanceMethods

Public Instance methods

Call this method in an ActiveRecord class to make it SodaSearchable.

Requires two arguments: the first is the ActiveRecord Class that will hold the indices. The second is the AR class that will hold the terms. (Maybe we can auto-generate these in v2.)

These classes/tables MUST implement the spec outlined in the SodaSearch README file. A RuntimeError is thrown if they don’t have the proper column names.

[Source]

    # File lib/acts_as_soda_search.rb, line 55
55:       def acts_as_soda_search(indices_class, terms_class)
56:         logger.info ("* acts_as_soda_search initializing for #{self.name}. indices_class: #{indices_class.inspect}. terms_class: #{terms_class.inspect}")
57: 
58: 
59:         # Make sure the classes do what we need them to
60:         if [ indices_class, terms_class ].any? { |x|
61:             x.nil? || !x.is_a?(Class) || !x.ancestors.include?(ActiveRecord::Base)              
62:           } 
63:           raise RuntimeError.new("ERROR: acts_as_soda_search requires two arguments: indices_class and terms_class, both of which must be classes.")
64:           return
65:         elsif
66:           begin
67:             cols = indices_class.columns_hash.keys
68:             !(cols.include?('term_id') && cols.include?('position') && cols.include?('user_id') && 
69:               cols.include?('indexee_id') && cols.include?('public'))
70:           end ||
71:               begin
72:                 cols = terms_class.columns_hash.keys
73:                 !cols.include?('term')
74:               end
75:           raise RuntimeError.new("acts_as_soda_search ERROR: you didn't implement all required attributes in your indices_class and/or in your terms_class (see README)")
76:           return
77:         end
78:         
79:         # we save a symbol rather than the Class object so that the class can be reloaded while we run.
80:         self.const_set :Soda_indices_class, indices_class.name.to_sym
81:         self.const_set :Soda_terms_class, terms_class.name.to_sym
82: 
83:         include NKS::Acts::SodaSearch::InstanceMethods
84:         extend NKS::Acts::SodaSearch::SingletonMethods
85:       end

[Source]

    # File lib/acts_as_soda_search.rb, line 95
95:       def count_index_entries
96:         count = soda_indices_class.count
97:         info("counted #{count} indices in #{soda_indices_class.table_name}")
98:         count
99:       end

Utility methods: statistics

[Source]

    # File lib/acts_as_soda_search.rb, line 88
88:       def count_stored_terms
89:         count = soda_terms_class.count
90:         info("counted #{count} terms in #{soda_terms_class.table_name}")
91:         count
92:       end

[Source]

     # File lib/acts_as_soda_search.rb, line 202
202:       def count_unused_terms
203:         count = soda_terms_class.count_by_sql("SELECT COUNT(\#{soda_terms_class.table_name}.id) FROM \#{soda_terms_class.table_name} WHERE id NOT IN\n (SELECT DISTINCT term_id from \#{soda_indices_class.table_name})\n"
204:                                            )
205:         info("counted #{count} unused terms.")
206:         count
207:       end

This clears out the index and terms tables.

[Source]

     # File lib/acts_as_soda_search.rb, line 222
222:       def purge_all
223:         start = Time.now.to_i
224:         ActiveRecord::Base.transaction {
225:           indices_deleted = soda_indices_class.delete_all
226:           terms_deleted = soda_terms_class.delete_all
227:           info("purge_all - deleted #{terms_deleted} terms and #{indices_deleted} indices")
228:         }
229:         info("purge_all done in #{Time.now.to_i - start} seconds.")
230:       end

[Source]

     # File lib/acts_as_soda_search.rb, line 213
213:       def purge_unused_terms
214:         start = Time.now.to_i
215:         count = soda_terms_class.delete_all("#{soda_terms_class.table_name}.id NOT IN (SELECT DISTINCT term_id from #{soda_indices_class.table_name})")
216:         info ("purged #{count} unused search terms.\npurge_unused_terms done in #{Time.now.to_i - start} seconds.")
217:         count
218:       end

Do some kind of reindexing on instances of this class.

Types:


:all - Erase the index and terms tables for this class. Reindex all records.

       This can take a long time.

:unindexed - Reindex only those instances of self which are not referenced in the index table at all.

[Source]

     # File lib/acts_as_soda_search.rb, line 156
156:       def reindex(type)
157:         info(" ** #{self.name}.reindex(#{type.to_s}) called. This may take a while..")
158:         start = Time.now.to_i
159:         reindexed = 0
160: 
161:         case type
162:         when :all
163:           
164:           soda_indices_class.delete_all()
165:           soda_terms_class.delete_all()
166:           
167:           self.find(:all).each {|item|
168:             item.autoindex()
169:             reindexed += 1
170:           }
171: 
172:         when :unindexed
173:           self.find(:all, 
174:                     :joins => "LEFT OUTER JOIN #{self.soda_indices_class.table_name} ON #{self.soda_indices_class.table_name}.indexee_id = #{self.table_name}.id",
175:                     :conditions => "#{self.table_name}.id not in (select distinct indexee_id from #{self.soda_indices_class.table_name})",
176:                     :select => "#{self.table_name}.id"
177:                     ).each {|item|
178: 
179:             # doesn't work. it doesn't like loading the whole object with :joins, for some reason.
180:             # item.autoindex()
181: 
182:             self.find(item.id).autoindex()
183:             reindexed += 1
184:           }
185:           
186:         else
187:           error(" * Unknown type argument #{type.to_s}. Aborting..")
188:           raise ArgumentError.new("didn't specify a valid argument for reindex()")
189:         end
190: 
191:         endTime = Time.now.to_i
192:         info(if reindexed == 0
193:                "reindex() done in #{endTime - start} seconds. No items found to reindex."
194:              else
195:                "reindex(#{type.to_s}): reindexed #{reindexed} objects in #{endTime - start} seconds.\n" + 
196:                  "   #{(endTime - start) / reindexed} seconds per object, #{reindexed / (endTime - start)} objects per second."
197:              end)
198: 
199:       end

Do a basic search, get an Array of ids of this class, in descending order of relevance. Right now, we AND all the terms together and provide no other options.

The search algorithm can have all kinds of tweaks and enhancements done to it later.

The result is an Array of Arrays, sorted in descending order of relevance. Each subarray is of the form [ database_id, "numeric relevance"]. The optional user_id will filter the results on the user_id column of the indices table.

N.B. user_id is not sanitized in any way before insertion into the database query.

[Source]

     # File lib/acts_as_soda_search.rb, line 120
120:       def search(query, user_id = nil, do_stemming=true)
121:         # Split terms string on whitespace, convert to Array of term ID numbers for those that are in the terms table.
122:         term_ids = Array.new
123:         query.split(/\s+/).each {|term|
124:           term.downcase! if do_stemming
125:           t = self.soda_terms_class.find(:first, :conditions => ["term = ?", (do_stemming ? term.stem : term)])
126:           term_ids.push(t.id) unless t.nil?
127:         } unless query.nil? || query.empty?
128:         
129:         if term_ids.empty?  # don't bother the database
130:           []
131:         else
132:           # Count term_ids in the index table. Return an ordered Array of indexee_ids according to how many terms they have.
133:           soda_indices_class.connection.execute("SELECT indexee_id, sum(count) FROM (SELECT indexee_id, COUNT(indexee_id), term_id FROM \#{\#{soda_indices_class.table_name}} where term_id in\n(\#{term_ids.join(',')}) \#{ user_id.nil? ? '' : \"AND user_id = '\" + user_id + \"' \" }\nGROUP BY indexee_id , term_id) as foo group by foo.indexee_id\nHAVING COUNT(term_id) = \#{term_ids.size}\nORDER BY sum DESC;\n"
134:                                               ).result # the HAVING implements the AND by excluding rows with not all the terms
135:         end
136: 
137:       end

See notes on acts_as_soda_search method.

[Source]

     # File lib/acts_as_soda_search.rb, line 102
102:       def soda_indices_class
103:         eval self.const_get(:Soda_indices_class).to_s
104:       end

See notes on acts_as_soda_search method.

[Source]

     # File lib/acts_as_soda_search.rb, line 106
106:       def soda_terms_class 
107:         eval self.const_get(:Soda_terms_class).to_s
108:       end

Protected Instance methods

[Source]

     # File lib/acts_as_soda_search.rb, line 236
236:       def debug(msg)
237:         logger.debug("* acts_as_soda_search (for #{name}): #{msg}")
238:       end

[Source]

     # File lib/acts_as_soda_search.rb, line 239
239:       def error(msg)
240:         logger.error("* ERROR: acts_as_soda_search (for #{name}): #{msg}")
241:       end

[Source]

     # File lib/acts_as_soda_search.rb, line 233
233:       def info(msg)
234:         logger.info("* acts_as_soda_search (for #{name}): #{msg}")
235:       end

[Validate]