OpenTREP Logo  0.07.7
C++ Open Travel Request Parsing Library
RequestInterpreter.cpp
Go to the documentation of this file.
1 // //////////////////////////////////////////////////////////////////////
2 // Import section
3 // //////////////////////////////////////////////////////////////////////
4 // STL
5 #include <cassert>
6 #include <sstream>
7 #include <string>
8 #include <vector>
9 #include <exception>
10 // Boost
11 #include <boost/filesystem.hpp>
12 #include <boost/regex.hpp>
13 // SOCI
14 #include <soci/soci.h>
15 // OpenTrep
16 #include <opentrep/DBType.hpp>
18 #include <opentrep/bom/Filter.hpp>
20 #include <opentrep/bom/Place.hpp>
23 #include <opentrep/bom/Result.hpp>
35 
36 namespace OPENTREP {
37 
49  // //////////////////////////////////////////////////////////////////////
50  void addUnmatchedWord (const TravelQuery_T& iQueryString,
51  WordList_T& ioWordList, WordSet_T& ioWordSet) {
52  // Token-ise the given string
53  WordList_T lQueryStringWordList;
55  lQueryStringWordList);
56  if (lQueryStringWordList.size() == 1) {
57  // Add the unmatched/unknown word, only when that latter has not
58  // already been stored, and when it is not black-listed.
59  const bool shouldBeKept = Filter::shouldKeep ("", iQueryString);
60  //const bool shouldBeKept = true;
61 
62  WordSet_T::const_iterator itWord = ioWordSet.find (iQueryString);
63  if (shouldBeKept == true && itWord == ioWordSet.end()) {
64  ioWordSet.insert (iQueryString);
65  ioWordList.push_back (iQueryString);
66  }
67  }
68  }
69 
70  // //////////////////////////////////////////////////////////////////////
71  void createPlaces (const ResultCombination& iResultCombination,
72  PlaceHolder& ioPlaceHolder) {
73 
74  // Retrieve the best matching ResultHolder object.
75  const ResultHolder& lResultHolder =
76  iResultCombination.getBestMatchingResultHolder();
77 
78  // Browse the list of result objects
79  const ResultList_T& lResultList = lResultHolder.getResultList();
80  for (ResultList_T::const_iterator itResult = lResultList.begin();
81  itResult != lResultList.end(); ++itResult) {
82  // Retrieve the result object
83  const Result* lResult_ptr = *itResult;
84  assert (lResult_ptr != NULL);
85 
90  const bool hasFullTextMatched = lResult_ptr->hasFullTextMatched();
91  if (hasFullTextMatched == false) {
92  continue;
93  }
94  assert (hasFullTextMatched == true);
95 
96  // Retrieve the Xapian document data (string)
97  const std::string& lDocDataStr = lResult_ptr->getBestDocData();
98  const RawDataString_T& lDocData = RawDataString_T (lDocDataStr);
99 
100  // Parse the POR details and create the corresponding Location structure
101  const Location& lLocation = Result::retrieveLocation (lDocData);
102 
103  // Instanciate an empty place object, which will be filled from the
104  // rows retrieved from the database.
105  Place& lPlace = FacPlace::instance().create (lLocation);
106 
107  // Insert the Place object within the PlaceHolder object
108  FacPlaceHolder::initLinkWithPlace (ioPlaceHolder, lPlace);
109 
110  // Fill the place with the remaining of the Result details.
111  lResult_ptr->fillPlace (lPlace);
112 
113  // DEBUG
114  OPENTREP_LOG_DEBUG ("Retrieved Document: " << lPlace.toString());
115  }
116  }
117 
130  // //////////////////////////////////////////////////////////////////////
131  void searchString (const StringPartition& iStringPartition,
132  const Xapian::Database& iDatabase,
133  ResultCombination& ioResultCombination,
134  WordList_T& ioWordList) {
135 
136  // Catch any thrown Xapian::Error exceptions
137  try {
138 
139  // Set of unknown words (just to eliminate the duplicates)
140  WordSet_T lWordSet;
141 
142  // Browse the partitions
143  for (StringPartition::StringPartition_T::const_iterator itSet =
144  iStringPartition._partition.begin();
145  itSet != iStringPartition._partition.end(); ++itSet) {
146  const StringSet& lStringSet = *itSet;
147 
148  // DEBUG
149  OPENTREP_LOG_DEBUG (" ==========");
150  OPENTREP_LOG_DEBUG (" String set: " << lStringSet);
151 
152  // Create a ResultHolder object.
153  ResultHolder& lResultHolder =
154  FacResultHolder::instance().create (lStringSet.describe(), iDatabase);
155 
156  // Add the ResultHolder object to the dedicated list.
158  lResultHolder);
159 
160  // Browse through all the word combinations of the partition
161  for (StringSet::StringSet_T::const_iterator itString =
162  lStringSet._set.begin();
163  itString != lStringSet._set.end(); ++itString) {
164  //
165  const std::string lQueryString (*itString);
166 
167  // DEBUG
168  OPENTREP_LOG_DEBUG (" --------");
169  OPENTREP_LOG_DEBUG (" Query string: '" << lQueryString << "'");
170 
171  // Create an empty Result object
172  Result& lResult = FacResult::instance().create (lQueryString,
173  iDatabase);
174 
175  // Add the Result object to the dedicated list.
176  FacResultHolder::initLinkWithResult (lResultHolder, lResult);
177 
178  // Perform the Xapian-based full-text match: the set of
179  // matching documents is filled.
180  const std::string& lMatchedString =
181  lResult.fullTextMatch (iDatabase, lQueryString);
182 
183  // When a single-word string is unmatched/unknown by/from Xapian,
184  // add it to the dedicated list (i.e., ioWordList).
185  if (lMatchedString.empty() == true) {
186  OPENTREP::addUnmatchedWord (lQueryString, ioWordList, lWordSet);
187  }
188  }
189 
190  // DEBUG
191  OPENTREP_LOG_DEBUG (std::endl
192  << "========================================="
193  << std::endl << "Result holder: "
194  << lResultHolder.toString() << std::endl
195  << "========================================="
196  << std::endl << std::endl);
197  }
198 
199  // DEBUG
200  OPENTREP_LOG_DEBUG ("*********************");
201 
202  } catch (const Xapian::Error& error) {
203  // Error
204  OPENTREP_LOG_ERROR ("Exception: " << error.get_msg());
205  throw XapianException (error.get_msg());
206  }
207  }
208 
225  // //////////////////////////////////////////////////////////////////////
226  void chooseBestMatchingResultHolder (ResultCombination& ioResultCombination) {
227 
228  // Calculate the weights for the full-text matches
229  const bool doesBestMatchingResultHolderExist =
230  ioResultCombination.chooseBestMatchingResultHolder();
231 
232  if (doesBestMatchingResultHolderExist == true) {
233  const ResultHolder& lBestMatchingResultHolder =
234  ioResultCombination.getBestMatchingResultHolder();
235 
236  // DEBUG
237  const StringSet& lCorrectedStringSet =
238  ioResultCombination.getCorrectedStringSet();
239  OPENTREP_LOG_DEBUG ("The best matching string partition for '"
240  << ioResultCombination.describeShortKey() << "' is "
241  << lBestMatchingResultHolder.describeShortKey()
242  << ", and has got a weight of "
243  << ioResultCombination.getBestMatchingWeight()
244  << "%. The corrected string set is: "
245  << lCorrectedStringSet);
246 
247  } else {
248  // DEBUG
249  OPENTREP_LOG_DEBUG ("There is no match for '"
250  << ioResultCombination.describeShortKey() << "'");
251  }
252  }
253 
254  // //////////////////////////////////////////////////////////////////////
255  bool RequestInterpreter::areAllCodeOrGeoID (const TravelQuery_T& iQueryString,
256  WordList_T& ioWordList) {
257  bool areAllWordsCodes = true;
258 
259  // Token-ise the given string
260  WordHolder::tokeniseStringIntoWordList (iQueryString, ioWordList);
261  for (WordList_T::const_iterator itWord = ioWordList.begin();
262  itWord != ioWordList.end(); ++itWord) {
263  const std::string& lWord = *itWord;
264 
265  // IATA code: alpha{3}
266  const boost::regex lIATACodeExp ("^[[:alpha:]]{3}$");
267  const bool lMatchesWithIATACode = regex_match (lWord, lIATACodeExp);
268 
269  // ICAO code: (alpha|digit){4}
270  const boost::regex lICAOCodeExp ("^([[:alpha:]]|[[:digit:]]){4}$");
271  const bool lMatchesWithICAOCode = regex_match (lWord, lICAOCodeExp);
272 
273  // UN/LOCODE code: alpha{2}(alpha|digit){3}
274  const boost::regex
275  lUNLOCodeExp ("^[[:alpha:]]{2}([[:alpha:]]|[[:digit:]]){3}$");
276  const bool lMatchesWithUNLOCode = regex_match (lWord, lUNLOCodeExp);
277 
278  // Geonames ID: digit{1,12}
279  const boost::regex lGeoIDCodeExp ("^[[:digit:]]{1,12}$");
280  const bool lMatchesWithGeoID = regex_match (lWord, lGeoIDCodeExp);
281 
282  // If the word is neither a IATA/ICAO code or a Geonames ID,
283  // there is nothing more to be done at that stage. The query string
284  // will have to be fully analysed.
285  // Otherwise, we go on analysing the other words.
286  if (lMatchesWithIATACode == false && lMatchesWithICAOCode == false
287  && lMatchesWithUNLOCode == false && lMatchesWithGeoID == false) {
288  areAllWordsCodes = false;
289  break;
290  }
291  }
292 
293  return areAllWordsCodes;
294  }
295 
302  // //////////////////////////////////////////////////////////////////////
304  Location oLocation;
305  PageRank_T lMaxPageRank = 0.0;
306 
307  for (LocationList_T::const_iterator itLocation = iLocationList.begin();
308  itLocation != iLocationList.end(); ++itLocation) {
309  const Location& lLocation = *itLocation;
310 
311  // Get the PageRank value
312  const PageRank_T& lPageRank = lLocation.getPageRank();
313  if (lPageRank > lMaxPageRank) {
314  lMaxPageRank = lPageRank;
315  oLocation = lLocation;
316  }
317  }
318 
319  return oLocation;
320  }
321 
335  // //////////////////////////////////////////////////////////////////////
337  const SQLDBConnectionString_T& iSQLDBConnStr,
338  const WordList_T& iCodeList,
339  LocationList_T& ioLocationList,
340  WordList_T& ioWordList) {
341  NbOfMatches_T oNbOfMatches = 0;
342 
343  // Connect to the SQL database/file
344  soci::session* lSociSession_ptr =
345  DBManager::initSQLDBSession (iSQLDBType, iSQLDBConnStr);
346  if (lSociSession_ptr == NULL) {
347  std::ostringstream oStr;
348  oStr << "The " << iSQLDBType.describe()
349  << " database is not accessible. Connection string: "
350  << iSQLDBConnStr << std::endl
351  << "Hint: launch the 'opentrep-dbmgr' program and "
352  << "see the 'tutorial' command.";
353  OPENTREP_LOG_ERROR (oStr.str());
354  throw SQLDatabaseImpossibleConnectionException (oStr.str());
355  }
356  assert (lSociSession_ptr != NULL);
357 
358  // Browse the list of words/items
359  for (WordList_T::const_iterator itWord = iCodeList.begin();
360  itWord != iCodeList.end(); ++itWord) {
361  const std::string& lWord = *itWord;
362 
363  // Check for IATA code: alpha{3}
364  const boost::regex lIATACodeExp ("^[[:alpha:]]{3}$");
365  const bool lMatchesWithIATACode = regex_match (lWord, lIATACodeExp);
366  if (lMatchesWithIATACode == true) {
367  // Perform the select statement on the underlying SQL database
368  const IATACode_T lIATACode (lWord);
369  const bool lUniqueEntry = true;
370  const NbOfDBEntries_T& lNbOfEntries =
371  DBManager::getPORByIATACode (*lSociSession_ptr, lIATACode,
372  ioLocationList, lUniqueEntry);
373  oNbOfMatches += lNbOfEntries;
374  continue;
375  }
376 
377  // Check for ICAO code: (alpha|digit){4}
378  const boost::regex lICAOCodeExp ("^([[:alpha:]]|[[:digit:]]){4}$");
379  const bool lMatchesWithICAOCode = regex_match (lWord, lICAOCodeExp);
380  if (lMatchesWithICAOCode == true) {
381  // Perform the select statement on the underlying SQL database
382  const ICAOCode_T lICAOCode (lWord);
383  const NbOfDBEntries_T& lNbOfEntries =
384  DBManager::getPORByICAOCode (*lSociSession_ptr, lICAOCode,
385  ioLocationList);
386  oNbOfMatches += lNbOfEntries;
387  continue;
388  }
389 
390  // Check for UN/LOCODE code: alpha{2}(alpha|digit){3}
391  const boost::regex
392  lUNLOCodeExp ("^[[:alpha:]]{2}([[:alpha:]]|[[:digit:]]){3}$");
393  const bool lMatchesWithUNLOCode = regex_match (lWord, lUNLOCodeExp);
394  if (lMatchesWithUNLOCode == true) {
395  // Perform the select statement on the underlying SQL database
396  const UNLOCode_T lUNLOCode (lWord);
397  const bool lUniqueEntry = true;
398  const NbOfDBEntries_T& lNbOfEntries =
399  DBManager::getPORByUNLOCode (*lSociSession_ptr, lUNLOCode,
400  ioLocationList, lUniqueEntry);
401  oNbOfMatches += lNbOfEntries;
402  continue;
403  }
404 
405  // Check for Geonames ID: digit{1,12}
406  const boost::regex lGeoIDCodeExp ("^[[:digit:]]{1,12}$");
407  const bool lMatchesWithGeoID = regex_match (lWord, lGeoIDCodeExp);
408  if (lMatchesWithGeoID == true) {
409  try {
410  // Convert the character string into a number
411  const GeonamesID_T lGeonamesID =
412  boost::lexical_cast<GeonamesID_T> (lWord);
413 
414  // Perform the select statement on the underlying SQL database
415  const NbOfDBEntries_T& lNbOfEntries =
416  DBManager::getPORByGeonameID (*lSociSession_ptr, lGeonamesID,
417  ioLocationList);
418  oNbOfMatches += lNbOfEntries;
419 
420  } catch (boost::bad_lexical_cast& eCast) {
421  OPENTREP_LOG_ERROR ("The Geoname ID ('" << lWord
422  << "') cannot be understood.");
423  }
424  }
425  }
426 
427  return oNbOfMatches;
428  }
429 
430  // //////////////////////////////////////////////////////////////////////
431  NbOfMatches_T RequestInterpreter::
432  interpretTravelRequest (const TravelDBFilePath_T& iTravelDBFilePath,
433  const DBType& iSQLDBType,
434  const SQLDBConnectionString_T& iSQLDBConnStr,
435  const TravelQuery_T& iTravelQuery,
436  LocationList_T& ioLocationList,
437  WordList_T& ioWordList,
438  const OTransliterator& iTransliterator) {
439  NbOfMatches_T oNbOfMatches = 0;
440 
441  // Sanity check
442  assert (iTravelQuery.empty() == false);
443 
444  // Check whether the file-path to the Xapian database/index exists
445  // and is a directory.
446  boost::filesystem::path lTravelDBFilePath (iTravelDBFilePath.begin(),
447  iTravelDBFilePath.end());
448  if (!(boost::filesystem::exists (lTravelDBFilePath)
449  && boost::filesystem::is_directory (lTravelDBFilePath))) {
450  std::ostringstream oStr;
451  oStr << "The file-path to the Xapian database/index ('"
452  << iTravelDBFilePath << "') does not exist or is not a directory. ";
453  oStr << "That usually means that the OpenTREP indexer (opentrep-indexer) "
454  << "has not been launched yet, or that it has operated "
455  << "on a different Xapian database/index file-path.";
456  OPENTREP_LOG_ERROR (oStr.str());
457  throw FileNotFoundException (oStr.str());
458  }
459 
460  // Open the Xapian database
461  Xapian::Database lXapianDatabase (iTravelDBFilePath);
462 
463  // DEBUG
464  OPENTREP_LOG_DEBUG (std::endl
465  << "=========================================");
466 
467  // First, cut the travel query in slices and calculate all the partitions
468  // for each of those query slices
469  QuerySlices lQuerySlices (lXapianDatabase, iTravelQuery, iTransliterator);
470 
471  // DEBUG
472  OPENTREP_LOG_DEBUG ("+=+=+=+=+=+=+=+=+=+=+=+=+=+=+");
473  OPENTREP_LOG_DEBUG ("Travel query: `" << iTravelQuery << "'");
474  const TravelQuery_T& lNormalisedQueryString = lQuerySlices.getQueryString();
475  if (!(iTravelQuery == lNormalisedQueryString)) {
476  OPENTREP_LOG_DEBUG ("Normalised travel query: `" << lNormalisedQueryString
477  << "'");
478  }
479  OPENTREP_LOG_DEBUG ("Query slices: `" << lQuerySlices << "'");
480 
481  // Browse the travel query slices
482  const StringPartitionList_T& lStringPartitionList =
483  lQuerySlices.getStringPartitionList();
484  for (StringPartitionList_T::const_iterator itSlice =
485  lStringPartitionList.begin();
486  itSlice != lStringPartitionList.end(); ++itSlice) {
487  StringPartition lStringPartition = *itSlice;
488  const std::string& lTravelQuerySlice = lStringPartition.getInitialString();
489 
495  ResultCombination& lResultCombination =
496  FacResultCombination::instance().create (lTravelQuerySlice);
497 
498  // DEBUG
499  OPENTREP_LOG_DEBUG ("+++++++++++++++++++++");
500  OPENTREP_LOG_DEBUG ("Travel query slice: `" << lTravelQuerySlice << "'");
501  OPENTREP_LOG_DEBUG ("Partitions: " << lStringPartition);
502 
503 
508  WordList_T lCodeList;
509  const bool areAllWordsCodes =
510  areAllCodeOrGeoID (lTravelQuerySlice, lCodeList);
511 
512  NbOfMatches_T lNbOfMatches = 0;
513  if (areAllWordsCodes == true && !(iSQLDBType == DBType::NODB)) {
520  // DEBUG
521  OPENTREP_LOG_DEBUG ("The travel query string (" << lTravelQuerySlice
522  << ") is made only of IATA/ICAO/UNLOCODE codes "
523  << "or Geonames ID. The " << iSQLDBType.describe()
524  << " SQL database (" << iSQLDBConnStr
525  << ") will be used. "
526  << "The Xapian database/index will not be used");
527 
528  lNbOfMatches = getLocationList (iSQLDBType, iSQLDBConnStr, lCodeList,
529  ioLocationList, ioWordList);
530  }
531 
532  if (lNbOfMatches == 0) {
543  // DEBUG
544  if (iSQLDBType == DBType::NODB) {
545  OPENTREP_LOG_DEBUG ("No SQL database may be used. "
546  << "The Xapian database will be used instead");
547  } else {
548  OPENTREP_LOG_DEBUG ("The travel query string (" << lTravelQuerySlice
549  << ") has got items/words, which are neither "
550  << "IATA/ICAO codes nor Geonames ID. "
551  << "The Xapian database/index will be used");
552  }
553 
558  OPENTREP::searchString (lTravelQuerySlice, lXapianDatabase,
559  lResultCombination, ioWordList);
560 
564  lResultCombination.calculateAllWeights();
565 
569  OPENTREP::chooseBestMatchingResultHolder (lResultCombination);
570 
576  // Create a PlaceHolder object, to collect the matching Place objects
577  PlaceHolder& lPlaceHolder = FacPlaceHolder::instance().create();
578  createPlaces (lResultCombination, lPlaceHolder);
579 
580  // DEBUG
581  OPENTREP_LOG_DEBUG (std::endl
582  << "========================================="
583  << std::endl << "Summary:" << std::endl
584  << lPlaceHolder.toShortString() << std::endl
585  << "========================================="
586  << std::endl);
587 
592  lPlaceHolder.createLocations (ioLocationList);
593  }
594  }
595 
596  oNbOfMatches = ioLocationList.size();
597  return oNbOfMatches;
598  }
599 
600 }
ResultHolder & create(const TravelQuery_T &iQueryString, const Xapian::Database &iDatabase)
#define OPENTREP_LOG_ERROR(iToBeLogged)
Definition: Logger.hpp:24
#define OPENTREP_LOG_DEBUG(iToBeLogged)
Definition: Logger.hpp:33
static void initLinkWithResult(ResultHolder &, Result &)
void addUnmatchedWord(const TravelQuery_T &iQueryString, WordList_T &ioWordList, WordSet_T &ioWordSet)
unsigned int GeonamesID_T
unsigned short NbOfMatches_T
StringSet_T _set
Definition: StringSet.hpp:118
static FacPlace & instance()
Definition: FacPlace.cpp:29
NbOfMatches_T getLocationList(const DBType &iSQLDBType, const SQLDBConnectionString_T &iSQLDBConnStr, const WordList_T &iCodeList, LocationList_T &ioLocationList, WordList_T &ioWordList)
Structure modelling a (geographical) location.
Definition: Location.hpp:25
double PageRank_T
const RawDataString_T & getBestDocData() const
Definition: Result.hpp:132
std::string toString() const
Definition: Place.cpp:85
static FacResultCombination & instance()
static Location retrieveLocation(const Xapian::Document &)
Definition: Result.cpp:272
const Percentage_T & getBestMatchingWeight() const
std::string fullTextMatch(const Xapian::Database &, const TravelQuery_T &)
Definition: Result.cpp:521
static void initLinkWithResultHolder(ResultCombination &, ResultHolder &)
unsigned int NbOfDBEntries_T
std::list< StringPartition > StringPartitionList_T
Result & create(const TravelQuery_T &, const Xapian::Database &)
Definition: FacResult.cpp:41
std::vector< std::string > WordList_T
std::list< Word_T > WordList_T
std::string describe() const
Definition: StringSet.cpp:88
StringSet getCorrectedStringSet() const
bool hasFullTextMatched() const
Definition: Result.hpp:71
const PageRank_T & getPageRank() const
Definition: Location.hpp:354
static FacResult & instance()
Definition: FacResult.cpp:29
Enumeration of database types.
Definition: DBType.hpp:17
static void tokeniseStringIntoWordList(const TravelQuery_T &, WordList_T &)
Definition: WordHolder.cpp:37
static NbOfDBEntries_T getPORByIATACode(soci::session &, const IATACode_T &, LocationList_T &, const bool iUniqueEntry)
Definition: DBManager.cpp:1172
std::set< std::string > WordSet_T
const ResultList_T & getResultList() const
Class wrapping functions on a list of Result objects.
const ResultHolder & getBestMatchingResultHolder() const
std::list< Result * > ResultList_T
Definition: ResultList.hpp:13
const std::string describe() const
Definition: DBType.cpp:131
Class modelling a place/POR (point of reference).
Definition: Place.hpp:29
static bool shouldKeep(const std::string &iPhrase, const std::string &iWord)
Definition: Filter.cpp:144
std::list< Location > LocationList_T
void searchString(const StringPartition &iStringPartition, const Xapian::Database &iDatabase, ResultCombination &ioResultCombination, WordList_T &ioWordList)
ResultCombination & create(const TravelQuery_T &iQueryString)
static NbOfDBEntries_T getPORByUNLOCode(soci::session &, const UNLOCode_T &, LocationList_T &, const bool iUniqueEntry)
Definition: DBManager.cpp:1418
std::string describeShortKey() const
Class wrapping functions on a list of ResultHolder objects.
Location getBestMatchingLocation(const LocationList_T &iLocationList)
static FacResultHolder & instance()
void fillPlace(Place &) const
Definition: Result.cpp:211
static FacPlaceHolder & instance()
std::string toString() const
StringPartition_T _partition
static NbOfDBEntries_T getPORByGeonameID(soci::session &, const GeonamesID_T &, LocationList_T &)
Definition: DBManager.cpp:1588
static void initLinkWithPlace(PlaceHolder &, Place &)
Class holding a set of strings, e.g., {"rio", "de", "janeiro"}.
Definition: StringSet.hpp:19
std::string TravelQuery_T
Place & create()
Definition: FacPlace.cpp:41
void createPlaces(const ResultCombination &iResultCombination, PlaceHolder &ioPlaceHolder)
void chooseBestMatchingResultHolder(ResultCombination &ioResultCombination)
static NbOfDBEntries_T getPORByICAOCode(soci::session &, const ICAOCode_T &, LocationList_T &)
Definition: DBManager.cpp:1286
std::string describeShortKey() const
static soci::session * initSQLDBSession(const DBType &, const SQLDBConnectionString_T &)
Definition: DBManager.cpp:318
Class wrapping a set of Xapian documents having matched a given query string.
Definition: Result.hpp:48