System documentation of the GNU Image-Finding Tool

CAcIFFileSystem.h
1 /* -*- mode: c++ -*-
2 */
3 /*
4 
5  GIFT, a flexible content based image retrieval system.
6  Copyright (C) 1998, 1999, 2000, 2001, 2002, CUI University of Geneva
7 
8  Copyright (C) 2003, 2004 Bayreuth University
9  2005 Bamberg University
10  This program is free software; you can redistribute it and/or modify
11  it under the terms of the GNU General Public License as published by
12  the Free Software Foundation; either version 2 of the License, or
13  (at your option) any later version.
14 
15  This program is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with this program; if not, write to the Free Software
22  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 
24 */
25 // -*- mode: c++ -*-
26 
27 
28 class CXMLElement;
29 
30 /*
31 *
32 *
33 * This class manages the access to the inverted file as well
34 * as its generation
35 *
36 *
37 *
38 * modification history:
39 *
40 * WM 1099 changed documentation format
41 * completed documentation
42 * HM 090399 created the documentation
43 * WM 1098 created the file
44 *
45 *
46 *
47 * compiler defines used:
48 *
49 *
50 */
51 
52 #ifndef _CACIFFILESYSTEM
53 #define _CACIFFILESYSTEM
54 #include "libGIFTAcInvertedFile/include/uses-declarations.h"
55 #include <string>
56 #include "libMRML/include/TID.h"
57 #include "libMRML/include/CSelfDestroyPointer.h"
58 #include "libMRML/include/CArraySelfDestroyPointer.h"
59 #include "libGIFTAcInvertedFile/include/CDocumentFrequencyList.h"
60 #include "libMRML/include/CMutex.h" // multi threading
61 //#include "CCollectionFrequencyList.h"
62 #include "libGIFTAcInvertedFile/include/CADIHash.h"
63 #include "libGIFTAcURL2FTS/include/CAcURL2FTS.h"
64 #include "libGIFTAcInvertedFile/include/CAcInvertedFile.h"
65 #include <iostream>
66 #include <fstream>
67 #include <map>
68 #include <vector>
69 #ifdef HAS_HASH_MAP
70 #include <hash_map>
71 #define HASH_MAP hash_map
72 #else
73 #define HASH_MAP map
74 #endif
75 #include <functional>
76 #include <algorithm>
77 
78 #include "libMRML/include/CMagic.h"
79 
80 
81 typedef TID TFeatureID ;
82 
94 
95 protected:
108 #ifndef V295
110 #else
112 #endif
113 
118 
120  mutable ifstream mOffsetFile;
121 
124 
127 
130 
133 
135  typedef HASH_MAP<TID,streampos> CIDToOffset;//new hash
137  CIDToOffset mIDToOffset;
138 
140  mutable HASH_MAP<TID,double> mFeatureToCollectionFrequency;//new hash
141 
145  HASH_MAP<TID,unsigned int> mFeatureDescription;//new hash_
146 
152 
155  void writeOffsetFileElement(TID inFeatureID,
156  streampos inPosition,
157  ostream& inOpenOffsetFile);
159  CDocumentFrequencyList* getFeatureFile(string inFileName)const;
160 public:
162  bool operator()()const;
163 
190  CAcIFFileSystem(const CXMLElement& inCollectionElement);
192  bool init(bool);
193 
196 
198  string IDToURL(TID inID)const;
199 
203  CDocumentFrequencyList* FeatureToList(TFeatureID)const;
204 
206  CDocumentFrequencyList* URLToFeatureList(string inURL)const;
207 
209  CDocumentFrequencyList* DIDToFeatureList(TID inDID)const;
210 
212 
213 
217  double FeatureToCollectionFrequency(TFeatureID)const;
218 
220  unsigned int getFeatureDescription(TID inFeatureID)const;
222 
226  double DIDToMaxDocumentFrequency(TID)const;
227 
229  double DIDToDFSquareSum(TID)const;
230 
232  double DIDToSquareDFLogICFSum(TID)const;
234 
235  /*@name Inverted File Generation and Consistency Checking*/
237 
245  bool generateInvertedFile();
246 
255 
258  bool checkConsistency();
259 
266  bool findWithinStream(TID inFeatureID,
267  TID inDocumentID,
268  double inDocumentFrequency)const;
269 
271 
277  virtual pair<bool,TID> URLToID(const string& inURL)const;
278 
280  void getAllIDs(list<TID>&)const;
283  void getAllAccessorElements(list<CAccessorElement>&)const;
288  void getRandomIDs(list<TID>&,
289  list<TID>::size_type)const;
298  void getRandomAccessorElements(list<CAccessorElement>& outResult,
299  list<CAccessorElement>::size_type inSize)const;
301  int size()const;
303 
304  TID getMaximumFeatureID()const;
312  list<TID>* getAllFeatureIDs()const;
318  virtual pair<bool,CAccessorElement> IDToAccessorElement(TID inID)const;
320  operator bool()const;
321 
322 };
323 
324 #endif
An accessor to an inverted file.
Definition: CAcIFFileSystem.h:93
CDocumentFrequencyList * DIDToFeatureList(TID inDID) const
List of features contained by a document with ID inDID.
list< TID > * getAllFeatureIDs() const
Getting a list of all features contained in this.
string IDToURL(TID inID) const
Translate a DocumentID to a URL (for output)
CSelfDestroyPointer< istream > mInvertedFile
The inverted file.
Definition: CAcIFFileSystem.h:117
bool operator()() const
for testing if the inverted file is correctly constructed
void getAllAccessorElements(list< CAccessorElement > &) const
List of triplets (ID,imageURL,thumbnailURL) of all the documents present in the inverted file...
void getRandomIDs(list< TID > &, list< TID >::size_type) const
get a given number of random C-AccessorElement-s
This class captures the structure of an XML element.
Definition: CXMLElement.h:51
A list of Document Frequency Elements (the main part of an inverted file)
Definition: CDocumentFrequencyList.h:58
double FeatureToCollectionFrequency(TFeatureID) const
Collection frequency for a given feature.
CAcIFFileSystem(const CXMLElement &inCollectionElement)
This opens an exsisting inverted file, and then inits this structure.
string mOffsetFileName
Name of the Offset file.
Definition: CAcIFFileSystem.h:129
double DIDToDFSquareSum(TID) const
Returns the document-frequency square sum for a given document ID.
HASH_MAP< TID, unsigned int > mFeatureDescription
map from the feature ID to the feature description
Definition: CAcIFFileSystem.h:145
CDocumentFrequencyList * FeatureToList(TFeatureID) const
List of documents containing the feature.
void getRandomAccessorElements(list< CAccessorElement > &outResult, list< CAccessorElement >::size_type inSize) const
For drawing random sets.
string mInvertedFileBuffer
A buffer, if the inverted file is to be held in ram.
Definition: CAcIFFileSystem.h:109
CADIHash.
Definition: CADIHash.h:53
TID getMaximumFeatureID() const
This is interesting for browsing.
An accessor to an inverted file.
Definition: CAcInvertedFile.h:83
virtual pair< bool, CAccessorElement > IDToAccessorElement(TID inID) const
Translate a DocumentID to an accessor Element.
bool checkConsistency()
Check the consistency of the inverted file system accessed by this accessor.
~CAcIFFileSystem()
Destructor.
CDocumentFrequencyList * getFeatureFile(string inFileName) const
loads a *.fts file.
CIDToOffset mIDToOffset
map from feature id to the offset for this feature
Definition: CAcIFFileSystem.h:137
string mInvertedFileName
Name of the inverted file.
Definition: CAcIFFileSystem.h:126
CSelfDestroyPointer< CAcURL2FTS > mURL2FTS
In order to have just one parent, I have to limit on single inheritance.
Definition: CAcIFFileSystem.h:103
double DIDToMaxDocumentFrequency(TID) const
returns the maximum document frequency for one document ID
int size() const
The number of images in this accessor.
unsigned int getFeatureDescription(TID inFeatureID) const
What kind of feature is the feature with ID inFeatureID?
double DIDToSquareDFLogICFSum(TID) const
Returns this function for a given document ID.
This class offers an abstraction from the locking method used.
Definition: CMutex.h:40
CMutex mMutex
the mutex for multi threading
Definition: CAcIFFileSystem.h:97
bool init(bool)
called by constructors
void writeOffsetFileElement(TID inFeatureID, streampos inPosition, ostream &inOpenOffsetFile)
add a pair of FeatureID,Offset to the open offset file (helper function for inverted file constructio...
bool generateInvertedFile()
Generating an inverted File, if there is none.
HASH_MAP< TID, streampos > CIDToOffset
map from feature id to the offset for this feature
Definition: CAcIFFileSystem.h:135
bool findWithinStream(TID inFeatureID, TID inDocumentID, double inDocumentFrequency) const
Is the Document with inDocumentID contained in the document frequency list of the feature inFeatureID...
HASH_MAP< TID, double > mFeatureToCollectionFrequency
map from feature to the collection frequency
Definition: CAcIFFileSystem.h:140
void getAllIDs(list< TID > &) const
List of the IDs of all documents present in the inverted file.
string mFeatureDescriptionFileName
Name for the file with the feature description.
Definition: CAcIFFileSystem.h:132
CADIHash mDocumentInformation
additional information about the document like, e.g.
Definition: CAcIFFileSystem.h:150
virtual pair< bool, TID > URLToID(const string &inURL) const
Translate an URL to its document ID.
bool newGenerateInvertedFile()
Generating an inverted File, if there is none.
ifstream mOffsetFile
Feature -> Offset in inverted file.
Definition: CAcIFFileSystem.h:120
CDocumentFrequencyList * URLToFeatureList(string inURL) const
List of features contained by a document.
ifstream mFeatureDescriptionFile
File of feature descriptions.
Definition: CAcIFFileSystem.h:123
TID mMaximumFeatureID
the maximum feature ID arising in this file
Definition: CAcIFFileSystem.h:105
string mTemporaryIndexingFileBase
Some place for putting temporary indexing data.
Definition: CAcIFFileSystem.h:115

Need for discussion? Want to contribute? Contact
help-gift@gnu.org Generated using Doxygen