System documentation of the GNU Image-Finding Tool

merge_sort_streams.h
1 /* -*- mode: c++ -*-
2  */
3 /*
4 
5  GIFT, a flexible content based image retrieval system.
6  Copyright (C) 1998, 1999, 2000, 2001, 2002, CUI University of Geneva, University of Bayreuth
7 
8  Copyright (C) 2003, 2004 Bayreuth University
9  2005 Bamberg University
10  This program is free software; you can redistribute it and/or modify
11  it under the terms of the GNU General Public License as published by
12  the Free Software Foundation; either version 2 of the License, or
13  (at your option) any later version.
14 
15  This program is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with this program; if not, write to the Free Software
22  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 
24 */
25 #ifndef _MERGE_SORT
26 #define _MERGE_SORT
27 
28 //#warning FIXME: this needs to be added to the GIFT distro. It speeds up the generation
29 //#warning of inverted files considerably
30 //(done)
31 
32 #include <memory> // auto_ptr
33 #include <algorithm> // using swap
34 #include <fstream> // file access
35 #include <cassert>
36 
37 using namespace std;
38 
39 //#define STREAMPOS_T fstream::pos_type
40 
43 template<typename T>
44 class CStreamPos{
46  T mContent;
47 public:
48  CStreamPos(T inContent):mContent(inContent){
49 
50  }
51 
52  CStreamPos(long int inContent):mContent(inContent){
53 
54  }
55 
56  operator T()const{
57  return mContent;
58  }
59  operator long int()const{
60  return mContent;
61  }
62 
63  CStreamPos<T>& operator++ (){
64  mContent=mContent+static_cast<T>(1);
65  return *this;
66  }
67  CStreamPos<T> operator++ (int){
68  CStreamPos<T> lReturnValue=*this;
69  mContent=mContent+static_cast<T>(1);
70  return lReturnValue;
71  }
72 
73  CStreamPos<T> operator% (int inMod)const{
74  return mContent % inMod;
75  }
76  CStreamPos<T> operator/ (int inMod)const{
77  return mContent / inMod;
78  }
79  bool operator< (CStreamPos<T>& inThan)const{
80  return this->mContent < inThan.mContent;
81  }
82  bool operator! ()const{
83  return !(this->mContent);
84  }
85  CStreamPos<T> operator+ (CStreamPos<T>& inSummand){
86  return CStreamPos<T>(mContent + inSummand.mContent);
87  }
88 };
89 
90 #if __GNUC__==2
91 #define STREAMPOS_T long long int
92 #else
93 #define STREAMPOS_T CStreamPos<fstream::pos_type>
94 #endif
95 // ex long long int
96 
107 template<class T>
108 void merge_streams(istream& in1, const STREAMPOS_T inCount1,
109  istream& in2, const STREAMPOS_T inCount2,
110  ostream& out,
111  int inNumberOfPageElements=1){
112 
113  {
114 // cout << "Merging: "
115 // << inCount1
116 // << ","
117 // << inCount2
118 // << endl;
119  const STREAMPOS_T lPageSize(sizeof(T)*inNumberOfPageElements);
120 
121 
122 
123  if(!(inCount1)){// if there is nothing to merge
124  for(STREAMPOS_T i=0;//...copy
125  i<inCount2;
126  i++){
127  T l2;
128  in2.read((char*)&l2,
129  sizeof(l2));
130  assert(in2);
131 
132  out.write((char*)&l2,
133  sizeof(l2));
134  assert(out);
135  }
136  return;//and return
137  }
138  if(!inCount2){//if there is nothing to merge
139  for(STREAMPOS_T i=0;//...copy
140  i<inCount1;
141  i++){
142  T l1;
143  in1.read((char*)&l1,
144  sizeof(l1));
145  assert(in1);
146  out.write((char*)&l1,
147  sizeof(l1));
148  assert(out);
149  }
150  return;//and return
151  }
152 
153  STREAMPOS_T lI1(0);
154  STREAMPOS_T lI2(0);
155 
156  //read the first record from both files
157  T l1;
158  in1.read((char*)&l1,
159  sizeof(l1));
160  assert(in1);
161  T l2;
162  in2.read((char*)&l2,
163  sizeof(l2));
164  assert(in2);
165 
166  while((lI1<inCount1)
167  &&(lI2<inCount2)){
168  if(l1<l2){
169  //
170  out.write((char*)&l1,
171  sizeof(l1));
172  assert(out);
173 
174  //
175  lI1++;
176  if(lI1<inCount1){
177  in1.read((char*)&l1,
178  sizeof(l1));
179  assert(in1);
180  }
181  }else{
182  //
183  out.write((char*)&l2,
184  sizeof(l2));
185  //
186  lI2++;
187  if(lI2<inCount2){
188  in2.read((char*)&l2,
189  sizeof(l2));
190  assert(in2);
191  }
192  }
193  }
194  while(lI1<inCount1){
195  //
196  out.write((char*)&l1,
197  sizeof(l1));
198  //
199  lI1++;
200  if(lI1<inCount1){
201  in1.read((char*)&l1,
202  sizeof(l1));
203  assert(in1);
204  }
205  }
206  while(lI2<inCount2){
207  //
208  out.write((char*)&l2,
209  sizeof(l2));
210  assert(out);
211  //
212  lI2++;
213  if(lI2<inCount2){
214  in2.read((char*)&l2,
215  sizeof(l2));
216  assert(in2);
217  }
218  }
219  }
220 }
221 template<typename T>
222 void first_level_quicksort(int inNumberOfPageElements,
223  const char* inFileToBeSortedName,
224  const char* inTemporaryName){
225 
226  cout << "Starting quicksort: "
227  << inNumberOfPageElements
228  << " elements per page." << endl
229  << "Sorting files " << inFileToBeSortedName << endl
230  << "to " << inTemporaryName << endl;
231  cout << "NOW ALLOCATING A PAGE" << inNumberOfPageElements << endl;
232  auto_ptr<T> lPage(new T[inNumberOfPageElements]);
233 
234  cout << "H" << flush;
235 
236  const STREAMPOS_T lPageSize(sizeof(T)*inNumberOfPageElements);
237 
238  cout << "I" << flush;
239 
240  STREAMPOS_T lFileSize(0);
241  ifstream lToBeSorted1(inFileToBeSortedName);
242  assert(lToBeSorted1);
243  lToBeSorted1.seekg(0,
244  ios::end);
245  lFileSize=lToBeSorted1.tellg();
246  lToBeSorted1.clear();
247  lToBeSorted1.seekg(0,
248  ios::beg);
249  cout << "E" << flush;
250 
251  ofstream lTemporary(inTemporaryName);
252  assert(lTemporary);
253  cout << "R" << flush;
254 
255  STREAMPOS_T lSum(0);
256 
257  T* lBegin(lPage.get());
258  T* lEnd(lPage.get());
259 
260  cout << "FIRSTLEVELQUICK" << lFileSize << ";" << lSum<< endl;
261  while((lSum<lFileSize)
262  && lToBeSorted1){
263 
264  cout << "." << flush;
265 
266  int lRead(0);
267  if(lSum+lPageSize < lFileSize){
268  lToBeSorted1.read((char*)lPage.get(),
269  lPageSize);
270  lRead=lPageSize;
271  }else{
272  lToBeSorted1.read((char*)lPage.get(),
273  lFileSize-lSum);
274  lRead=lFileSize-lSum;
275  }
276  if(lRead){
277  lEnd=lBegin+(lRead)/sizeof(T);
278  sort(lBegin,lEnd);
279  lTemporary.write((char*)lPage.get(),lRead);
280  assert(lTemporary);
281  }
282  }
283  cout << "." << endl;
284 
285 }
286 
297 template<class T>
298 char* merge_sort_streams(const char* inFileToBeSortedName,
299  const char* inTemporaryName,
300  int inNumberOfPageElements=(1 << 20)
301  ){
302 
303  const char* lFileToBeSortedName(inFileToBeSortedName);
304  const char* lTemporaryName(inTemporaryName);
305 
306  STREAMPOS_T lFileSize(0);
307  ifstream lToBeSorted1(inFileToBeSortedName);
308  lToBeSorted1.seekg(0,
309  ios::end);
310  lFileSize=lToBeSorted1.tellg();
311  lToBeSorted1.close();
312 
313  ofstream lTemporary;
314  ifstream lToBeSorted2;
315 
316 #ifdef first_level_quick
317  first_level_quicksort<T>(inNumberOfPageElements,
318  inFileToBeSortedName,
319  inTemporaryName);
320  swap(lFileToBeSortedName,
321  lTemporaryName);
322 #else
323  cout << "STARTING mit MERGESIZE1" << endl;
324  inNumberOfPageElements=1;
325 #endif
326  STREAMPOS_T lCount(0);
327  for(STREAMPOS_T iMergeSize(sizeof(T)*inNumberOfPageElements);
328  (iMergeSize < lFileSize)
329  || !(lCount%2)
330  // ||(lCount%2) makes sure that we will get
331  // the result in inFileToBeSorted
332  // the ! is, because we have have already
333  // the quicksort sorting pass behind us
334  ;
335  (iMergeSize = iMergeSize << 1),
336  (lCount=lCount+static_cast<STREAMPOS_T>(1))){
337 
338  cout << "MERGESORT MergeSize "
339  << iMergeSize
340  << endl;
341 
342  lToBeSorted1.open(lFileToBeSortedName);
343  lToBeSorted1.clear();
344 
345  lToBeSorted2.open(lFileToBeSortedName);
346  lToBeSorted2.clear();
347 
348  lTemporary.open(lTemporaryName);
349  lTemporary.clear();
350 
351 
352  for(STREAMPOS_T i(0);
353  i<lFileSize;
354  i = i + iMergeSize*2){
355  lToBeSorted1.seekg(i);
356 
357  if(!lToBeSorted1){
358  cerr << __FILE__ << ":" << __LINE__ << " lToBeSorted false, after seekg("
359  << static_cast<long int>(i)
360  << ")"
361  << endl;
362  }
363 
364  assert(lToBeSorted1);
365 
366  if(i+iMergeSize<lFileSize){
367  lToBeSorted2.seekg(i+iMergeSize);
368  assert(lToBeSorted2);
369  }
370 
371  STREAMPOS_T lMergeSize1=lFileSize-i;
372  if(lFileSize < i){// underflow
373  lMergeSize1=0;//correct underflow
374  }
375  STREAMPOS_T lMergeSize2=lFileSize-i-iMergeSize;
376  if(lFileSize < i + iMergeSize){// underflow of lMergeSize2
377  lMergeSize2=0;//correct underflow
378  }
379 
380  // lToBeSorted1.clear();
381  // lToBeSorted2.clear();
382 
383  if(lMergeSize1>iMergeSize){
384  lMergeSize1=iMergeSize;
385  }
386  if(lMergeSize2>iMergeSize){
387  lMergeSize2=iMergeSize;
388  }
389 
390 #if __GNUC__==2
391  merge_streams<T>(lToBeSorted1,
392  lMergeSize1/sizeof(T),
393  lToBeSorted2,
394  lMergeSize2/sizeof(T),
395  lTemporary,
396  inNumberOfPageElements
397  );
398 #else
399  merge_streams<T>(lToBeSorted1,
400  lMergeSize1.operator/(sizeof(T)),
401  lToBeSorted2,
402  lMergeSize2.operator/(sizeof(T)),
403  lTemporary,
404  inNumberOfPageElements
405  );
406 #endif
407  }
408 
409  lTemporary.close();
410  lToBeSorted1.close();
411  lToBeSorted2.close();
412  swap(lFileToBeSortedName,
413  lTemporaryName);
414  cout << "endmerge" << endl;
415  }
416  return strdup(lFileToBeSortedName);
417 }
418 #endif
a wrapper for streampos, so we can do increments on it
Definition: merge_sort_streams.h:44

Need for discussion? Want to contribute? Contact
help-gift@gnu.org Generated using Doxygen