/*
Copyright (c) 2003, Cornell University
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Cornell University nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*/
/////////////////////////////////////////////////////////////////////
//
// Mafia.cpp
//
/////////////////////////////////////////////////////////////////////
//#define DEBUG
#include <iostream>
#include <fstream>
#include <time.h>
#include <stdio.h>
#include <list>
#include <vector>
#include <algorithm rel='nofollow' onclick='return false;'>
#include <string>
#include <assert.h rel='nofollow' onclick='return false;'>
#include <cmath>
#include <map>
#include "Bitmap.h"
#include "TreeNode.h"
#include "Transaction.h"
#include "ItemsetOutput.h"
using namespace std;
/// @defgroup GlobalVariables Global Variables
/// Global variables
/** @{ */
typedef vector<TreeNode *> NodeList;
typedef vector<TreeNode *> BranchList;
typedef vector<Bitmap *> BitmapList;
typedef vector<BaseBitmap *> BaseBitmapList;
typedef vector<int> ItemSet;
typedef map<long, ItemSet *> HashTable;
/// Simple class for storing subtree size estimates
class SubtreeEstimate {
public:
int Count; ///< Count of actual subtrees counted
int Sum; ///< Sum of subtree sizes
SubtreeEstimate () {
Count = 0;
Sum = 0;
}
};
/// Simple class for storing tail elements of each node of the tree
class TailElement {
public:
int Count; ///< Support of the 1-extension
int Item; ///< Item-id for this1 -extension
TailElement () {
Count = 0;
Item = 0;
}
bool operator < (const TailElement& rhs) const {
return this->Count < rhs.Count;
};
};
/// @defgroup CommandLineParameters Command Line Parameters/Variables
/// Commmand line parameters from user or inferred
/** @{ */
string method; ///< either -mfi or -fci or -fi
char* outFilename; ///< filename for output
ItemsetOutput *outFile; ///< file for ouput
bool outputMFI = false; ///< true if MFI should be saved to a file
bool MethodIsFI = false; ///< true if the method is -fi
bool MethodIsFCI = false; ///< true if the method is -fci
int ItemCount; ///< # of items in the file
int TransCount; ///< # of transactions in the file
double MSF; ///< user-defined min sup as percentage
int MS; ///< min sup as a transaction count
int VerScale = 1; ///< Scaling factor for transactions
int HorScale = 1; ///< Scaling factor for items
bool GoFHUT = true; ///< FHUT flag
bool HUTMFI = true; ///< HUTMFI flag
bool PEPrune = true; ///< PEPrune flag -- parent equivalent pruning
bool Reorder = true; ///< Reorder flag
/** @} */
/// @defgroup CounterVariables Counter Variables
/// Counter variables for information gathering
/** @{ */
int CountFHUT = 0; ///< # of times FHUT was successful
int CountNodes = 0; ///< # of frequent nodes in the tree
int CountCounts = 0; ///< # of Counts or all nodes in the tree
int CountAnds = 0; ///< # of ANDs of normal bitmaps
int CountSmallAnds = 0; ///< # of compressed bitmap ANDs
int CountPEPrunes = 0; ///< # of PEPruning
int CountCheckPosition = 0; ///< # of CheckPosition calls
int CountHUTMFI = 0; ///< # of HUTMFI attempts
int CountHUTMFISuccess = 0; ///< # of HUTMFI successes
int CountRebuilds; ///< # of Rebuilds
/** @} */
/// @defgroup ProgramVariables Program Parameters/Variables
/// Useful program parameters/counters
/** @{ */
int maxtail = 0;
int MFISize = 0; ///< MFI size before pruning
int MFIDepth = 0; ///< The aggregated depth of the all MFI elements
int F1size = 0; ///< # of frequent 1-itemsets after merging repeats
int FullF1size = 0; ///< # of frequent 1-itemsets
int k = 50; ///< # of items checked for a MFI lookup
int MAX_compID = 1; ///< max compression ID
int projectDepth = -1; ///< depth of the bitmap you're projecting from
int EstimateSize; ///< size of subtree estimation buffer
int EstimateDiv = 5; ///< bucket size by frequent tail length
int maxItemsetSize = 0; ///< max size of a frequent itemset
/** @} */
/// @defgroup DataVariables Data variables
/// Complex data structure variables
/** @{ */
NodeList F1; ///< List of frequent 1-itemsets
BitmapList TransBuffy; ///< Buffer of transaction bitmaps
BaseBitmapList NameBuffy; ///< Buffer of name bitmaps
NodeList NodeBuffy; ///< Buffer of tree nodess
TreeNode *Root; ///< The root (the nullset)
TailElement *gTail; ///< global tail pointer
TailElement *TailBuffy; ///< Common Buffer for tail elements
Bitmap *NullTrans; ///< A transaction bitmap filled with ones
int *ItemMap; ///< For renaming items after sorting by support
int *ReverseItemMap; ///< For remembering the renaming...
BaseBitmapList MFI; ///< List of Maximally Frequent Itemsets
HashTable HT; ///< Hash table of transaction supports
vector <int> SupportCountList; ///< List that stores support count
BaseBitmap* TempName; ///< Temporary buffer for one name bitmap
SubtreeEstimate* EstimateBuffy; ///< Buffer of subtree estimates
int *MFIBySizes; ///< Buffer for counting MFI by itemset size
int *ItemsetBuffy; ///< Buffer for writing itemsets to file output
/** @} */
/// @defgroup TimingVariables Timing Variables
/// Variables for timing (and instrumenting the code)
/** @{ */
time_t total_start, total_finish;
double total_time;
time_t read_start, read_finish;
double read_time;
time_t algorithm_start, algorithm_finish;
double algorithm_time;
time_t print_start, print_finish;
double print_time;
/** @} */
/** @} */
/*********************************************************************
Reading the data in and building the item bitmaps
*********************************************************************/
/// @addtogroup InputOutput Input/Output Functions
/// Reading the data in and building the item bitmaps.
/// Outputting the frequent itemsets.
/** @{ */
/////////////////////////////////////////////////////////////////////
/// Insert pointer to node in order of increasing support
///
/// @param newNode item node to add to F1
/////////////////////////////////////////////////////////////////////
void AddToF1(TreeNode *newNode) {
if (F1.empty())
F1.push_back(newNode);
else {
// use insertion sort for increasing support ordering
NodeList::iterator nol