#pragma once

#define _CRT_SECURE_NO_WARNINGS

#include <iostream>
#include <fstream>
#include <thread>
#include <mutex>
#include <vector>
#include <algorithm>
#include <unordered_map>
#include <unordered_set>
#include <string>
#include <iomanip>
#include <memory.h>
#include <time.h>
#include <stdlib.h>
#include "Core.h"

using namespace std;

#ifdef WIN32
#define MKDIR STRING("md ")
#define SLASH "\\"
#define RM STRING("del ")
#define RN STRING("ren ")
#define XMX	"10G"
#else
#define MKDIR STRING("mkdir ")
#define SLASH "/"
#define RM STRING("rm ")
#define RN STRING("mv ")
#define XMX "200G"
#endif

#define CMD(X) system((X + " > nul").c_str())
#define MIN(A,B) (A<B?A:B)
#define MAX(A,B) (A>B?A:B)
#define LOG(A,B) (log(A)/log(B))

void Debug();

//Utility.cpp	==============================================================================================================
INT8* ReadAllBytes(STRING strFile);
VOID WriteAllBytes(STRING strFile, INT8* buf, INT32 LENGTH);
INT64 FileLength(STRING strFile);
INT32 ParseNumber(STRING str, FLOAT &value);
VECTOR<INT32> ToUTF32(STRING utf8);
STRING PrintTime(INT64 clock);
INT32 ReadINT32(INT8* &ptr);
INT64 ReadINT64(INT8* &ptr);
FLOAT ReadFLOAT(INT8* &ptr);
DOUBLE ReadDOUBLE(INT8* &ptr);
STRING ReadSTRING(INT8* &ptr);
INT32 ReadINT32(ifstream* fi);
INT64 ReadINT64(ifstream* fi);
STRING ReadSTRING(ifstream* fi);
VECTOR<STRING> ReadSeparatedLine(INT8* line, const INT8* sep);
SET<INT32> Intersect(VECTOR<INT32> &A, VECTOR<INT32> &B);
SET<INT32> Union(VECTOR<INT32> &A, VECTOR<INT32> &B);

#define StrReadINT32(FILESTREAM, VARIABLE) {FILESTREAM.read((INT8*)&VARIABLE, sizeof(INT32));}
#define StrReadINT64(FILESTREAM, VARIABLE) {FILESTREAM.read((INT8*)&VARIABLE, sizeof(INT64));}
#define BufReadINT32(BUF, VARIABLE) {memcpy(&VARIABLE, BUF, sizeof(INT32)); BUF += sizeof(INT32);}
#define BufReadINT64(BUF, VARIABLE) {memcpy(&VARIABLE, BUF, sizeof(INT64)); BUF += sizeof(INT64);}
#define BufReadFLOAT(BUF, VARIABLE) {memcpy(&VARIABLE, BUF, sizeof(FLOAT)); BUF += sizeof(FLOAT);}
#define BufReadDOUBLE(BUF, VARIABLE) {memcpy(&VARIABLE, BUF, sizeof(DOUBLE)); BUF += sizeof(DOUBLE);}

//Log.cpp  =============================================================================================================
extern ofstream LOGSTREAM;
extern mutex LOGMT;
#define LOGALL(MESSAGE) {cout << MESSAGE << flush; LOGSTREAM << MESSAGE << flush;}
#define LOGSTR(STREAM, MESSAGE) { STREAM << MESSAGE << flush; }
#define LOGALLSAFE(MESSAGE) {LOGMT.lock(); cout << MESSAGE << flush; LOGSTREAM << MESSAGE << flush; LOGMT.unlock();}
#define LOGSTRSAFE(STREAM, MESSAGE) {LOGMT.lock(); STREAM << MESSAGE << flush; LOGMT.unlock();}

INT64 TickCount();
STRING Now();
STRING PrintTime(INT64 timeInSeconds);
INT32 Loop(INT64 state = 1);

//Similarity.cpp =============================================================================================================
FLOAT ExactMatching(DataSource *dSrc, DataSource *dDes, SPOEntry *src, SPOEntry *des, INT32 pSrc, INT32 pDes);
FLOAT ReverseDifference(DataSource *dSrc, DataSource *dDes, SPOEntry *src, SPOEntry *des, INT32 pSrc, INT32 pDes);
FLOAT Levenshtein(DataSource *dSrc, DataSource *dDes, SPOEntry *src, SPOEntry *des, INT32 pSrc, INT32 pDes);
FLOAT TFIDFCosine(DataSource *dSrc, DataSource *dDes, SPOEntry *src, SPOEntry *des, INT32 pSrc, INT32 pDes);
FLOAT DiffBM25Jaccard(DataSource *dSrc, DataSource *dDes, SPOEntry *src, SPOEntry *des, INT32 pSrc, INT32 pDes);

//Alignment.cpp
VOID Align(DataSource *dSrc, DataSource *dDes, ASLParameter params, VECTOR<Alignment> *alg, VECTOR<VECTOR<INT32>> *sharedTokens);
VOID Save(STRING strFile, VECTOR<Alignment> alg);
VECTOR<Alignment> LoadAlignment(STRING strFile);
VECTOR<SimilarityFunction> GenerateSimilarityFunction(VECTOR<Alignment> &alg);

//Blocking.cpp =============================================================================================================
VOID Block(DataSource* dSrc, DataSource* dDes, VECTOR<Alignment> *alg, VECTOR<VECTOR<INT32>> *sharedTokens, ASLParameter params, STRING strOutputFile);

//Matching.cpp =============================================================================================================
VECTOR<ScoreEntryEx> Match(DataSource *dSrc, DataSource *dDes, BlockSpliter *pBlock, Configuration *config, ASLParameter params);
VOID Match(DataSource *dSrc, DataSource *dDes, BlockSpliter *pBlock, Configuration *config, ASLParameter params, STRING strOutputFile);
VOID Match(DataSource *dSrc, DataSource *dDes, BlockSpliter *pBlock, Configuration *config, SET<INT64> *refLinks, ASLParameter params, STRING strOutput);

//Aggregator.cpp
FLOAT Linear(ScoreEntryEx *entry, VECTOR<INT32> *sFOI, VECTOR<FLOAT> *sFOIThreshold, INT32 useBoolean, INT32 useAverage, INT32 useWeighting, INT32 K);

//Filter.cpp =============================================================================================================
VECTOR<pair<INT64, FLOAT>> StableFiltering(STRING strScoreFile, FLOAT filteringFactor, FLOAT filteringThreshold);
VECTOR<pair<INT64, FLOAT>> StableFiltering(VECTOR<ScoreEntry> *ens, FLOAT filteringFactor, FLOAT filteringThreshold);

//Evaluator.cpp
SET<INT64> LoadReference(STRING strFile);
RPF Evaluate(VECTOR<pair<INT64, FLOAT>> *detected, SET<INT64> *reference);
RPF Evaluate(STRING strBlockFile, SET<INT64> *reference);
template <typename T> VOID MeanStDev(VECTOR<T> v, FLOAT &m, FLOAT &e);
extern template VOID MeanStDev(VECTOR<FLOAT> v, FLOAT &m, FLOAT &e);
extern template VOID MeanStDev(VECTOR<INT64> v, FLOAT &m, FLOAT &e);

//ASL.cpp
typedef Configuration(*Learner)(DataSource*, DataSource*, VECTOR<ScoreEntryEx>*, VECTOR<SimilarityFunction>, SET<INT64>*, SET<INT64>*, ASLParameter, BlockSpliter*, STRING);
VOID cLink(ASLParameter params, STRING strOutputDir);
VOID GenerateAlignment(ASLParameter params, STRING strOutDir);
VOID GenerateBlockingFile(ASLParameter params, STRING strOutDir);
VOID MatchWithDefaultConfiguration(ASLParameter params, STRING strOutDir);
VOID GenerateSplit(ASLParameter params, INT32 nRepeat, STRING strOutDir);

//cLink.cpp
VECTOR<Configuration> SortSimilarityFunctions(VECTOR<ScoreEntryEx> *ens, Configuration *config, SET<INT64> *refLinks);
VOID FindThreshold(VECTOR<ScoreEntryEx> *ens, Configuration *config, SET<INT64> *refLinks);
INT32 MapSimilarityFunctions(VECTOR<Configuration>* config, VECTOR<INT32>* map, INT32 topSim);
Configuration Validate(DataSource *dSrc, DataSource* dDes, BlockSpliter *bBlock, SET<INT64> *refValidation, Configuration* config, INT32 configCount, ASLParameter params, STRING strOutputDir);
VECTOR<INT32> SelectBalanceDataset(VECTOR<ScoreEntryEx> *ens, Configuration *def, SET<INT64> *refLinks);
VOID GenerateExternalFile(ASLParameter params, STRING strTraining, STRING strTest);

//Learning algorithms
Configuration Naive(DataSource *dSrc, DataSource *dDes, VECTOR<ScoreEntryEx> *ens, VECTOR<SimilarityFunction> simFunctions, SET<INT64> *refTrain, SET<INT64> *refValidation, ASLParameter params, BlockSpliter *bBlock, STRING strOutputDir);
Configuration Heuristic(DataSource *dSrc, DataSource *dDes, VECTOR<ScoreEntryEx> *ens, VECTOR<SimilarityFunction> simFunctions, SET<INT64> *refTrain, SET<INT64> *refValidation, ASLParameter params, BlockSpliter *bBlock, STRING strOutputDir);
Configuration Exhaustive(DataSource *dSrc, DataSource *dDes, VECTOR<ScoreEntryEx> *ens, VECTOR<SimilarityFunction> simFunctions, SET<INT64> *refTrain, SET<INT64> *refValidation, ASLParameter params, BlockSpliter *bBlock, STRING strOutputDir);
Configuration InfoGain(DataSource *dSrc, DataSource *dDes, VECTOR<ScoreEntryEx> *ens, VECTOR<SimilarityFunction> simFunctions, SET<INT64> *refTrain, SET<INT64> *refValidation, ASLParameter params, BlockSpliter *bBlock, STRING strOutputDir);
Configuration Genetic(DataSource *dSrc, DataSource *dDes, VECTOR<ScoreEntryEx> *ens, VECTOR<SimilarityFunction> simFunctions, SET<INT64> *refTrain, SET<INT64> *refValidation, ASLParameter params, BlockSpliter *bBlock, STRING strOutputDir);
Configuration AcceptAll(DataSource *dSrc, DataSource *dDes, VECTOR<ScoreEntryEx> *ens, VECTOR<SimilarityFunction> simFunctions, SET<INT64> *refTrain, SET<INT64> *refValidation, ASLParameter params, BlockSpliter *bBlock, STRING strOutputDir);

