
var logger = console;
var stringSimilarity = {compareTwoStrings: function (a,b) {
    if (a.toUpperCase() == b.toUpperCase()){
        return 1;
    } 
    return 0;
    
}};

var findValues = function(data, fields, config) {
  var tableMinX;
  var tableMaxX;
  var tableMinY;
  var tableMaxY;

  data.sort(function(a, b) {
    return a.b[0].x - b.b[0].x;
  });

  tableMinX = data[0].b[0].x;
  tableMaxX = data[data.length - 1].b[1].x;

  data.sort(function(a, b) {
    return a.b[0].y - b.b[0].y;
  });

  tableMinY = data[0].b[0].y;
  tableMaxY = data[data.length - 1].b[3].y;

  var tableYLinesArr = [];
  tableYLinesArr.push(tableMinY);
  var rowsData = {};
  var rowCount = 1;
  var arrWordObjArr = [];

  var tempArr = [];

  for (let k = 0; k < data.length; k++) {
    const aWord = data[k];
    if (aWord.d.trim().length != 1 && aWord.d.indexOf(":") != -1) {
      var min_x = aWord.b[0].x > aWord.b[3].x ? aWord.b[0].x : aWord.b[3].x;
      var max_x = aWord.b[1].x > aWord.b[2].x ? aWord.b[1].x : aWord.b[2].x;
      var min_y = aWord.b[3].y > aWord.b[2].y ? aWord.b[3].y : aWord.b[2].y;
      var max_y = aWord.b[0].y > aWord.b[1].y ? aWord.b[0].y : aWord.b[1].y;
      var div_width = max_x - min_x;
      var div_height = min_y - max_y;
      var mid_y = (min_y + max_y) / 2;

      var wD = aWord.d;
      var wArr = [""];
      var wIndex = 0;

      var charLength = div_width / aWord.length;
      for (let q = 0; q < wD.length; q++) {
        const element = wD[q];
        if (element == ":") {
          wIndex++;
          wArr[wIndex] = ":";
          wIndex++;
          wArr[wIndex] = "";
        } else {
          wArr[wIndex] = wArr[wIndex] + element;
        }
      }
      for (let q = 0; q < wArr.length; q++) {
        const aW = wArr[q];
        if (aW.trim() == "") {
          wArr.splice(q, 1);
          q--;
        }
      }
      for (let r = 0; r < wArr.length; r++) {
        const aW = wArr[r];
        var tempW = {
          b: aWord.b,
          d: aW
        };
        tempArr.push(tempW);
      }
    } else {
      tempArr.push(aWord);
    }
  }

  data = tempArr;

  for (var i = 0; i < data.length; i++) {
    var aWord = data[i];
    var aRow = [];
    var min_x = aWord.b[0].x > aWord.b[3].x ? aWord.b[0].x : aWord.b[3].x;
    var max_x = aWord.b[1].x > aWord.b[2].x ? aWord.b[1].x : aWord.b[2].x;
    var min_y = aWord.b[3].y > aWord.b[2].y ? aWord.b[3].y : aWord.b[2].y;
    var max_y = aWord.b[0].y > aWord.b[1].y ? aWord.b[0].y : aWord.b[1].y;
    var div_width = max_x - min_x;
    var div_height = min_y - max_y;
    var mid_y = (min_y + max_y) / 2;

    for (var j = 0; j < data.length; j++) {
      var aword = data[j];
      if (aword.b[0].y < mid_y && aword.b[3].y > mid_y) {
        aRow.push(aword);
        data.splice(j, 1);
        j--;
      }
    }
    if (aRow.length > 0) {
      i--;
      if (rowCount != 1) {
        var midLineBetweenMid_y =
          (rowsData["row_" + (rowCount - 1)].mid_y + mid_y) / 2;
        tableYLinesArr.push(midLineBetweenMid_y);
      }

      aRow.sort(function(a, b) {
        return a.b[0].x - b.b[0].x;
      });

      var seqObj = getSeqObjectForLine(aRow, 0.02);
      var arrWordObj = convertSeqToWord(seqObj);
      arrWordObjArr.push(arrWordObj);
      rowsData["row_" + rowCount] = { mid_y, seqObj, arrWordObj };
      rowCount++;
    }
  }
  tableYLinesArr.push(tableMaxY);
  var resultObj = [];
  for (var i = 0; i < arrWordObjArr.length; i++) {
    var aLine = arrWordObjArr[i];
    for (var j = 0; j < fields.length; j++) {
      var multiField = fields[j].field;
      for (let o = 0; o < multiField.length; o++) {
        const aField = multiField[o];
        for (var k = 0; k < aLine.length; k++) {
          var aWord = aLine[k];
          if (aField.legendType == "single") {
            var similarity = stringSimilarity.compareTwoStrings(
              aWord.d.toUpperCase().trim(),
              aField.singleWord.toUpperCase().trim()
            );
            if (similarity > 0.9) {
              logger.log("similarity", similarity);
              if (aField.fieldValueType == "word") {
                var foundField = {};
                foundField.legend = aWord;
                foundField.field = aField;
                foundField.value = findValueForField(
                  i,
                  k,
                  arrWordObjArr,
                  aField
                );
                resultObj.push(foundField);
                //logger.log("Legend Found : ",aWord.d,"   \t\t\t\tValue : "+ foundField.value.d );
                if (!config.getAll) {
                  fields.splice(j, 1);
                  j--;
                  break;
                }
              } else if (aField.fieldValueType == "paragraph") {
                //logger.log("Legend Found : ",aWord.d, "Para : ")
                var foundField = {};
                foundField.legend = aWord;
                foundField.field = aField;
                foundField.value = getParagraph(i, k, arrWordObjArr);
                resultObj.push(foundField);
                if (!config.getAll) {
                  fields.splice(j, 1);
                  j--;
                  break;
                }
              } else {
              }
            }
          } else if (aField.legendType == "line") {
            if (
              aWord.d
                .toUpperCase()
                .trim()
                .indexOf(aField.singleWord.toUpperCase().trim()) != -1
            ) {
              if (aField.fieldValueType == "line") {
                var foundField = {};
                foundField.legend = aWord;
                foundField.field = aField;
                foundField.value = findLineForField(
                  i,
                  k,
                  arrWordObjArr,
                  aField
                );
                resultObj.push(foundField);
                if (!config.getAll) {
                  fields.splice(j, 1);
                  j--;
                  break;
                }
              }
              // else if(aField.fieldValueType == "paragraph"){
              //   var foundField = {};
              //   foundField.legend = aWord;
              //   foundField.field = aField;
              //   foundField.value = getContainingParagraph(i,k,arrWordObjArr);
              //   resultObj.push(foundField);
              //   if(!config.getAll){
              //     fields.splice(j,1);
              //     j--;
              //     break;
              //   }
              // }
              // else{

              // }
            }
          } else if (aField.legendType == "combi") {
            var similarity = stringSimilarity.compareTwoStrings(
              aWord.d.toUpperCase().trim(),
              aField.parentWord.toUpperCase().trim()
            );
            if (similarity > 0.9) {
              var matchedChildWord = findNearestChildWord(
                i,
                k,
                arrWordObjArr,
                aField
              );
              if (matchedChildWord) {
                var foundField = {};
                foundField.parentLegend = aWord;
                foundField.legend = matchedChildWord;
                foundField.field = aField;
                foundField.value = findValueForField(
                  matchedChildWord.meta.lineIndex,
                  matchedChildWord.meta.wordIndex,
                  arrWordObjArr,
                  aField
                );

                //logger.log("Legend Found : ",aWord.d,"/",matchedChildWord.d,"   \t\t\t\tValue : "+ foundField.value.d );
                resultObj.push(foundField);
                if (!config.getAll) {
                  fields.splice(j, 1);
                  j--;
                  break;
                }
              }
            }
          } else if (aField.legendType == "table") {
            var similarity = stringSimilarity.compareTwoStrings(
              aWord.d.toUpperCase().trim(),
              aField.parentWord.toUpperCase().trim()
            );
            if (similarity > 0.9) {
              var matchedChildWord = findNearestChildWord(
                i,
                k,
                arrWordObjArr,
                aField
              );
              if (matchedChildWord) {
                var foundField = {};
                aWord.meta = getBoxData(aWord);
                foundField.parentLegend = aWord;
                foundField.legend = matchedChildWord;
                foundField.field = aField;
                aField.column_max_x = aWord.meta.max_x;
                aField.column_min_x = aWord.meta.min_x;
                aField.column_mid_x = aWord.meta.mid_x;
                foundField.value = findValueForField(
                  matchedChildWord.meta.lineIndex,
                  matchedChildWord.meta.wordIndex,
                  arrWordObjArr,
                  aField
                );

                //logger.log("Legend Found : ",aWord.d,"/",matchedChildWord.d,"   \t\t\t\tValue : "+ foundField.value.d );
                resultObj.push(foundField);
                if (!config.getAll) {
                  fields.splice(j, 1);
                  j--;
                  break;
                }
              }
            }
          } else {
            logger.log("Invalid Field Type");
          }
        }
      }
    }
  }

  return { fields, resultObj };
};
var getParagraph = function(lineIndex, wordIndex, arrWordObjArr) {
  var mainWord = arrWordObjArr[lineIndex][wordIndex];
  mainWord.meta = getBoxData(mainWord);
  var childWordArr = [];
  var lineArr = [];
  var lineDiff = 0.02;
  for (var i = 0; i < arrWordObjArr.length; i++) {
    if (i == lineIndex) {
      var tempLine = [];
      for (var k = 0; k < arrWordObjArr[lineIndex].length; k++) {
        if (k > wordIndex) {
          tempLine.push(arrWordObjArr[lineIndex][k]);
        }
      }
      if (tempLine.length > 0) {
        lineArr.push(tempLine);
      }
    }
    if (i > lineIndex) {
      var aLine = arrWordObjArr[i];
      // //logger.log(i+ "diff : ",Math.abs(aLine[0].line_mid - arrWordObjArr[i-1][0].line_mid), "Diff : ",lineDiff);
      if (
        Math.abs(aLine[0].line_mid - arrWordObjArr[i - 1][0].line_mid) <
        lineDiff
      ) {
        lineArr.push(aLine);
      } else {
        break;
      }
    }
  }
  for (var i = 0; i < lineArr.length; i++) {
    var hmtlString = "";
    var aLine = lineArr[i];
    for (var k = 0; k < aLine.length; k++) {
      hmtlString += aLine[k].d;
    }
    //logger.log(hmtlString);
  }
  return lineArr;
};

var findNearestChildWord = function(
  lineIndex,
  wordIndex,
  arrWordObjArr,
  field
) {
  var mainWord = arrWordObjArr[lineIndex][wordIndex];
  mainWord.meta = getBoxData(mainWord);
  var childWordArr = [];
  for (var i = 0; i < arrWordObjArr.length; i++) {
    if (i >= lineIndex) {
      var toFindInLine = arrWordObjArr[i];
      for (var j = 0; j < toFindInLine.length; j++) {
        var aWord = toFindInLine[j];
        var similarity = stringSimilarity.compareTwoStrings(
          aWord.d.toUpperCase().trim(),
          field.childWord.toUpperCase().trim()
        );
        if (similarity > 0.9) {
          // //logger.log("child Matched : "+aWord.d, similarity);
          aWord.meta = getBoxData(aWord);
          var a = Math.abs(mainWord.meta.mid_x - aWord.meta.mid_x);
          var b = Math.abs(mainWord.meta.min_y - aWord.meta.min_y);
          var c = Math.sqrt(a * a + b * b);
          toFindInLine[j].meta.distance = c;
          toFindInLine[j].meta.wordIndex = j;
          toFindInLine[j].meta.lineIndex = i;
          childWordArr.push(toFindInLine[j]);
        }
      }
    }
  }

  if (childWordArr.length == 0) {
    return "";
  }

  childWordArr.sort(function(a, b) {
    return a.meta.distance - b.meta.distance;
  });

  return childWordArr[0];
};

var findLineForField = function(lineIndex, wordIndex, arrWordObjArr, field) {
  var finalValue = { d: "" };
  if (field.fieldValuePosition == "next") {
    if (field.fieldValueQuantity == "one") {
      var thisLine = arrWordObjArr[lineIndex] ? arrWordObjArr[lineIndex] : [];
      for (let l = 0; l < thisLine.length; l++) {
        if (thisLine[l].d) finalValue.d += thisLine[l].d + " ";
      }
    } else {
        for (let lineCouter = 0; lineCouter < field.fieldValueQuantity; lineCouter++) {
          var thisLine = arrWordObjArr[(lineIndex + lineCouter)] ? arrWordObjArr[(lineIndex + lineCouter)] : [];
          for (let l = 0; l < thisLine.length; l++) {
            if (thisLine[l].d) finalValue.d += thisLine[l].d + " ";
          }
          
        }
      
    }
  }
  logger.log("finalValue", finalValue.d);
  return finalValue;
};
var findValueForField = function(lineIndex, wordIndex, arrWordObjArr, field) {
  var finalValue = null;
  if (field.fieldValuePosition == "next") {
    if (field.fieldValueQuantity == "one") {
      var firstWord = arrWordObjArr[lineIndex][wordIndex + 1]
        ? arrWordObjArr[lineIndex][wordIndex + 1]
        : null;

      if (!firstWord) {
        //If next is not found, get the nearest word
        firstWord = findNearestValue(
          lineIndex,
          wordIndex,
          arrWordObjArr,
          field
        );
      }
      finalValue = firstWord;
    } else if (field.fieldValueQuantity == "two") {
      var firstWord = arrWordObjArr[lineIndex][wordIndex + 1]
        ? arrWordObjArr[lineIndex][wordIndex + 1]
        : null;
      var secondWord = arrWordObjArr[lineIndex][wordIndex + 2]
        ? arrWordObjArr[lineIndex][wordIndex + 2]
        : null;

      if (!firstWord && !secondWord) {
        //If next is not found, get the nearest word
        firstWord = findNearestValue(
          lineIndex,
          wordIndex,
          arrWordObjArr,
          field
        );
      }
      finalValue = firstWord + " " + secondWord;
    } else {
      // TODO:
    }
  } else if (field.fieldValuePosition == "row") {
    var rowLine = arrWordObjArr[lineIndex];

    var wordDisfromCol = [];

    for (var i = 0; i < rowLine.length; i++) {
      if (i > wordIndex) {
        var midDist = Math.abs(
          getBoxData(rowLine[i]).mid_x - field.column_min_x
        );
        wordDisfromCol.push({
          distance: midDist,
          orignalIndex: i
        });
      }
    }

    wordDisfromCol.sort(function(a, b) {
      return a.distance - b.distance;
    });

    finalValue = rowLine[wordDisfromCol[0].orignalIndex];
    logger.log("Word Found : ", finalValue.d);
  } else {
    finalValue = findNearestValue(lineIndex, wordIndex, arrWordObjArr);
  }
  return finalValue;
};

var findNearestValue = function(lineIndex, wordIndex, arrWordObjArr) {
  var mainWord = arrWordObjArr[lineIndex][wordIndex];
  mainWord.meta = getBoxData(mainWord);
  var toFindInLine = arrWordObjArr[lineIndex + 1]
    ? arrWordObjArr[lineIndex + 1]
    : arrWordObjArr[lineIndex - 1];
  var distanceArr = [];
  for (var i = 0; i < toFindInLine.length; i++) {
    var aWord = toFindInLine[i];
    aWord.meta = getBoxData(aWord);
    var a = Math.abs(mainWord.meta.mid_x - aWord.meta.mid_x);
    var b = Math.abs(mainWord.meta.min_y - aWord.meta.min_y);
    var c = Math.sqrt(a * a + b * b);
    toFindInLine[i].meta.distance = c;
    toFindInLine[i].meta.orginalIndex = i;
  }

  toFindInLine.sort(function(a, b) {
    return a.meta.distance - b.meta.distance;
  });
  return toFindInLine[0];
};

var getBoxData = function(aWord) {
  var xCoor = [];
  var yCoor = [];
  var mid_y = 0;
  var mid_x = 0;
  for (var i = 0; i < aWord.b.length; i++) {
    xCoor.push(aWord.b[i].x);
    yCoor.push(aWord.b[i].y);
    mid_x += aWord.b[i].x;
    mid_y += aWord.b[i].y;
  }
  mid_x = mid_x / aWord.b.length;
  mid_y = mid_y / aWord.b.length;

  xCoor.sort(function(a, b) {
    return a - b;
  });
  yCoor.sort(function(a, b) {
    return a - b;
  });
  var min_x = xCoor[0];
  var max_x = xCoor[xCoor.length - 1];
  var min_y = yCoor[0];
  var max_y = yCoor[yCoor.length - 1];

  var div_width = max_x - min_x;
  var div_height = max_y - min_y;

  return { mid_x, mid_y, min_x, max_x, min_y, max_y, div_width, div_height };
};

var getSeqObjectForLine = function(aLine, minSeperation) {
  var _seqObj = [];

  if (aLine.length == 1) {
    _seqObj.push({
      start: 0,
      end: aLine[0].b[0].x,
      blank: true
    });
    _seqObj.push([
      {
        start: aLine[0].b[0].x,
        end: aLine[0].b[1].x,
        blank: false,
        data: aLine[0]
      }
    ]);
    _seqObj.push({
      start: aLine[0].b[1].x,
      end: 1,
      blank: true
    });
    return _seqObj;
  }

  for (var i = 0; i < aLine.length; i++) {
    var aWord = aLine[i];
    if (i == 0) {
      _seqObj.push({
        start: 0,
        end: aWord.b[0].x,
        blank: true
      });
      _seqObj.push([
        {
          start: aWord.b[0].x,
          end: aWord.b[1].x,
          blank: false,
          data: aWord
        }
      ]);
    } else if (aLine.length - 1 == i) {
      // if(!_seqObj[_seqObj.length-1]){
      //   _seqObj[_seqObj.length-1] = [];
      // }

      if (!Array.isArray(_seqObj[_seqObj.length - 1])) {
        _seqObj.push([]);
      }
      if (aWord.b[0].x - aLine[i - 1].b[1].x < minSeperation) {
        _seqObj[_seqObj.length - 1].push({
          start: aWord.b[0].x,
          end: aWord.b[1].x,
          blank: false,
          data: aWord
        });
      } else {
        _seqObj.push({
          start: aLine[i - 1].b[1].x,
          end: aWord.b[0].x,
          blank: true
        });
        _seqObj.push([
          {
            start: aWord.b[0].x,
            end: aWord.b[1].x,
            blank: false,
            data: aWord
          }
        ]);
      }
      _seqObj.push({
        start: aWord.b[1].x,
        end: 1,
        blank: true
      });
    } else {
      // //logger.log("aLine[i-1].b[1].x - aWord.b[0].x :",aLine[i-1].b[1].x - aWord.b[0].x );
      if (aWord.d == ":") {
        _seqObj.push({
          start: aWord.b[0].x,
          end: aWord.b[1].x,
          blank: true
        });
      }
      // else if(aWord.d.trim().indexOf(":") !=-1){

      //   if(aWord.d.trim().indexOf(":") == 0 ){
      //     _seqObj.push({
      //       start : aWord.b[0].x,
      //       end : aWord.b[0].x,
      //       blank : true
      //     })
      //     _seqObj.push([{
      //       start : aWord.b[0].x,
      //       end : aWord.b[1].x,
      //       blank : false,
      //       data : aWord.d.replace(":"," ")
      //     }])
      //   }

      //   else if(aWord.d.trim().indexOf(":") == (aWord.d.trim().length -1)){
      //     _seqObj.push([{
      //       start : aWord.b[0].x,
      //       end : aWord.b[1].x,
      //       blank : false,
      //       data : aWord.d.replace(":"," ")
      //     }])
      //     _seqObj.push({
      //       start : aWord.b[1].x,
      //       end : aWord.b[1].x,
      //       blank : true
      //     })
      //   }
      //   else{
      //     //todo,
      //     console.log(" : found in center, TODO")
      //   }

      // }
      else if (aWord.b[0].x - aLine[i - 1].b[1].x < minSeperation) {
        if (!Array.isArray(_seqObj[_seqObj.length - 1])) {
          _seqObj.push([]);
        }
        _seqObj[_seqObj.length - 1].push({
          start: aWord.b[0].x,
          end: aWord.b[1].x,
          blank: false,
          data: aWord
        });
      } else {
        _seqObj.push({
          start: aLine[i - 1].b[1].x,
          end: aWord.b[0].x,
          blank: true
        });
        _seqObj.push([
          {
            start: aWord.b[0].x,
            end: aWord.b[1].x,
            blank: false,
            data: aWord
          }
        ]);
      }
    }
  }
  return _seqObj;
};

var convertSeqToWord = function(seqObj) {
  var wordArr = [];
  var avg_x = 0;
  var avg_y = 0;
  var totalWords = 0;
  for (var i = 0; i < seqObj.length; i++) {
    var aRowObjects = seqObj[i];
    if (Array.isArray(aRowObjects)) {
      if (aRowObjects.length == 1) {
        wordArr.push(aRowObjects[0].data);
      } else {
        var all_x = [];
        var all_y = [];
        var all_d = [];
        var all_l = [];
        var avg_c = 0;
        var w_o_p = [];
        for (var j = 0; j < aRowObjects.length; j++) {
          var aWord = aRowObjects[j].data;
          w_o_p.push(aWord.orignalIndex);
          for (var k = 0; k < aWord.b.length; k++) {
            all_x.push(aWord.b[k].x);
            avg_x += aWord.b[k].x;
            all_y.push(aWord.b[k].y);
            avg_y += aWord.b[k].y;
            totalWords++;
          }
          all_d.push(aWord.d);
          all_l.concat(aWord.l);
          avg_c += aWord.c;
        }
        avg_c = avg_c / aRowObjects.length;
        all_x.sort(function(a, b) {
          return a - b;
        });
        all_y.sort(function(a, b) {
          return a - b;
        });
        var aWord = {
          l: all_l,
          w_o_p: w_o_p,
          d: all_d.join(" "),
          b: [
            { x: all_x[0], y: all_y[0] },
            { x: all_x[all_x.length - 1], y: all_y[0] },
            { x: all_x[all_x.length - 1], y: all_y[all_y.length - 1] },
            { x: all_x[0], y: all_y[all_y.length - 1] }
          ],
          c: avg_c
        };
        if (aWord.d.trim() != "") wordArr.push(aWord);
      }
    }
  }

  avg_y = avg_y / totalWords;
  for (var i = 0; i < wordArr.length; i++) {
    wordArr[i].line_mid = avg_y;
  }

  return wordArr;
};


