package org.tip.flatdb4geonames.model.index;

import fr.devinsy.util.StringSet;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.Writer;
import java.util.Iterator;
import java.util.Locale;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.fop.render.java2d.Java2DFontMetrics;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.tip.flatdb4geonames.model.GeoNamesLine;
import org.tip.flatdb4geonames.util.Chronometer;
import org.tip.flatdb4geonames.util.Shrinker;
import org.tip.flatdb4geonames.util.StringFileSorter;

/* loaded from: input_file:org/tip/flatdb4geonames/model/index/IndexOfWordSeeksBuilder.class */
public class IndexOfWordSeeksBuilder {
    private static Logger logger = LoggerFactory.getLogger((Class<?>) IndexOfWordSeeksBuilder.class);
    public static final String DEFAULT_CHARSET_NAME = "UTF-8";
    public static final String INDEX0_FILENAME = "word_seeks.index";
    public static final String INDEX1_FILENAME = "seeks.index";
    public static final String INDEX2_FILENAME = "word_seek.index";
    public static final int DEFAULT_SPLIT_LINE = 2000000;
    public static final int WORD_MAX_LENGTH = 30;
    public static final int DEFAULT_PADDING_LENGTH = 40;
    public static final String SEPARATOR_CHARACTERS = " ・+*-_()?'’‘`”|\\[\\]!«»%{}\\:;/\\.#@&\"";

    public static void buildIndex(File file, File file2) throws IOException {
        buildIndex(file, file2, 2000000, 40);
    }

    public static void buildIndex(File file, File file2, int i, int i2) throws IOException {
        if (file == null || file2 == null) {
            throw new IllegalArgumentException("Null parameter.");
        }
        if (!file2.isDirectory()) {
            throw new IllegalArgumentException("Output directory is not a directory.");
        }
        if (i < 100000) {
            throw new IllegalArgumentException("Invalid split value [" + i + "]");
        }
        logger.debug("build index start...");
        logger.debug("currentDirectory={}", new File(".").getAbsolutePath());
        logger.debug("Max   memory= {} Mo", Long.valueOf((Runtime.getRuntime().maxMemory() / 1024) / 1024));
        logger.debug("Total memory= {} Mo", Long.valueOf((Runtime.getRuntime().totalMemory() / 1024) / 1024));
        File buildIndex0 = buildIndex0(file, file2, i);
        buildIndex12(buildIndex0, file2, i, i2);
        buildIndex0.delete();
        logger.debug("build index done.");
    }

    public static File buildIndex0(File file, File file2, int i) throws IOException {
        if (file == null || file2 == null) {
            throw new IllegalArgumentException("Null parameter.");
        }
        if (!file2.isDirectory()) {
            throw new IllegalArgumentException("Output directory is not a directory.");
        }
        if (i < 100000) {
            throw new IllegalArgumentException("Invalid split value [" + i + "]");
        }
        logger.debug("build index0 file start...");
        Chronometer chronometer = new Chronometer();
        logger.debug("geonames file to word seek file start...");
        File file3 = new File(file2.getAbsoluteFile() + File.separator + INDEX0_FILENAME);
        Chronometer chronometer2 = new Chronometer();
        geonamesToWordSeek(file, file3);
        logger.debug("geonames file to word seek file done. {}", Long.valueOf(chronometer2.stop().interval()));
        System.gc();
        logger.debug(" memory={} Mo", Long.valueOf((Runtime.getRuntime().totalMemory() / 1024) / 1024));
        logger.debug("sort big string file start...");
        chronometer2.reset();
        StringFileSorter.sortBigStringFile(file3, i);
        logger.debug("sort big string file done. {}", Long.valueOf(chronometer2.stop().interval()));
        System.gc();
        logger.debug(" memory={} Mo", Long.valueOf((Runtime.getRuntime().totalMemory() / 1024) / 1024));
        logger.debug("shrink string file start...");
        chronometer2.reset();
        Shrinker.shrinkStringFile(file3);
        logger.debug("shrink string file done. {}", Long.valueOf(chronometer.stop().interval()));
        System.gc();
        logger.debug(" memory={} Mo", Long.valueOf((Runtime.getRuntime().totalMemory() / 1024) / 1024));
        logger.debug("build index0 file done.");
        return file3;
    }

    public static void buildIndex12(File file, File file2, int i, int i2) throws IOException {
        logger.debug("build index12 files start...");
        Chronometer chronometer = new Chronometer();
        long j = 0;
        BufferedReader bufferedReader = null;
        PrintWriter printWriter = null;
        PrintWriter printWriter2 = null;
        try {
            bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
            printWriter = new PrintWriter(new File(file2.getAbsoluteFile() + File.separator + INDEX1_FILENAME), "UTF-8");
            printWriter2 = new PrintWriter(new File(file2.getAbsoluteFile() + File.separator + INDEX2_FILENAME), "UTF-8");
            int i3 = 0;
            boolean z = false;
            while (!z) {
                if (i3 % Java2DFontMetrics.FONT_FACTOR == 0) {
                    System.gc();
                    logger.debug("\tlineCount=" + i3 + "\tcurrentSeekValue=" + j + " \tmemory=" + ((Runtime.getRuntime().totalMemory() / 1024) / 1024) + " Mo \t" + ((int) (chronometer.stop().interval() / 1000)) + " s");
                    chronometer.reset();
                }
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    z = true;
                } else {
                    i3++;
                    String[] split = readLine.split("\t");
                    String str = split[0];
                    printWriter.println(split[1]);
                    String str2 = String.valueOf(str) + "\t" + j;
                    printWriter2.println(String.valueOf(str2) + StringUtils.repeat(" ", i2 - str2.getBytes().length));
                    j += r0.getBytes().length + 1;
                }
            }
            IOUtils.closeQuietly((Reader) bufferedReader);
            IOUtils.closeQuietly((Writer) printWriter);
            IOUtils.closeQuietly((Writer) printWriter2);
            logger.debug("build index12 files done.");
        } catch (Throwable th) {
            IOUtils.closeQuietly((Reader) bufferedReader);
            IOUtils.closeQuietly((Writer) printWriter);
            IOUtils.closeQuietly((Writer) printWriter2);
            throw th;
        }
    }

    public static StringSet geoNameLineToRawWords(String str) {
        return namesToRawWords(lineToNames(str));
    }

    public static WordTriage geoNameLineToWords(String str) {
        return namesToWords(lineToNames(str));
    }

    public static void geonamesToWordSeek(File file, File file2) throws IOException {
        Chronometer chronometer = new Chronometer();
        long j = 0;
        BufferedReader bufferedReader = null;
        PrintWriter printWriter = null;
        try {
            bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
            printWriter = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file2), "UTF-8"));
            int i = 0;
            boolean z = false;
            StringSet stringSet = new StringSet();
            while (!z) {
                if (i % Java2DFontMetrics.FONT_FACTOR == 0) {
                    System.gc();
                    logger.debug("\tlineCount=" + i + "\tcurrentSeekValue=" + j + " \tmemory=" + ((Runtime.getRuntime().totalMemory() / 1024) / 1024) + " Mo \t" + ((int) (chronometer.stop().interval() / 1000)) + " s");
                    chronometer.reset();
                }
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    z = true;
                } else {
                    i++;
                    WordTriage geoNameLineToWords = geoNameLineToWords(readLine);
                    Iterator<String> it2 = geoNameLineToWords.getSelection().iterator();
                    while (it2.hasNext()) {
                        printWriter.println(String.valueOf(it2.next()) + "\t" + j);
                    }
                    stringSet.addAll(geoNameLineToWords.getRejection());
                    j += readLine.getBytes().length + 1;
                }
            }
            FileUtils.writeLines(new File(String.valueOf(file2.getAbsolutePath()) + ".rejection"), stringSet);
            IOUtils.closeQuietly((Reader) bufferedReader);
            IOUtils.closeQuietly((Writer) printWriter);
        } catch (Throwable th) {
            IOUtils.closeQuietly((Reader) bufferedReader);
            IOUtils.closeQuietly((Writer) printWriter);
            throw th;
        }
    }

    public static StringSet lineToNames(GeoNamesLine geoNamesLine) {
        StringSet stringSet = new StringSet();
        String name = geoNamesLine.getName();
        if (StringUtils.isNotBlank(name)) {
            stringSet.add(name);
        }
        String asciiName = geoNamesLine.getAsciiName();
        if (StringUtils.isNotBlank(asciiName)) {
            stringSet.add(asciiName);
        }
        Iterator<String> it2 = geoNamesLine.getAlternateNames().iterator();
        while (it2.hasNext()) {
            String next = it2.next();
            if (StringUtils.isNotBlank(next)) {
                stringSet.add(next);
            }
        }
        return stringSet;
    }

    public static StringSet lineToNames(String str) {
        StringSet stringSet = new StringSet();
        String[] split = str.split("\t");
        String str2 = split[1];
        if (StringUtils.isNotBlank(str2)) {
            stringSet.add(str2);
        }
        String str3 = split[2];
        if (StringUtils.isNotBlank(str3)) {
            stringSet.add(str3);
        }
        for (String str4 : split[3].split(",")) {
            if (StringUtils.isNotBlank(str4)) {
                stringSet.add(str4);
            }
        }
        return stringSet;
    }

    public static StringSet namesToRawWords(StringSet stringSet) {
        StringSet stringSet2 = new StringSet();
        Iterator<String> it2 = stringSet.iterator();
        while (it2.hasNext()) {
            stringSet2.addAll(nameToRawWords(it2.next()));
        }
        return stringSet2;
    }

    public static WordTriage namesToWords(StringSet stringSet) {
        WordTriage wordTriage = new WordTriage();
        Iterator<String> it2 = stringSet.iterator();
        while (it2.hasNext()) {
            wordTriage.addAll(nameToWords(it2.next()));
        }
        return wordTriage;
    }

    public static StringSet nameToRawWords(String str) {
        StringSet stringSet = new StringSet();
        if (str != null) {
            for (String str2 : str.toLowerCase(Locale.ROOT).split("[ ・+*-_()?'’‘`”|\\[\\]!«»%{}\\:;/\\.#@&\"]")) {
                if (StringUtils.isNotBlank(str2)) {
                    stringSet.add(str2);
                }
            }
        }
        return stringSet;
    }

    public static WordTriage nameToWords(String str) {
        WordTriage wordTriage = new WordTriage();
        if (str != null) {
            for (String str2 : str.toLowerCase(Locale.ROOT).split("[ ・+*-_()?'’‘`”|\\[\\]!«»%{}\\:;/\\.#@&\"]")) {
                if (!StringUtils.isNotBlank(str2) || str2.length() < 2 || startsWithDigit(str2) || str2.getBytes().length > 30) {
                    wordTriage.getRejection().add(str2);
                } else {
                    wordTriage.getSelection().add(str2);
                }
            }
        }
        return wordTriage;
    }

    public static boolean startsWithDigit(String str) {
        return str == null ? false : str.length() == 0 ? false : Character.isDigit(str.charAt(0));
    }

    public static void wordSeeksToWordSeekSeeks(File file, File file2) throws IOException {
        Chronometer chronometer = new Chronometer();
        long j = 0;
        BufferedReader bufferedReader = null;
        PrintWriter printWriter = null;
        try {
            bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
            printWriter = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file2), "UTF-8"));
            int i = 0;
            boolean z = false;
            while (!z) {
                if (i % Java2DFontMetrics.FONT_FACTOR == 0) {
                    System.gc();
                    logger.debug("\tlineCount=" + i + "\tcurrentSeekValue=" + j + " \tmemory=" + ((Runtime.getRuntime().totalMemory() / 1024) / 1024) + " Mo \t" + ((int) (chronometer.stop().interval() / 1000)) + " s");
                    chronometer.reset();
                }
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    z = true;
                } else {
                    i++;
                    printWriter.println(StringUtils.rightPad(String.valueOf(readLine.split("\t")[0]) + "\t" + j, 41));
                    j += readLine.getBytes().length + 1;
                }
            }
            IOUtils.closeQuietly((Reader) bufferedReader);
            IOUtils.closeQuietly((Writer) printWriter);
        } catch (Throwable th) {
            IOUtils.closeQuietly((Reader) bufferedReader);
            IOUtils.closeQuietly((Writer) printWriter);
            throw th;
        }
    }
}
