Hello,
i implement TF-IDF and excute with a simple example : but now i want to analyze text file with TF-idf(put txt file in this methode ) but i don't know what is necessery to do it : this code of tf-idf say me what will should i do!!!! please
package ibtissem;
import java.util.Arrays;
import java.util.List;
public class exer {
private static tfidfCalcultor calculator;
/**
@param doc list of strings
@param term String represents a term
@return term frequency of term in document
/
public static double tf(List<String> doc, String term) {
double result = 0;
for (String word : doc) {
if (term.equalsIgnoreCase(word))
result++;
}
return result / doc.size();
}
/**
@param docs list of list of strings represents the dataset
@param term String represents a term
@return the inverse term frequency of term in documents
/
public static double idf(List<List<String>> docs, String term) {
double n = 0;
for (List<String> doc : docs) {
for (String word : doc) {
if (term.equalsIgnoreCase(word)) {
n++;
break;
}
}
}
return Math.log(docs.size() / n);
}
/**
@param doc a text document
@param docs all documents
@param term term
@return the TF-IDF of term
/
public static double tfIdf(List<String> doc, List<List<String>> docs, String term) {
return tf(doc, term) * idf(docs, term);
}
public static void main(String[] args) {
List<String> doc1 = Arrays.asList("Lorem", "ipsum", "dolor", "ipsum", "sit", "ipsum");
List<String> doc2 = Arrays.asList("Vituperata", "incorrupte", "at", "ipsum", "pro", "quo");
List<String> doc3 = Arrays.asList("Has", "persius", "disputationi", "id", "simul","lorem");
List<List<String>> documents = Arrays.asList(doc1, doc2, doc3);
calculator = new tfidfCalcultor();
double tfidf = tfIdf(doc1, documents, "lorem") ;
System.out.println("TF-IDF (lorem) = " + tfidf) ;
}
}
See more