Difference between revisions of "CSV importer"
From OpenKM Documentation
Line 3: | Line 3: | ||
{{Warning|Article under construction}} | {{Warning|Article under construction}} | ||
− | + | '''CSV File''' | |
CSV file has two colum, first column contains document path and second column has metadata value. You can download from here [[File:Metadata.csv.zip]]. | CSV file has two colum, first column contains document path and second column has metadata value. You can download from here [[File:Metadata.csv.zip]]. | ||
[[File:User_guide_535.png]] | [[File:User_guide_535.png]] | ||
+ | |||
+ | '''Property Group definition''' | ||
+ | <source lang="xml"> | ||
+ | <?xml version="1.0" encoding="UTF-8"?> | ||
+ | <!DOCTYPE property-groups PUBLIC "-//OpenKM//DTD Property Groups 2.1//EN" | ||
+ | "http://www.openkm.com/dtd/property-groups-2.1.dtd"> | ||
+ | <property-groups> | ||
+ | <property-group label="Metadata" name="okg:metadata"> | ||
+ | <input label="Document ID" name="okp:metadata.value" type="text"/> | ||
+ | </property-group> | ||
+ | </property-groups> | ||
+ | </source> | ||
<source lang="java"> | <source lang="java"> |
Revision as of 16:46, 31 December 2013
The script can be execute from administration scripting ( used as crontab script also ) to import metadata values to OpenKM.
Article under construction |
CSV File CSV file has two colum, first column contains document path and second column has metadata value. You can download from here File:Metadata.csv.zip.
Property Group definition
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE property-groups PUBLIC "-//OpenKM//DTD Property Groups 2.1//EN"
"http://www.openkm.com/dtd/property-groups-2.1.dtd">
<property-groups>
<property-group label="Metadata" name="okg:metadata">
<input label="Document ID" name="okp:metadata.value" type="text"/>
</property-group>
</property-groups>
import java.io.FileReader;
import java.io.Reader;
import java.util.Iterator;
import java.util.List;
import java.util.Collection;
import com.googlecode.jcsv.CSVStrategy;
import com.googlecode.jcsv.reader.CSVReader;
import com.googlecode.jcsv.reader.internal.CSVReaderBuilder;
import com.googlecode.jcsv.reader.internal.DefaultCSVEntryParser;
import com.openkm.dao.bean.QueryParams;
import com.openkm.bean.QueryResult;
import com.openkm.api.OKMSearch;
import com.openkm.util.FileLogger;
String FILE_LOG_NAME = "CSVLOG";
String META_PATH = "/home/openkm/csv/";
String META_FILE_NAME = "metadata.csv";
int uniqueFileName = 0;
int yearColumn = 2;
// Format defintion
char delimiter = ',';
char quoteCharacter = '"';
char commentIndicator = '#';
boolean skipHeader = true;
boolean ignoreEmptyLines = true;
CSVStrategy strategy = new CSVStrategy(delimiter, quoteCharacter, commentIndicator, skipHeader, ignoreEmptyLines);
// File reader
Reader reader = new FileReader(META_PATH + META_FILE_NAME);
// CSV reader
CSVReader csvParser = new CSVReaderBuilder(reader).strategy(strategy).entryParser(new DefaultCSVEntryParser()).build();
List data = csvParser.readAll();
int count = 1;
int countFound = 0;
int countNotDocument = 0;
int moreThanOneDocumentFound = 0;
int notFound = 0;
int noName = 0;
for (Iterator it = data.listIterator(); it.hasNext();) {
String[] row = (String[]) it.next();
String docName = row[uniqueFileName];
print(count + ">>>> " + docName);
if (docName != null && !docName.equals("")) {
QueryParams queryParams = new QueryParams();
queryParams.setDomain(QueryParams.DOCUMENT);
queryParams.setName(docName + ".*");
Collection results = OKMSearch.getInstance().find(null, queryParams);
if (results.size() == 1) {
QueryResult queryResult = (QueryResult) results.iterator().next();
if (queryResult.getDocument() != null) {
print("found");
countFound++;
} else {
print("error is not document");
countNotDocument++;
}
} else if (results.size() > 1) {
print("error more than one document found can not decide");
moreThanOneDocumentFound++;
} else {
print("not found");
notFound++;
}
} else {
print("error document has no name");
noName++;
}
print("</br>");
//FileLogger.info(FILE_LOG_NAME, "Document name ''{0}'' to ''{1}''", row[0], row[posDocRevNo]);
count++;
}
print("Total:" + count + "</br>");
print("Found:" + countFound + "</br>");
print("Error not document:" + countNotDocument + "</br>");
print("Error more then one document found:" + moreThanOneDocumentFound + "</br>");
print("Error not found:" + notFound + "</br>");
print("Error name empty:" + notFound + "</br>");